polars_core/frame/group_by/aggregations/
string.rs

1use super::*;
2
3pub fn _agg_helper_idx_bin<'a, F>(groups: &'a GroupsIdx, f: F) -> Series
4where
5    F: Fn((IdxSize, &'a IdxVec)) -> Option<&'a [u8]> + Send + Sync,
6{
7    let ca: BinaryChunked = POOL.install(|| groups.into_par_iter().map(f).collect());
8    ca.into_series()
9}
10
11pub fn _agg_helper_slice_bin<'a, F>(groups: &'a [[IdxSize; 2]], f: F) -> Series
12where
13    F: Fn([IdxSize; 2]) -> Option<&'a [u8]> + Send + Sync,
14{
15    let ca: BinaryChunked = POOL.install(|| groups.par_iter().copied().map(f).collect());
16    ca.into_series()
17}
18
19impl BinaryChunked {
20    #[allow(clippy::needless_lifetimes)]
21    pub(crate) unsafe fn agg_min<'a>(&'a self, groups: &GroupsType) -> Series {
22        // faster paths
23        match (&self.is_sorted_flag(), &self.null_count()) {
24            (IsSorted::Ascending, 0) => {
25                return self.clone().into_series().agg_first(groups);
26            },
27            (IsSorted::Descending, 0) => {
28                return self.clone().into_series().agg_last(groups);
29            },
30            _ => {},
31        }
32
33        match groups {
34            GroupsType::Idx(groups) => {
35                let ca_self = self.rechunk();
36                let arr = ca_self.downcast_as_array();
37                let no_nulls = arr.null_count() == 0;
38                _agg_helper_idx_bin(groups, |(first, idx)| {
39                    debug_assert!(idx.len() <= ca_self.len());
40                    if idx.is_empty() {
41                        None
42                    } else if idx.len() == 1 {
43                        arr.get_unchecked(first as usize)
44                    } else if no_nulls {
45                        take_agg_bin_iter_unchecked_no_null(
46                            arr,
47                            indexes_to_usizes(idx),
48                            |acc, v| if acc < v { acc } else { v },
49                        )
50                    } else {
51                        take_agg_bin_iter_unchecked(
52                            arr,
53                            indexes_to_usizes(idx),
54                            |acc, v| if acc < v { acc } else { v },
55                            idx.len() as IdxSize,
56                        )
57                    }
58                })
59            },
60            GroupsType::Slice {
61                groups: groups_slice,
62                ..
63            } => _agg_helper_slice_bin(groups_slice, |[first, len]| {
64                debug_assert!(len <= self.len() as IdxSize);
65                match len {
66                    0 => None,
67                    1 => self.get(first as usize),
68                    _ => {
69                        let arr_group = _slice_from_offsets(self, first, len);
70                        let borrowed = arr_group.min_binary();
71
72                        // SAFETY:
73                        // The borrowed has `arr_group`s lifetime, but it actually points to data
74                        // hold by self. Here we tell the compiler that.
75                        unsafe { std::mem::transmute::<Option<&[u8]>, Option<&'a [u8]>>(borrowed) }
76                    },
77                }
78            }),
79        }
80    }
81
82    #[allow(clippy::needless_lifetimes)]
83    pub(crate) unsafe fn agg_max<'a>(&'a self, groups: &GroupsType) -> Series {
84        // faster paths
85        match (self.is_sorted_flag(), self.null_count()) {
86            (IsSorted::Ascending, 0) => {
87                return self.clone().into_series().agg_last(groups);
88            },
89            (IsSorted::Descending, 0) => {
90                return self.clone().into_series().agg_first(groups);
91            },
92            _ => {},
93        }
94
95        match groups {
96            GroupsType::Idx(groups) => {
97                let ca_self = self.rechunk();
98                let arr = ca_self.downcast_as_array();
99                let no_nulls = arr.null_count() == 0;
100                _agg_helper_idx_bin(groups, |(first, idx)| {
101                    debug_assert!(idx.len() <= self.len());
102                    if idx.is_empty() {
103                        None
104                    } else if idx.len() == 1 {
105                        ca_self.get(first as usize)
106                    } else if no_nulls {
107                        take_agg_bin_iter_unchecked_no_null(
108                            arr,
109                            indexes_to_usizes(idx),
110                            |acc, v| if acc > v { acc } else { v },
111                        )
112                    } else {
113                        take_agg_bin_iter_unchecked(
114                            arr,
115                            indexes_to_usizes(idx),
116                            |acc, v| if acc > v { acc } else { v },
117                            idx.len() as IdxSize,
118                        )
119                    }
120                })
121            },
122            GroupsType::Slice {
123                groups: groups_slice,
124                ..
125            } => _agg_helper_slice_bin(groups_slice, |[first, len]| {
126                debug_assert!(len <= self.len() as IdxSize);
127                match len {
128                    0 => None,
129                    1 => self.get(first as usize),
130                    _ => {
131                        let arr_group = _slice_from_offsets(self, first, len);
132                        let borrowed = arr_group.max_binary();
133
134                        // SAFETY:
135                        // The borrowed has `arr_group`s lifetime, but it actually points to data
136                        // hold by self. Here we tell the compiler that.
137                        unsafe { std::mem::transmute::<Option<&[u8]>, Option<&'a [u8]>>(borrowed) }
138                    },
139                }
140            }),
141        }
142    }
143}
144
145impl StringChunked {
146    #[allow(clippy::needless_lifetimes)]
147    pub(crate) unsafe fn agg_min<'a>(&'a self, groups: &GroupsType) -> Series {
148        let out = self.as_binary().agg_min(groups);
149        out.binary().unwrap().to_string_unchecked().into_series()
150    }
151
152    #[allow(clippy::needless_lifetimes)]
153    pub(crate) unsafe fn agg_max<'a>(&'a self, groups: &GroupsType) -> Series {
154        let out = self.as_binary().agg_max(groups);
155        out.binary().unwrap().to_string_unchecked().into_series()
156    }
157}