Skip to main content

polars_core/frame/group_by/aggregations/
string.rs

1use super::*;
2
3pub fn _agg_helper_idx_bin<'a, F>(groups: &'a GroupsIdx, f: F) -> Series
4where
5    F: Fn((IdxSize, &'a IdxVec)) -> Option<&'a [u8]> + Send + Sync,
6{
7    let ca: BinaryChunked = RAYON.install(|| groups.into_par_iter().map(f).collect());
8    ca.into_series()
9}
10
11pub fn _agg_helper_slice_bin<'a, F>(groups: &'a [[IdxSize; 2]], f: F) -> Series
12where
13    F: Fn([IdxSize; 2]) -> Option<&'a [u8]> + Send + Sync,
14{
15    let ca: BinaryChunked = RAYON.install(|| groups.par_iter().copied().map(f).collect());
16    ca.into_series()
17}
18
19impl BinaryChunked {
20    #[allow(clippy::needless_lifetimes)]
21    pub(crate) unsafe fn agg_min<'a>(&'a self, groups: &GroupsType) -> Series {
22        // faster paths
23        if !self.has_nulls() || matches!(groups, GroupsType::Slice { .. }) {
24            match self.is_sorted_flag() {
25                IsSorted::Ascending => {
26                    return self.clone().into_series().agg_first_non_null(groups);
27                },
28                IsSorted::Descending => {
29                    return self.clone().into_series().agg_last_non_null(groups);
30                },
31                _ => {},
32            }
33        }
34
35        match groups {
36            GroupsType::Idx(groups) => {
37                let ca_self = self.rechunk();
38                let arr = ca_self.downcast_as_array();
39                let no_nulls = arr.null_count() == 0;
40                _agg_helper_idx_bin(groups, |(first, idx)| {
41                    debug_assert!(idx.len() <= ca_self.len());
42                    if idx.is_empty() {
43                        None
44                    } else if idx.len() == 1 {
45                        arr.get_unchecked(first as usize)
46                    } else if no_nulls {
47                        take_agg_bin_iter_unchecked_no_null(
48                            arr,
49                            indexes_to_usizes(idx),
50                            |acc, v| if acc < v { acc } else { v },
51                        )
52                    } else {
53                        take_agg_bin_iter_unchecked(
54                            arr,
55                            indexes_to_usizes(idx),
56                            |acc, v| if acc < v { acc } else { v },
57                            idx.len() as IdxSize,
58                        )
59                    }
60                })
61            },
62            GroupsType::Slice {
63                groups: groups_slice,
64                ..
65            } => _agg_helper_slice_bin(groups_slice, |[first, len]| {
66                debug_assert!(len <= self.len() as IdxSize);
67                match len {
68                    0 => None,
69                    1 => self.get(first as usize),
70                    _ => {
71                        let arr_group = _slice_from_offsets(self, first, len);
72                        let borrowed = arr_group.min_binary();
73
74                        // SAFETY:
75                        // The borrowed has `arr_group`s lifetime, but it actually points to data
76                        // hold by self. Here we tell the compiler that.
77                        unsafe { std::mem::transmute::<Option<&[u8]>, Option<&'a [u8]>>(borrowed) }
78                    },
79                }
80            }),
81        }
82    }
83
84    #[allow(clippy::needless_lifetimes)]
85    pub(crate) unsafe fn agg_max<'a>(&'a self, groups: &GroupsType) -> Series {
86        // faster paths
87        if !self.has_nulls() || matches!(groups, GroupsType::Slice { .. }) {
88            match self.is_sorted_flag() {
89                IsSorted::Ascending => return self.clone().into_series().agg_last_non_null(groups),
90                IsSorted::Descending => {
91                    return self.clone().into_series().agg_first_non_null(groups);
92                },
93                _ => {},
94            }
95        }
96
97        match groups {
98            GroupsType::Idx(groups) => {
99                let ca_self = self.rechunk();
100                let arr = ca_self.downcast_as_array();
101                let no_nulls = arr.null_count() == 0;
102                _agg_helper_idx_bin(groups, |(first, idx)| {
103                    debug_assert!(idx.len() <= self.len());
104                    if idx.is_empty() {
105                        None
106                    } else if idx.len() == 1 {
107                        ca_self.get(first as usize)
108                    } else if no_nulls {
109                        take_agg_bin_iter_unchecked_no_null(
110                            arr,
111                            indexes_to_usizes(idx),
112                            |acc, v| if acc > v { acc } else { v },
113                        )
114                    } else {
115                        take_agg_bin_iter_unchecked(
116                            arr,
117                            indexes_to_usizes(idx),
118                            |acc, v| if acc > v { acc } else { v },
119                            idx.len() as IdxSize,
120                        )
121                    }
122                })
123            },
124            GroupsType::Slice {
125                groups: groups_slice,
126                ..
127            } => _agg_helper_slice_bin(groups_slice, |[first, len]| {
128                debug_assert!(len <= self.len() as IdxSize);
129                match len {
130                    0 => None,
131                    1 => self.get(first as usize),
132                    _ => {
133                        let arr_group = _slice_from_offsets(self, first, len);
134                        let borrowed = arr_group.max_binary();
135
136                        // SAFETY:
137                        // The borrowed has `arr_group`s lifetime, but it actually points to data
138                        // hold by self. Here we tell the compiler that.
139                        unsafe { std::mem::transmute::<Option<&[u8]>, Option<&'a [u8]>>(borrowed) }
140                    },
141                }
142            }),
143        }
144    }
145
146    pub(crate) unsafe fn agg_arg_min(&self, groups: &GroupsType) -> Series {
147        // fast paths, consistent with other impls
148        if !self.has_nulls() || matches!(groups, GroupsType::Slice { .. }) {
149            match self.is_sorted_flag() {
150                IsSorted::Ascending => {
151                    return self.clone().into_series().agg_arg_first_non_null(groups);
152                },
153                IsSorted::Descending => {
154                    return self.clone().into_series().agg_arg_last_non_null(groups);
155                },
156                _ => {},
157            }
158        }
159
160        let ca_self = self.rechunk();
161        let arr = ca_self.downcast_as_array();
162        let no_nulls = arr.null_count() == 0;
163        match groups {
164            GroupsType::Idx(groups) => _agg_helper_idx_idx(groups, |(first, idx)| {
165                debug_assert!(idx.len() <= ca_self.len());
166
167                if idx.is_empty() {
168                    None
169                } else if idx.len() == 1 {
170                    arr.is_valid(first as usize).then_some(0)
171                } else if no_nulls {
172                    take_agg_bin_iter_unchecked_no_null_arg(
173                        arr,
174                        indexes_to_usizes(idx),
175                        |acc, cur| if cur.1 < acc.1 { cur } else { acc },
176                    )
177                } else {
178                    take_agg_bin_iter_unchecked_arg(arr, indexes_to_usizes(idx), |acc, cur| {
179                        if cur.1 < acc.1 { cur } else { acc }
180                    })
181                }
182            }),
183
184            GroupsType::Slice {
185                groups: groups_slice,
186                ..
187            } => _agg_helper_slice_idx(groups_slice, |[first, len]| {
188                debug_assert!(len <= self.len() as IdxSize);
189                match len {
190                    0 => None,
191                    1 => arr.is_valid(first as usize).then_some(0),
192                    _ => {
193                        let arr_group = _slice_from_offsets(&ca_self, first, len);
194                        arr_group.arg_min_binary().map(|i| i as IdxSize)
195                    },
196                }
197            }),
198        }
199    }
200
201    pub(crate) unsafe fn agg_arg_max(&self, groups: &GroupsType) -> Series {
202        // fast paths
203        if !self.has_nulls() || matches!(groups, GroupsType::Slice { .. }) {
204            match self.is_sorted_flag() {
205                IsSorted::Ascending => {
206                    return self.clone().into_series().agg_arg_last_non_null(groups);
207                },
208                IsSorted::Descending => {
209                    return self.clone().into_series().agg_arg_first_non_null(groups);
210                },
211                _ => {},
212            }
213        }
214
215        let ca_self = self.rechunk();
216        let arr = ca_self.downcast_as_array();
217        let no_nulls = arr.null_count() == 0;
218
219        match groups {
220            GroupsType::Idx(groups) => _agg_helper_idx_idx(groups, |(first, idx)| {
221                debug_assert!(idx.len() <= ca_self.len());
222
223                if idx.is_empty() {
224                    None
225                } else if idx.len() == 1 {
226                    arr.is_valid(first as usize).then_some(0)
227                } else if no_nulls {
228                    take_agg_bin_iter_unchecked_no_null_arg(
229                        arr,
230                        indexes_to_usizes(idx),
231                        |acc, cur| if cur.1 > acc.1 { cur } else { acc },
232                    )
233                } else {
234                    take_agg_bin_iter_unchecked_arg(arr, indexes_to_usizes(idx), |acc, cur| {
235                        if cur.1 > acc.1 { cur } else { acc }
236                    })
237                }
238            }),
239
240            GroupsType::Slice {
241                groups: groups_slice,
242                ..
243            } => _agg_helper_slice_idx(groups_slice, |[first, len]| {
244                debug_assert!(len <= self.len() as IdxSize);
245                match len {
246                    0 => None,
247                    1 => arr.is_valid(first as usize).then_some(0),
248                    _ => {
249                        let arr_group = _slice_from_offsets(&ca_self, first, len);
250                        arr_group.arg_max_binary().map(|i| i as IdxSize)
251                    },
252                }
253            }),
254        }
255    }
256}
257
258impl StringChunked {
259    #[allow(clippy::needless_lifetimes)]
260    pub(crate) unsafe fn agg_min<'a>(&'a self, groups: &GroupsType) -> Series {
261        let out = self.as_binary().agg_min(groups);
262        out.binary().unwrap().to_string_unchecked().into_series()
263    }
264
265    #[allow(clippy::needless_lifetimes)]
266    pub(crate) unsafe fn agg_max<'a>(&'a self, groups: &GroupsType) -> Series {
267        let out = self.as_binary().agg_max(groups);
268        out.binary().unwrap().to_string_unchecked().into_series()
269    }
270
271    #[cfg(feature = "algorithm_group_by")]
272    pub(crate) unsafe fn agg_arg_min(&self, groups: &GroupsType) -> Series {
273        self.as_binary().agg_arg_min(groups)
274    }
275
276    #[cfg(feature = "algorithm_group_by")]
277    pub(crate) unsafe fn agg_arg_max(&self, groups: &GroupsType) -> Series {
278        self.as_binary().agg_arg_max(groups)
279    }
280}