polars_core/frame/group_by/aggregations/
string.rs1use super::*;
2
3pub fn _agg_helper_idx_bin<'a, F>(groups: &'a GroupsIdx, f: F) -> Series
4where
5 F: Fn((IdxSize, &'a IdxVec)) -> Option<&'a [u8]> + Send + Sync,
6{
7 let ca: BinaryChunked = RAYON.install(|| groups.into_par_iter().map(f).collect());
8 ca.into_series()
9}
10
11pub fn _agg_helper_slice_bin<'a, F>(groups: &'a [[IdxSize; 2]], f: F) -> Series
12where
13 F: Fn([IdxSize; 2]) -> Option<&'a [u8]> + Send + Sync,
14{
15 let ca: BinaryChunked = RAYON.install(|| groups.par_iter().copied().map(f).collect());
16 ca.into_series()
17}
18
19impl BinaryChunked {
20 #[allow(clippy::needless_lifetimes)]
21 pub(crate) unsafe fn agg_min<'a>(&'a self, groups: &GroupsType) -> Series {
22 if !self.has_nulls() || matches!(groups, GroupsType::Slice { .. }) {
24 match self.is_sorted_flag() {
25 IsSorted::Ascending => {
26 return self.clone().into_series().agg_first_non_null(groups);
27 },
28 IsSorted::Descending => {
29 return self.clone().into_series().agg_last_non_null(groups);
30 },
31 _ => {},
32 }
33 }
34
35 match groups {
36 GroupsType::Idx(groups) => {
37 let ca_self = self.rechunk();
38 let arr = ca_self.downcast_as_array();
39 let no_nulls = arr.null_count() == 0;
40 _agg_helper_idx_bin(groups, |(first, idx)| {
41 debug_assert!(idx.len() <= ca_self.len());
42 if idx.is_empty() {
43 None
44 } else if idx.len() == 1 {
45 arr.get_unchecked(first as usize)
46 } else if no_nulls {
47 take_agg_bin_iter_unchecked_no_null(
48 arr,
49 indexes_to_usizes(idx),
50 |acc, v| if acc < v { acc } else { v },
51 )
52 } else {
53 take_agg_bin_iter_unchecked(
54 arr,
55 indexes_to_usizes(idx),
56 |acc, v| if acc < v { acc } else { v },
57 idx.len() as IdxSize,
58 )
59 }
60 })
61 },
62 GroupsType::Slice {
63 groups: groups_slice,
64 ..
65 } => _agg_helper_slice_bin(groups_slice, |[first, len]| {
66 debug_assert!(len <= self.len() as IdxSize);
67 match len {
68 0 => None,
69 1 => self.get(first as usize),
70 _ => {
71 let arr_group = _slice_from_offsets(self, first, len);
72 let borrowed = arr_group.min_binary();
73
74 unsafe { std::mem::transmute::<Option<&[u8]>, Option<&'a [u8]>>(borrowed) }
78 },
79 }
80 }),
81 }
82 }
83
84 #[allow(clippy::needless_lifetimes)]
85 pub(crate) unsafe fn agg_max<'a>(&'a self, groups: &GroupsType) -> Series {
86 if !self.has_nulls() || matches!(groups, GroupsType::Slice { .. }) {
88 match self.is_sorted_flag() {
89 IsSorted::Ascending => return self.clone().into_series().agg_last_non_null(groups),
90 IsSorted::Descending => {
91 return self.clone().into_series().agg_first_non_null(groups);
92 },
93 _ => {},
94 }
95 }
96
97 match groups {
98 GroupsType::Idx(groups) => {
99 let ca_self = self.rechunk();
100 let arr = ca_self.downcast_as_array();
101 let no_nulls = arr.null_count() == 0;
102 _agg_helper_idx_bin(groups, |(first, idx)| {
103 debug_assert!(idx.len() <= self.len());
104 if idx.is_empty() {
105 None
106 } else if idx.len() == 1 {
107 ca_self.get(first as usize)
108 } else if no_nulls {
109 take_agg_bin_iter_unchecked_no_null(
110 arr,
111 indexes_to_usizes(idx),
112 |acc, v| if acc > v { acc } else { v },
113 )
114 } else {
115 take_agg_bin_iter_unchecked(
116 arr,
117 indexes_to_usizes(idx),
118 |acc, v| if acc > v { acc } else { v },
119 idx.len() as IdxSize,
120 )
121 }
122 })
123 },
124 GroupsType::Slice {
125 groups: groups_slice,
126 ..
127 } => _agg_helper_slice_bin(groups_slice, |[first, len]| {
128 debug_assert!(len <= self.len() as IdxSize);
129 match len {
130 0 => None,
131 1 => self.get(first as usize),
132 _ => {
133 let arr_group = _slice_from_offsets(self, first, len);
134 let borrowed = arr_group.max_binary();
135
136 unsafe { std::mem::transmute::<Option<&[u8]>, Option<&'a [u8]>>(borrowed) }
140 },
141 }
142 }),
143 }
144 }
145
146 pub(crate) unsafe fn agg_arg_min(&self, groups: &GroupsType) -> Series {
147 if !self.has_nulls() || matches!(groups, GroupsType::Slice { .. }) {
149 match self.is_sorted_flag() {
150 IsSorted::Ascending => {
151 return self.clone().into_series().agg_arg_first_non_null(groups);
152 },
153 IsSorted::Descending => {
154 return self.clone().into_series().agg_arg_last_non_null(groups);
155 },
156 _ => {},
157 }
158 }
159
160 let ca_self = self.rechunk();
161 let arr = ca_self.downcast_as_array();
162 let no_nulls = arr.null_count() == 0;
163 match groups {
164 GroupsType::Idx(groups) => _agg_helper_idx_idx(groups, |(first, idx)| {
165 debug_assert!(idx.len() <= ca_self.len());
166
167 if idx.is_empty() {
168 None
169 } else if idx.len() == 1 {
170 arr.is_valid(first as usize).then_some(0)
171 } else if no_nulls {
172 take_agg_bin_iter_unchecked_no_null_arg(
173 arr,
174 indexes_to_usizes(idx),
175 |acc, cur| if cur.1 < acc.1 { cur } else { acc },
176 )
177 } else {
178 take_agg_bin_iter_unchecked_arg(arr, indexes_to_usizes(idx), |acc, cur| {
179 if cur.1 < acc.1 { cur } else { acc }
180 })
181 }
182 }),
183
184 GroupsType::Slice {
185 groups: groups_slice,
186 ..
187 } => _agg_helper_slice_idx(groups_slice, |[first, len]| {
188 debug_assert!(len <= self.len() as IdxSize);
189 match len {
190 0 => None,
191 1 => arr.is_valid(first as usize).then_some(0),
192 _ => {
193 let arr_group = _slice_from_offsets(&ca_self, first, len);
194 arr_group.arg_min_binary().map(|i| i as IdxSize)
195 },
196 }
197 }),
198 }
199 }
200
201 pub(crate) unsafe fn agg_arg_max(&self, groups: &GroupsType) -> Series {
202 if !self.has_nulls() || matches!(groups, GroupsType::Slice { .. }) {
204 match self.is_sorted_flag() {
205 IsSorted::Ascending => {
206 return self.clone().into_series().agg_arg_last_non_null(groups);
207 },
208 IsSorted::Descending => {
209 return self.clone().into_series().agg_arg_first_non_null(groups);
210 },
211 _ => {},
212 }
213 }
214
215 let ca_self = self.rechunk();
216 let arr = ca_self.downcast_as_array();
217 let no_nulls = arr.null_count() == 0;
218
219 match groups {
220 GroupsType::Idx(groups) => _agg_helper_idx_idx(groups, |(first, idx)| {
221 debug_assert!(idx.len() <= ca_self.len());
222
223 if idx.is_empty() {
224 None
225 } else if idx.len() == 1 {
226 arr.is_valid(first as usize).then_some(0)
227 } else if no_nulls {
228 take_agg_bin_iter_unchecked_no_null_arg(
229 arr,
230 indexes_to_usizes(idx),
231 |acc, cur| if cur.1 > acc.1 { cur } else { acc },
232 )
233 } else {
234 take_agg_bin_iter_unchecked_arg(arr, indexes_to_usizes(idx), |acc, cur| {
235 if cur.1 > acc.1 { cur } else { acc }
236 })
237 }
238 }),
239
240 GroupsType::Slice {
241 groups: groups_slice,
242 ..
243 } => _agg_helper_slice_idx(groups_slice, |[first, len]| {
244 debug_assert!(len <= self.len() as IdxSize);
245 match len {
246 0 => None,
247 1 => arr.is_valid(first as usize).then_some(0),
248 _ => {
249 let arr_group = _slice_from_offsets(&ca_self, first, len);
250 arr_group.arg_max_binary().map(|i| i as IdxSize)
251 },
252 }
253 }),
254 }
255 }
256}
257
258impl StringChunked {
259 #[allow(clippy::needless_lifetimes)]
260 pub(crate) unsafe fn agg_min<'a>(&'a self, groups: &GroupsType) -> Series {
261 let out = self.as_binary().agg_min(groups);
262 out.binary().unwrap().to_string_unchecked().into_series()
263 }
264
265 #[allow(clippy::needless_lifetimes)]
266 pub(crate) unsafe fn agg_max<'a>(&'a self, groups: &GroupsType) -> Series {
267 let out = self.as_binary().agg_max(groups);
268 out.binary().unwrap().to_string_unchecked().into_series()
269 }
270
271 #[cfg(feature = "algorithm_group_by")]
272 pub(crate) unsafe fn agg_arg_min(&self, groups: &GroupsType) -> Series {
273 self.as_binary().agg_arg_min(groups)
274 }
275
276 #[cfg(feature = "algorithm_group_by")]
277 pub(crate) unsafe fn agg_arg_max(&self, groups: &GroupsType) -> Series {
278 self.as_binary().agg_arg_max(groups)
279 }
280}