polars_core/frame/group_by/aggregations/
string.rs

1use super::*;
2
3pub fn _agg_helper_idx_bin<'a, F>(groups: &'a GroupsIdx, f: F) -> Series
4where
5    F: Fn((IdxSize, &'a IdxVec)) -> Option<&'a [u8]> + Send + Sync,
6{
7    let ca: BinaryChunked = POOL.install(|| groups.into_par_iter().map(f).collect());
8    ca.into_series()
9}
10
11pub fn _agg_helper_slice_bin<'a, F>(groups: &'a [[IdxSize; 2]], f: F) -> Series
12where
13    F: Fn([IdxSize; 2]) -> Option<&'a [u8]> + Send + Sync,
14{
15    let ca: BinaryChunked = POOL.install(|| groups.par_iter().copied().map(f).collect());
16    ca.into_series()
17}
18
19impl BinaryChunked {
20    #[allow(clippy::needless_lifetimes)]
21    pub(crate) unsafe fn agg_min<'a>(&'a self, groups: &GroupsType) -> Series {
22        // faster paths
23        match self.is_sorted_flag() {
24            IsSorted::Ascending => return self.clone().into_series().agg_first_non_null(groups),
25            IsSorted::Descending => return self.clone().into_series().agg_last_non_null(groups),
26            _ => {},
27        }
28
29        match groups {
30            GroupsType::Idx(groups) => {
31                let ca_self = self.rechunk();
32                let arr = ca_self.downcast_as_array();
33                let no_nulls = arr.null_count() == 0;
34                _agg_helper_idx_bin(groups, |(first, idx)| {
35                    debug_assert!(idx.len() <= ca_self.len());
36                    if idx.is_empty() {
37                        None
38                    } else if idx.len() == 1 {
39                        arr.get_unchecked(first as usize)
40                    } else if no_nulls {
41                        take_agg_bin_iter_unchecked_no_null(
42                            arr,
43                            indexes_to_usizes(idx),
44                            |acc, v| if acc < v { acc } else { v },
45                        )
46                    } else {
47                        take_agg_bin_iter_unchecked(
48                            arr,
49                            indexes_to_usizes(idx),
50                            |acc, v| if acc < v { acc } else { v },
51                            idx.len() as IdxSize,
52                        )
53                    }
54                })
55            },
56            GroupsType::Slice {
57                groups: groups_slice,
58                ..
59            } => _agg_helper_slice_bin(groups_slice, |[first, len]| {
60                debug_assert!(len <= self.len() as IdxSize);
61                match len {
62                    0 => None,
63                    1 => self.get(first as usize),
64                    _ => {
65                        let arr_group = _slice_from_offsets(self, first, len);
66                        let borrowed = arr_group.min_binary();
67
68                        // SAFETY:
69                        // The borrowed has `arr_group`s lifetime, but it actually points to data
70                        // hold by self. Here we tell the compiler that.
71                        unsafe { std::mem::transmute::<Option<&[u8]>, Option<&'a [u8]>>(borrowed) }
72                    },
73                }
74            }),
75        }
76    }
77
78    #[allow(clippy::needless_lifetimes)]
79    pub(crate) unsafe fn agg_max<'a>(&'a self, groups: &GroupsType) -> Series {
80        // faster paths
81        match (self.is_sorted_flag(), self.null_count()) {
82            (IsSorted::Ascending, 0) => {
83                return self.clone().into_series().agg_last(groups);
84            },
85            (IsSorted::Descending, 0) => {
86                return self.clone().into_series().agg_first(groups);
87            },
88            _ => {},
89        }
90
91        match groups {
92            GroupsType::Idx(groups) => {
93                let ca_self = self.rechunk();
94                let arr = ca_self.downcast_as_array();
95                let no_nulls = arr.null_count() == 0;
96                _agg_helper_idx_bin(groups, |(first, idx)| {
97                    debug_assert!(idx.len() <= self.len());
98                    if idx.is_empty() {
99                        None
100                    } else if idx.len() == 1 {
101                        ca_self.get(first as usize)
102                    } else if no_nulls {
103                        take_agg_bin_iter_unchecked_no_null(
104                            arr,
105                            indexes_to_usizes(idx),
106                            |acc, v| if acc > v { acc } else { v },
107                        )
108                    } else {
109                        take_agg_bin_iter_unchecked(
110                            arr,
111                            indexes_to_usizes(idx),
112                            |acc, v| if acc > v { acc } else { v },
113                            idx.len() as IdxSize,
114                        )
115                    }
116                })
117            },
118            GroupsType::Slice {
119                groups: groups_slice,
120                ..
121            } => _agg_helper_slice_bin(groups_slice, |[first, len]| {
122                debug_assert!(len <= self.len() as IdxSize);
123                match len {
124                    0 => None,
125                    1 => self.get(first as usize),
126                    _ => {
127                        let arr_group = _slice_from_offsets(self, first, len);
128                        let borrowed = arr_group.max_binary();
129
130                        // SAFETY:
131                        // The borrowed has `arr_group`s lifetime, but it actually points to data
132                        // hold by self. Here we tell the compiler that.
133                        unsafe { std::mem::transmute::<Option<&[u8]>, Option<&'a [u8]>>(borrowed) }
134                    },
135                }
136            }),
137        }
138    }
139}
140
141impl StringChunked {
142    #[allow(clippy::needless_lifetimes)]
143    pub(crate) unsafe fn agg_min<'a>(&'a self, groups: &GroupsType) -> Series {
144        let out = self.as_binary().agg_min(groups);
145        out.binary().unwrap().to_string_unchecked().into_series()
146    }
147
148    #[allow(clippy::needless_lifetimes)]
149    pub(crate) unsafe fn agg_max<'a>(&'a self, groups: &GroupsType) -> Series {
150        let out = self.as_binary().agg_max(groups);
151        out.binary().unwrap().to_string_unchecked().into_series()
152    }
153}