polars_core/chunked_array/
arg_min_max.rs

1use arrow::array::Array;
2use polars_utils::arg_min_max::ArgMinMax;
3use polars_utils::min_max::{MaxIgnoreNan, MinIgnoreNan, MinMaxPolicy};
4
5use crate::chunked_array::ChunkedArray;
6use crate::chunked_array::ops::float_sorted_arg_max::{
7    float_arg_max_sorted_ascending, float_arg_max_sorted_descending,
8};
9use crate::datatypes::{
10    BinaryChunked, BooleanChunked, PolarsDataType, PolarsNumericType, StringChunked,
11};
12#[cfg(feature = "dtype-categorical")]
13use crate::datatypes::{CategoricalChunked, PolarsCategoricalType};
14use crate::series::IsSorted;
15
16pub fn arg_min_opt_iter<T, I>(iter: I) -> Option<usize>
17where
18    I: IntoIterator<Item = Option<T>>,
19    T: Ord,
20{
21    iter.into_iter()
22        .enumerate()
23        .flat_map(|(idx, val)| Some((idx, val?)))
24        .min_by(|x, y| Ord::cmp(&x.1, &y.1))
25        .map(|x| x.0)
26}
27
28pub fn arg_max_opt_iter<T, I>(iter: I) -> Option<usize>
29where
30    I: IntoIterator<Item = Option<T>>,
31    T: Ord,
32{
33    iter.into_iter()
34        .enumerate()
35        .flat_map(|(idx, val)| Some((idx, val?)))
36        .max_by(|x, y| Ord::cmp(&x.1, &y.1))
37        .map(|x| x.0)
38}
39
40pub fn arg_min_numeric<T>(ca: &ChunkedArray<T>) -> Option<usize>
41where
42    T: PolarsNumericType,
43    for<'b> &'b [T::Native]: ArgMinMax,
44{
45    if ca.null_count() == ca.len() {
46        None
47    } else if let Ok(vals) = ca.cont_slice() {
48        arg_min_numeric_slice(vals, ca.is_sorted_flag())
49    } else {
50        arg_min_numeric_chunked(ca)
51    }
52}
53
54pub fn arg_max_numeric<T>(ca: &ChunkedArray<T>) -> Option<usize>
55where
56    T: PolarsNumericType,
57    for<'b> &'b [T::Native]: ArgMinMax,
58{
59    if ca.null_count() == ca.len() {
60        None
61    } else if T::get_static_dtype().is_float() && !matches!(ca.is_sorted_flag(), IsSorted::Not) {
62        arg_max_float_sorted(ca)
63    } else if let Ok(vals) = ca.cont_slice() {
64        arg_max_numeric_slice(vals, ca.is_sorted_flag())
65    } else {
66        arg_max_numeric_chunked(ca)
67    }
68}
69
70/// # Safety
71/// `ca` has a float dtype, has at least one non-null value and is sorted.
72fn arg_max_float_sorted<T>(ca: &ChunkedArray<T>) -> Option<usize>
73where
74    T: PolarsNumericType,
75{
76    let out = match ca.is_sorted_flag() {
77        IsSorted::Ascending => float_arg_max_sorted_ascending(ca),
78        IsSorted::Descending => float_arg_max_sorted_descending(ca),
79        _ => unreachable!(),
80    };
81    Some(out)
82}
83
84#[cfg(feature = "dtype-categorical")]
85pub fn arg_min_cat<T: PolarsCategoricalType>(ca: &CategoricalChunked<T>) -> Option<usize> {
86    if ca.null_count() == ca.len() {
87        return None;
88    }
89    arg_min_opt_iter(ca.iter_str())
90}
91
92#[cfg(feature = "dtype-categorical")]
93pub fn arg_max_cat<T: PolarsCategoricalType>(ca: &CategoricalChunked<T>) -> Option<usize> {
94    if ca.null_count() == ca.len() {
95        return None;
96    }
97    arg_max_opt_iter(ca.iter_str())
98}
99
100pub fn arg_min_bool(ca: &BooleanChunked) -> Option<usize> {
101    ca.first_false_idx().or_else(|| ca.first_true_idx())
102}
103
104pub fn arg_max_bool(ca: &BooleanChunked) -> Option<usize> {
105    ca.first_true_idx().or_else(|| ca.first_false_idx())
106}
107
108pub fn arg_min_str(ca: &StringChunked) -> Option<usize> {
109    arg_min_physical_generic(ca)
110}
111
112pub fn arg_max_str(ca: &StringChunked) -> Option<usize> {
113    arg_max_physical_generic(ca)
114}
115
116pub fn arg_min_binary(ca: &BinaryChunked) -> Option<usize> {
117    arg_min_physical_generic(ca)
118}
119
120pub fn arg_max_binary(ca: &BinaryChunked) -> Option<usize> {
121    arg_max_physical_generic(ca)
122}
123
124fn arg_min_physical_generic<T>(ca: &ChunkedArray<T>) -> Option<usize>
125where
126    T: PolarsDataType,
127    for<'a> T::Physical<'a>: Ord,
128{
129    if ca.null_count() == ca.len() {
130        return None;
131    }
132    match ca.is_sorted_flag() {
133        IsSorted::Ascending => ca.first_non_null(),
134        IsSorted::Descending => ca.last_non_null(),
135        IsSorted::Not => arg_min_opt_iter(ca.iter()),
136    }
137}
138
139fn arg_max_physical_generic<T>(ca: &ChunkedArray<T>) -> Option<usize>
140where
141    T: PolarsDataType,
142    for<'a> T::Physical<'a>: Ord,
143{
144    if ca.null_count() == ca.len() {
145        return None;
146    }
147    match ca.is_sorted_flag() {
148        IsSorted::Ascending => ca.last_non_null(),
149        IsSorted::Descending => ca.first_non_null(),
150        IsSorted::Not => arg_max_opt_iter(ca.iter()),
151    }
152}
153
154fn arg_min_numeric_chunked<'a, T>(ca: &'a ChunkedArray<T>) -> Option<usize>
155where
156    T: PolarsNumericType,
157    for<'b> &'b [T::Native]: ArgMinMax,
158{
159    match ca.is_sorted_flag() {
160        IsSorted::Ascending => ca.first_non_null(),
161        IsSorted::Descending => ca.last_non_null(),
162        IsSorted::Not => {
163            let mut chunk_start_offset = 0;
164            let mut min_idx: Option<usize> = None;
165            let mut min_val: Option<T::Native> = None;
166            for arr in ca.downcast_iter() {
167                if arr.len() == arr.null_count() {
168                    chunk_start_offset += arr.len();
169                    continue;
170                }
171
172                let chunk_min: Option<(usize, T::Native)> = if arr.null_count() > 0 {
173                    arr.into_iter()
174                        .enumerate()
175                        .flat_map(|(idx, val)| Some((idx, *(val?))))
176                        .reduce(|acc, (idx, val)| {
177                            if MinIgnoreNan::is_better(&val, &acc.1) {
178                                (idx, val)
179                            } else {
180                                acc
181                            }
182                        })
183                } else {
184                    // When no nulls & array not empty => we can use fast argmin.
185                    let min_idx: usize = arr.values().as_slice().argmin();
186                    Some((min_idx, arr.value(min_idx)))
187                };
188
189                if let Some((chunk_min_idx, chunk_min_val)) = chunk_min {
190                    if min_val.is_none()
191                        || MinIgnoreNan::is_better(&chunk_min_val, &min_val.unwrap())
192                    {
193                        min_val = Some(chunk_min_val);
194                        min_idx = Some(chunk_start_offset + chunk_min_idx);
195                    }
196                }
197                chunk_start_offset += arr.len();
198            }
199            min_idx
200        },
201    }
202}
203
204fn arg_max_numeric_chunked<'a, T>(ca: &'a ChunkedArray<T>) -> Option<usize>
205where
206    T: PolarsNumericType,
207    for<'b> &'b [T::Native]: ArgMinMax,
208{
209    match ca.is_sorted_flag() {
210        IsSorted::Ascending => ca.last_non_null(),
211        IsSorted::Descending => ca.first_non_null(),
212        IsSorted::Not => {
213            let mut chunk_start_offset = 0;
214            let mut max_idx: Option<usize> = None;
215            let mut max_val: Option<T::Native> = None;
216            for arr in ca.downcast_iter() {
217                if arr.len() == arr.null_count() {
218                    chunk_start_offset += arr.len();
219                    continue;
220                }
221
222                let chunk_max: Option<(usize, T::Native)> = if arr.null_count() > 0 {
223                    arr.into_iter()
224                        .enumerate()
225                        .flat_map(|(idx, val)| Some((idx, *(val?))))
226                        .reduce(|acc, (idx, val)| {
227                            if MaxIgnoreNan::is_better(&val, &acc.1) {
228                                (idx, val)
229                            } else {
230                                acc
231                            }
232                        })
233                } else {
234                    // When no nulls & array not empty => we can use fast argmax.
235                    let max_idx: usize = arr.values().as_slice().argmax();
236                    Some((max_idx, arr.value(max_idx)))
237                };
238
239                if let Some((chunk_max_idx, chunk_max_val)) = chunk_max {
240                    if max_val.is_none()
241                        || MaxIgnoreNan::is_better(&chunk_max_val, &max_val.unwrap())
242                    {
243                        max_val = Some(chunk_max_val);
244                        max_idx = Some(chunk_start_offset + chunk_max_idx);
245                    }
246                }
247                chunk_start_offset += arr.len();
248            }
249            max_idx
250        },
251    }
252}
253
254fn arg_min_numeric_slice<T>(vals: &[T], is_sorted: IsSorted) -> Option<usize>
255where
256    for<'a> &'a [T]: ArgMinMax,
257{
258    match is_sorted {
259        // all vals are not null guarded by cont_slice
260        IsSorted::Ascending => Some(0),
261        // all vals are not null guarded by cont_slice
262        IsSorted::Descending => Some(vals.len() - 1),
263        IsSorted::Not => Some(vals.argmin()), // assumes not empty
264    }
265}
266
267fn arg_max_numeric_slice<T>(vals: &[T], is_sorted: IsSorted) -> Option<usize>
268where
269    for<'a> &'a [T]: ArgMinMax,
270{
271    match is_sorted {
272        // all vals are not null guarded by cont_slice
273        IsSorted::Ascending => Some(vals.len() - 1),
274        // all vals are not null guarded by cont_slice
275        IsSorted::Descending => Some(0),
276        IsSorted::Not => Some(vals.argmax()), // assumes not empty
277    }
278}