Skip to main content

polars_core/chunked_array/
arg_min_max.rs

1use arrow::array::Array;
2use polars_utils::arg_min_max::ArgMinMax;
3use polars_utils::min_max::{MaxIgnoreNan, MinIgnoreNan, MinMaxPolicy};
4
5use crate::chunked_array::ChunkedArray;
6use crate::chunked_array::ops::float_sorted_arg_max::{
7    float_arg_max_sorted_ascending, float_arg_max_sorted_descending,
8};
9use crate::datatypes::{
10    BinaryChunked, BinaryOffsetChunked, BooleanChunked, PolarsDataType, PolarsNumericType,
11    StringChunked,
12};
13#[cfg(feature = "dtype-categorical")]
14use crate::datatypes::{CategoricalChunked, PolarsCategoricalType};
15use crate::series::IsSorted;
16
17pub fn arg_min_opt_iter<T, I>(iter: I) -> Option<usize>
18where
19    I: IntoIterator<Item = Option<T>>,
20    T: Ord,
21{
22    iter.into_iter()
23        .enumerate()
24        .flat_map(|(idx, val)| Some((idx, val?)))
25        .min_by(|x, y| Ord::cmp(&x.1, &y.1))
26        .map(|x| x.0)
27}
28
29pub fn arg_max_opt_iter<T, I>(iter: I) -> Option<usize>
30where
31    I: IntoIterator<Item = Option<T>>,
32    T: Ord,
33{
34    iter.into_iter()
35        .enumerate()
36        .flat_map(|(idx, val)| Some((idx, val?)))
37        .max_by(|x, y| Ord::cmp(&x.1, &y.1))
38        .map(|x| x.0)
39}
40
41pub fn arg_min_numeric<T>(ca: &ChunkedArray<T>) -> Option<usize>
42where
43    T: PolarsNumericType,
44    for<'b> &'b [T::Native]: ArgMinMax,
45{
46    if ca.null_count() == ca.len() {
47        None
48    } else if let Ok(vals) = ca.cont_slice() {
49        arg_min_numeric_slice(vals, ca.is_sorted_flag())
50    } else {
51        arg_min_numeric_chunked(ca)
52    }
53}
54
55pub fn arg_max_numeric<T>(ca: &ChunkedArray<T>) -> Option<usize>
56where
57    T: PolarsNumericType,
58    for<'b> &'b [T::Native]: ArgMinMax,
59{
60    if ca.null_count() == ca.len() {
61        None
62    } else if T::get_static_dtype().is_float() && !matches!(ca.is_sorted_flag(), IsSorted::Not) {
63        arg_max_float_sorted(ca)
64    } else if let Ok(vals) = ca.cont_slice() {
65        arg_max_numeric_slice(vals, ca.is_sorted_flag())
66    } else {
67        arg_max_numeric_chunked(ca)
68    }
69}
70
71/// # Safety
72/// `ca` has a float dtype, has at least one non-null value and is sorted.
73fn arg_max_float_sorted<T>(ca: &ChunkedArray<T>) -> Option<usize>
74where
75    T: PolarsNumericType,
76{
77    let out = match ca.is_sorted_flag() {
78        IsSorted::Ascending => float_arg_max_sorted_ascending(ca),
79        IsSorted::Descending => float_arg_max_sorted_descending(ca),
80        _ => unreachable!(),
81    };
82    Some(out)
83}
84
85#[cfg(feature = "dtype-categorical")]
86pub fn arg_min_cat<T: PolarsCategoricalType>(ca: &CategoricalChunked<T>) -> Option<usize> {
87    if ca.null_count() == ca.len() {
88        return None;
89    }
90    arg_min_opt_iter(ca.iter_str())
91}
92
93#[cfg(feature = "dtype-categorical")]
94pub fn arg_max_cat<T: PolarsCategoricalType>(ca: &CategoricalChunked<T>) -> Option<usize> {
95    if ca.null_count() == ca.len() {
96        return None;
97    }
98    arg_max_opt_iter(ca.iter_str())
99}
100
101pub fn arg_min_bool(ca: &BooleanChunked) -> Option<usize> {
102    ca.first_false_idx().or_else(|| ca.first_true_idx())
103}
104
105pub fn arg_max_bool(ca: &BooleanChunked) -> Option<usize> {
106    ca.first_true_idx().or_else(|| ca.first_false_idx())
107}
108
109pub fn arg_min_str(ca: &StringChunked) -> Option<usize> {
110    arg_min_physical_generic(ca)
111}
112
113pub fn arg_max_str(ca: &StringChunked) -> Option<usize> {
114    arg_max_physical_generic(ca)
115}
116
117pub fn arg_min_binary(ca: &BinaryChunked) -> Option<usize> {
118    arg_min_physical_generic(ca)
119}
120
121pub fn arg_max_binary(ca: &BinaryChunked) -> Option<usize> {
122    arg_max_physical_generic(ca)
123}
124
125pub fn arg_min_binary_offset(ca: &BinaryOffsetChunked) -> Option<usize> {
126    arg_min_physical_generic(ca)
127}
128
129pub fn arg_max_binary_offset(ca: &BinaryOffsetChunked) -> Option<usize> {
130    arg_max_physical_generic(ca)
131}
132
133fn arg_min_physical_generic<T>(ca: &ChunkedArray<T>) -> Option<usize>
134where
135    T: PolarsDataType,
136    for<'a> T::Physical<'a>: Ord,
137{
138    if ca.null_count() == ca.len() {
139        return None;
140    }
141    match ca.is_sorted_flag() {
142        IsSorted::Ascending => ca.first_non_null(),
143        IsSorted::Descending => ca.last_non_null(),
144        IsSorted::Not => arg_min_opt_iter(ca.iter()),
145    }
146}
147
148fn arg_max_physical_generic<T>(ca: &ChunkedArray<T>) -> Option<usize>
149where
150    T: PolarsDataType,
151    for<'a> T::Physical<'a>: Ord,
152{
153    if ca.null_count() == ca.len() {
154        return None;
155    }
156    match ca.is_sorted_flag() {
157        IsSorted::Ascending => ca.last_non_null(),
158        IsSorted::Descending => ca.first_non_null(),
159        IsSorted::Not => arg_max_opt_iter(ca.iter()),
160    }
161}
162
163fn arg_min_numeric_chunked<'a, T>(ca: &'a ChunkedArray<T>) -> Option<usize>
164where
165    T: PolarsNumericType,
166    for<'b> &'b [T::Native]: ArgMinMax,
167{
168    match ca.is_sorted_flag() {
169        IsSorted::Ascending => ca.first_non_null(),
170        IsSorted::Descending => ca.last_non_null(),
171        IsSorted::Not => {
172            let mut chunk_start_offset = 0;
173            let mut min_idx: Option<usize> = None;
174            let mut min_val: Option<T::Native> = None;
175            for arr in ca.downcast_iter() {
176                if arr.len() == arr.null_count() {
177                    chunk_start_offset += arr.len();
178                    continue;
179                }
180
181                let chunk_min: Option<(usize, T::Native)> = if arr.null_count() > 0 {
182                    arr.into_iter()
183                        .enumerate()
184                        .flat_map(|(idx, val)| Some((idx, *(val?))))
185                        .reduce(|acc, (idx, val)| {
186                            if MinIgnoreNan::is_better(&val, &acc.1) {
187                                (idx, val)
188                            } else {
189                                acc
190                            }
191                        })
192                } else {
193                    // When no nulls & array not empty => we can use fast argmin.
194                    let min_idx: usize = arr.values().as_slice().argmin();
195                    Some((min_idx, arr.value(min_idx)))
196                };
197
198                if let Some((chunk_min_idx, chunk_min_val)) = chunk_min {
199                    if min_val.is_none()
200                        || MinIgnoreNan::is_better(&chunk_min_val, &min_val.unwrap())
201                    {
202                        min_val = Some(chunk_min_val);
203                        min_idx = Some(chunk_start_offset + chunk_min_idx);
204                    }
205                }
206                chunk_start_offset += arr.len();
207            }
208            min_idx
209        },
210    }
211}
212
213fn arg_max_numeric_chunked<'a, T>(ca: &'a ChunkedArray<T>) -> Option<usize>
214where
215    T: PolarsNumericType,
216    for<'b> &'b [T::Native]: ArgMinMax,
217{
218    match ca.is_sorted_flag() {
219        IsSorted::Ascending => ca.last_non_null(),
220        IsSorted::Descending => ca.first_non_null(),
221        IsSorted::Not => {
222            let mut chunk_start_offset = 0;
223            let mut max_idx: Option<usize> = None;
224            let mut max_val: Option<T::Native> = None;
225            for arr in ca.downcast_iter() {
226                if arr.len() == arr.null_count() {
227                    chunk_start_offset += arr.len();
228                    continue;
229                }
230
231                let chunk_max: Option<(usize, T::Native)> = if arr.null_count() > 0 {
232                    arr.into_iter()
233                        .enumerate()
234                        .flat_map(|(idx, val)| Some((idx, *(val?))))
235                        .reduce(|acc, (idx, val)| {
236                            if MaxIgnoreNan::is_better(&val, &acc.1) {
237                                (idx, val)
238                            } else {
239                                acc
240                            }
241                        })
242                } else {
243                    // When no nulls & array not empty => we can use fast argmax.
244                    let max_idx: usize = arr.values().as_slice().argmax();
245                    Some((max_idx, arr.value(max_idx)))
246                };
247
248                if let Some((chunk_max_idx, chunk_max_val)) = chunk_max {
249                    if max_val.is_none()
250                        || MaxIgnoreNan::is_better(&chunk_max_val, &max_val.unwrap())
251                    {
252                        max_val = Some(chunk_max_val);
253                        max_idx = Some(chunk_start_offset + chunk_max_idx);
254                    }
255                }
256                chunk_start_offset += arr.len();
257            }
258            max_idx
259        },
260    }
261}
262
263fn arg_min_numeric_slice<T>(vals: &[T], is_sorted: IsSorted) -> Option<usize>
264where
265    for<'a> &'a [T]: ArgMinMax,
266{
267    match is_sorted {
268        // all vals are not null guarded by cont_slice
269        IsSorted::Ascending => Some(0),
270        // all vals are not null guarded by cont_slice
271        IsSorted::Descending => Some(vals.len() - 1),
272        IsSorted::Not => Some(vals.argmin()), // assumes not empty
273    }
274}
275
276fn arg_max_numeric_slice<T>(vals: &[T], is_sorted: IsSorted) -> Option<usize>
277where
278    for<'a> &'a [T]: ArgMinMax,
279{
280    match is_sorted {
281        // all vals are not null guarded by cont_slice
282        IsSorted::Ascending => Some(vals.len() - 1),
283        // all vals are not null guarded by cont_slice
284        IsSorted::Descending => Some(0),
285        IsSorted::Not => Some(vals.argmax()), // assumes not empty
286    }
287}