polars_ops/chunked_array/list/
min_max.rs

1use arrow::array::{Array, PrimitiveArray};
2use arrow::bitmap::Bitmap;
3use arrow::compute::utils::combine_validities_and;
4use arrow::types::NativeType;
5use polars_compute::min_max::MinMaxKernel;
6use polars_core::prelude::*;
7use polars_core::with_match_physical_numeric_polars_type;
8
9use crate::chunked_array::list::namespace::has_inner_nulls;
10
11fn min_between_offsets<T>(values: &[T], offset: &[i64]) -> PrimitiveArray<T>
12where
13    T: NativeType,
14    [T]: for<'a> MinMaxKernel<Scalar<'a> = T>,
15{
16    let mut running_offset = offset[0];
17
18    (offset[1..])
19        .iter()
20        .map(|end| {
21            let current_offset = running_offset;
22            running_offset = *end;
23            if current_offset == *end {
24                return None;
25            }
26
27            let slice = unsafe { values.get_unchecked(current_offset as usize..*end as usize) };
28            slice.min_ignore_nan_kernel()
29        })
30        .collect()
31}
32
33fn dispatch_min<T>(arr: &dyn Array, offsets: &[i64], validity: Option<&Bitmap>) -> ArrayRef
34where
35    T: NativeType,
36    [T]: for<'a> MinMaxKernel<Scalar<'a> = T>,
37{
38    let values = arr.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
39    let values = values.values().as_slice();
40    let out = min_between_offsets(values, offsets);
41    let new_validity = combine_validities_and(out.validity(), validity);
42    out.with_validity(new_validity).to_boxed()
43}
44
45fn min_list_numerical(ca: &ListChunked, inner_type: &DataType) -> Series {
46    use DataType::*;
47    let chunks = ca
48        .downcast_iter()
49        .map(|arr| {
50            let offsets = arr.offsets().as_slice();
51            let values = arr.values().as_ref();
52
53            match inner_type {
54                Int8 => dispatch_min::<i8>(values, offsets, arr.validity()),
55                Int16 => dispatch_min::<i16>(values, offsets, arr.validity()),
56                Int32 => dispatch_min::<i32>(values, offsets, arr.validity()),
57                Int64 => dispatch_min::<i64>(values, offsets, arr.validity()),
58                Int128 => dispatch_min::<i128>(values, offsets, arr.validity()),
59                UInt8 => dispatch_min::<u8>(values, offsets, arr.validity()),
60                UInt16 => dispatch_min::<u16>(values, offsets, arr.validity()),
61                UInt32 => dispatch_min::<u32>(values, offsets, arr.validity()),
62                UInt64 => dispatch_min::<u64>(values, offsets, arr.validity()),
63                Float32 => dispatch_min::<f32>(values, offsets, arr.validity()),
64                Float64 => dispatch_min::<f64>(values, offsets, arr.validity()),
65                _ => unimplemented!(),
66            }
67        })
68        .collect::<Vec<_>>();
69
70    Series::try_from((ca.name().clone(), chunks)).unwrap()
71}
72
73pub(super) fn list_min_function(ca: &ListChunked) -> PolarsResult<Series> {
74    fn inner(ca: &ListChunked) -> PolarsResult<Series> {
75        match ca.inner_dtype() {
76            DataType::Boolean => {
77                let out: BooleanChunked = ca
78                    .apply_amortized_generic(|s| s.and_then(|s| s.as_ref().bool().unwrap().min()));
79                Ok(out.into_series())
80            },
81            dt if dt.to_physical().is_primitive_numeric() => {
82                with_match_physical_numeric_polars_type!(dt.to_physical(), |$T| {
83                    let out: ChunkedArray<$T> = ca.to_physical_repr().apply_amortized_generic(|opt_s| {
84                            let s = opt_s?;
85                            let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();
86                            ca.min()
87                    });
88                    // restore logical type
89                    unsafe { out.into_series().from_physical_unchecked(dt) }
90                })
91            },
92            dt => ca
93                .try_apply_amortized(|s| {
94                    let s = s.as_ref();
95                    let sc = s.min_reduce()?;
96                    Ok(sc.into_series(s.name().clone()))
97                })?
98                .explode()
99                .unwrap()
100                .into_series()
101                .cast(dt),
102        }
103    }
104
105    if has_inner_nulls(ca) {
106        return inner(ca);
107    };
108
109    match ca.inner_dtype() {
110        dt if dt.is_primitive_numeric() => Ok(min_list_numerical(ca, dt)),
111        _ => inner(ca),
112    }
113}
114
115fn max_between_offsets<T>(values: &[T], offset: &[i64]) -> PrimitiveArray<T>
116where
117    T: NativeType,
118    [T]: for<'a> MinMaxKernel<Scalar<'a> = T>,
119{
120    let mut running_offset = offset[0];
121
122    (offset[1..])
123        .iter()
124        .map(|end| {
125            let current_offset = running_offset;
126            running_offset = *end;
127            if current_offset == *end {
128                return None;
129            }
130
131            let slice = unsafe { values.get_unchecked(current_offset as usize..*end as usize) };
132            slice.max_ignore_nan_kernel()
133        })
134        .collect()
135}
136
137fn dispatch_max<T>(arr: &dyn Array, offsets: &[i64], validity: Option<&Bitmap>) -> ArrayRef
138where
139    T: NativeType,
140    [T]: for<'a> MinMaxKernel<Scalar<'a> = T>,
141{
142    let values = arr.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
143    let values = values.values().as_slice();
144    let mut out = max_between_offsets(values, offsets);
145
146    if let Some(validity) = validity {
147        if out.null_count() > 0 {
148            out.apply_validity(|other_validity| validity & &other_validity)
149        } else {
150            out = out.with_validity(Some(validity.clone()));
151        }
152    }
153    Box::new(out)
154}
155
156fn max_list_numerical(ca: &ListChunked, inner_type: &DataType) -> Series {
157    use DataType::*;
158    let chunks = ca
159        .downcast_iter()
160        .map(|arr| {
161            let offsets = arr.offsets().as_slice();
162            let values = arr.values().as_ref();
163
164            match inner_type {
165                Int8 => dispatch_max::<i8>(values, offsets, arr.validity()),
166                Int16 => dispatch_max::<i16>(values, offsets, arr.validity()),
167                Int32 => dispatch_max::<i32>(values, offsets, arr.validity()),
168                Int64 => dispatch_max::<i64>(values, offsets, arr.validity()),
169                Int128 => dispatch_max::<i128>(values, offsets, arr.validity()),
170                UInt8 => dispatch_max::<u8>(values, offsets, arr.validity()),
171                UInt16 => dispatch_max::<u16>(values, offsets, arr.validity()),
172                UInt32 => dispatch_max::<u32>(values, offsets, arr.validity()),
173                UInt64 => dispatch_max::<u64>(values, offsets, arr.validity()),
174                Float32 => dispatch_max::<f32>(values, offsets, arr.validity()),
175                Float64 => dispatch_max::<f64>(values, offsets, arr.validity()),
176                _ => unimplemented!(),
177            }
178        })
179        .collect::<Vec<_>>();
180
181    Series::try_from((ca.name().clone(), chunks)).unwrap()
182}
183
184pub(super) fn list_max_function(ca: &ListChunked) -> PolarsResult<Series> {
185    fn inner(ca: &ListChunked) -> PolarsResult<Series> {
186        match ca.inner_dtype() {
187            DataType::Boolean => {
188                let out: BooleanChunked = ca
189                    .apply_amortized_generic(|s| s.and_then(|s| s.as_ref().bool().unwrap().max()));
190                Ok(out.into_series())
191            },
192            dt if dt.to_physical().is_primitive_numeric() => {
193                with_match_physical_numeric_polars_type!(dt.to_physical(), |$T| {
194                    let out: ChunkedArray<$T> = ca.to_physical_repr().apply_amortized_generic(|opt_s| {
195                            let s = opt_s?;
196                            let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();
197                            ca.max()
198                    });
199                    // restore logical type
200                    unsafe { out.into_series().from_physical_unchecked(dt) }
201                })
202            },
203            dt => ca
204                .try_apply_amortized(|s| {
205                    let s = s.as_ref();
206                    let sc = s.max_reduce()?;
207                    Ok(sc.into_series(s.name().clone()))
208                })?
209                .explode()
210                .unwrap()
211                .into_series()
212                .cast(dt),
213        }
214    }
215
216    if has_inner_nulls(ca) {
217        return inner(ca);
218    };
219
220    match ca.inner_dtype() {
221        dt if dt.is_primitive_numeric() => Ok(max_list_numerical(ca, dt)),
222        _ => inner(ca),
223    }
224}