polars_ops/chunked_array/list/
min_max.rs

1use arrow::array::{Array, PrimitiveArray};
2use arrow::bitmap::Bitmap;
3use arrow::compute::utils::combine_validities_and;
4use arrow::types::NativeType;
5use polars_compute::min_max::MinMaxKernel;
6use polars_core::prelude::*;
7use polars_core::with_match_physical_numeric_polars_type;
8use polars_utils::float16::pf16;
9
10use crate::chunked_array::list::namespace::has_inner_nulls;
11
12fn min_between_offsets<T>(values: &[T], offset: &[i64]) -> PrimitiveArray<T>
13where
14    T: NativeType,
15    [T]: for<'a> MinMaxKernel<Scalar<'a> = T>,
16{
17    let mut running_offset = offset[0];
18
19    (offset[1..])
20        .iter()
21        .map(|end| {
22            let current_offset = running_offset;
23            running_offset = *end;
24            if current_offset == *end {
25                return None;
26            }
27
28            let slice = unsafe { values.get_unchecked(current_offset as usize..*end as usize) };
29            slice.min_ignore_nan_kernel()
30        })
31        .collect()
32}
33
34fn dispatch_min<T>(arr: &dyn Array, offsets: &[i64], validity: Option<&Bitmap>) -> ArrayRef
35where
36    T: NativeType,
37    [T]: for<'a> MinMaxKernel<Scalar<'a> = T>,
38{
39    let values = arr.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
40    let values = values.values().as_slice();
41    let out = min_between_offsets(values, offsets);
42    let new_validity = combine_validities_and(out.validity(), validity);
43    out.with_validity(new_validity).to_boxed()
44}
45
46fn min_list_numerical(ca: &ListChunked, inner_type: &DataType) -> Series {
47    use DataType::*;
48    let chunks = ca
49        .downcast_iter()
50        .map(|arr| {
51            let offsets = arr.offsets().as_slice();
52            let values = arr.values().as_ref();
53
54            match inner_type {
55                Int8 => dispatch_min::<i8>(values, offsets, arr.validity()),
56                Int16 => dispatch_min::<i16>(values, offsets, arr.validity()),
57                Int32 => dispatch_min::<i32>(values, offsets, arr.validity()),
58                Int64 => dispatch_min::<i64>(values, offsets, arr.validity()),
59                Int128 => dispatch_min::<i128>(values, offsets, arr.validity()),
60                UInt8 => dispatch_min::<u8>(values, offsets, arr.validity()),
61                UInt16 => dispatch_min::<u16>(values, offsets, arr.validity()),
62                UInt32 => dispatch_min::<u32>(values, offsets, arr.validity()),
63                UInt64 => dispatch_min::<u64>(values, offsets, arr.validity()),
64                UInt128 => dispatch_min::<u128>(values, offsets, arr.validity()),
65                Float16 => dispatch_min::<pf16>(values, offsets, arr.validity()),
66                Float32 => dispatch_min::<f32>(values, offsets, arr.validity()),
67                Float64 => dispatch_min::<f64>(values, offsets, arr.validity()),
68                _ => unimplemented!(),
69            }
70        })
71        .collect::<Vec<_>>();
72
73    Series::try_from((ca.name().clone(), chunks)).unwrap()
74}
75
76pub(super) fn list_min_function(ca: &ListChunked) -> PolarsResult<Series> {
77    fn inner(ca: &ListChunked) -> PolarsResult<Series> {
78        match ca.inner_dtype() {
79            DataType::Boolean => {
80                let out: BooleanChunked = ca
81                    .apply_amortized_generic(|s| s.and_then(|s| s.as_ref().bool().unwrap().min()));
82                Ok(out.into_series())
83            },
84            dt if dt.to_physical().is_primitive_numeric() => {
85                with_match_physical_numeric_polars_type!(dt.to_physical(), |$T| {
86                    let out: ChunkedArray<$T> = ca.to_physical_repr().apply_amortized_generic(|opt_s| {
87                            let s = opt_s?;
88                            let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();
89                            ca.min()
90                    });
91                    // restore logical type
92                    unsafe { out.into_series().from_physical_unchecked(dt) }
93                })
94            },
95            dt => unsafe {
96                // SAFETY: `min_reduce` doesn't change the dtype
97                ca.try_apply_amortized_same_type(|s| {
98                    let s = s.as_ref();
99                    let sc = s.min_reduce()?;
100                    Ok(sc.into_series(s.name().clone()))
101                })?
102            }
103            .explode(ExplodeOptions {
104                empty_as_null: true,
105                keep_nulls: true,
106            })
107            .unwrap()
108            .into_series()
109            .cast(dt),
110        }
111    }
112
113    if has_inner_nulls(ca) {
114        return inner(ca);
115    };
116
117    match ca.inner_dtype() {
118        dt if dt.is_primitive_numeric() => Ok(min_list_numerical(ca, dt)),
119        _ => inner(ca),
120    }
121}
122
123fn max_between_offsets<T>(values: &[T], offset: &[i64]) -> PrimitiveArray<T>
124where
125    T: NativeType,
126    [T]: for<'a> MinMaxKernel<Scalar<'a> = T>,
127{
128    let mut running_offset = offset[0];
129
130    (offset[1..])
131        .iter()
132        .map(|end| {
133            let current_offset = running_offset;
134            running_offset = *end;
135            if current_offset == *end {
136                return None;
137            }
138
139            let slice = unsafe { values.get_unchecked(current_offset as usize..*end as usize) };
140            slice.max_ignore_nan_kernel()
141        })
142        .collect()
143}
144
145fn dispatch_max<T>(arr: &dyn Array, offsets: &[i64], validity: Option<&Bitmap>) -> ArrayRef
146where
147    T: NativeType,
148    [T]: for<'a> MinMaxKernel<Scalar<'a> = T>,
149{
150    let values = arr.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
151    let values = values.values().as_slice();
152    let mut out = max_between_offsets(values, offsets);
153
154    if let Some(validity) = validity {
155        if out.null_count() > 0 {
156            out.apply_validity(|other_validity| validity & &other_validity)
157        } else {
158            out = out.with_validity(Some(validity.clone()));
159        }
160    }
161    Box::new(out)
162}
163
164fn max_list_numerical(ca: &ListChunked, inner_type: &DataType) -> Series {
165    use DataType::*;
166    let chunks = ca
167        .downcast_iter()
168        .map(|arr| {
169            let offsets = arr.offsets().as_slice();
170            let values = arr.values().as_ref();
171
172            match inner_type {
173                Int8 => dispatch_max::<i8>(values, offsets, arr.validity()),
174                Int16 => dispatch_max::<i16>(values, offsets, arr.validity()),
175                Int32 => dispatch_max::<i32>(values, offsets, arr.validity()),
176                Int64 => dispatch_max::<i64>(values, offsets, arr.validity()),
177                Int128 => dispatch_max::<i128>(values, offsets, arr.validity()),
178                UInt8 => dispatch_max::<u8>(values, offsets, arr.validity()),
179                UInt16 => dispatch_max::<u16>(values, offsets, arr.validity()),
180                UInt32 => dispatch_max::<u32>(values, offsets, arr.validity()),
181                UInt64 => dispatch_max::<u64>(values, offsets, arr.validity()),
182                UInt128 => dispatch_max::<u128>(values, offsets, arr.validity()),
183                Float16 => dispatch_max::<pf16>(values, offsets, arr.validity()),
184                Float32 => dispatch_max::<f32>(values, offsets, arr.validity()),
185                Float64 => dispatch_max::<f64>(values, offsets, arr.validity()),
186                _ => unimplemented!(),
187            }
188        })
189        .collect::<Vec<_>>();
190
191    Series::try_from((ca.name().clone(), chunks)).unwrap()
192}
193
194pub(super) fn list_max_function(ca: &ListChunked) -> PolarsResult<Series> {
195    fn inner(ca: &ListChunked) -> PolarsResult<Series> {
196        match ca.inner_dtype() {
197            DataType::Boolean => {
198                let out: BooleanChunked = ca
199                    .apply_amortized_generic(|s| s.and_then(|s| s.as_ref().bool().unwrap().max()));
200                Ok(out.into_series())
201            },
202            dt if dt.to_physical().is_primitive_numeric() => {
203                with_match_physical_numeric_polars_type!(dt.to_physical(), |$T| {
204                    let out: ChunkedArray<$T> = ca.to_physical_repr().apply_amortized_generic(|opt_s| {
205                            let s = opt_s?;
206                            let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();
207                            ca.max()
208                    });
209                    // restore logical type
210                    unsafe { out.into_series().from_physical_unchecked(dt) }
211                })
212            },
213            dt => unsafe {
214                // SAFETY: `max_reduce` doesn't change the dtype
215                ca.try_apply_amortized_same_type(|s| {
216                    let s = s.as_ref();
217                    let sc = s.max_reduce()?;
218                    Ok(sc.into_series(s.name().clone()))
219                })?
220            }
221            .explode(ExplodeOptions {
222                empty_as_null: true,
223                keep_nulls: true,
224            })
225            .unwrap()
226            .into_series()
227            .cast(dt),
228        }
229    }
230
231    if has_inner_nulls(ca) {
232        return inner(ca);
233    };
234
235    match ca.inner_dtype() {
236        dt if dt.is_primitive_numeric() => Ok(max_list_numerical(ca, dt)),
237        _ => inner(ca),
238    }
239}