polars_ops/chunked_array/list/
min_max.rs
1use arrow::array::{Array, PrimitiveArray};
2use arrow::bitmap::Bitmap;
3use arrow::compute::utils::combine_validities_and;
4use arrow::types::NativeType;
5use polars_compute::min_max::MinMaxKernel;
6use polars_core::prelude::*;
7use polars_core::with_match_physical_numeric_polars_type;
8
9use crate::chunked_array::list::namespace::has_inner_nulls;
10
11fn min_between_offsets<T>(values: &[T], offset: &[i64]) -> PrimitiveArray<T>
12where
13 T: NativeType,
14 [T]: for<'a> MinMaxKernel<Scalar<'a> = T>,
15{
16 let mut running_offset = offset[0];
17
18 (offset[1..])
19 .iter()
20 .map(|end| {
21 let current_offset = running_offset;
22 running_offset = *end;
23 if current_offset == *end {
24 return None;
25 }
26
27 let slice = unsafe { values.get_unchecked(current_offset as usize..*end as usize) };
28 slice.min_ignore_nan_kernel()
29 })
30 .collect()
31}
32
33fn dispatch_min<T>(arr: &dyn Array, offsets: &[i64], validity: Option<&Bitmap>) -> ArrayRef
34where
35 T: NativeType,
36 [T]: for<'a> MinMaxKernel<Scalar<'a> = T>,
37{
38 let values = arr.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
39 let values = values.values().as_slice();
40 let out = min_between_offsets(values, offsets);
41 let new_validity = combine_validities_and(out.validity(), validity);
42 out.with_validity(new_validity).to_boxed()
43}
44
45fn min_list_numerical(ca: &ListChunked, inner_type: &DataType) -> Series {
46 use DataType::*;
47 let chunks = ca
48 .downcast_iter()
49 .map(|arr| {
50 let offsets = arr.offsets().as_slice();
51 let values = arr.values().as_ref();
52
53 match inner_type {
54 Int8 => dispatch_min::<i8>(values, offsets, arr.validity()),
55 Int16 => dispatch_min::<i16>(values, offsets, arr.validity()),
56 Int32 => dispatch_min::<i32>(values, offsets, arr.validity()),
57 Int64 => dispatch_min::<i64>(values, offsets, arr.validity()),
58 Int128 => dispatch_min::<i128>(values, offsets, arr.validity()),
59 UInt8 => dispatch_min::<u8>(values, offsets, arr.validity()),
60 UInt16 => dispatch_min::<u16>(values, offsets, arr.validity()),
61 UInt32 => dispatch_min::<u32>(values, offsets, arr.validity()),
62 UInt64 => dispatch_min::<u64>(values, offsets, arr.validity()),
63 Float32 => dispatch_min::<f32>(values, offsets, arr.validity()),
64 Float64 => dispatch_min::<f64>(values, offsets, arr.validity()),
65 _ => unimplemented!(),
66 }
67 })
68 .collect::<Vec<_>>();
69
70 Series::try_from((ca.name().clone(), chunks)).unwrap()
71}
72
73pub(super) fn list_min_function(ca: &ListChunked) -> PolarsResult<Series> {
74 fn inner(ca: &ListChunked) -> PolarsResult<Series> {
75 match ca.inner_dtype() {
76 DataType::Boolean => {
77 let out: BooleanChunked = ca
78 .apply_amortized_generic(|s| s.and_then(|s| s.as_ref().bool().unwrap().min()));
79 Ok(out.into_series())
80 },
81 dt if dt.to_physical().is_primitive_numeric() => {
82 with_match_physical_numeric_polars_type!(dt.to_physical(), |$T| {
83 let out: ChunkedArray<$T> = ca.to_physical_repr().apply_amortized_generic(|opt_s| {
84 let s = opt_s?;
85 let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();
86 ca.min()
87 });
88 unsafe { out.into_series().from_physical_unchecked(dt) }
90 })
91 },
92 dt => ca
93 .try_apply_amortized(|s| {
94 let s = s.as_ref();
95 let sc = s.min_reduce()?;
96 Ok(sc.into_series(s.name().clone()))
97 })?
98 .explode()
99 .unwrap()
100 .into_series()
101 .cast(dt),
102 }
103 }
104
105 if has_inner_nulls(ca) {
106 return inner(ca);
107 };
108
109 match ca.inner_dtype() {
110 dt if dt.is_primitive_numeric() => Ok(min_list_numerical(ca, dt)),
111 _ => inner(ca),
112 }
113}
114
115fn max_between_offsets<T>(values: &[T], offset: &[i64]) -> PrimitiveArray<T>
116where
117 T: NativeType,
118 [T]: for<'a> MinMaxKernel<Scalar<'a> = T>,
119{
120 let mut running_offset = offset[0];
121
122 (offset[1..])
123 .iter()
124 .map(|end| {
125 let current_offset = running_offset;
126 running_offset = *end;
127 if current_offset == *end {
128 return None;
129 }
130
131 let slice = unsafe { values.get_unchecked(current_offset as usize..*end as usize) };
132 slice.max_ignore_nan_kernel()
133 })
134 .collect()
135}
136
137fn dispatch_max<T>(arr: &dyn Array, offsets: &[i64], validity: Option<&Bitmap>) -> ArrayRef
138where
139 T: NativeType,
140 [T]: for<'a> MinMaxKernel<Scalar<'a> = T>,
141{
142 let values = arr.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
143 let values = values.values().as_slice();
144 let mut out = max_between_offsets(values, offsets);
145
146 if let Some(validity) = validity {
147 if out.null_count() > 0 {
148 out.apply_validity(|other_validity| validity & &other_validity)
149 } else {
150 out = out.with_validity(Some(validity.clone()));
151 }
152 }
153 Box::new(out)
154}
155
156fn max_list_numerical(ca: &ListChunked, inner_type: &DataType) -> Series {
157 use DataType::*;
158 let chunks = ca
159 .downcast_iter()
160 .map(|arr| {
161 let offsets = arr.offsets().as_slice();
162 let values = arr.values().as_ref();
163
164 match inner_type {
165 Int8 => dispatch_max::<i8>(values, offsets, arr.validity()),
166 Int16 => dispatch_max::<i16>(values, offsets, arr.validity()),
167 Int32 => dispatch_max::<i32>(values, offsets, arr.validity()),
168 Int64 => dispatch_max::<i64>(values, offsets, arr.validity()),
169 Int128 => dispatch_max::<i128>(values, offsets, arr.validity()),
170 UInt8 => dispatch_max::<u8>(values, offsets, arr.validity()),
171 UInt16 => dispatch_max::<u16>(values, offsets, arr.validity()),
172 UInt32 => dispatch_max::<u32>(values, offsets, arr.validity()),
173 UInt64 => dispatch_max::<u64>(values, offsets, arr.validity()),
174 Float32 => dispatch_max::<f32>(values, offsets, arr.validity()),
175 Float64 => dispatch_max::<f64>(values, offsets, arr.validity()),
176 _ => unimplemented!(),
177 }
178 })
179 .collect::<Vec<_>>();
180
181 Series::try_from((ca.name().clone(), chunks)).unwrap()
182}
183
184pub(super) fn list_max_function(ca: &ListChunked) -> PolarsResult<Series> {
185 fn inner(ca: &ListChunked) -> PolarsResult<Series> {
186 match ca.inner_dtype() {
187 DataType::Boolean => {
188 let out: BooleanChunked = ca
189 .apply_amortized_generic(|s| s.and_then(|s| s.as_ref().bool().unwrap().max()));
190 Ok(out.into_series())
191 },
192 dt if dt.to_physical().is_primitive_numeric() => {
193 with_match_physical_numeric_polars_type!(dt.to_physical(), |$T| {
194 let out: ChunkedArray<$T> = ca.to_physical_repr().apply_amortized_generic(|opt_s| {
195 let s = opt_s?;
196 let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();
197 ca.max()
198 });
199 unsafe { out.into_series().from_physical_unchecked(dt) }
201 })
202 },
203 dt => ca
204 .try_apply_amortized(|s| {
205 let s = s.as_ref();
206 let sc = s.max_reduce()?;
207 Ok(sc.into_series(s.name().clone()))
208 })?
209 .explode()
210 .unwrap()
211 .into_series()
212 .cast(dt),
213 }
214 }
215
216 if has_inner_nulls(ca) {
217 return inner(ca);
218 };
219
220 match ca.inner_dtype() {
221 dt if dt.is_primitive_numeric() => Ok(max_list_numerical(ca, dt)),
222 _ => inner(ca),
223 }
224}