polars_ops/chunked_array/list/
min_max.rs1use arrow::array::{Array, PrimitiveArray};
2use arrow::bitmap::Bitmap;
3use arrow::compute::utils::combine_validities_and;
4use arrow::types::NativeType;
5use polars_compute::min_max::MinMaxKernel;
6use polars_core::prelude::*;
7use polars_core::with_match_physical_numeric_polars_type;
8use polars_utils::float16::pf16;
9
10use crate::chunked_array::list::namespace::has_inner_nulls;
11
12fn min_between_offsets<T>(values: &[T], offset: &[i64]) -> PrimitiveArray<T>
13where
14 T: NativeType,
15 [T]: for<'a> MinMaxKernel<Scalar<'a> = T>,
16{
17 let mut running_offset = offset[0];
18
19 (offset[1..])
20 .iter()
21 .map(|end| {
22 let current_offset = running_offset;
23 running_offset = *end;
24 if current_offset == *end {
25 return None;
26 }
27
28 let slice = unsafe { values.get_unchecked(current_offset as usize..*end as usize) };
29 slice.min_ignore_nan_kernel()
30 })
31 .collect()
32}
33
34fn dispatch_min<T>(arr: &dyn Array, offsets: &[i64], validity: Option<&Bitmap>) -> ArrayRef
35where
36 T: NativeType,
37 [T]: for<'a> MinMaxKernel<Scalar<'a> = T>,
38{
39 let values = arr.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
40 let values = values.values().as_slice();
41 let out = min_between_offsets(values, offsets);
42 let new_validity = combine_validities_and(out.validity(), validity);
43 out.with_validity(new_validity).to_boxed()
44}
45
46fn min_list_numerical(ca: &ListChunked, inner_type: &DataType) -> Series {
47 use DataType::*;
48 let chunks = ca
49 .downcast_iter()
50 .map(|arr| {
51 let offsets = arr.offsets().as_slice();
52 let values = arr.values().as_ref();
53
54 match inner_type {
55 Int8 => dispatch_min::<i8>(values, offsets, arr.validity()),
56 Int16 => dispatch_min::<i16>(values, offsets, arr.validity()),
57 Int32 => dispatch_min::<i32>(values, offsets, arr.validity()),
58 Int64 => dispatch_min::<i64>(values, offsets, arr.validity()),
59 Int128 => dispatch_min::<i128>(values, offsets, arr.validity()),
60 UInt8 => dispatch_min::<u8>(values, offsets, arr.validity()),
61 UInt16 => dispatch_min::<u16>(values, offsets, arr.validity()),
62 UInt32 => dispatch_min::<u32>(values, offsets, arr.validity()),
63 UInt64 => dispatch_min::<u64>(values, offsets, arr.validity()),
64 UInt128 => dispatch_min::<u128>(values, offsets, arr.validity()),
65 Float16 => dispatch_min::<pf16>(values, offsets, arr.validity()),
66 Float32 => dispatch_min::<f32>(values, offsets, arr.validity()),
67 Float64 => dispatch_min::<f64>(values, offsets, arr.validity()),
68 _ => unimplemented!(),
69 }
70 })
71 .collect::<Vec<_>>();
72
73 Series::try_from((ca.name().clone(), chunks)).unwrap()
74}
75
76pub(super) fn list_min_function(ca: &ListChunked) -> PolarsResult<Series> {
77 fn inner(ca: &ListChunked) -> PolarsResult<Series> {
78 match ca.inner_dtype() {
79 DataType::Boolean => {
80 let out: BooleanChunked = ca
81 .apply_amortized_generic(|s| s.and_then(|s| s.as_ref().bool().unwrap().min()));
82 Ok(out.into_series())
83 },
84 dt if dt.to_physical().is_primitive_numeric() => {
85 with_match_physical_numeric_polars_type!(dt.to_physical(), |$T| {
86 let out: ChunkedArray<$T> = ca.to_physical_repr().apply_amortized_generic(|opt_s| {
87 let s = opt_s?;
88 let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();
89 ca.min()
90 });
91 unsafe { out.into_series().from_physical_unchecked(dt) }
93 })
94 },
95 dt => unsafe {
96 ca.try_apply_amortized_same_type(|s| {
98 let s = s.as_ref();
99 let sc = s.min_reduce()?;
100 Ok(sc.into_series(s.name().clone()))
101 })?
102 }
103 .explode(ExplodeOptions {
104 empty_as_null: true,
105 keep_nulls: true,
106 })
107 .unwrap()
108 .into_series()
109 .cast(dt),
110 }
111 }
112
113 if has_inner_nulls(ca) {
114 return inner(ca);
115 };
116
117 match ca.inner_dtype() {
118 dt if dt.is_primitive_numeric() => Ok(min_list_numerical(ca, dt)),
119 _ => inner(ca),
120 }
121}
122
123fn max_between_offsets<T>(values: &[T], offset: &[i64]) -> PrimitiveArray<T>
124where
125 T: NativeType,
126 [T]: for<'a> MinMaxKernel<Scalar<'a> = T>,
127{
128 let mut running_offset = offset[0];
129
130 (offset[1..])
131 .iter()
132 .map(|end| {
133 let current_offset = running_offset;
134 running_offset = *end;
135 if current_offset == *end {
136 return None;
137 }
138
139 let slice = unsafe { values.get_unchecked(current_offset as usize..*end as usize) };
140 slice.max_ignore_nan_kernel()
141 })
142 .collect()
143}
144
145fn dispatch_max<T>(arr: &dyn Array, offsets: &[i64], validity: Option<&Bitmap>) -> ArrayRef
146where
147 T: NativeType,
148 [T]: for<'a> MinMaxKernel<Scalar<'a> = T>,
149{
150 let values = arr.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
151 let values = values.values().as_slice();
152 let mut out = max_between_offsets(values, offsets);
153
154 if let Some(validity) = validity {
155 if out.null_count() > 0 {
156 out.apply_validity(|other_validity| validity & &other_validity)
157 } else {
158 out = out.with_validity(Some(validity.clone()));
159 }
160 }
161 Box::new(out)
162}
163
164fn max_list_numerical(ca: &ListChunked, inner_type: &DataType) -> Series {
165 use DataType::*;
166 let chunks = ca
167 .downcast_iter()
168 .map(|arr| {
169 let offsets = arr.offsets().as_slice();
170 let values = arr.values().as_ref();
171
172 match inner_type {
173 Int8 => dispatch_max::<i8>(values, offsets, arr.validity()),
174 Int16 => dispatch_max::<i16>(values, offsets, arr.validity()),
175 Int32 => dispatch_max::<i32>(values, offsets, arr.validity()),
176 Int64 => dispatch_max::<i64>(values, offsets, arr.validity()),
177 Int128 => dispatch_max::<i128>(values, offsets, arr.validity()),
178 UInt8 => dispatch_max::<u8>(values, offsets, arr.validity()),
179 UInt16 => dispatch_max::<u16>(values, offsets, arr.validity()),
180 UInt32 => dispatch_max::<u32>(values, offsets, arr.validity()),
181 UInt64 => dispatch_max::<u64>(values, offsets, arr.validity()),
182 UInt128 => dispatch_max::<u128>(values, offsets, arr.validity()),
183 Float16 => dispatch_max::<pf16>(values, offsets, arr.validity()),
184 Float32 => dispatch_max::<f32>(values, offsets, arr.validity()),
185 Float64 => dispatch_max::<f64>(values, offsets, arr.validity()),
186 _ => unimplemented!(),
187 }
188 })
189 .collect::<Vec<_>>();
190
191 Series::try_from((ca.name().clone(), chunks)).unwrap()
192}
193
194pub(super) fn list_max_function(ca: &ListChunked) -> PolarsResult<Series> {
195 fn inner(ca: &ListChunked) -> PolarsResult<Series> {
196 match ca.inner_dtype() {
197 DataType::Boolean => {
198 let out: BooleanChunked = ca
199 .apply_amortized_generic(|s| s.and_then(|s| s.as_ref().bool().unwrap().max()));
200 Ok(out.into_series())
201 },
202 dt if dt.to_physical().is_primitive_numeric() => {
203 with_match_physical_numeric_polars_type!(dt.to_physical(), |$T| {
204 let out: ChunkedArray<$T> = ca.to_physical_repr().apply_amortized_generic(|opt_s| {
205 let s = opt_s?;
206 let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();
207 ca.max()
208 });
209 unsafe { out.into_series().from_physical_unchecked(dt) }
211 })
212 },
213 dt => unsafe {
214 ca.try_apply_amortized_same_type(|s| {
216 let s = s.as_ref();
217 let sc = s.max_reduce()?;
218 Ok(sc.into_series(s.name().clone()))
219 })?
220 }
221 .explode(ExplodeOptions {
222 empty_as_null: true,
223 keep_nulls: true,
224 })
225 .unwrap()
226 .into_series()
227 .cast(dt),
228 }
229 }
230
231 if has_inner_nulls(ca) {
232 return inner(ca);
233 };
234
235 match ca.inner_dtype() {
236 dt if dt.is_primitive_numeric() => Ok(max_list_numerical(ca, dt)),
237 _ => inner(ca),
238 }
239}