polars_core/chunked_array/ops/
mod.rs

1//! Traits for miscellaneous operations on ChunkedArray
2use arrow::offset::OffsetsBuffer;
3use polars_compute::rolling::QuantileMethod;
4
5use crate::prelude::*;
6
7pub(crate) mod aggregate;
8pub(crate) mod any_value;
9pub(crate) mod append;
10mod apply;
11#[cfg(feature = "approx_unique")]
12mod approx_n_unique;
13pub mod arity;
14mod bit_repr;
15mod bits;
16#[cfg(feature = "bitwise")]
17mod bitwise_reduce;
18pub(crate) mod chunkops;
19pub(crate) mod compare_inner;
20#[cfg(feature = "dtype-decimal")]
21mod decimal;
22pub(crate) mod downcast;
23pub(crate) mod explode;
24mod explode_and_offsets;
25mod extend;
26pub mod fill_null;
27mod filter;
28pub mod float_sorted_arg_max;
29mod for_each;
30pub mod full;
31pub mod gather;
32mod nesting_utils;
33pub(crate) mod nulls;
34mod reverse;
35#[cfg(feature = "rolling_window")]
36pub(crate) mod rolling_window;
37pub mod row_encode;
38pub mod search_sorted;
39mod set;
40mod shift;
41pub mod sort;
42#[cfg(feature = "algorithm_group_by")]
43pub(crate) mod unique;
44#[cfg(feature = "zip_with")]
45pub mod zip;
46
47pub use chunkops::_set_check_length;
48pub use nesting_utils::ChunkNestingUtils;
49#[cfg(feature = "serde-lazy")]
50use serde::{Deserialize, Serialize};
51pub use sort::options::*;
52
53use crate::chunked_array::cast::CastOptions;
54use crate::series::{BitRepr, IsSorted};
55pub trait Reinterpret {
56    fn reinterpret_signed(&self) -> Series {
57        unimplemented!()
58    }
59
60    fn reinterpret_unsigned(&self) -> Series {
61        unimplemented!()
62    }
63}
64
65/// Transmute [`ChunkedArray`] to bit representation.
66/// This is useful in hashing context and reduces no.
67/// of compiled code paths.
68pub(crate) trait ToBitRepr {
69    fn to_bit_repr(&self) -> BitRepr;
70}
71
72pub trait ChunkAnyValue {
73    /// Get a single value. Beware this is slow.
74    /// If you need to use this slightly performant, cast Categorical to UInt32
75    ///
76    /// # Safety
77    /// Does not do any bounds checking.
78    unsafe fn get_any_value_unchecked(&self, index: usize) -> AnyValue<'_>;
79
80    /// Get a single value. Beware this is slow.
81    fn get_any_value(&self, index: usize) -> PolarsResult<AnyValue<'_>>;
82}
83
84/// Explode/flatten a List or String Series
85pub trait ChunkExplode {
86    fn explode(&self, skip_empty: bool) -> PolarsResult<Series> {
87        self.explode_and_offsets(skip_empty).map(|t| t.0)
88    }
89    fn offsets(&self) -> PolarsResult<OffsetsBuffer<i64>>;
90    fn explode_and_offsets(&self, skip_empty: bool) -> PolarsResult<(Series, OffsetsBuffer<i64>)>;
91}
92
93pub trait ChunkBytes {
94    fn to_byte_slices(&self) -> Vec<&[u8]>;
95}
96
97/// This differs from ChunkWindowCustom and ChunkWindow
98/// by not using a fold aggregator, but reusing a `Series` wrapper and calling `Series` aggregators.
99/// This likely is a bit slower than ChunkWindow
100#[cfg(feature = "rolling_window")]
101pub trait ChunkRollApply: AsRefDataType {
102    fn rolling_map(
103        &self,
104        f: &dyn Fn(&Series) -> PolarsResult<Series>,
105        options: RollingOptionsFixedWindow,
106    ) -> PolarsResult<Series>
107    where
108        Self: Sized;
109}
110
111pub trait ChunkTake<Idx: ?Sized>: ChunkTakeUnchecked<Idx> {
112    /// Gather values from ChunkedArray by index.
113    fn take(&self, indices: &Idx) -> PolarsResult<Self>
114    where
115        Self: Sized;
116}
117
118pub trait ChunkTakeUnchecked<Idx: ?Sized> {
119    /// Gather values from ChunkedArray by index.
120    ///
121    /// # Safety
122    /// The non-null indices must be valid.
123    unsafe fn take_unchecked(&self, indices: &Idx) -> Self;
124}
125
126/// Create a `ChunkedArray` with new values by index or by boolean mask.
127///
128/// Note that these operations clone data. This is however the only way we can modify at mask or
129/// index level as the underlying Arrow arrays are immutable.
130pub trait ChunkSet<'a, A, B> {
131    /// Set the values at indexes `idx` to some optional value `Option<T>`.
132    ///
133    /// # Example
134    ///
135    /// ```rust
136    /// # use polars_core::prelude::*;
137    /// let ca = UInt32Chunked::new("a".into(), &[1, 2, 3]);
138    /// let new = ca.scatter_single(vec![0, 1], Some(10)).unwrap();
139    ///
140    /// assert_eq!(Vec::from(&new), &[Some(10), Some(10), Some(3)]);
141    /// ```
142    fn scatter_single<I: IntoIterator<Item = IdxSize>>(
143        &'a self,
144        idx: I,
145        opt_value: Option<A>,
146    ) -> PolarsResult<Self>
147    where
148        Self: Sized;
149
150    /// Set the values at indexes `idx` by applying a closure to these values.
151    ///
152    /// # Example
153    ///
154    /// ```rust
155    /// # use polars_core::prelude::*;
156    /// let ca = Int32Chunked::new("a".into(), &[1, 2, 3]);
157    /// let new = ca.scatter_with(vec![0, 1], |opt_v| opt_v.map(|v| v - 5)).unwrap();
158    ///
159    /// assert_eq!(Vec::from(&new), &[Some(-4), Some(-3), Some(3)]);
160    /// ```
161    fn scatter_with<I: IntoIterator<Item = IdxSize>, F>(
162        &'a self,
163        idx: I,
164        f: F,
165    ) -> PolarsResult<Self>
166    where
167        Self: Sized,
168        F: Fn(Option<A>) -> Option<B>;
169    /// Set the values where the mask evaluates to `true` to some optional value `Option<T>`.
170    ///
171    /// # Example
172    ///
173    /// ```rust
174    /// # use polars_core::prelude::*;
175    /// let ca = Int32Chunked::new("a".into(), &[1, 2, 3]);
176    /// let mask = BooleanChunked::new("mask".into(), &[false, true, false]);
177    /// let new = ca.set(&mask, Some(5)).unwrap();
178    /// assert_eq!(Vec::from(&new), &[Some(1), Some(5), Some(3)]);
179    /// ```
180    fn set(&'a self, mask: &BooleanChunked, opt_value: Option<A>) -> PolarsResult<Self>
181    where
182        Self: Sized;
183}
184
185/// Cast `ChunkedArray<T>` to `ChunkedArray<N>`
186pub trait ChunkCast {
187    /// Cast a [`ChunkedArray`] to [`DataType`]
188    fn cast(&self, dtype: &DataType) -> PolarsResult<Series> {
189        self.cast_with_options(dtype, CastOptions::NonStrict)
190    }
191
192    /// Cast a [`ChunkedArray`] to [`DataType`]
193    fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Series>;
194
195    /// Does not check if the cast is a valid one and may over/underflow
196    ///
197    /// # Safety
198    /// - This doesn't do utf8 validation checking when casting from binary
199    /// - This doesn't do categorical bound checking when casting from UInt32
200    unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Series>;
201}
202
203/// Fastest way to do elementwise operations on a [`ChunkedArray<T>`] when the operation is cheaper than
204/// branching due to null checking.
205pub trait ChunkApply<'a, T> {
206    type FuncRet;
207
208    /// Apply a closure elementwise. This is fastest when the null check branching is more expensive
209    /// than the closure application. Often it is.
210    ///
211    /// Null values remain null.
212    ///
213    /// # Example
214    ///
215    /// ```
216    /// use polars_core::prelude::*;
217    /// fn double(ca: &UInt32Chunked) -> UInt32Chunked {
218    ///     ca.apply_values(|v| v * 2)
219    /// }
220    /// ```
221    #[must_use]
222    fn apply_values<F>(&'a self, f: F) -> Self
223    where
224        F: Fn(T) -> Self::FuncRet + Copy;
225
226    /// Apply a closure elementwise including null values.
227    #[must_use]
228    fn apply<F>(&'a self, f: F) -> Self
229    where
230        F: Fn(Option<T>) -> Option<Self::FuncRet> + Copy;
231
232    /// Apply a closure elementwise and write results to a mutable slice.
233    fn apply_to_slice<F, S>(&'a self, f: F, slice: &mut [S])
234    // (value of chunkedarray, value of slice) -> value of slice
235    where
236        F: Fn(Option<T>, &S) -> S;
237}
238
239/// Aggregation operations.
240pub trait ChunkAgg<T> {
241    /// Aggregate the sum of the ChunkedArray.
242    /// Returns `None` if not implemented for `T`.
243    /// If the array is empty, `0` is returned
244    fn sum(&self) -> Option<T> {
245        None
246    }
247
248    fn _sum_as_f64(&self) -> f64;
249
250    fn min(&self) -> Option<T> {
251        None
252    }
253
254    /// Returns the maximum value in the array, according to the natural order.
255    /// Returns `None` if the array is empty or only contains null values.
256    fn max(&self) -> Option<T> {
257        None
258    }
259
260    fn min_max(&self) -> Option<(T, T)> {
261        Some((self.min()?, self.max()?))
262    }
263
264    /// Returns the mean value in the array.
265    /// Returns `None` if the array is empty or only contains null values.
266    fn mean(&self) -> Option<f64> {
267        None
268    }
269}
270
271/// Quantile and median aggregation.
272pub trait ChunkQuantile<T> {
273    /// Returns the mean value in the array.
274    /// Returns `None` if the array is empty or only contains null values.
275    fn median(&self) -> Option<T> {
276        None
277    }
278    /// Aggregate a given quantile of the ChunkedArray.
279    /// Returns `None` if the array is empty or only contains null values.
280    fn quantile(&self, _quantile: f64, _method: QuantileMethod) -> PolarsResult<Option<T>> {
281        Ok(None)
282    }
283}
284
285/// Variance and standard deviation aggregation.
286pub trait ChunkVar {
287    /// Compute the variance of this ChunkedArray/Series.
288    fn var(&self, _ddof: u8) -> Option<f64> {
289        None
290    }
291
292    /// Compute the standard deviation of this ChunkedArray/Series.
293    fn std(&self, _ddof: u8) -> Option<f64> {
294        None
295    }
296}
297
298/// Bitwise Reduction Operations.
299#[cfg(feature = "bitwise")]
300pub trait ChunkBitwiseReduce {
301    type Physical;
302
303    fn and_reduce(&self) -> Option<Self::Physical>;
304    fn or_reduce(&self) -> Option<Self::Physical>;
305    fn xor_reduce(&self) -> Option<Self::Physical>;
306}
307
308/// Compare [`Series`] and [`ChunkedArray`]'s and get a `boolean` mask that
309/// can be used to filter rows.
310///
311/// # Example
312///
313/// ```
314/// use polars_core::prelude::*;
315/// fn filter_all_ones(df: &DataFrame) -> PolarsResult<DataFrame> {
316///     let mask = df
317///     .column("column_a")?
318///     .as_materialized_series()
319///     .equal(1)?;
320///
321///     df.filter(&mask)
322/// }
323/// ```
324pub trait ChunkCompareEq<Rhs> {
325    type Item;
326
327    /// Check for equality.
328    fn equal(&self, rhs: Rhs) -> Self::Item;
329
330    /// Check for equality where `None == None`.
331    fn equal_missing(&self, rhs: Rhs) -> Self::Item;
332
333    /// Check for inequality.
334    fn not_equal(&self, rhs: Rhs) -> Self::Item;
335
336    /// Check for inequality where `None == None`.
337    fn not_equal_missing(&self, rhs: Rhs) -> Self::Item;
338}
339
340/// Compare [`Series`] and [`ChunkedArray`]'s using inequality operators (`<`, `>=`, etc.) and get
341/// a `boolean` mask that can be used to filter rows.
342pub trait ChunkCompareIneq<Rhs> {
343    type Item;
344
345    /// Greater than comparison.
346    fn gt(&self, rhs: Rhs) -> Self::Item;
347
348    /// Greater than or equal comparison.
349    fn gt_eq(&self, rhs: Rhs) -> Self::Item;
350
351    /// Less than comparison.
352    fn lt(&self, rhs: Rhs) -> Self::Item;
353
354    /// Less than or equal comparison
355    fn lt_eq(&self, rhs: Rhs) -> Self::Item;
356}
357
358/// Get unique values in a `ChunkedArray`
359pub trait ChunkUnique {
360    // We don't return Self to be able to use AutoRef specialization
361    /// Get unique values of a ChunkedArray
362    fn unique(&self) -> PolarsResult<Self>
363    where
364        Self: Sized;
365
366    /// Get first index of the unique values in a `ChunkedArray`.
367    /// This Vec is sorted.
368    fn arg_unique(&self) -> PolarsResult<IdxCa>;
369
370    /// Number of unique values in the `ChunkedArray`
371    fn n_unique(&self) -> PolarsResult<usize> {
372        self.arg_unique().map(|v| v.len())
373    }
374}
375
376#[cfg(feature = "approx_unique")]
377pub trait ChunkApproxNUnique {
378    fn approx_n_unique(&self) -> IdxSize;
379}
380
381/// Sort operations on `ChunkedArray`.
382pub trait ChunkSort<T: PolarsDataType> {
383    #[allow(unused_variables)]
384    fn sort_with(&self, options: SortOptions) -> ChunkedArray<T>;
385
386    /// Returned a sorted `ChunkedArray`.
387    fn sort(&self, descending: bool) -> ChunkedArray<T>;
388
389    /// Retrieve the indexes needed to sort this array.
390    fn arg_sort(&self, options: SortOptions) -> IdxCa;
391
392    /// Retrieve the indexes need to sort this and the other arrays.
393    #[allow(unused_variables)]
394    fn arg_sort_multiple(
395        &self,
396        by: &[Column],
397        _options: &SortMultipleOptions,
398    ) -> PolarsResult<IdxCa> {
399        polars_bail!(opq = arg_sort_multiple, T::get_static_dtype());
400    }
401}
402
403pub type FillNullLimit = Option<IdxSize>;
404
405#[derive(Copy, Clone, Debug, PartialEq, Hash)]
406#[cfg_attr(feature = "serde-lazy", derive(Serialize, Deserialize))]
407#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
408pub enum FillNullStrategy {
409    /// previous value in array
410    Backward(FillNullLimit),
411    /// next value in array
412    Forward(FillNullLimit),
413    /// mean value of array
414    Mean,
415    /// minimal value in array
416    Min,
417    /// maximum value in array
418    Max,
419    /// replace with the value zero
420    Zero,
421    /// replace with the value one
422    One,
423}
424
425impl FillNullStrategy {
426    pub fn is_elementwise(&self) -> bool {
427        matches!(self, Self::One | Self::Zero)
428    }
429}
430
431/// Replace None values with a value
432pub trait ChunkFillNullValue<T> {
433    /// Replace None values with a give value `T`.
434    fn fill_null_with_values(&self, value: T) -> PolarsResult<Self>
435    where
436        Self: Sized;
437}
438
439/// Fill a ChunkedArray with one value.
440pub trait ChunkFull<T> {
441    /// Create a ChunkedArray with a single value.
442    fn full(name: PlSmallStr, value: T, length: usize) -> Self
443    where
444        Self: Sized;
445}
446
447pub trait ChunkFullNull {
448    fn full_null(_name: PlSmallStr, _length: usize) -> Self
449    where
450        Self: Sized;
451}
452
453/// Reverse a [`ChunkedArray<T>`]
454pub trait ChunkReverse {
455    /// Return a reversed version of this array.
456    fn reverse(&self) -> Self;
457}
458
459/// Filter values by a boolean mask.
460pub trait ChunkFilter<T: PolarsDataType> {
461    /// Filter values in the ChunkedArray with a boolean mask.
462    ///
463    /// ```rust
464    /// # use polars_core::prelude::*;
465    /// let array = Int32Chunked::new("array".into(), &[1, 2, 3]);
466    /// let mask = BooleanChunked::new("mask".into(), &[true, false, true]);
467    ///
468    /// let filtered = array.filter(&mask).unwrap();
469    /// assert_eq!(Vec::from(&filtered), [Some(1), Some(3)])
470    /// ```
471    fn filter(&self, filter: &BooleanChunked) -> PolarsResult<ChunkedArray<T>>
472    where
473        Self: Sized;
474}
475
476/// Create a new ChunkedArray filled with values at that index.
477pub trait ChunkExpandAtIndex<T: PolarsDataType> {
478    /// Create a new ChunkedArray filled with values at that index.
479    fn new_from_index(&self, index: usize, length: usize) -> ChunkedArray<T>;
480}
481
482macro_rules! impl_chunk_expand {
483    ($self:ident, $length:ident, $index:ident) => {{
484        if $self.is_empty() {
485            return $self.clone();
486        }
487        let opt_val = $self.get($index);
488        match opt_val {
489            Some(val) => ChunkedArray::full($self.name().clone(), val, $length),
490            None => ChunkedArray::full_null($self.name().clone(), $length),
491        }
492    }};
493}
494
495impl<T: PolarsNumericType> ChunkExpandAtIndex<T> for ChunkedArray<T>
496where
497    ChunkedArray<T>: ChunkFull<T::Native>,
498{
499    fn new_from_index(&self, index: usize, length: usize) -> ChunkedArray<T> {
500        let mut out = impl_chunk_expand!(self, length, index);
501        out.set_sorted_flag(IsSorted::Ascending);
502        out
503    }
504}
505
506impl ChunkExpandAtIndex<BooleanType> for BooleanChunked {
507    fn new_from_index(&self, index: usize, length: usize) -> BooleanChunked {
508        let mut out = impl_chunk_expand!(self, length, index);
509        out.set_sorted_flag(IsSorted::Ascending);
510        out
511    }
512}
513
514impl ChunkExpandAtIndex<StringType> for StringChunked {
515    fn new_from_index(&self, index: usize, length: usize) -> StringChunked {
516        let mut out = impl_chunk_expand!(self, length, index);
517        out.set_sorted_flag(IsSorted::Ascending);
518        out
519    }
520}
521
522impl ChunkExpandAtIndex<BinaryType> for BinaryChunked {
523    fn new_from_index(&self, index: usize, length: usize) -> BinaryChunked {
524        let mut out = impl_chunk_expand!(self, length, index);
525        out.set_sorted_flag(IsSorted::Ascending);
526        out
527    }
528}
529
530impl ChunkExpandAtIndex<BinaryOffsetType> for BinaryOffsetChunked {
531    fn new_from_index(&self, index: usize, length: usize) -> BinaryOffsetChunked {
532        let mut out = impl_chunk_expand!(self, length, index);
533        out.set_sorted_flag(IsSorted::Ascending);
534        out
535    }
536}
537
538impl ChunkExpandAtIndex<ListType> for ListChunked {
539    fn new_from_index(&self, index: usize, length: usize) -> ListChunked {
540        let opt_val = self.get_as_series(index);
541        match opt_val {
542            Some(val) => {
543                let mut ca = ListChunked::full(self.name().clone(), &val, length);
544                unsafe { ca.to_logical(self.inner_dtype().clone()) };
545                ca
546            },
547            None => {
548                ListChunked::full_null_with_dtype(self.name().clone(), length, self.inner_dtype())
549            },
550        }
551    }
552}
553
554#[cfg(feature = "dtype-struct")]
555impl ChunkExpandAtIndex<StructType> for StructChunked {
556    fn new_from_index(&self, index: usize, length: usize) -> ChunkedArray<StructType> {
557        let (chunk_idx, idx) = self.index_to_chunked_index(index);
558        let chunk = self.downcast_chunks().get(chunk_idx).unwrap();
559        let chunk = if chunk.is_null(idx) {
560            new_null_array(chunk.dtype().clone(), length)
561        } else {
562            let values = chunk
563                .values()
564                .iter()
565                .map(|arr| {
566                    let s = Series::try_from((PlSmallStr::EMPTY, arr.clone())).unwrap();
567                    let s = s.new_from_index(idx, length);
568                    s.chunks()[0].clone()
569                })
570                .collect::<Vec<_>>();
571
572            StructArray::new(chunk.dtype().clone(), length, values, None).boxed()
573        };
574
575        // SAFETY: chunks are from self.
576        unsafe { self.copy_with_chunks(vec![chunk]) }
577    }
578}
579
580#[cfg(feature = "dtype-array")]
581impl ChunkExpandAtIndex<FixedSizeListType> for ArrayChunked {
582    fn new_from_index(&self, index: usize, length: usize) -> ArrayChunked {
583        let opt_val = self.get_as_series(index);
584        match opt_val {
585            Some(val) => {
586                let mut ca = ArrayChunked::full(self.name().clone(), &val, length);
587                unsafe { ca.to_logical(self.inner_dtype().clone()) };
588                ca
589            },
590            None => ArrayChunked::full_null_with_dtype(
591                self.name().clone(),
592                length,
593                self.inner_dtype(),
594                self.width(),
595            ),
596        }
597    }
598}
599
600#[cfg(feature = "object")]
601impl<T: PolarsObject> ChunkExpandAtIndex<ObjectType<T>> for ObjectChunked<T> {
602    fn new_from_index(&self, index: usize, length: usize) -> ObjectChunked<T> {
603        let opt_val = self.get(index);
604        match opt_val {
605            Some(val) => ObjectChunked::<T>::full(self.name().clone(), val.clone(), length),
606            None => ObjectChunked::<T>::full_null(self.name().clone(), length),
607        }
608    }
609}
610
611/// Shift the values of a [`ChunkedArray`] by a number of periods.
612pub trait ChunkShiftFill<T: PolarsDataType, V> {
613    /// Shift the values by a given period and fill the parts that will be empty due to this operation
614    /// with `fill_value`.
615    fn shift_and_fill(&self, periods: i64, fill_value: V) -> ChunkedArray<T>;
616}
617
618pub trait ChunkShift<T: PolarsDataType> {
619    fn shift(&self, periods: i64) -> ChunkedArray<T>;
620}
621
622/// Combine two [`ChunkedArray`] based on some predicate.
623pub trait ChunkZip<T: PolarsDataType> {
624    /// Create a new ChunkedArray with values from self where the mask evaluates `true` and values
625    /// from `other` where the mask evaluates `false`
626    fn zip_with(
627        &self,
628        mask: &BooleanChunked,
629        other: &ChunkedArray<T>,
630    ) -> PolarsResult<ChunkedArray<T>>;
631}
632
633/// Apply kernels on the arrow array chunks in a ChunkedArray.
634pub trait ChunkApplyKernel<A: Array> {
635    /// Apply kernel and return result as a new ChunkedArray.
636    #[must_use]
637    fn apply_kernel(&self, f: &dyn Fn(&A) -> ArrayRef) -> Self;
638
639    /// Apply a kernel that outputs an array of different type.
640    fn apply_kernel_cast<S>(&self, f: &dyn Fn(&A) -> ArrayRef) -> ChunkedArray<S>
641    where
642        S: PolarsDataType;
643}
644
645#[cfg(feature = "is_first_distinct")]
646/// Mask the first unique values as `true`
647pub trait IsFirstDistinct<T: PolarsDataType> {
648    fn is_first_distinct(&self) -> PolarsResult<BooleanChunked> {
649        polars_bail!(opq = is_first_distinct, T::get_static_dtype());
650    }
651}
652
653#[cfg(feature = "is_last_distinct")]
654/// Mask the last unique values as `true`
655pub trait IsLastDistinct<T: PolarsDataType> {
656    fn is_last_distinct(&self) -> PolarsResult<BooleanChunked> {
657        polars_bail!(opq = is_last_distinct, T::get_static_dtype());
658    }
659}