polars_core/chunked_array/ops/
mod.rs

1//! Traits for miscellaneous operations on ChunkedArray
2use arrow::offset::OffsetsBuffer;
3use polars_compute::rolling::QuantileMethod;
4
5use crate::prelude::*;
6
7pub(crate) mod aggregate;
8pub(crate) mod any_value;
9pub(crate) mod append;
10mod apply;
11#[cfg(feature = "approx_unique")]
12mod approx_n_unique;
13pub mod arity;
14mod bit_repr;
15mod bits;
16#[cfg(feature = "bitwise")]
17mod bitwise_reduce;
18pub(crate) mod chunkops;
19pub(crate) mod compare_inner;
20#[cfg(feature = "dtype-decimal")]
21mod decimal;
22pub(crate) mod downcast;
23pub(crate) mod explode;
24mod explode_and_offsets;
25mod extend;
26pub mod fill_null;
27mod filter;
28pub mod float_sorted_arg_max;
29mod for_each;
30pub mod full;
31pub mod gather;
32pub(crate) mod nulls;
33mod reverse;
34#[cfg(feature = "rolling_window")]
35pub(crate) mod rolling_window;
36pub mod row_encode;
37pub mod search_sorted;
38mod set;
39mod shift;
40pub mod sort;
41#[cfg(feature = "algorithm_group_by")]
42pub(crate) mod unique;
43#[cfg(feature = "zip_with")]
44pub mod zip;
45
46pub use chunkops::_set_check_length;
47#[cfg(feature = "serde-lazy")]
48use serde::{Deserialize, Serialize};
49pub use sort::options::*;
50
51use crate::chunked_array::cast::CastOptions;
52use crate::series::{BitRepr, IsSorted};
53#[cfg(feature = "reinterpret")]
54pub trait Reinterpret {
55    fn reinterpret_signed(&self) -> Series {
56        unimplemented!()
57    }
58
59    fn reinterpret_unsigned(&self) -> Series {
60        unimplemented!()
61    }
62}
63
64/// Transmute [`ChunkedArray`] to bit representation.
65/// This is useful in hashing context and reduces no.
66/// of compiled code paths.
67pub(crate) trait ToBitRepr {
68    fn to_bit_repr(&self) -> BitRepr;
69}
70
71pub trait ChunkAnyValue {
72    /// Get a single value. Beware this is slow.
73    /// If you need to use this slightly performant, cast Categorical to UInt32
74    ///
75    /// # Safety
76    /// Does not do any bounds checking.
77    unsafe fn get_any_value_unchecked(&self, index: usize) -> AnyValue;
78
79    /// Get a single value. Beware this is slow.
80    fn get_any_value(&self, index: usize) -> PolarsResult<AnyValue>;
81}
82
83/// Explode/flatten a List or String Series
84pub trait ChunkExplode {
85    fn explode(&self) -> PolarsResult<Series> {
86        self.explode_and_offsets().map(|t| t.0)
87    }
88    fn offsets(&self) -> PolarsResult<OffsetsBuffer<i64>>;
89    fn explode_and_offsets(&self) -> PolarsResult<(Series, OffsetsBuffer<i64>)>;
90}
91
92pub trait ChunkBytes {
93    fn to_byte_slices(&self) -> Vec<&[u8]>;
94}
95
96/// This differs from ChunkWindowCustom and ChunkWindow
97/// by not using a fold aggregator, but reusing a `Series` wrapper and calling `Series` aggregators.
98/// This likely is a bit slower than ChunkWindow
99#[cfg(feature = "rolling_window")]
100pub trait ChunkRollApply: AsRefDataType {
101    fn rolling_map(
102        &self,
103        _f: &dyn Fn(&Series) -> Series,
104        _options: RollingOptionsFixedWindow,
105    ) -> PolarsResult<Series>
106    where
107        Self: Sized,
108    {
109        polars_bail!(opq = rolling_map, self.as_ref_dtype());
110    }
111}
112
113pub trait ChunkTake<Idx: ?Sized>: ChunkTakeUnchecked<Idx> {
114    /// Gather values from ChunkedArray by index.
115    fn take(&self, indices: &Idx) -> PolarsResult<Self>
116    where
117        Self: Sized;
118}
119
120pub trait ChunkTakeUnchecked<Idx: ?Sized> {
121    /// Gather values from ChunkedArray by index.
122    ///
123    /// # Safety
124    /// The non-null indices must be valid.
125    unsafe fn take_unchecked(&self, indices: &Idx) -> Self;
126}
127
128/// Create a `ChunkedArray` with new values by index or by boolean mask.
129///
130/// Note that these operations clone data. This is however the only way we can modify at mask or
131/// index level as the underlying Arrow arrays are immutable.
132pub trait ChunkSet<'a, A, B> {
133    /// Set the values at indexes `idx` to some optional value `Option<T>`.
134    ///
135    /// # Example
136    ///
137    /// ```rust
138    /// # use polars_core::prelude::*;
139    /// let ca = UInt32Chunked::new("a".into(), &[1, 2, 3]);
140    /// let new = ca.scatter_single(vec![0, 1], Some(10)).unwrap();
141    ///
142    /// assert_eq!(Vec::from(&new), &[Some(10), Some(10), Some(3)]);
143    /// ```
144    fn scatter_single<I: IntoIterator<Item = IdxSize>>(
145        &'a self,
146        idx: I,
147        opt_value: Option<A>,
148    ) -> PolarsResult<Self>
149    where
150        Self: Sized;
151
152    /// Set the values at indexes `idx` by applying a closure to these values.
153    ///
154    /// # Example
155    ///
156    /// ```rust
157    /// # use polars_core::prelude::*;
158    /// let ca = Int32Chunked::new("a".into(), &[1, 2, 3]);
159    /// let new = ca.scatter_with(vec![0, 1], |opt_v| opt_v.map(|v| v - 5)).unwrap();
160    ///
161    /// assert_eq!(Vec::from(&new), &[Some(-4), Some(-3), Some(3)]);
162    /// ```
163    fn scatter_with<I: IntoIterator<Item = IdxSize>, F>(
164        &'a self,
165        idx: I,
166        f: F,
167    ) -> PolarsResult<Self>
168    where
169        Self: Sized,
170        F: Fn(Option<A>) -> Option<B>;
171    /// Set the values where the mask evaluates to `true` to some optional value `Option<T>`.
172    ///
173    /// # Example
174    ///
175    /// ```rust
176    /// # use polars_core::prelude::*;
177    /// let ca = Int32Chunked::new("a".into(), &[1, 2, 3]);
178    /// let mask = BooleanChunked::new("mask".into(), &[false, true, false]);
179    /// let new = ca.set(&mask, Some(5)).unwrap();
180    /// assert_eq!(Vec::from(&new), &[Some(1), Some(5), Some(3)]);
181    /// ```
182    fn set(&'a self, mask: &BooleanChunked, opt_value: Option<A>) -> PolarsResult<Self>
183    where
184        Self: Sized;
185}
186
187/// Cast `ChunkedArray<T>` to `ChunkedArray<N>`
188pub trait ChunkCast {
189    /// Cast a [`ChunkedArray`] to [`DataType`]
190    fn cast(&self, dtype: &DataType) -> PolarsResult<Series> {
191        self.cast_with_options(dtype, CastOptions::NonStrict)
192    }
193
194    /// Cast a [`ChunkedArray`] to [`DataType`]
195    fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Series>;
196
197    /// Does not check if the cast is a valid one and may over/underflow
198    ///
199    /// # Safety
200    /// - This doesn't do utf8 validation checking when casting from binary
201    /// - This doesn't do categorical bound checking when casting from UInt32
202    unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Series>;
203}
204
205/// Fastest way to do elementwise operations on a [`ChunkedArray<T>`] when the operation is cheaper than
206/// branching due to null checking.
207pub trait ChunkApply<'a, T> {
208    type FuncRet;
209
210    /// Apply a closure elementwise. This is fastest when the null check branching is more expensive
211    /// than the closure application. Often it is.
212    ///
213    /// Null values remain null.
214    ///
215    /// # Example
216    ///
217    /// ```
218    /// use polars_core::prelude::*;
219    /// fn double(ca: &UInt32Chunked) -> UInt32Chunked {
220    ///     ca.apply_values(|v| v * 2)
221    /// }
222    /// ```
223    #[must_use]
224    fn apply_values<F>(&'a self, f: F) -> Self
225    where
226        F: Fn(T) -> Self::FuncRet + Copy;
227
228    /// Apply a closure elementwise including null values.
229    #[must_use]
230    fn apply<F>(&'a self, f: F) -> Self
231    where
232        F: Fn(Option<T>) -> Option<Self::FuncRet> + Copy;
233
234    /// Apply a closure elementwise and write results to a mutable slice.
235    fn apply_to_slice<F, S>(&'a self, f: F, slice: &mut [S])
236    // (value of chunkedarray, value of slice) -> value of slice
237    where
238        F: Fn(Option<T>, &S) -> S;
239}
240
241/// Aggregation operations.
242pub trait ChunkAgg<T> {
243    /// Aggregate the sum of the ChunkedArray.
244    /// Returns `None` if not implemented for `T`.
245    /// If the array is empty, `0` is returned
246    fn sum(&self) -> Option<T> {
247        None
248    }
249
250    fn _sum_as_f64(&self) -> f64;
251
252    fn min(&self) -> Option<T> {
253        None
254    }
255
256    /// Returns the maximum value in the array, according to the natural order.
257    /// Returns `None` if the array is empty or only contains null values.
258    fn max(&self) -> Option<T> {
259        None
260    }
261
262    fn min_max(&self) -> Option<(T, T)> {
263        Some((self.min()?, self.max()?))
264    }
265
266    /// Returns the mean value in the array.
267    /// Returns `None` if the array is empty or only contains null values.
268    fn mean(&self) -> Option<f64> {
269        None
270    }
271}
272
273/// Quantile and median aggregation.
274pub trait ChunkQuantile<T> {
275    /// Returns the mean value in the array.
276    /// Returns `None` if the array is empty or only contains null values.
277    fn median(&self) -> Option<T> {
278        None
279    }
280    /// Aggregate a given quantile of the ChunkedArray.
281    /// Returns `None` if the array is empty or only contains null values.
282    fn quantile(&self, _quantile: f64, _method: QuantileMethod) -> PolarsResult<Option<T>> {
283        Ok(None)
284    }
285}
286
287/// Variance and standard deviation aggregation.
288pub trait ChunkVar {
289    /// Compute the variance of this ChunkedArray/Series.
290    fn var(&self, _ddof: u8) -> Option<f64> {
291        None
292    }
293
294    /// Compute the standard deviation of this ChunkedArray/Series.
295    fn std(&self, _ddof: u8) -> Option<f64> {
296        None
297    }
298}
299
300/// Bitwise Reduction Operations.
301#[cfg(feature = "bitwise")]
302pub trait ChunkBitwiseReduce {
303    type Physical;
304
305    fn and_reduce(&self) -> Option<Self::Physical>;
306    fn or_reduce(&self) -> Option<Self::Physical>;
307    fn xor_reduce(&self) -> Option<Self::Physical>;
308}
309
310/// Compare [`Series`] and [`ChunkedArray`]'s and get a `boolean` mask that
311/// can be used to filter rows.
312///
313/// # Example
314///
315/// ```
316/// use polars_core::prelude::*;
317/// fn filter_all_ones(df: &DataFrame) -> PolarsResult<DataFrame> {
318///     let mask = df
319///     .column("column_a")?
320///     .as_materialized_series()
321///     .equal(1)?;
322///
323///     df.filter(&mask)
324/// }
325/// ```
326pub trait ChunkCompareEq<Rhs> {
327    type Item;
328
329    /// Check for equality.
330    fn equal(&self, rhs: Rhs) -> Self::Item;
331
332    /// Check for equality where `None == None`.
333    fn equal_missing(&self, rhs: Rhs) -> Self::Item;
334
335    /// Check for inequality.
336    fn not_equal(&self, rhs: Rhs) -> Self::Item;
337
338    /// Check for inequality where `None == None`.
339    fn not_equal_missing(&self, rhs: Rhs) -> Self::Item;
340}
341
342/// Compare [`Series`] and [`ChunkedArray`]'s using inequality operators (`<`, `>=`, etc.) and get
343/// a `boolean` mask that can be used to filter rows.
344pub trait ChunkCompareIneq<Rhs> {
345    type Item;
346
347    /// Greater than comparison.
348    fn gt(&self, rhs: Rhs) -> Self::Item;
349
350    /// Greater than or equal comparison.
351    fn gt_eq(&self, rhs: Rhs) -> Self::Item;
352
353    /// Less than comparison.
354    fn lt(&self, rhs: Rhs) -> Self::Item;
355
356    /// Less than or equal comparison
357    fn lt_eq(&self, rhs: Rhs) -> Self::Item;
358}
359
360/// Get unique values in a `ChunkedArray`
361pub trait ChunkUnique {
362    // We don't return Self to be able to use AutoRef specialization
363    /// Get unique values of a ChunkedArray
364    fn unique(&self) -> PolarsResult<Self>
365    where
366        Self: Sized;
367
368    /// Get first index of the unique values in a `ChunkedArray`.
369    /// This Vec is sorted.
370    fn arg_unique(&self) -> PolarsResult<IdxCa>;
371
372    /// Number of unique values in the `ChunkedArray`
373    fn n_unique(&self) -> PolarsResult<usize> {
374        self.arg_unique().map(|v| v.len())
375    }
376}
377
378#[cfg(feature = "approx_unique")]
379pub trait ChunkApproxNUnique {
380    fn approx_n_unique(&self) -> IdxSize;
381}
382
383/// Sort operations on `ChunkedArray`.
384pub trait ChunkSort<T: PolarsDataType> {
385    #[allow(unused_variables)]
386    fn sort_with(&self, options: SortOptions) -> ChunkedArray<T>;
387
388    /// Returned a sorted `ChunkedArray`.
389    fn sort(&self, descending: bool) -> ChunkedArray<T>;
390
391    /// Retrieve the indexes needed to sort this array.
392    fn arg_sort(&self, options: SortOptions) -> IdxCa;
393
394    /// Retrieve the indexes need to sort this and the other arrays.
395    #[allow(unused_variables)]
396    fn arg_sort_multiple(
397        &self,
398        by: &[Column],
399        _options: &SortMultipleOptions,
400    ) -> PolarsResult<IdxCa> {
401        polars_bail!(opq = arg_sort_multiple, T::get_dtype());
402    }
403}
404
405pub type FillNullLimit = Option<IdxSize>;
406
407#[derive(Copy, Clone, Debug, PartialEq, Hash)]
408#[cfg_attr(feature = "serde-lazy", derive(Serialize, Deserialize))]
409pub enum FillNullStrategy {
410    /// previous value in array
411    Backward(FillNullLimit),
412    /// next value in array
413    Forward(FillNullLimit),
414    /// mean value of array
415    Mean,
416    /// minimal value in array
417    Min,
418    /// maximum value in array
419    Max,
420    /// replace with the value zero
421    Zero,
422    /// replace with the value one
423    One,
424    /// replace with the maximum value of that data type
425    MaxBound,
426    /// replace with the minimal value of that data type
427    MinBound,
428}
429
430impl FillNullStrategy {
431    pub fn is_elementwise(&self) -> bool {
432        matches!(self, Self::One | Self::Zero)
433    }
434}
435
436/// Replace None values with a value
437pub trait ChunkFillNullValue<T> {
438    /// Replace None values with a give value `T`.
439    fn fill_null_with_values(&self, value: T) -> PolarsResult<Self>
440    where
441        Self: Sized;
442}
443
444/// Fill a ChunkedArray with one value.
445pub trait ChunkFull<T> {
446    /// Create a ChunkedArray with a single value.
447    fn full(name: PlSmallStr, value: T, length: usize) -> Self
448    where
449        Self: Sized;
450}
451
452pub trait ChunkFullNull {
453    fn full_null(_name: PlSmallStr, _length: usize) -> Self
454    where
455        Self: Sized;
456}
457
458/// Reverse a [`ChunkedArray<T>`]
459pub trait ChunkReverse {
460    /// Return a reversed version of this array.
461    fn reverse(&self) -> Self;
462}
463
464/// Filter values by a boolean mask.
465pub trait ChunkFilter<T: PolarsDataType> {
466    /// Filter values in the ChunkedArray with a boolean mask.
467    ///
468    /// ```rust
469    /// # use polars_core::prelude::*;
470    /// let array = Int32Chunked::new("array".into(), &[1, 2, 3]);
471    /// let mask = BooleanChunked::new("mask".into(), &[true, false, true]);
472    ///
473    /// let filtered = array.filter(&mask).unwrap();
474    /// assert_eq!(Vec::from(&filtered), [Some(1), Some(3)])
475    /// ```
476    fn filter(&self, filter: &BooleanChunked) -> PolarsResult<ChunkedArray<T>>
477    where
478        Self: Sized;
479}
480
481/// Create a new ChunkedArray filled with values at that index.
482pub trait ChunkExpandAtIndex<T: PolarsDataType> {
483    /// Create a new ChunkedArray filled with values at that index.
484    fn new_from_index(&self, index: usize, length: usize) -> ChunkedArray<T>;
485}
486
487macro_rules! impl_chunk_expand {
488    ($self:ident, $length:ident, $index:ident) => {{
489        if $self.is_empty() {
490            return $self.clone();
491        }
492        let opt_val = $self.get($index);
493        match opt_val {
494            Some(val) => ChunkedArray::full($self.name().clone(), val, $length),
495            None => ChunkedArray::full_null($self.name().clone(), $length),
496        }
497    }};
498}
499
500impl<T: PolarsNumericType> ChunkExpandAtIndex<T> for ChunkedArray<T>
501where
502    ChunkedArray<T>: ChunkFull<T::Native>,
503{
504    fn new_from_index(&self, index: usize, length: usize) -> ChunkedArray<T> {
505        let mut out = impl_chunk_expand!(self, length, index);
506        out.set_sorted_flag(IsSorted::Ascending);
507        out
508    }
509}
510
511impl ChunkExpandAtIndex<BooleanType> for BooleanChunked {
512    fn new_from_index(&self, index: usize, length: usize) -> BooleanChunked {
513        let mut out = impl_chunk_expand!(self, length, index);
514        out.set_sorted_flag(IsSorted::Ascending);
515        out
516    }
517}
518
519impl ChunkExpandAtIndex<StringType> for StringChunked {
520    fn new_from_index(&self, index: usize, length: usize) -> StringChunked {
521        let mut out = impl_chunk_expand!(self, length, index);
522        out.set_sorted_flag(IsSorted::Ascending);
523        out
524    }
525}
526
527impl ChunkExpandAtIndex<BinaryType> for BinaryChunked {
528    fn new_from_index(&self, index: usize, length: usize) -> BinaryChunked {
529        let mut out = impl_chunk_expand!(self, length, index);
530        out.set_sorted_flag(IsSorted::Ascending);
531        out
532    }
533}
534
535impl ChunkExpandAtIndex<BinaryOffsetType> for BinaryOffsetChunked {
536    fn new_from_index(&self, index: usize, length: usize) -> BinaryOffsetChunked {
537        let mut out = impl_chunk_expand!(self, length, index);
538        out.set_sorted_flag(IsSorted::Ascending);
539        out
540    }
541}
542
543impl ChunkExpandAtIndex<ListType> for ListChunked {
544    fn new_from_index(&self, index: usize, length: usize) -> ListChunked {
545        let opt_val = self.get_as_series(index);
546        match opt_val {
547            Some(val) => {
548                let mut ca = ListChunked::full(self.name().clone(), &val, length);
549                unsafe { ca.to_logical(self.inner_dtype().clone()) };
550                ca
551            },
552            None => {
553                ListChunked::full_null_with_dtype(self.name().clone(), length, self.inner_dtype())
554            },
555        }
556    }
557}
558
559#[cfg(feature = "dtype-struct")]
560impl ChunkExpandAtIndex<StructType> for StructChunked {
561    fn new_from_index(&self, index: usize, length: usize) -> ChunkedArray<StructType> {
562        let (chunk_idx, idx) = self.index_to_chunked_index(index);
563        let chunk = self.downcast_chunks().get(chunk_idx).unwrap();
564        let chunk = if chunk.is_null(idx) {
565            new_null_array(chunk.dtype().clone(), length)
566        } else {
567            let values = chunk
568                .values()
569                .iter()
570                .map(|arr| {
571                    let s = Series::try_from((PlSmallStr::EMPTY, arr.clone())).unwrap();
572                    let s = s.new_from_index(idx, length);
573                    s.chunks()[0].clone()
574                })
575                .collect::<Vec<_>>();
576
577            StructArray::new(chunk.dtype().clone(), length, values, None).boxed()
578        };
579
580        // SAFETY: chunks are from self.
581        unsafe { self.copy_with_chunks(vec![chunk]) }
582    }
583}
584
585#[cfg(feature = "dtype-array")]
586impl ChunkExpandAtIndex<FixedSizeListType> for ArrayChunked {
587    fn new_from_index(&self, index: usize, length: usize) -> ArrayChunked {
588        let opt_val = self.get_as_series(index);
589        match opt_val {
590            Some(val) => {
591                let mut ca = ArrayChunked::full(self.name().clone(), &val, length);
592                unsafe { ca.to_logical(self.inner_dtype().clone()) };
593                ca
594            },
595            None => ArrayChunked::full_null_with_dtype(
596                self.name().clone(),
597                length,
598                self.inner_dtype(),
599                self.width(),
600            ),
601        }
602    }
603}
604
605#[cfg(feature = "object")]
606impl<T: PolarsObject> ChunkExpandAtIndex<ObjectType<T>> for ObjectChunked<T> {
607    fn new_from_index(&self, index: usize, length: usize) -> ObjectChunked<T> {
608        let opt_val = self.get(index);
609        match opt_val {
610            Some(val) => ObjectChunked::<T>::full(self.name().clone(), val.clone(), length),
611            None => ObjectChunked::<T>::full_null(self.name().clone(), length),
612        }
613    }
614}
615
616/// Shift the values of a [`ChunkedArray`] by a number of periods.
617pub trait ChunkShiftFill<T: PolarsDataType, V> {
618    /// Shift the values by a given period and fill the parts that will be empty due to this operation
619    /// with `fill_value`.
620    fn shift_and_fill(&self, periods: i64, fill_value: V) -> ChunkedArray<T>;
621}
622
623pub trait ChunkShift<T: PolarsDataType> {
624    fn shift(&self, periods: i64) -> ChunkedArray<T>;
625}
626
627/// Combine two [`ChunkedArray`] based on some predicate.
628pub trait ChunkZip<T: PolarsDataType> {
629    /// Create a new ChunkedArray with values from self where the mask evaluates `true` and values
630    /// from `other` where the mask evaluates `false`
631    fn zip_with(
632        &self,
633        mask: &BooleanChunked,
634        other: &ChunkedArray<T>,
635    ) -> PolarsResult<ChunkedArray<T>>;
636}
637
638/// Apply kernels on the arrow array chunks in a ChunkedArray.
639pub trait ChunkApplyKernel<A: Array> {
640    /// Apply kernel and return result as a new ChunkedArray.
641    #[must_use]
642    fn apply_kernel(&self, f: &dyn Fn(&A) -> ArrayRef) -> Self;
643
644    /// Apply a kernel that outputs an array of different type.
645    fn apply_kernel_cast<S>(&self, f: &dyn Fn(&A) -> ArrayRef) -> ChunkedArray<S>
646    where
647        S: PolarsDataType;
648}
649
650#[cfg(feature = "is_first_distinct")]
651/// Mask the first unique values as `true`
652pub trait IsFirstDistinct<T: PolarsDataType> {
653    fn is_first_distinct(&self) -> PolarsResult<BooleanChunked> {
654        polars_bail!(opq = is_first_distinct, T::get_dtype());
655    }
656}
657
658#[cfg(feature = "is_last_distinct")]
659/// Mask the last unique values as `true`
660pub trait IsLastDistinct<T: PolarsDataType> {
661    fn is_last_distinct(&self) -> PolarsResult<BooleanChunked> {
662        polars_bail!(opq = is_last_distinct, T::get_dtype());
663    }
664}