polars_core/chunked_array/
mod.rs

1//! The typed heart of every Series column.
2#![allow(unsafe_op_in_unsafe_fn)]
3use std::iter::Map;
4use std::sync::Arc;
5
6use arrow::array::*;
7use arrow::bitmap::Bitmap;
8use arrow::compute::concatenate::concatenate_unchecked;
9use polars_compute::filter::filter_with_bitmap;
10
11use crate::prelude::*;
12
13pub mod ops;
14#[macro_use]
15pub mod arithmetic;
16pub mod builder;
17pub mod cast;
18pub mod collect;
19pub mod comparison;
20pub mod flags;
21pub mod float;
22pub mod iterator;
23#[cfg(feature = "ndarray")]
24pub(crate) mod ndarray;
25
26#[cfg(feature = "dtype-array")]
27pub(crate) mod array;
28mod binary;
29mod bitwise;
30#[cfg(feature = "object")]
31mod drop;
32mod from;
33mod from_iterator;
34pub mod from_iterator_par;
35pub(crate) mod list;
36pub(crate) mod logical;
37#[cfg(feature = "object")]
38pub mod object;
39#[cfg(feature = "random")]
40mod random;
41#[cfg(feature = "dtype-struct")]
42mod struct_;
43#[cfg(any(
44    feature = "temporal",
45    feature = "dtype-datetime",
46    feature = "dtype-date"
47))]
48pub mod temporal;
49mod to_vec;
50mod trusted_len;
51
52use std::slice::Iter;
53
54use arrow::legacy::prelude::*;
55#[cfg(feature = "dtype-struct")]
56pub use struct_::StructChunked;
57
58use self::flags::{StatisticsFlags, StatisticsFlagsIM};
59use crate::series::IsSorted;
60use crate::utils::{first_non_null, last_non_null};
61
62#[cfg(not(feature = "dtype-categorical"))]
63pub struct RevMapping {}
64
65pub type ChunkLenIter<'a> = std::iter::Map<std::slice::Iter<'a, ArrayRef>, fn(&ArrayRef) -> usize>;
66
67/// # ChunkedArray
68///
69/// Every Series contains a [`ChunkedArray<T>`]. Unlike [`Series`], [`ChunkedArray`]s are typed. This allows
70/// us to apply closures to the data and collect the results to a [`ChunkedArray`] of the same type `T`.
71/// Below we use an apply to use the cosine function to the values of a [`ChunkedArray`].
72///
73/// ```rust
74/// # use polars_core::prelude::*;
75/// fn apply_cosine_and_cast(ca: &Float32Chunked) -> Float32Chunked {
76///     ca.apply_values(|v| v.cos())
77/// }
78/// ```
79///
80/// ## Conversion between Series and ChunkedArrays
81/// Conversion from a [`Series`] to a [`ChunkedArray`] is effortless.
82///
83/// ```rust
84/// # use polars_core::prelude::*;
85/// fn to_chunked_array(series: &Series) -> PolarsResult<&Int32Chunked>{
86///     series.i32()
87/// }
88///
89/// fn to_series(ca: Int32Chunked) -> Series {
90///     ca.into_series()
91/// }
92/// ```
93///
94/// # Iterators
95///
96/// [`ChunkedArray`]s fully support Rust native [Iterator](https://doc.rust-lang.org/std/iter/trait.Iterator.html)
97/// and [DoubleEndedIterator](https://doc.rust-lang.org/std/iter/trait.DoubleEndedIterator.html) traits, thereby
98/// giving access to all the excellent methods available for [Iterators](https://doc.rust-lang.org/std/iter/trait.Iterator.html).
99///
100/// ```rust
101/// # use polars_core::prelude::*;
102///
103/// fn iter_forward(ca: &Float32Chunked) {
104///     ca.iter()
105///         .for_each(|opt_v| println!("{:?}", opt_v))
106/// }
107///
108/// fn iter_backward(ca: &Float32Chunked) {
109///     ca.iter()
110///         .rev()
111///         .for_each(|opt_v| println!("{:?}", opt_v))
112/// }
113/// ```
114///
115/// # Memory layout
116///
117/// [`ChunkedArray`]s use [Apache Arrow](https://github.com/apache/arrow) as backend for the memory layout.
118/// Arrows memory is immutable which makes it possible to make multiple zero copy (sub)-views from a single array.
119///
120/// To be able to append data, Polars uses chunks to append new memory locations, hence the [`ChunkedArray<T>`] data structure.
121/// Appends are cheap, because it will not lead to a full reallocation of the whole array (as could be the case with a Rust Vec).
122///
123/// However, multiple chunks in a [`ChunkedArray`] will slow down many operations that need random access because we have an extra indirection
124/// and indexes need to be mapped to the proper chunk. Arithmetic may also be slowed down by this.
125/// When multiplying two [`ChunkedArray`]s with different chunk sizes they cannot utilize [SIMD](https://en.wikipedia.org/wiki/SIMD) for instance.
126///
127/// If you want to have predictable performance
128/// (no unexpected re-allocation of memory), it is advised to call the [`ChunkedArray::rechunk`] after
129/// multiple append operations.
130///
131/// See also [`ChunkedArray::extend`] for appends within a chunk.
132///
133/// # Invariants
134/// - A [`ChunkedArray`] should always have at least a single [`ArrayRef`].
135/// - The [`PolarsDataType`] `T` should always map to the correct [`ArrowDataType`] in the [`ArrayRef`]
136///   chunks.
137/// - Nested datatypes such as [`List`] and [`Array`] store the physical types instead of the
138///   logical type given by the datatype.
139///
140/// [`List`]: crate::datatypes::DataType::List
141pub struct ChunkedArray<T: PolarsDataType> {
142    pub(crate) field: Arc<Field>,
143    pub(crate) chunks: Vec<ArrayRef>,
144
145    pub(crate) flags: StatisticsFlagsIM,
146
147    length: usize,
148    null_count: usize,
149    _pd: std::marker::PhantomData<T>,
150}
151
152impl<T: PolarsDataType> ChunkedArray<T> {
153    fn should_rechunk(&self) -> bool {
154        self.chunks.len() > 1 && self.chunks.len() > self.len() / 3
155    }
156
157    fn optional_rechunk(mut self) -> Self {
158        // Rechunk if we have many small chunks.
159        if self.should_rechunk() {
160            self.rechunk_mut()
161        }
162        self
163    }
164
165    pub(crate) fn as_any(&self) -> &dyn std::any::Any {
166        self
167    }
168
169    /// Series to [`ChunkedArray<T>`]
170    pub fn unpack_series_matching_type<'a>(
171        &self,
172        series: &'a Series,
173    ) -> PolarsResult<&'a ChunkedArray<T>> {
174        match self.dtype() {
175            #[cfg(feature = "dtype-decimal")]
176            DataType::Decimal(_, _) => {
177                let logical = series.decimal()?;
178
179                let ca = logical.physical();
180                Ok(ca.as_any().downcast_ref::<ChunkedArray<T>>().unwrap())
181            },
182            dt => {
183                polars_ensure!(
184                    dt == series.dtype(),
185                    SchemaMismatch: "cannot unpack series of type `{}` into `{}`",
186                    series.dtype(),
187                    dt,
188                );
189
190                // SAFETY:
191                // dtype will be correct.
192                Ok(unsafe { self.unpack_series_matching_physical_type(series) })
193            },
194        }
195    }
196
197    /// Create a new [`ChunkedArray`] and compute its `length` and `null_count`.
198    ///
199    /// If you want to explicitly the `length` and `null_count`, look at
200    /// [`ChunkedArray::new_with_dims`]
201    fn new_with_compute_len(field: Arc<Field>, chunks: Vec<ArrayRef>) -> Self {
202        unsafe {
203            let mut chunked_arr = Self::new_with_dims(field, chunks, 0, 0);
204            chunked_arr.compute_len();
205            chunked_arr
206        }
207    }
208
209    /// Create a new [`ChunkedArray`] and explicitly set its `length` and `null_count`.
210    /// # Safety
211    /// The length and null_count must be correct.
212    pub unsafe fn new_with_dims(
213        field: Arc<Field>,
214        chunks: Vec<ArrayRef>,
215        length: usize,
216        null_count: usize,
217    ) -> Self {
218        Self {
219            field,
220            chunks,
221            flags: StatisticsFlagsIM::empty(),
222
223            _pd: Default::default(),
224            length,
225            null_count,
226        }
227    }
228
229    pub(crate) fn is_sorted_ascending_flag(&self) -> bool {
230        self.get_flags().is_sorted_ascending()
231    }
232
233    pub(crate) fn is_sorted_descending_flag(&self) -> bool {
234        self.get_flags().is_sorted_descending()
235    }
236
237    /// Whether `self` is sorted in any direction.
238    pub(crate) fn is_sorted_any(&self) -> bool {
239        self.get_flags().is_sorted_any()
240    }
241
242    pub fn unset_fast_explode_list(&mut self) {
243        self.set_fast_explode_list(false)
244    }
245
246    pub fn set_fast_explode_list(&mut self, value: bool) {
247        let mut flags = self.flags.get_mut();
248        flags.set(StatisticsFlags::CAN_FAST_EXPLODE_LIST, value);
249        self.flags.set_mut(flags);
250    }
251
252    pub fn get_fast_explode_list(&self) -> bool {
253        self.get_flags().can_fast_explode_list()
254    }
255
256    pub fn get_flags(&self) -> StatisticsFlags {
257        self.flags.get()
258    }
259
260    /// Set flags for the [`ChunkedArray`]
261    pub(crate) fn set_flags(&mut self, flags: StatisticsFlags) {
262        self.flags = StatisticsFlagsIM::new(flags);
263    }
264
265    pub fn is_sorted_flag(&self) -> IsSorted {
266        self.get_flags().is_sorted()
267    }
268
269    pub fn retain_flags_from<U: PolarsDataType>(
270        &mut self,
271        from: &ChunkedArray<U>,
272        retain_flags: StatisticsFlags,
273    ) {
274        let flags = from.flags.get();
275        // Try to avoid write contention.
276        if !flags.is_empty() {
277            self.set_flags(flags & retain_flags)
278        }
279    }
280
281    /// Set the 'sorted' bit meta info.
282    pub fn set_sorted_flag(&mut self, sorted: IsSorted) {
283        let mut flags = self.flags.get_mut();
284        flags.set_sorted(sorted);
285        self.flags.set_mut(flags);
286    }
287
288    /// Set the 'sorted' bit meta info.
289    pub fn with_sorted_flag(&self, sorted: IsSorted) -> Self {
290        let mut out = self.clone();
291        out.set_sorted_flag(sorted);
292        out
293    }
294
295    /// Get the index of the first non null value in this [`ChunkedArray`].
296    pub fn first_non_null(&self) -> Option<usize> {
297        if self.null_count() == self.len() {
298            None
299        }
300        // We now know there is at least 1 non-null item in the array, and self.len() > 0
301        else if self.null_count() == 0 {
302            Some(0)
303        } else if self.is_sorted_any() {
304            let out = if unsafe { self.downcast_get_unchecked(0).is_null_unchecked(0) } {
305                // nulls are all at the start
306                self.null_count()
307            } else {
308                // nulls are all at the end
309                0
310            };
311
312            debug_assert!(
313                // If we are lucky this catches something.
314                unsafe { self.get_unchecked(out) }.is_some(),
315                "incorrect sorted flag"
316            );
317
318            Some(out)
319        } else {
320            first_non_null(self.iter_validities())
321        }
322    }
323
324    /// Get the index of the last non null value in this [`ChunkedArray`].
325    pub fn last_non_null(&self) -> Option<usize> {
326        if self.null_count() == self.len() {
327            None
328        }
329        // We now know there is at least 1 non-null item in the array, and self.len() > 0
330        else if self.null_count() == 0 {
331            Some(self.len() - 1)
332        } else if self.is_sorted_any() {
333            let out = if unsafe { self.downcast_get_unchecked(0).is_null_unchecked(0) } {
334                // nulls are all at the start
335                self.len() - 1
336            } else {
337                // nulls are all at the end
338                self.len() - self.null_count() - 1
339            };
340
341            debug_assert!(
342                // If we are lucky this catches something.
343                unsafe { self.get_unchecked(out) }.is_some(),
344                "incorrect sorted flag"
345            );
346
347            Some(out)
348        } else {
349            last_non_null(self.iter_validities(), self.len())
350        }
351    }
352
353    pub fn drop_nulls(&self) -> Self {
354        if self.null_count() == 0 {
355            self.clone()
356        } else {
357            let chunks = self
358                .downcast_iter()
359                .map(|arr| {
360                    if arr.null_count() == 0 {
361                        arr.to_boxed()
362                    } else {
363                        filter_with_bitmap(arr, arr.validity().unwrap())
364                    }
365                })
366                .collect();
367            unsafe {
368                Self::new_with_dims(
369                    self.field.clone(),
370                    chunks,
371                    self.len() - self.null_count(),
372                    0,
373                )
374            }
375        }
376    }
377
378    /// Get the buffer of bits representing null values
379    #[inline]
380    #[allow(clippy::type_complexity)]
381    pub fn iter_validities(&self) -> Map<Iter<'_, ArrayRef>, fn(&ArrayRef) -> Option<&Bitmap>> {
382        fn to_validity(arr: &ArrayRef) -> Option<&Bitmap> {
383            arr.validity()
384        }
385        self.chunks.iter().map(to_validity)
386    }
387
388    #[inline]
389    /// Return if any the chunks in this [`ChunkedArray`] have nulls.
390    pub fn has_nulls(&self) -> bool {
391        self.null_count > 0
392    }
393
394    /// Shrink the capacity of this array to fit its length.
395    pub fn shrink_to_fit(&mut self) {
396        self.chunks = vec![concatenate_unchecked(self.chunks.as_slice()).unwrap()];
397    }
398
399    pub fn clear(&self) -> Self {
400        // SAFETY: we keep the correct dtype
401        let mut ca = unsafe {
402            self.copy_with_chunks(vec![new_empty_array(
403                self.chunks.first().unwrap().dtype().clone(),
404            )])
405        };
406
407        use StatisticsFlags as F;
408        ca.retain_flags_from(self, F::IS_SORTED_ANY | F::CAN_FAST_EXPLODE_LIST);
409        ca
410    }
411
412    /// Unpack a [`Series`] to the same physical type.
413    ///
414    /// # Safety
415    ///
416    /// This is unsafe as the dtype may be incorrect and
417    /// is assumed to be correct in other safe code.
418    pub(crate) unsafe fn unpack_series_matching_physical_type<'a>(
419        &self,
420        series: &'a Series,
421    ) -> &'a ChunkedArray<T> {
422        let series_trait = &**series;
423        if self.dtype() == series.dtype() {
424            &*(series_trait as *const dyn SeriesTrait as *const ChunkedArray<T>)
425        } else {
426            use DataType::*;
427            match (self.dtype(), series.dtype()) {
428                (Int64, Datetime(_, _)) | (Int64, Duration(_)) | (Int32, Date) => {
429                    &*(series_trait as *const dyn SeriesTrait as *const ChunkedArray<T>)
430                },
431                _ => panic!(
432                    "cannot unpack series {:?} into matching type {:?}",
433                    series,
434                    self.dtype()
435                ),
436            }
437        }
438    }
439
440    /// Returns an iterator over the lengths of the chunks of the array.
441    pub fn chunk_lengths(&self) -> ChunkLenIter {
442        self.chunks.iter().map(|chunk| chunk.len())
443    }
444
445    /// A reference to the chunks
446    #[inline]
447    pub fn chunks(&self) -> &Vec<ArrayRef> {
448        &self.chunks
449    }
450
451    /// A mutable reference to the chunks
452    ///
453    /// # Safety
454    /// The caller must ensure to not change the [`DataType`] or `length` of any of the chunks.
455    /// And the `null_count` remains correct.
456    #[inline]
457    pub unsafe fn chunks_mut(&mut self) -> &mut Vec<ArrayRef> {
458        &mut self.chunks
459    }
460
461    /// Returns true if contains a single chunk and has no null values
462    pub fn is_optimal_aligned(&self) -> bool {
463        self.chunks.len() == 1 && self.null_count() == 0
464    }
465
466    /// Create a new [`ChunkedArray`] from self, where the chunks are replaced.
467    ///
468    /// # Safety
469    /// The caller must ensure the dtypes of the chunks are correct
470    unsafe fn copy_with_chunks(&self, chunks: Vec<ArrayRef>) -> Self {
471        Self::new_with_compute_len(self.field.clone(), chunks)
472    }
473
474    /// Get data type of [`ChunkedArray`].
475    pub fn dtype(&self) -> &DataType {
476        self.field.dtype()
477    }
478
479    pub(crate) unsafe fn set_dtype(&mut self, dtype: DataType) {
480        self.field = Arc::new(Field::new(self.name().clone(), dtype))
481    }
482
483    /// Name of the [`ChunkedArray`].
484    pub fn name(&self) -> &PlSmallStr {
485        self.field.name()
486    }
487
488    /// Get a reference to the field.
489    pub fn ref_field(&self) -> &Field {
490        &self.field
491    }
492
493    /// Rename this [`ChunkedArray`].
494    pub fn rename(&mut self, name: PlSmallStr) {
495        self.field = Arc::new(Field::new(name, self.field.dtype().clone()));
496    }
497
498    /// Return this [`ChunkedArray`] with a new name.
499    pub fn with_name(mut self, name: PlSmallStr) -> Self {
500        self.rename(name);
501        self
502    }
503}
504
505impl<T> ChunkedArray<T>
506where
507    T: PolarsDataType,
508{
509    /// Get a single value from this [`ChunkedArray`]. If the return values is `None` this
510    /// indicates a NULL value.
511    ///
512    /// # Panics
513    /// This function will panic if `idx` is out of bounds.
514    #[inline]
515    pub fn get(&self, idx: usize) -> Option<T::Physical<'_>> {
516        let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
517        assert!(
518            chunk_idx < self.chunks().len(),
519            "index: {} out of bounds for len: {}",
520            idx,
521            self.len()
522        );
523        unsafe {
524            let arr = self.downcast_get_unchecked(chunk_idx);
525            assert!(
526                arr_idx < arr.len(),
527                "index: {} out of bounds for len: {}",
528                idx,
529                self.len()
530            );
531            arr.get_unchecked(arr_idx)
532        }
533    }
534
535    /// Get a single value from this [`ChunkedArray`]. If the return values is `None` this
536    /// indicates a NULL value.
537    ///
538    /// # Safety
539    /// It is the callers responsibility that the `idx < self.len()`.
540    #[inline]
541    pub unsafe fn get_unchecked(&self, idx: usize) -> Option<T::Physical<'_>> {
542        let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
543
544        unsafe {
545            // SAFETY: up to the caller to make sure the index is valid.
546            self.downcast_get_unchecked(chunk_idx)
547                .get_unchecked(arr_idx)
548        }
549    }
550
551    /// Get a single value from this [`ChunkedArray`]. Null values are ignored and the returned
552    /// value could be garbage if it was masked out by NULL. Note that the value always is initialized.
553    ///
554    /// # Safety
555    /// It is the callers responsibility that the `idx < self.len()`.
556    #[inline]
557    pub unsafe fn value_unchecked(&self, idx: usize) -> T::Physical<'_> {
558        let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
559
560        unsafe {
561            // SAFETY: up to the caller to make sure the index is valid.
562            self.downcast_get_unchecked(chunk_idx)
563                .value_unchecked(arr_idx)
564        }
565    }
566
567    #[inline]
568    pub fn first(&self) -> Option<T::Physical<'_>> {
569        unsafe {
570            let arr = self.downcast_get_unchecked(0);
571            arr.get_unchecked(0)
572        }
573    }
574
575    #[inline]
576    pub fn last(&self) -> Option<T::Physical<'_>> {
577        unsafe {
578            let arr = self.downcast_get_unchecked(self.chunks.len().checked_sub(1)?);
579            arr.get_unchecked(arr.len().checked_sub(1)?)
580        }
581    }
582}
583
584impl ListChunked {
585    #[inline]
586    pub fn get_as_series(&self, idx: usize) -> Option<Series> {
587        unsafe {
588            Some(Series::from_chunks_and_dtype_unchecked(
589                self.name().clone(),
590                vec![self.get(idx)?],
591                &self.inner_dtype().to_physical(),
592            ))
593        }
594    }
595}
596
597#[cfg(feature = "dtype-array")]
598impl ArrayChunked {
599    #[inline]
600    pub fn get_as_series(&self, idx: usize) -> Option<Series> {
601        unsafe {
602            Some(Series::from_chunks_and_dtype_unchecked(
603                self.name().clone(),
604                vec![self.get(idx)?],
605                &self.inner_dtype().to_physical(),
606            ))
607        }
608    }
609}
610
611impl<T> ChunkedArray<T>
612where
613    T: PolarsDataType,
614{
615    /// Should be used to match the chunk_id of another [`ChunkedArray`].
616    /// # Panics
617    /// It is the callers responsibility to ensure that this [`ChunkedArray`] has a single chunk.
618    pub(crate) fn match_chunks<I>(&self, chunk_id: I) -> Self
619    where
620        I: Iterator<Item = usize>,
621    {
622        debug_assert!(self.chunks.len() == 1);
623        // Takes a ChunkedArray containing a single chunk.
624        let slice = |ca: &Self| {
625            let array = &ca.chunks[0];
626
627            let mut offset = 0;
628            let chunks = chunk_id
629                .map(|len| {
630                    // SAFETY: within bounds.
631                    debug_assert!((offset + len) <= array.len());
632                    let out = unsafe { array.sliced_unchecked(offset, len) };
633                    offset += len;
634                    out
635                })
636                .collect();
637
638            debug_assert_eq!(offset, array.len());
639
640            // SAFETY: We just slice the original chunks, their type will not change.
641            unsafe {
642                Self::from_chunks_and_dtype(self.name().clone(), chunks, self.dtype().clone())
643            }
644        };
645
646        if self.chunks.len() != 1 {
647            let out = self.rechunk();
648            slice(&out)
649        } else {
650            slice(self)
651        }
652    }
653}
654
655impl<T: PolarsDataType> AsRefDataType for ChunkedArray<T> {
656    fn as_ref_dtype(&self) -> &DataType {
657        self.dtype()
658    }
659}
660
661pub(crate) trait AsSinglePtr: AsRefDataType {
662    /// Rechunk and return a ptr to the start of the array
663    fn as_single_ptr(&mut self) -> PolarsResult<usize> {
664        polars_bail!(opq = as_single_ptr, self.as_ref_dtype());
665    }
666}
667
668impl<T> AsSinglePtr for ChunkedArray<T>
669where
670    T: PolarsNumericType,
671{
672    fn as_single_ptr(&mut self) -> PolarsResult<usize> {
673        self.rechunk_mut();
674        let a = self.data_views().next().unwrap();
675        let ptr = a.as_ptr();
676        Ok(ptr as usize)
677    }
678}
679
680impl AsSinglePtr for BooleanChunked {}
681impl AsSinglePtr for ListChunked {}
682#[cfg(feature = "dtype-array")]
683impl AsSinglePtr for ArrayChunked {}
684impl AsSinglePtr for StringChunked {}
685impl AsSinglePtr for BinaryChunked {}
686#[cfg(feature = "object")]
687impl<T: PolarsObject> AsSinglePtr for ObjectChunked<T> {}
688
689pub enum ChunkedArrayLayout<'a, T: PolarsDataType> {
690    SingleNoNull(&'a T::Array),
691    Single(&'a T::Array),
692    MultiNoNull(&'a ChunkedArray<T>),
693    Multi(&'a ChunkedArray<T>),
694}
695
696impl<T> ChunkedArray<T>
697where
698    T: PolarsDataType,
699{
700    pub fn layout(&self) -> ChunkedArrayLayout<'_, T> {
701        if self.chunks.len() == 1 {
702            let arr = self.downcast_iter().next().unwrap();
703            return if arr.null_count() == 0 {
704                ChunkedArrayLayout::SingleNoNull(arr)
705            } else {
706                ChunkedArrayLayout::Single(arr)
707            };
708        }
709
710        if self.downcast_iter().all(|a| a.null_count() == 0) {
711            ChunkedArrayLayout::MultiNoNull(self)
712        } else {
713            ChunkedArrayLayout::Multi(self)
714        }
715    }
716}
717
718impl<T> ChunkedArray<T>
719where
720    T: PolarsNumericType,
721{
722    /// Returns the values of the array as a contiguous slice.
723    pub fn cont_slice(&self) -> PolarsResult<&[T::Native]> {
724        polars_ensure!(
725            self.chunks.len() == 1 && self.chunks[0].null_count() == 0,
726            ComputeError: "chunked array is not contiguous"
727        );
728        Ok(self.downcast_iter().next().map(|arr| arr.values()).unwrap())
729    }
730
731    /// Returns the values of the array as a contiguous mutable slice.
732    pub(crate) fn cont_slice_mut(&mut self) -> Option<&mut [T::Native]> {
733        if self.chunks.len() == 1 && self.chunks[0].null_count() == 0 {
734            // SAFETY, we will not swap the PrimitiveArray.
735            let arr = unsafe { self.downcast_iter_mut().next().unwrap() };
736            arr.get_mut_values()
737        } else {
738            None
739        }
740    }
741
742    /// Get slices of the underlying arrow data.
743    /// NOTE: null values should be taken into account by the user of these slices as they are handled
744    /// separately
745    pub fn data_views(&self) -> impl DoubleEndedIterator<Item = &[T::Native]> {
746        self.downcast_iter().map(|arr| arr.values().as_slice())
747    }
748
749    #[allow(clippy::wrong_self_convention)]
750    pub fn into_no_null_iter(
751        &self,
752    ) -> impl '_ + Send + Sync + ExactSizeIterator<Item = T::Native> + DoubleEndedIterator + TrustedLen
753    {
754        // .copied was significantly slower in benchmark, next call did not inline?
755        #[allow(clippy::map_clone)]
756        // we know the iterators len
757        unsafe {
758            self.data_views()
759                .flatten()
760                .map(|v| *v)
761                .trust_my_length(self.len())
762        }
763    }
764}
765
766impl<T: PolarsDataType> Clone for ChunkedArray<T> {
767    fn clone(&self) -> Self {
768        ChunkedArray {
769            field: self.field.clone(),
770            chunks: self.chunks.clone(),
771            flags: self.flags.clone(),
772
773            _pd: Default::default(),
774            length: self.length,
775            null_count: self.null_count,
776        }
777    }
778}
779
780impl<T: PolarsDataType> AsRef<ChunkedArray<T>> for ChunkedArray<T> {
781    fn as_ref(&self) -> &ChunkedArray<T> {
782        self
783    }
784}
785
786impl ValueSize for ListChunked {
787    fn get_values_size(&self) -> usize {
788        self.chunks
789            .iter()
790            .fold(0usize, |acc, arr| acc + arr.get_values_size())
791    }
792}
793
794#[cfg(feature = "dtype-array")]
795impl ValueSize for ArrayChunked {
796    fn get_values_size(&self) -> usize {
797        self.chunks
798            .iter()
799            .fold(0usize, |acc, arr| acc + arr.get_values_size())
800    }
801}
802impl ValueSize for StringChunked {
803    fn get_values_size(&self) -> usize {
804        self.chunks
805            .iter()
806            .fold(0usize, |acc, arr| acc + arr.get_values_size())
807    }
808}
809
810impl ValueSize for BinaryOffsetChunked {
811    fn get_values_size(&self) -> usize {
812        self.chunks
813            .iter()
814            .fold(0usize, |acc, arr| acc + arr.get_values_size())
815    }
816}
817
818pub(crate) fn to_primitive<T: PolarsNumericType>(
819    values: Vec<T::Native>,
820    validity: Option<Bitmap>,
821) -> PrimitiveArray<T::Native> {
822    PrimitiveArray::new(
823        T::get_dtype().to_arrow(CompatLevel::newest()),
824        values.into(),
825        validity,
826    )
827}
828
829pub(crate) fn to_array<T: PolarsNumericType>(
830    values: Vec<T::Native>,
831    validity: Option<Bitmap>,
832) -> ArrayRef {
833    Box::new(to_primitive::<T>(values, validity))
834}
835
836impl<T: PolarsDataType> Default for ChunkedArray<T> {
837    fn default() -> Self {
838        let dtype = T::get_dtype();
839        let arrow_dtype = dtype.to_physical().to_arrow(CompatLevel::newest());
840        ChunkedArray {
841            field: Arc::new(Field::new(PlSmallStr::EMPTY, dtype)),
842            // Invariant: always has 1 chunk.
843            chunks: vec![new_empty_array(arrow_dtype)],
844            flags: StatisticsFlagsIM::empty(),
845
846            _pd: Default::default(),
847            length: 0,
848            null_count: 0,
849        }
850    }
851}
852
853#[cfg(test)]
854pub(crate) mod test {
855    use crate::prelude::*;
856
857    pub(crate) fn get_chunked_array() -> Int32Chunked {
858        ChunkedArray::new(PlSmallStr::from_static("a"), &[1, 2, 3])
859    }
860
861    #[test]
862    fn test_sort() {
863        let a = Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 9, 3, 2]);
864        let b = a
865            .sort(false)
866            .into_iter()
867            .map(|opt| opt.unwrap())
868            .collect::<Vec<_>>();
869        assert_eq!(b, [1, 2, 3, 9]);
870        let a = StringChunked::new(PlSmallStr::from_static("a"), &["b", "a", "c"]);
871        let a = a.sort(false);
872        let b = a.into_iter().collect::<Vec<_>>();
873        assert_eq!(b, [Some("a"), Some("b"), Some("c")]);
874        assert!(a.is_sorted_ascending_flag());
875    }
876
877    #[test]
878    fn arithmetic() {
879        let a = &Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 100, 6, 40]);
880        let b = &Int32Chunked::new(PlSmallStr::from_static("b"), &[-1, 2, 3, 4]);
881
882        // Not really asserting anything here but still making sure the code is exercised
883        // This (and more) is properly tested from the integration test suite and Python bindings.
884        println!("{:?}", a + b);
885        println!("{:?}", a - b);
886        println!("{:?}", a * b);
887        println!("{:?}", a / b);
888    }
889
890    #[test]
891    fn iter() {
892        let s1 = get_chunked_array();
893        // sum
894        assert_eq!(s1.into_iter().fold(0, |acc, val| { acc + val.unwrap() }), 6)
895    }
896
897    #[test]
898    fn limit() {
899        let a = get_chunked_array();
900        let b = a.limit(2);
901        println!("{:?}", b);
902        assert_eq!(b.len(), 2)
903    }
904
905    #[test]
906    fn filter() {
907        let a = get_chunked_array();
908        let b = a
909            .filter(&BooleanChunked::new(
910                PlSmallStr::from_static("filter"),
911                &[true, false, false],
912            ))
913            .unwrap();
914        assert_eq!(b.len(), 1);
915        assert_eq!(b.into_iter().next(), Some(Some(1)));
916    }
917
918    #[test]
919    fn aggregates() {
920        let a = &Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 100, 10, 9]);
921        assert_eq!(a.max(), Some(100));
922        assert_eq!(a.min(), Some(1));
923        assert_eq!(a.sum(), Some(120))
924    }
925
926    #[test]
927    fn take() {
928        let a = get_chunked_array();
929        let new = a.take(&[0 as IdxSize, 1]).unwrap();
930        assert_eq!(new.len(), 2)
931    }
932
933    #[test]
934    fn cast() {
935        let a = get_chunked_array();
936        let b = a.cast(&DataType::Int64).unwrap();
937        assert_eq!(b.dtype(), &DataType::Int64)
938    }
939
940    fn assert_slice_equal<T>(ca: &ChunkedArray<T>, eq: &[T::Native])
941    where
942        T: PolarsNumericType,
943    {
944        assert_eq!(ca.iter().map(|opt| opt.unwrap()).collect::<Vec<_>>(), eq)
945    }
946
947    #[test]
948    fn slice() {
949        let mut first = UInt32Chunked::new(PlSmallStr::from_static("first"), &[0, 1, 2]);
950        let second = UInt32Chunked::new(PlSmallStr::from_static("second"), &[3, 4, 5]);
951        first.append(&second).unwrap();
952        assert_slice_equal(&first.slice(0, 3), &[0, 1, 2]);
953        assert_slice_equal(&first.slice(0, 4), &[0, 1, 2, 3]);
954        assert_slice_equal(&first.slice(1, 4), &[1, 2, 3, 4]);
955        assert_slice_equal(&first.slice(3, 2), &[3, 4]);
956        assert_slice_equal(&first.slice(3, 3), &[3, 4, 5]);
957        assert_slice_equal(&first.slice(-3, 3), &[3, 4, 5]);
958        assert_slice_equal(&first.slice(-6, 6), &[0, 1, 2, 3, 4, 5]);
959
960        assert_eq!(first.slice(-7, 2).len(), 1);
961        assert_eq!(first.slice(-3, 4).len(), 3);
962        assert_eq!(first.slice(3, 4).len(), 3);
963        assert_eq!(first.slice(10, 4).len(), 0);
964    }
965
966    #[test]
967    fn sorting() {
968        let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[9, 2, 4]);
969        let sorted = s.sort(false);
970        assert_slice_equal(&sorted, &[2, 4, 9]);
971        let sorted = s.sort(true);
972        assert_slice_equal(&sorted, &[9, 4, 2]);
973
974        let s: StringChunked = ["b", "a", "z"].iter().collect();
975        let sorted = s.sort(false);
976        assert_eq!(
977            sorted.into_iter().collect::<Vec<_>>(),
978            &[Some("a"), Some("b"), Some("z")]
979        );
980        let sorted = s.sort(true);
981        assert_eq!(
982            sorted.into_iter().collect::<Vec<_>>(),
983            &[Some("z"), Some("b"), Some("a")]
984        );
985        let s: StringChunked = [Some("b"), None, Some("z")].iter().copied().collect();
986        let sorted = s.sort(false);
987        assert_eq!(
988            sorted.into_iter().collect::<Vec<_>>(),
989            &[None, Some("b"), Some("z")]
990        );
991    }
992
993    #[test]
994    fn reverse() {
995        let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3]);
996        // path with continuous slice
997        assert_slice_equal(&s.reverse(), &[3, 2, 1]);
998        // path with options
999        let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[Some(1), None, Some(3)]);
1000        assert_eq!(Vec::from(&s.reverse()), &[Some(3), None, Some(1)]);
1001        let s = BooleanChunked::new(PlSmallStr::EMPTY, &[true, false]);
1002        assert_eq!(Vec::from(&s.reverse()), &[Some(false), Some(true)]);
1003
1004        let s = StringChunked::new(PlSmallStr::EMPTY, &["a", "b", "c"]);
1005        assert_eq!(Vec::from(&s.reverse()), &[Some("c"), Some("b"), Some("a")]);
1006
1007        let s = StringChunked::new(PlSmallStr::EMPTY, &[Some("a"), None, Some("c")]);
1008        assert_eq!(Vec::from(&s.reverse()), &[Some("c"), None, Some("a")]);
1009    }
1010
1011    #[test]
1012    #[cfg(feature = "dtype-categorical")]
1013    fn test_iter_categorical() {
1014        use crate::{SINGLE_LOCK, disable_string_cache};
1015        let _lock = SINGLE_LOCK.lock();
1016        disable_string_cache();
1017        let ca = StringChunked::new(
1018            PlSmallStr::EMPTY,
1019            &[Some("foo"), None, Some("bar"), Some("ham")],
1020        );
1021        let ca = ca
1022            .cast(&DataType::Categorical(None, Default::default()))
1023            .unwrap();
1024        let ca = ca.categorical().unwrap();
1025        let v: Vec<_> = ca.physical().into_iter().collect();
1026        assert_eq!(v, &[Some(0), None, Some(1), Some(2)]);
1027    }
1028
1029    #[test]
1030    #[ignore]
1031    fn test_shrink_to_fit() {
1032        let mut builder = StringChunkedBuilder::new(PlSmallStr::from_static("foo"), 2048);
1033        builder.append_value("foo");
1034        let mut arr = builder.finish();
1035        let before = arr
1036            .chunks()
1037            .iter()
1038            .map(|arr| arrow::compute::aggregate::estimated_bytes_size(arr.as_ref()))
1039            .sum::<usize>();
1040        arr.shrink_to_fit();
1041        let after = arr
1042            .chunks()
1043            .iter()
1044            .map(|arr| arrow::compute::aggregate::estimated_bytes_size(arr.as_ref()))
1045            .sum::<usize>();
1046        assert!(before > after);
1047    }
1048}