polars_core/chunked_array/
mod.rs

1//! The typed heart of every Series column.
2#![allow(unsafe_op_in_unsafe_fn)]
3use std::iter::Map;
4use std::sync::Arc;
5
6use arrow::array::*;
7use arrow::bitmap::Bitmap;
8use arrow::compute::concatenate::concatenate_unchecked;
9use polars_compute::filter::filter_with_bitmap;
10
11use crate::prelude::*;
12
13pub mod ops;
14#[macro_use]
15pub mod arithmetic;
16pub mod builder;
17pub mod cast;
18pub mod collect;
19pub mod comparison;
20pub mod flags;
21pub mod float;
22pub mod iterator;
23#[cfg(feature = "ndarray")]
24pub(crate) mod ndarray;
25
26#[cfg(feature = "dtype-array")]
27pub(crate) mod array;
28mod binary;
29mod binary_offset;
30mod bitwise;
31#[cfg(feature = "object")]
32mod drop;
33mod from;
34mod from_iterator;
35pub mod from_iterator_par;
36pub(crate) mod list;
37pub(crate) mod logical;
38#[cfg(feature = "object")]
39pub mod object;
40#[cfg(feature = "random")]
41mod random;
42#[cfg(feature = "dtype-struct")]
43mod struct_;
44#[cfg(any(
45    feature = "temporal",
46    feature = "dtype-datetime",
47    feature = "dtype-date"
48))]
49pub mod temporal;
50mod to_vec;
51mod trusted_len;
52
53use std::slice::Iter;
54
55use arrow::legacy::prelude::*;
56#[cfg(feature = "dtype-struct")]
57pub use struct_::StructChunked;
58
59use self::flags::{StatisticsFlags, StatisticsFlagsIM};
60use crate::series::IsSorted;
61use crate::utils::{first_non_null, last_non_null};
62
63#[cfg(not(feature = "dtype-categorical"))]
64pub struct RevMapping {}
65
66pub type ChunkLenIter<'a> = std::iter::Map<std::slice::Iter<'a, ArrayRef>, fn(&ArrayRef) -> usize>;
67
68/// # ChunkedArray
69///
70/// Every Series contains a [`ChunkedArray<T>`]. Unlike [`Series`], [`ChunkedArray`]s are typed. This allows
71/// us to apply closures to the data and collect the results to a [`ChunkedArray`] of the same type `T`.
72/// Below we use an apply to use the cosine function to the values of a [`ChunkedArray`].
73///
74/// ```rust
75/// # use polars_core::prelude::*;
76/// fn apply_cosine_and_cast(ca: &Float32Chunked) -> Float32Chunked {
77///     ca.apply_values(|v| v.cos())
78/// }
79/// ```
80///
81/// ## Conversion between Series and ChunkedArrays
82/// Conversion from a [`Series`] to a [`ChunkedArray`] is effortless.
83///
84/// ```rust
85/// # use polars_core::prelude::*;
86/// fn to_chunked_array(series: &Series) -> PolarsResult<&Int32Chunked>{
87///     series.i32()
88/// }
89///
90/// fn to_series(ca: Int32Chunked) -> Series {
91///     ca.into_series()
92/// }
93/// ```
94///
95/// # Iterators
96///
97/// [`ChunkedArray`]s fully support Rust native [Iterator](https://doc.rust-lang.org/std/iter/trait.Iterator.html)
98/// and [DoubleEndedIterator](https://doc.rust-lang.org/std/iter/trait.DoubleEndedIterator.html) traits, thereby
99/// giving access to all the excellent methods available for [Iterators](https://doc.rust-lang.org/std/iter/trait.Iterator.html).
100///
101/// ```rust
102/// # use polars_core::prelude::*;
103///
104/// fn iter_forward(ca: &Float32Chunked) {
105///     ca.iter()
106///         .for_each(|opt_v| println!("{:?}", opt_v))
107/// }
108///
109/// fn iter_backward(ca: &Float32Chunked) {
110///     ca.iter()
111///         .rev()
112///         .for_each(|opt_v| println!("{:?}", opt_v))
113/// }
114/// ```
115///
116/// # Memory layout
117///
118/// [`ChunkedArray`]s use [Apache Arrow](https://github.com/apache/arrow) as backend for the memory layout.
119/// Arrows memory is immutable which makes it possible to make multiple zero copy (sub)-views from a single array.
120///
121/// To be able to append data, Polars uses chunks to append new memory locations, hence the [`ChunkedArray<T>`] data structure.
122/// Appends are cheap, because it will not lead to a full reallocation of the whole array (as could be the case with a Rust Vec).
123///
124/// However, multiple chunks in a [`ChunkedArray`] will slow down many operations that need random access because we have an extra indirection
125/// and indexes need to be mapped to the proper chunk. Arithmetic may also be slowed down by this.
126/// When multiplying two [`ChunkedArray`]s with different chunk sizes they cannot utilize [SIMD](https://en.wikipedia.org/wiki/SIMD) for instance.
127///
128/// If you want to have predictable performance
129/// (no unexpected re-allocation of memory), it is advised to call the [`ChunkedArray::rechunk`] after
130/// multiple append operations.
131///
132/// See also [`ChunkedArray::extend`] for appends within a chunk.
133///
134/// # Invariants
135/// - A [`ChunkedArray`] should always have at least a single [`ArrayRef`].
136/// - The [`PolarsDataType`] `T` should always map to the correct [`ArrowDataType`] in the [`ArrayRef`]
137///   chunks.
138/// - Nested datatypes such as [`List`] and [`Array`] store the physical types instead of the
139///   logical type given by the datatype.
140///
141/// [`List`]: crate::datatypes::DataType::List
142pub struct ChunkedArray<T: PolarsDataType> {
143    pub(crate) field: Arc<Field>,
144    pub(crate) chunks: Vec<ArrayRef>,
145
146    pub(crate) flags: StatisticsFlagsIM,
147
148    length: usize,
149    null_count: usize,
150    _pd: std::marker::PhantomData<T>,
151}
152
153impl<T: PolarsDataType> ChunkedArray<T> {
154    fn should_rechunk(&self) -> bool {
155        self.chunks.len() > 1 && self.chunks.len() > self.len() / 3
156    }
157
158    fn optional_rechunk(mut self) -> Self {
159        // Rechunk if we have many small chunks.
160        if self.should_rechunk() {
161            self.rechunk_mut()
162        }
163        self
164    }
165
166    pub(crate) fn as_any(&self) -> &dyn std::any::Any {
167        self
168    }
169
170    /// Series to [`ChunkedArray<T>`]
171    pub fn unpack_series_matching_type<'a>(
172        &self,
173        series: &'a Series,
174    ) -> PolarsResult<&'a ChunkedArray<T>> {
175        polars_ensure!(
176            self.dtype() == series.dtype(),
177            SchemaMismatch: "cannot unpack series of type `{}` into `{}`",
178            series.dtype(),
179            self.dtype(),
180        );
181
182        // SAFETY: dtype will be correct.
183        Ok(unsafe { self.unpack_series_matching_physical_type(series) })
184    }
185
186    /// Create a new [`ChunkedArray`] and compute its `length` and `null_count`.
187    ///
188    /// If you want to explicitly the `length` and `null_count`, look at
189    /// [`ChunkedArray::new_with_dims`]
190    fn new_with_compute_len(field: Arc<Field>, chunks: Vec<ArrayRef>) -> Self {
191        unsafe {
192            let mut chunked_arr = Self::new_with_dims(field, chunks, 0, 0);
193            chunked_arr.compute_len();
194            chunked_arr
195        }
196    }
197
198    /// Create a new [`ChunkedArray`] and explicitly set its `length` and `null_count`.
199    /// # Safety
200    /// The length and null_count must be correct.
201    pub unsafe fn new_with_dims(
202        field: Arc<Field>,
203        chunks: Vec<ArrayRef>,
204        length: usize,
205        null_count: usize,
206    ) -> Self {
207        Self {
208            field,
209            chunks,
210            flags: StatisticsFlagsIM::empty(),
211
212            _pd: Default::default(),
213            length,
214            null_count,
215        }
216    }
217
218    pub(crate) fn is_sorted_ascending_flag(&self) -> bool {
219        self.get_flags().is_sorted_ascending()
220    }
221
222    pub(crate) fn is_sorted_descending_flag(&self) -> bool {
223        self.get_flags().is_sorted_descending()
224    }
225
226    /// Whether `self` is sorted in any direction.
227    pub(crate) fn is_sorted_any(&self) -> bool {
228        self.get_flags().is_sorted_any()
229    }
230
231    pub fn unset_fast_explode_list(&mut self) {
232        self.set_fast_explode_list(false)
233    }
234
235    pub fn set_fast_explode_list(&mut self, value: bool) {
236        let mut flags = self.flags.get_mut();
237        flags.set(StatisticsFlags::CAN_FAST_EXPLODE_LIST, value);
238        self.flags.set_mut(flags);
239    }
240
241    pub fn get_fast_explode_list(&self) -> bool {
242        self.get_flags().can_fast_explode_list()
243    }
244
245    pub fn get_flags(&self) -> StatisticsFlags {
246        self.flags.get()
247    }
248
249    /// Set flags for the [`ChunkedArray`]
250    pub fn set_flags(&mut self, flags: StatisticsFlags) {
251        self.flags = StatisticsFlagsIM::new(flags);
252    }
253
254    pub fn is_sorted_flag(&self) -> IsSorted {
255        self.get_flags().is_sorted()
256    }
257
258    pub fn retain_flags_from<U: PolarsDataType>(
259        &mut self,
260        from: &ChunkedArray<U>,
261        retain_flags: StatisticsFlags,
262    ) {
263        let flags = from.flags.get();
264        // Try to avoid write contention.
265        if !flags.is_empty() {
266            self.set_flags(flags & retain_flags)
267        }
268    }
269
270    /// Set the 'sorted' bit meta info.
271    pub fn set_sorted_flag(&mut self, sorted: IsSorted) {
272        let mut flags = self.flags.get_mut();
273        flags.set_sorted(sorted);
274        self.flags.set_mut(flags);
275    }
276
277    /// Set the 'sorted' bit meta info.
278    pub fn with_sorted_flag(&self, sorted: IsSorted) -> Self {
279        let mut out = self.clone();
280        out.set_sorted_flag(sorted);
281        out
282    }
283
284    /// Get the index of the first non null value in this [`ChunkedArray`].
285    pub fn first_non_null(&self) -> Option<usize> {
286        if self.null_count() == self.len() {
287            None
288        }
289        // We now know there is at least 1 non-null item in the array, and self.len() > 0
290        else if self.null_count() == 0 {
291            Some(0)
292        } else if self.is_sorted_any() {
293            let out = if unsafe { self.downcast_get_unchecked(0).is_null_unchecked(0) } {
294                // nulls are all at the start
295                self.null_count()
296            } else {
297                // nulls are all at the end
298                0
299            };
300
301            debug_assert!(
302                // If we are lucky this catches something.
303                unsafe { self.get_unchecked(out) }.is_some(),
304                "incorrect sorted flag"
305            );
306
307            Some(out)
308        } else {
309            first_non_null(self.iter_validities())
310        }
311    }
312
313    /// Get the index of the last non null value in this [`ChunkedArray`].
314    pub fn last_non_null(&self) -> Option<usize> {
315        if self.null_count() == self.len() {
316            None
317        }
318        // We now know there is at least 1 non-null item in the array, and self.len() > 0
319        else if self.null_count() == 0 {
320            Some(self.len() - 1)
321        } else if self.is_sorted_any() {
322            let out = if unsafe { self.downcast_get_unchecked(0).is_null_unchecked(0) } {
323                // nulls are all at the start
324                self.len() - 1
325            } else {
326                // nulls are all at the end
327                self.len() - self.null_count() - 1
328            };
329
330            debug_assert!(
331                // If we are lucky this catches something.
332                unsafe { self.get_unchecked(out) }.is_some(),
333                "incorrect sorted flag"
334            );
335
336            Some(out)
337        } else {
338            last_non_null(self.iter_validities(), self.len())
339        }
340    }
341
342    pub fn drop_nulls(&self) -> Self {
343        if self.null_count() == 0 {
344            self.clone()
345        } else {
346            let chunks = self
347                .downcast_iter()
348                .map(|arr| {
349                    if arr.null_count() == 0 {
350                        arr.to_boxed()
351                    } else {
352                        filter_with_bitmap(arr, arr.validity().unwrap())
353                    }
354                })
355                .collect();
356            unsafe {
357                Self::new_with_dims(
358                    self.field.clone(),
359                    chunks,
360                    self.len() - self.null_count(),
361                    0,
362                )
363            }
364        }
365    }
366
367    /// Get the buffer of bits representing null values
368    #[inline]
369    #[allow(clippy::type_complexity)]
370    pub fn iter_validities(&self) -> Map<Iter<'_, ArrayRef>, fn(&ArrayRef) -> Option<&Bitmap>> {
371        fn to_validity(arr: &ArrayRef) -> Option<&Bitmap> {
372            arr.validity()
373        }
374        self.chunks.iter().map(to_validity)
375    }
376
377    #[inline]
378    /// Return if any the chunks in this [`ChunkedArray`] have nulls.
379    pub fn has_nulls(&self) -> bool {
380        self.null_count > 0
381    }
382
383    /// Shrink the capacity of this array to fit its length.
384    pub fn shrink_to_fit(&mut self) {
385        self.chunks = vec![concatenate_unchecked(self.chunks.as_slice()).unwrap()];
386    }
387
388    pub fn clear(&self) -> Self {
389        // SAFETY: we keep the correct dtype
390        let mut ca = unsafe {
391            self.copy_with_chunks(vec![new_empty_array(
392                self.chunks.first().unwrap().dtype().clone(),
393            )])
394        };
395
396        use StatisticsFlags as F;
397        ca.retain_flags_from(self, F::IS_SORTED_ANY | F::CAN_FAST_EXPLODE_LIST);
398        ca
399    }
400
401    /// Unpack a [`Series`] to the same physical type.
402    ///
403    /// # Safety
404    ///
405    /// This is unsafe as the dtype may be incorrect and
406    /// is assumed to be correct in other safe code.
407    pub(crate) unsafe fn unpack_series_matching_physical_type<'a>(
408        &self,
409        series: &'a Series,
410    ) -> &'a ChunkedArray<T> {
411        let series_trait = &**series;
412        if self.dtype() == series.dtype() {
413            &*(series_trait as *const dyn SeriesTrait as *const ChunkedArray<T>)
414        } else {
415            use DataType::*;
416            match (self.dtype(), series.dtype()) {
417                (Int64, Datetime(_, _)) | (Int64, Duration(_)) | (Int32, Date) => {
418                    &*(series_trait as *const dyn SeriesTrait as *const ChunkedArray<T>)
419                },
420                _ => panic!(
421                    "cannot unpack series {:?} into matching type {:?}",
422                    series,
423                    self.dtype()
424                ),
425            }
426        }
427    }
428
429    /// Returns an iterator over the lengths of the chunks of the array.
430    pub fn chunk_lengths(&self) -> ChunkLenIter<'_> {
431        self.chunks.iter().map(|chunk| chunk.len())
432    }
433
434    /// A reference to the chunks
435    #[inline]
436    pub fn chunks(&self) -> &Vec<ArrayRef> {
437        &self.chunks
438    }
439
440    /// A mutable reference to the chunks
441    ///
442    /// # Safety
443    /// The caller must ensure to not change the [`DataType`] or `length` of any of the chunks.
444    /// And the `null_count` remains correct.
445    #[inline]
446    pub unsafe fn chunks_mut(&mut self) -> &mut Vec<ArrayRef> {
447        &mut self.chunks
448    }
449
450    /// Returns true if contains a single chunk and has no null values
451    pub fn is_optimal_aligned(&self) -> bool {
452        self.chunks.len() == 1 && self.null_count() == 0
453    }
454
455    /// Create a new [`ChunkedArray`] from self, where the chunks are replaced.
456    ///
457    /// # Safety
458    /// The caller must ensure the dtypes of the chunks are correct
459    unsafe fn copy_with_chunks(&self, chunks: Vec<ArrayRef>) -> Self {
460        Self::new_with_compute_len(self.field.clone(), chunks)
461    }
462
463    /// Get data type of [`ChunkedArray`].
464    pub fn dtype(&self) -> &DataType {
465        self.field.dtype()
466    }
467
468    pub(crate) unsafe fn set_dtype(&mut self, dtype: DataType) {
469        self.field = Arc::new(Field::new(self.name().clone(), dtype))
470    }
471
472    /// Name of the [`ChunkedArray`].
473    pub fn name(&self) -> &PlSmallStr {
474        self.field.name()
475    }
476
477    /// Get a reference to the field.
478    pub fn ref_field(&self) -> &Field {
479        &self.field
480    }
481
482    /// Rename this [`ChunkedArray`].
483    pub fn rename(&mut self, name: PlSmallStr) {
484        self.field = Arc::new(Field::new(name, self.field.dtype().clone()));
485    }
486
487    /// Return this [`ChunkedArray`] with a new name.
488    pub fn with_name(mut self, name: PlSmallStr) -> Self {
489        self.rename(name);
490        self
491    }
492}
493
494impl<T> ChunkedArray<T>
495where
496    T: PolarsDataType,
497{
498    /// Get a single value from this [`ChunkedArray`]. If the return values is `None` this
499    /// indicates a NULL value.
500    ///
501    /// # Panics
502    /// This function will panic if `idx` is out of bounds.
503    #[inline]
504    pub fn get(&self, idx: usize) -> Option<T::Physical<'_>> {
505        let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
506        assert!(
507            chunk_idx < self.chunks().len(),
508            "index: {} out of bounds for len: {}",
509            idx,
510            self.len()
511        );
512        unsafe {
513            let arr = self.downcast_get_unchecked(chunk_idx);
514            assert!(
515                arr_idx < arr.len(),
516                "index: {} out of bounds for len: {}",
517                idx,
518                self.len()
519            );
520            arr.get_unchecked(arr_idx)
521        }
522    }
523
524    /// Get a single value from this [`ChunkedArray`]. If the return values is `None` this
525    /// indicates a NULL value.
526    ///
527    /// # Safety
528    /// It is the callers responsibility that the `idx < self.len()`.
529    #[inline]
530    pub unsafe fn get_unchecked(&self, idx: usize) -> Option<T::Physical<'_>> {
531        let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
532
533        unsafe {
534            // SAFETY: up to the caller to make sure the index is valid.
535            self.downcast_get_unchecked(chunk_idx)
536                .get_unchecked(arr_idx)
537        }
538    }
539
540    /// Get a single value from this [`ChunkedArray`]. Null values are ignored and the returned
541    /// value could be garbage if it was masked out by NULL. Note that the value always is initialized.
542    ///
543    /// # Safety
544    /// It is the callers responsibility that the `idx < self.len()`.
545    #[inline]
546    pub unsafe fn value_unchecked(&self, idx: usize) -> T::Physical<'_> {
547        let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
548
549        unsafe {
550            // SAFETY: up to the caller to make sure the index is valid.
551            self.downcast_get_unchecked(chunk_idx)
552                .value_unchecked(arr_idx)
553        }
554    }
555
556    #[inline]
557    pub fn first(&self) -> Option<T::Physical<'_>> {
558        unsafe {
559            let arr = self.downcast_get_unchecked(0);
560            arr.get_unchecked(0)
561        }
562    }
563
564    #[inline]
565    pub fn last(&self) -> Option<T::Physical<'_>> {
566        unsafe {
567            let arr = self.downcast_get_unchecked(self.chunks.len().checked_sub(1)?);
568            arr.get_unchecked(arr.len().checked_sub(1)?)
569        }
570    }
571}
572
573impl ListChunked {
574    #[inline]
575    pub fn get_as_series(&self, idx: usize) -> Option<Series> {
576        unsafe {
577            Some(Series::from_chunks_and_dtype_unchecked(
578                self.name().clone(),
579                vec![self.get(idx)?],
580                &self.inner_dtype().to_physical(),
581            ))
582        }
583    }
584}
585
586#[cfg(feature = "dtype-array")]
587impl ArrayChunked {
588    #[inline]
589    pub fn get_as_series(&self, idx: usize) -> Option<Series> {
590        unsafe {
591            Some(Series::from_chunks_and_dtype_unchecked(
592                self.name().clone(),
593                vec![self.get(idx)?],
594                &self.inner_dtype().to_physical(),
595            ))
596        }
597    }
598}
599
600impl<T> ChunkedArray<T>
601where
602    T: PolarsDataType,
603{
604    /// Should be used to match the chunk_id of another [`ChunkedArray`].
605    /// # Panics
606    /// It is the callers responsibility to ensure that this [`ChunkedArray`] has a single chunk.
607    pub fn match_chunks<I>(&self, chunk_id: I) -> Self
608    where
609        I: Iterator<Item = usize>,
610    {
611        debug_assert!(self.chunks.len() == 1);
612        // Takes a ChunkedArray containing a single chunk.
613        let slice = |ca: &Self| {
614            let array = &ca.chunks[0];
615
616            let mut offset = 0;
617            let chunks = chunk_id
618                .map(|len| {
619                    // SAFETY: within bounds.
620                    debug_assert!((offset + len) <= array.len());
621                    let out = unsafe { array.sliced_unchecked(offset, len) };
622                    offset += len;
623                    out
624                })
625                .collect();
626
627            debug_assert_eq!(offset, array.len());
628
629            // SAFETY: We just slice the original chunks, their type will not change.
630            unsafe {
631                Self::from_chunks_and_dtype(self.name().clone(), chunks, self.dtype().clone())
632            }
633        };
634
635        if self.chunks.len() != 1 {
636            let out = self.rechunk();
637            slice(&out)
638        } else {
639            slice(self)
640        }
641    }
642}
643
644impl<T: PolarsDataType> AsRefDataType for ChunkedArray<T> {
645    fn as_ref_dtype(&self) -> &DataType {
646        self.dtype()
647    }
648}
649
650pub(crate) trait AsSinglePtr: AsRefDataType {
651    /// Rechunk and return a ptr to the start of the array
652    fn as_single_ptr(&mut self) -> PolarsResult<usize> {
653        polars_bail!(opq = as_single_ptr, self.as_ref_dtype());
654    }
655}
656
657impl<T> AsSinglePtr for ChunkedArray<T>
658where
659    T: PolarsNumericType,
660{
661    fn as_single_ptr(&mut self) -> PolarsResult<usize> {
662        self.rechunk_mut();
663        let a = self.data_views().next().unwrap();
664        let ptr = a.as_ptr();
665        Ok(ptr as usize)
666    }
667}
668
669impl AsSinglePtr for BooleanChunked {}
670impl AsSinglePtr for ListChunked {}
671#[cfg(feature = "dtype-array")]
672impl AsSinglePtr for ArrayChunked {}
673impl AsSinglePtr for StringChunked {}
674impl AsSinglePtr for BinaryChunked {}
675#[cfg(feature = "object")]
676impl<T: PolarsObject> AsSinglePtr for ObjectChunked<T> {}
677
678pub enum ChunkedArrayLayout<'a, T: PolarsDataType> {
679    SingleNoNull(&'a T::Array),
680    Single(&'a T::Array),
681    MultiNoNull(&'a ChunkedArray<T>),
682    Multi(&'a ChunkedArray<T>),
683}
684
685impl<T> ChunkedArray<T>
686where
687    T: PolarsDataType,
688{
689    pub fn layout(&self) -> ChunkedArrayLayout<'_, T> {
690        if self.chunks.len() == 1 {
691            let arr = self.downcast_iter().next().unwrap();
692            return if arr.null_count() == 0 {
693                ChunkedArrayLayout::SingleNoNull(arr)
694            } else {
695                ChunkedArrayLayout::Single(arr)
696            };
697        }
698
699        if self.downcast_iter().all(|a| a.null_count() == 0) {
700            ChunkedArrayLayout::MultiNoNull(self)
701        } else {
702            ChunkedArrayLayout::Multi(self)
703        }
704    }
705}
706
707impl<T> ChunkedArray<T>
708where
709    T: PolarsNumericType,
710{
711    /// Returns the values of the array as a contiguous slice.
712    pub fn cont_slice(&self) -> PolarsResult<&[T::Native]> {
713        polars_ensure!(
714            self.chunks.len() == 1 && self.chunks[0].null_count() == 0,
715            ComputeError: "chunked array is not contiguous"
716        );
717        Ok(self.downcast_iter().next().map(|arr| arr.values()).unwrap())
718    }
719
720    /// Returns the values of the array as a contiguous mutable slice.
721    pub(crate) fn cont_slice_mut(&mut self) -> Option<&mut [T::Native]> {
722        if self.chunks.len() == 1 && self.chunks[0].null_count() == 0 {
723            // SAFETY, we will not swap the PrimitiveArray.
724            let arr = unsafe { self.downcast_iter_mut().next().unwrap() };
725            arr.get_mut_values()
726        } else {
727            None
728        }
729    }
730
731    /// Get slices of the underlying arrow data.
732    /// NOTE: null values should be taken into account by the user of these slices as they are handled
733    /// separately
734    pub fn data_views(&self) -> impl DoubleEndedIterator<Item = &[T::Native]> {
735        self.downcast_iter().map(|arr| arr.values().as_slice())
736    }
737
738    #[allow(clippy::wrong_self_convention)]
739    pub fn into_no_null_iter(
740        &self,
741    ) -> impl '_ + Send + Sync + ExactSizeIterator<Item = T::Native> + DoubleEndedIterator + TrustedLen
742    {
743        // .copied was significantly slower in benchmark, next call did not inline?
744        #[allow(clippy::map_clone)]
745        // we know the iterators len
746        unsafe {
747            self.data_views()
748                .flatten()
749                .map(|v| *v)
750                .trust_my_length(self.len())
751        }
752    }
753}
754
755impl<T: PolarsDataType> Clone for ChunkedArray<T> {
756    fn clone(&self) -> Self {
757        ChunkedArray {
758            field: self.field.clone(),
759            chunks: self.chunks.clone(),
760            flags: self.flags.clone(),
761
762            _pd: Default::default(),
763            length: self.length,
764            null_count: self.null_count,
765        }
766    }
767}
768
769impl<T: PolarsDataType> AsRef<ChunkedArray<T>> for ChunkedArray<T> {
770    fn as_ref(&self) -> &ChunkedArray<T> {
771        self
772    }
773}
774
775impl ValueSize for ListChunked {
776    fn get_values_size(&self) -> usize {
777        self.chunks
778            .iter()
779            .fold(0usize, |acc, arr| acc + arr.get_values_size())
780    }
781}
782
783#[cfg(feature = "dtype-array")]
784impl ValueSize for ArrayChunked {
785    fn get_values_size(&self) -> usize {
786        self.chunks
787            .iter()
788            .fold(0usize, |acc, arr| acc + arr.get_values_size())
789    }
790}
791impl ValueSize for StringChunked {
792    fn get_values_size(&self) -> usize {
793        self.chunks
794            .iter()
795            .fold(0usize, |acc, arr| acc + arr.get_values_size())
796    }
797}
798
799impl ValueSize for BinaryOffsetChunked {
800    fn get_values_size(&self) -> usize {
801        self.chunks
802            .iter()
803            .fold(0usize, |acc, arr| acc + arr.get_values_size())
804    }
805}
806
807pub(crate) fn to_primitive<T: PolarsNumericType>(
808    values: Vec<T::Native>,
809    validity: Option<Bitmap>,
810) -> PrimitiveArray<T::Native> {
811    PrimitiveArray::new(
812        T::get_static_dtype().to_arrow(CompatLevel::newest()),
813        values.into(),
814        validity,
815    )
816}
817
818pub(crate) fn to_array<T: PolarsNumericType>(
819    values: Vec<T::Native>,
820    validity: Option<Bitmap>,
821) -> ArrayRef {
822    Box::new(to_primitive::<T>(values, validity))
823}
824
825impl<T: PolarsDataType> Default for ChunkedArray<T> {
826    fn default() -> Self {
827        let dtype = T::get_static_dtype();
828        let arrow_dtype = dtype.to_physical().to_arrow(CompatLevel::newest());
829        ChunkedArray {
830            field: Arc::new(Field::new(PlSmallStr::EMPTY, dtype)),
831            // Invariant: always has 1 chunk.
832            chunks: vec![new_empty_array(arrow_dtype)],
833            flags: StatisticsFlagsIM::empty(),
834
835            _pd: Default::default(),
836            length: 0,
837            null_count: 0,
838        }
839    }
840}
841
842#[cfg(test)]
843pub(crate) mod test {
844    use crate::prelude::*;
845
846    pub(crate) fn get_chunked_array() -> Int32Chunked {
847        ChunkedArray::new(PlSmallStr::from_static("a"), &[1, 2, 3])
848    }
849
850    #[test]
851    fn test_sort() {
852        let a = Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 9, 3, 2]);
853        let b = a
854            .sort(false)
855            .into_iter()
856            .map(|opt| opt.unwrap())
857            .collect::<Vec<_>>();
858        assert_eq!(b, [1, 2, 3, 9]);
859        let a = StringChunked::new(PlSmallStr::from_static("a"), &["b", "a", "c"]);
860        let a = a.sort(false);
861        let b = a.into_iter().collect::<Vec<_>>();
862        assert_eq!(b, [Some("a"), Some("b"), Some("c")]);
863        assert!(a.is_sorted_ascending_flag());
864    }
865
866    #[test]
867    fn arithmetic() {
868        let a = &Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 100, 6, 40]);
869        let b = &Int32Chunked::new(PlSmallStr::from_static("b"), &[-1, 2, 3, 4]);
870
871        // Not really asserting anything here but still making sure the code is exercised
872        // This (and more) is properly tested from the integration test suite and Python bindings.
873        println!("{:?}", a + b);
874        println!("{:?}", a - b);
875        println!("{:?}", a * b);
876        println!("{:?}", a / b);
877    }
878
879    #[test]
880    fn iter() {
881        let s1 = get_chunked_array();
882        // sum
883        assert_eq!(s1.into_iter().fold(0, |acc, val| { acc + val.unwrap() }), 6)
884    }
885
886    #[test]
887    fn limit() {
888        let a = get_chunked_array();
889        let b = a.limit(2);
890        println!("{b:?}");
891        assert_eq!(b.len(), 2)
892    }
893
894    #[test]
895    fn filter() {
896        let a = get_chunked_array();
897        let b = a
898            .filter(&BooleanChunked::new(
899                PlSmallStr::from_static("filter"),
900                &[true, false, false],
901            ))
902            .unwrap();
903        assert_eq!(b.len(), 1);
904        assert_eq!(b.into_iter().next(), Some(Some(1)));
905    }
906
907    #[test]
908    fn aggregates() {
909        let a = &Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 100, 10, 9]);
910        assert_eq!(a.max(), Some(100));
911        assert_eq!(a.min(), Some(1));
912        assert_eq!(a.sum(), Some(120))
913    }
914
915    #[test]
916    fn take() {
917        let a = get_chunked_array();
918        let new = a.take(&[0 as IdxSize, 1]).unwrap();
919        assert_eq!(new.len(), 2)
920    }
921
922    #[test]
923    fn cast() {
924        let a = get_chunked_array();
925        let b = a.cast(&DataType::Int64).unwrap();
926        assert_eq!(b.dtype(), &DataType::Int64)
927    }
928
929    fn assert_slice_equal<T>(ca: &ChunkedArray<T>, eq: &[T::Native])
930    where
931        T: PolarsNumericType,
932    {
933        assert_eq!(ca.iter().map(|opt| opt.unwrap()).collect::<Vec<_>>(), eq)
934    }
935
936    #[test]
937    fn slice() {
938        let mut first = UInt32Chunked::new(PlSmallStr::from_static("first"), &[0, 1, 2]);
939        let second = UInt32Chunked::new(PlSmallStr::from_static("second"), &[3, 4, 5]);
940        first.append(&second).unwrap();
941        assert_slice_equal(&first.slice(0, 3), &[0, 1, 2]);
942        assert_slice_equal(&first.slice(0, 4), &[0, 1, 2, 3]);
943        assert_slice_equal(&first.slice(1, 4), &[1, 2, 3, 4]);
944        assert_slice_equal(&first.slice(3, 2), &[3, 4]);
945        assert_slice_equal(&first.slice(3, 3), &[3, 4, 5]);
946        assert_slice_equal(&first.slice(-3, 3), &[3, 4, 5]);
947        assert_slice_equal(&first.slice(-6, 6), &[0, 1, 2, 3, 4, 5]);
948
949        assert_eq!(first.slice(-7, 2).len(), 1);
950        assert_eq!(first.slice(-3, 4).len(), 3);
951        assert_eq!(first.slice(3, 4).len(), 3);
952        assert_eq!(first.slice(10, 4).len(), 0);
953    }
954
955    #[test]
956    fn sorting() {
957        let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[9, 2, 4]);
958        let sorted = s.sort(false);
959        assert_slice_equal(&sorted, &[2, 4, 9]);
960        let sorted = s.sort(true);
961        assert_slice_equal(&sorted, &[9, 4, 2]);
962
963        let s: StringChunked = ["b", "a", "z"].iter().collect();
964        let sorted = s.sort(false);
965        assert_eq!(
966            sorted.into_iter().collect::<Vec<_>>(),
967            &[Some("a"), Some("b"), Some("z")]
968        );
969        let sorted = s.sort(true);
970        assert_eq!(
971            sorted.into_iter().collect::<Vec<_>>(),
972            &[Some("z"), Some("b"), Some("a")]
973        );
974        let s: StringChunked = [Some("b"), None, Some("z")].iter().copied().collect();
975        let sorted = s.sort(false);
976        assert_eq!(
977            sorted.into_iter().collect::<Vec<_>>(),
978            &[None, Some("b"), Some("z")]
979        );
980    }
981
982    #[test]
983    fn reverse() {
984        let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3]);
985        // path with continuous slice
986        assert_slice_equal(&s.reverse(), &[3, 2, 1]);
987        // path with options
988        let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[Some(1), None, Some(3)]);
989        assert_eq!(Vec::from(&s.reverse()), &[Some(3), None, Some(1)]);
990        let s = BooleanChunked::new(PlSmallStr::EMPTY, &[true, false]);
991        assert_eq!(Vec::from(&s.reverse()), &[Some(false), Some(true)]);
992
993        let s = StringChunked::new(PlSmallStr::EMPTY, &["a", "b", "c"]);
994        assert_eq!(Vec::from(&s.reverse()), &[Some("c"), Some("b"), Some("a")]);
995
996        let s = StringChunked::new(PlSmallStr::EMPTY, &[Some("a"), None, Some("c")]);
997        assert_eq!(Vec::from(&s.reverse()), &[Some("c"), None, Some("a")]);
998    }
999
1000    #[test]
1001    #[cfg(feature = "dtype-categorical")]
1002    fn test_iter_categorical() {
1003        let ca = StringChunked::new(
1004            PlSmallStr::EMPTY,
1005            &[Some("foo"), None, Some("bar"), Some("ham")],
1006        );
1007        let cats = Categories::new(
1008            PlSmallStr::EMPTY,
1009            PlSmallStr::EMPTY,
1010            CategoricalPhysical::U32,
1011        );
1012        let ca = ca.cast(&DataType::from_categories(cats)).unwrap();
1013        let ca = ca.cat32().unwrap();
1014        let v: Vec<_> = ca.physical().into_iter().collect();
1015        assert_eq!(v, &[Some(0), None, Some(1), Some(2)]);
1016    }
1017
1018    #[test]
1019    #[ignore]
1020    fn test_shrink_to_fit() {
1021        let mut builder = StringChunkedBuilder::new(PlSmallStr::from_static("foo"), 2048);
1022        builder.append_value("foo");
1023        let mut arr = builder.finish();
1024        let before = arr
1025            .chunks()
1026            .iter()
1027            .map(|arr| arrow::compute::aggregate::estimated_bytes_size(arr.as_ref()))
1028            .sum::<usize>();
1029        arr.shrink_to_fit();
1030        let after = arr
1031            .chunks()
1032            .iter()
1033            .map(|arr| arrow::compute::aggregate::estimated_bytes_size(arr.as_ref()))
1034            .sum::<usize>();
1035        assert!(before > after);
1036    }
1037}