1#![allow(unsafe_op_in_unsafe_fn)]
3use std::sync::Arc;
4
5use arrow::array::*;
6use arrow::bitmap::Bitmap;
7use arrow::compute::concatenate::concatenate_unchecked;
8use arrow::compute::utils::combine_validities_and;
9use polars_compute::filter::filter_with_bitmap;
10
11use crate::prelude::{ChunkTakeUnchecked, *};
12
13pub mod ops;
14#[macro_use]
15pub mod arithmetic;
16pub mod builder;
17pub mod cast;
18pub mod collect;
19pub mod comparison;
20pub mod flags;
21pub mod float;
22pub mod iterator;
23#[cfg(feature = "ndarray")]
24pub(crate) mod ndarray;
25
26pub mod arg_min_max;
27#[cfg(feature = "dtype-array")]
28pub(crate) mod array;
29mod binary;
30mod binary_offset;
31mod bitwise;
32#[cfg(feature = "object")]
33mod drop;
34mod from;
35mod from_iterator;
36pub mod from_iterator_par;
37pub(crate) mod list;
38pub(crate) mod logical;
39#[cfg(feature = "object")]
40pub mod object;
41#[cfg(feature = "random")]
42mod random;
43#[cfg(feature = "dtype-struct")]
44mod struct_;
45#[cfg(any(
46 feature = "temporal",
47 feature = "dtype-datetime",
48 feature = "dtype-date"
49))]
50pub mod temporal;
51mod to_vec;
52mod trusted_len;
53pub(crate) use arg_min_max::*;
54#[cfg(feature = "dtype-struct")]
55pub use struct_::StructChunked;
56
57use self::flags::{StatisticsFlags, StatisticsFlagsIM};
58use crate::series::IsSorted;
59use crate::utils::{first_non_null, first_null, last_non_null};
60
61pub type ChunkLenIter<'a> = std::iter::Map<std::slice::Iter<'a, ArrayRef>, fn(&ArrayRef) -> usize>;
62
63pub struct ChunkedArray<T: PolarsDataType> {
138 pub(crate) field: Arc<Field>,
139 pub(crate) chunks: Vec<ArrayRef>,
140
141 pub(crate) flags: StatisticsFlagsIM,
142
143 length: usize,
144 null_count: usize,
145 _pd: std::marker::PhantomData<T>,
146}
147
148impl<T: PolarsDataType> ChunkedArray<T> {
149 fn should_rechunk(&self) -> bool {
150 self.chunks.len() > 1 && self.chunks.len() > self.len() / 3
151 }
152
153 fn optional_rechunk(mut self) -> Self {
154 if self.should_rechunk() {
156 self.rechunk_mut()
157 }
158 self
159 }
160
161 pub(crate) fn as_any(&self) -> &dyn std::any::Any {
162 self
163 }
164
165 pub fn unpack_series_matching_type<'a>(
167 &self,
168 series: &'a Series,
169 ) -> PolarsResult<&'a ChunkedArray<T>> {
170 polars_ensure!(
171 self.dtype() == series.dtype(),
172 SchemaMismatch: "cannot unpack series of type `{}` into `{}`",
173 series.dtype(),
174 self.dtype(),
175 );
176
177 Ok(unsafe { self.unpack_series_matching_physical_type(series) })
179 }
180
181 fn new_with_compute_len(field: Arc<Field>, chunks: Vec<ArrayRef>) -> Self {
186 unsafe {
187 let mut chunked_arr = Self::new_with_dims(field, chunks, 0, 0);
188 chunked_arr.compute_len();
189 chunked_arr
190 }
191 }
192
193 pub unsafe fn new_with_dims(
197 field: Arc<Field>,
198 chunks: Vec<ArrayRef>,
199 length: usize,
200 null_count: usize,
201 ) -> Self {
202 Self {
203 field,
204 chunks,
205 flags: StatisticsFlagsIM::empty(),
206
207 _pd: Default::default(),
208 length,
209 null_count,
210 }
211 }
212
213 pub(crate) fn is_sorted_ascending_flag(&self) -> bool {
214 self.get_flags().is_sorted_ascending()
215 }
216
217 pub(crate) fn is_sorted_descending_flag(&self) -> bool {
218 self.get_flags().is_sorted_descending()
219 }
220
221 pub(crate) fn is_sorted_any(&self) -> bool {
223 self.get_flags().is_sorted_any()
224 }
225
226 pub fn unset_fast_explode_list(&mut self) {
227 self.set_fast_explode_list(false)
228 }
229
230 pub fn set_fast_explode_list(&mut self, value: bool) {
231 let mut flags = self.flags.get_mut();
232 flags.set(StatisticsFlags::CAN_FAST_EXPLODE_LIST, value);
233 self.flags.set_mut(flags);
234 }
235
236 pub fn get_fast_explode_list(&self) -> bool {
237 self.get_flags().can_fast_explode_list()
238 }
239
240 pub fn get_flags(&self) -> StatisticsFlags {
241 self.flags.get()
242 }
243
244 pub fn set_flags(&mut self, flags: StatisticsFlags) {
246 self.flags = StatisticsFlagsIM::new(flags);
247 }
248
249 pub fn is_sorted_flag(&self) -> IsSorted {
250 self.get_flags().is_sorted()
251 }
252
253 pub fn retain_flags_from<U: PolarsDataType>(
254 &mut self,
255 from: &ChunkedArray<U>,
256 retain_flags: StatisticsFlags,
257 ) {
258 let flags = from.flags.get();
259 if !flags.is_empty() {
261 self.set_flags(flags & retain_flags)
262 }
263 }
264
265 pub fn set_sorted_flag(&mut self, sorted: IsSorted) {
267 let mut flags = self.flags.get_mut();
268 flags.set_sorted(sorted);
269 self.flags.set_mut(flags);
270 }
271
272 pub fn with_sorted_flag(&self, sorted: IsSorted) -> Self {
274 let mut out = self.clone();
275 out.set_sorted_flag(sorted);
276 out
277 }
278
279 pub fn first_null(&self) -> Option<usize> {
280 if self.null_count() == 0 {
281 None
282 }
283 else if self.null_count() == self.len() {
285 Some(0)
286 } else if self.is_sorted_any() {
287 let out = if unsafe { self.downcast_get_unchecked(0).is_null_unchecked(0) } {
288 0
290 } else {
291 self.null_count()
293 };
294
295 debug_assert!(
296 unsafe { self.get_unchecked(out) }.is_some(),
298 "incorrect sorted flag"
299 );
300
301 Some(out)
302 } else {
303 first_null(self.chunks().iter().map(|arr| arr.as_ref()))
304 }
305 }
306
307 pub fn first_non_null(&self) -> Option<usize> {
309 if self.null_count() == self.len() {
310 None
311 }
312 else if self.null_count() == 0 {
314 Some(0)
315 } else if self.is_sorted_any() {
316 let out = if unsafe { self.downcast_get_unchecked(0).is_null_unchecked(0) } {
317 self.null_count()
319 } else {
320 0
322 };
323
324 debug_assert!(
325 unsafe { self.get_unchecked(out) }.is_some(),
327 "incorrect sorted flag"
328 );
329
330 Some(out)
331 } else {
332 first_non_null(self.chunks().iter().map(|arr| arr.as_ref()))
333 }
334 }
335
336 pub fn last_non_null(&self) -> Option<usize> {
338 if self.null_count() == self.len() {
339 None
340 }
341 else if self.null_count() == 0 {
343 Some(self.len() - 1)
344 } else if self.is_sorted_any() {
345 let out = if unsafe { self.downcast_get_unchecked(0).is_null_unchecked(0) } {
346 self.len() - 1
348 } else {
349 self.len() - self.null_count() - 1
351 };
352
353 debug_assert!(
354 unsafe { self.get_unchecked(out) }.is_some(),
356 "incorrect sorted flag"
357 );
358
359 Some(out)
360 } else {
361 last_non_null(self.chunks().iter().map(|arr| arr.as_ref()), self.len())
362 }
363 }
364
365 pub fn drop_nulls(&self) -> Self {
366 if self.null_count() == 0 {
367 self.clone()
368 } else {
369 let chunks = self
370 .downcast_iter()
371 .map(|arr| {
372 if arr.null_count() == 0 {
373 arr.to_boxed()
374 } else {
375 filter_with_bitmap(arr, arr.validity().unwrap())
376 }
377 })
378 .collect();
379 unsafe {
380 Self::new_with_dims(
381 self.field.clone(),
382 chunks,
383 self.len() - self.null_count(),
384 0,
385 )
386 }
387 }
388 }
389
390 #[inline]
392 #[allow(clippy::type_complexity)]
393 pub fn iter_validities(
394 &self,
395 ) -> impl ExactSizeIterator<Item = Option<&Bitmap>> + DoubleEndedIterator {
396 fn to_validity(arr: &ArrayRef) -> Option<&Bitmap> {
397 arr.validity()
398 }
399 self.chunks.iter().map(to_validity)
400 }
401
402 #[inline]
403 pub fn has_nulls(&self) -> bool {
405 self.null_count > 0
406 }
407
408 pub fn shrink_to_fit(&mut self) {
410 self.chunks = vec![concatenate_unchecked(self.chunks.as_slice()).unwrap()];
411 }
412
413 pub fn clear(&self) -> Self {
414 let mut ca = unsafe {
416 self.copy_with_chunks(vec![new_empty_array(
417 self.chunks.first().unwrap().dtype().clone(),
418 )])
419 };
420
421 use StatisticsFlags as F;
422 ca.retain_flags_from(self, F::IS_SORTED_ANY | F::CAN_FAST_EXPLODE_LIST);
423 ca
424 }
425
426 pub(crate) unsafe fn unpack_series_matching_physical_type<'a>(
433 &self,
434 series: &'a Series,
435 ) -> &'a ChunkedArray<T> {
436 let series_trait = &**series;
437 if self.dtype() == series.dtype() {
438 &*(series_trait as *const dyn SeriesTrait as *const ChunkedArray<T>)
439 } else {
440 use DataType::*;
441 match (self.dtype(), series.dtype()) {
442 (Int64, Datetime(_, _)) | (Int64, Duration(_)) | (Int32, Date) => {
443 &*(series_trait as *const dyn SeriesTrait as *const ChunkedArray<T>)
444 },
445 _ => panic!(
446 "cannot unpack series {:?} into matching type {:?}",
447 series,
448 self.dtype()
449 ),
450 }
451 }
452 }
453
454 pub fn chunk_lengths(&self) -> ChunkLenIter<'_> {
456 self.chunks.iter().map(|chunk| chunk.len())
457 }
458
459 #[inline]
461 pub fn chunks(&self) -> &Vec<ArrayRef> {
462 &self.chunks
463 }
464
465 #[inline]
471 pub unsafe fn chunks_mut(&mut self) -> &mut Vec<ArrayRef> {
472 &mut self.chunks
473 }
474
475 pub fn is_optimal_aligned(&self) -> bool {
477 self.chunks.len() == 1 && self.null_count() == 0
478 }
479
480 unsafe fn copy_with_chunks(&self, chunks: Vec<ArrayRef>) -> Self {
485 Self::new_with_compute_len(self.field.clone(), chunks)
486 }
487
488 pub fn dtype(&self) -> &DataType {
490 self.field.dtype()
491 }
492
493 pub(crate) unsafe fn set_dtype(&mut self, dtype: DataType) {
494 self.field = Arc::new(Field::new(self.name().clone(), dtype))
495 }
496
497 pub fn name(&self) -> &PlSmallStr {
499 self.field.name()
500 }
501
502 pub fn ref_field(&self) -> &Field {
504 &self.field
505 }
506
507 pub fn rename(&mut self, name: PlSmallStr) {
509 self.field = Arc::new(Field::new(name, self.field.dtype().clone()));
510 }
511
512 pub fn with_name(mut self, name: PlSmallStr) -> Self {
514 self.rename(name);
515 self
516 }
517}
518
519impl<T> ChunkedArray<T>
520where
521 T: PolarsDataType,
522{
523 #[inline]
529 pub fn get(&self, idx: usize) -> Option<T::Physical<'_>> {
530 let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
531 assert!(
532 chunk_idx < self.chunks().len(),
533 "index: {} out of bounds for len: {}",
534 idx,
535 self.len()
536 );
537 unsafe {
538 let arr = self.downcast_get_unchecked(chunk_idx);
539 assert!(
540 arr_idx < arr.len(),
541 "index: {} out of bounds for len: {}",
542 idx,
543 self.len()
544 );
545 arr.get_unchecked(arr_idx)
546 }
547 }
548
549 #[inline]
555 pub unsafe fn get_unchecked(&self, idx: usize) -> Option<T::Physical<'_>> {
556 let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
557
558 unsafe {
559 self.downcast_get_unchecked(chunk_idx)
561 .get_unchecked(arr_idx)
562 }
563 }
564
565 #[inline]
571 pub unsafe fn value_unchecked(&self, idx: usize) -> T::Physical<'_> {
572 let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
573
574 unsafe {
575 self.downcast_get_unchecked(chunk_idx)
577 .value_unchecked(arr_idx)
578 }
579 }
580
581 #[inline]
584 pub fn first(&self) -> Option<T::Physical<'_>> {
585 self.iter().next().unwrap()
586 }
587
588 #[inline]
591 pub fn last(&self) -> Option<T::Physical<'_>> {
592 let arr = self
593 .downcast_iter()
594 .rev()
595 .find(|arr| !arr.is_empty())
596 .unwrap();
597 unsafe { arr.get_unchecked(arr.len() - 1) }
598 }
599
600 pub fn set_validity(&mut self, validity: Option<Bitmap>) {
601 assert!(
602 !self.dtype().is_struct(),
603 "set_outer_validity should be used for struct types"
604 );
605 if let Some(v) = &validity {
606 assert_eq!(self.len(), v.len());
607 }
608 let mut i = 0;
609 for chunk in unsafe { self.chunks_mut() } {
610 *chunk =
611 chunk.with_validity(validity.as_ref().map(|v| v.clone().sliced(i, chunk.len())));
612 i += chunk.len();
613 }
614 self.null_count = validity.map(|v| v.unset_bits()).unwrap_or(0);
615 self.set_fast_explode_list(false);
616 }
617
618 pub fn with_validity(mut self, validity: Option<Bitmap>) -> Self {
619 self.set_validity(validity);
620 self
621 }
622}
623
624impl<T> ChunkedArray<T>
625where
626 T: PolarsDataType,
627 ChunkedArray<T>: ChunkTakeUnchecked<[IdxSize]>,
628{
629 pub fn deposit(&self, validity: &Bitmap) -> Self {
631 let set_bits = validity.set_bits();
632
633 assert_eq!(self.len(), set_bits);
634
635 if set_bits == validity.len() {
636 return self.clone();
637 }
638
639 if set_bits == 0 {
640 return Self::full_null_like(self, validity.len());
641 }
642
643 let mut null_mask = validity.clone();
644
645 let mut gather_idxs = Vec::with_capacity(validity.len());
646 let leading_nulls = null_mask.take_leading_zeros();
647 gather_idxs.extend(std::iter::repeat_n(0, leading_nulls + 1));
648
649 let mut i = 0 as IdxSize;
650 gather_idxs.extend(null_mask.iter().skip(1).map(|v| {
651 i += IdxSize::from(v);
652 i
653 }));
654
655 let mut ca = unsafe { ChunkTakeUnchecked::take_unchecked(self, &gather_idxs) };
656 ca.set_validity(combine_validities_and(
657 Some(validity),
658 ca.rechunk_validity().as_ref(),
659 ));
660 ca
661 }
662}
663
664impl ListChunked {
665 #[inline]
666 pub fn get_as_series(&self, idx: usize) -> Option<Series> {
667 unsafe {
668 Some(Series::from_chunks_and_dtype_unchecked(
669 self.name().clone(),
670 vec![self.get(idx)?],
671 &self.inner_dtype().to_physical(),
672 ))
673 }
674 }
675
676 pub fn has_empty_lists(&self) -> bool {
677 for arr in self.downcast_iter() {
678 if arr.is_empty() {
679 continue;
680 }
681
682 if match arr.validity() {
683 None => arr.offsets().lengths().any(|l| l == 0),
684 Some(validity) => arr
685 .offsets()
686 .lengths()
687 .enumerate()
688 .any(|(i, l)| l == 0 && unsafe { validity.get_bit_unchecked(i) }),
689 } {
690 return true;
691 }
692 }
693
694 false
695 }
696
697 pub fn has_masked_out_values(&self) -> bool {
698 for arr in self.downcast_iter() {
699 if arr.is_empty() {
700 continue;
701 }
702
703 if *arr.offsets().first() != 0 || *arr.offsets().last() != arr.values().len() as i64 {
704 return true;
705 }
706
707 let Some(validity) = arr.validity() else {
708 continue;
709 };
710 if validity.set_bits() == 0 {
711 continue;
712 }
713
714 for i in (!validity).true_idx_iter() {
716 if arr.offsets().length_at(i) > 0 {
717 return true;
718 }
719 }
720 }
721
722 false
723 }
724}
725
726#[cfg(feature = "dtype-array")]
727impl ArrayChunked {
728 #[inline]
729 pub fn get_as_series(&self, idx: usize) -> Option<Series> {
730 unsafe {
731 Some(Series::from_chunks_and_dtype_unchecked(
732 self.name().clone(),
733 vec![self.get(idx)?],
734 &self.inner_dtype().to_physical(),
735 ))
736 }
737 }
738
739 pub fn from_aligned_values(
740 name: PlSmallStr,
741 inner_dtype: &DataType,
742 width: usize,
743 chunks: Vec<ArrayRef>,
744 length: usize,
745 ) -> Self {
746 let dtype = DataType::Array(Box::new(inner_dtype.clone()), width);
747 let arrow_dtype = dtype.to_arrow(CompatLevel::newest()).to_storage_recursive();
748 let field = Arc::new(Field::new(name, dtype));
749 if width == 0 {
750 use arrow::array::builder::{ArrayBuilder, make_builder};
751 let values = make_builder(&inner_dtype.to_arrow(CompatLevel::newest())).freeze();
752 return ArrayChunked::new_with_compute_len(
753 field,
754 vec![FixedSizeListArray::new(arrow_dtype, length, values, None).into_boxed()],
755 );
756 }
757 let mut total_len = 0;
758 let chunks = chunks
759 .iter()
760 .map(|chunk| {
761 debug_assert_eq!(chunk.len() % width, 0);
762 let chunk_len = chunk.len() / width;
763 total_len += chunk_len;
764 FixedSizeListArray::new(arrow_dtype.clone(), chunk_len, chunk.clone(), None)
765 .into_boxed()
766 })
767 .collect();
768 debug_assert_eq!(total_len, length);
769
770 unsafe { Self::new_with_dims(field, chunks, length, 0) }
771 }
772
773 pub fn to_list(&self) -> ListChunked {
777 let inner_dtype = self.inner_dtype();
778 let chunks = self
779 .downcast_iter()
780 .map(|chunk| {
781 use arrow::offset::OffsetsBuffer;
782
783 let inner_dtype = chunk.dtype().inner_dtype().unwrap();
784 let dtype = inner_dtype.clone().to_large_list(true);
785
786 let offsets = (0..=chunk.len())
787 .map(|i| (i * self.width()) as i64)
788 .collect::<Vec<i64>>();
789
790 let offsets = unsafe { OffsetsBuffer::new_unchecked(offsets.into()) };
792
793 ListArray::<i64>::new(
794 dtype,
795 offsets,
796 chunk.values().clone(),
797 chunk.validity().cloned(),
798 )
799 .into_boxed()
800 })
801 .collect();
802
803 let mut ca = unsafe {
805 ListChunked::new_with_dims(
806 Arc::new(Field::new(
807 self.name().clone(),
808 DataType::List(Box::new(inner_dtype.clone())),
809 )),
810 chunks,
811 self.len(),
812 self.null_count(),
813 )
814 };
815 ca.set_fast_explode_list(!self.has_nulls());
816 ca
817 }
818}
819
820impl<T> ChunkedArray<T>
821where
822 T: PolarsDataType,
823{
824 pub fn match_chunks<I>(&self, chunk_id: I) -> Self
828 where
829 I: Iterator<Item = usize>,
830 {
831 debug_assert!(self.chunks.len() == 1);
832 let slice = |ca: &Self| {
834 let array = &ca.chunks[0];
835
836 let mut offset = 0;
837 let chunks = chunk_id
838 .map(|len| {
839 debug_assert!((offset + len) <= array.len());
841 let out = unsafe { array.sliced_unchecked(offset, len) };
842 offset += len;
843 out
844 })
845 .collect();
846
847 debug_assert_eq!(offset, array.len());
848
849 unsafe {
851 Self::from_chunks_and_dtype(self.name().clone(), chunks, self.dtype().clone())
852 }
853 };
854
855 if self.chunks.len() != 1 {
856 let out = self.rechunk();
857 slice(&out)
858 } else {
859 slice(self)
860 }
861 }
862}
863
864impl<T: PolarsDataType> AsRefDataType for ChunkedArray<T> {
865 fn as_ref_dtype(&self) -> &DataType {
866 self.dtype()
867 }
868}
869
870pub(crate) trait AsSinglePtr: AsRefDataType {
871 fn as_single_ptr(&mut self) -> PolarsResult<usize> {
873 polars_bail!(opq = as_single_ptr, self.as_ref_dtype());
874 }
875}
876
877impl<T> AsSinglePtr for ChunkedArray<T>
878where
879 T: PolarsNumericType,
880{
881 fn as_single_ptr(&mut self) -> PolarsResult<usize> {
882 self.rechunk_mut();
883 let a = self.data_views().next().unwrap();
884 let ptr = a.as_ptr();
885 Ok(ptr as usize)
886 }
887}
888
889impl AsSinglePtr for BooleanChunked {}
890impl AsSinglePtr for ListChunked {}
891#[cfg(feature = "dtype-array")]
892impl AsSinglePtr for ArrayChunked {}
893impl AsSinglePtr for StringChunked {}
894impl AsSinglePtr for BinaryChunked {}
895#[cfg(feature = "object")]
896impl<T: PolarsObject> AsSinglePtr for ObjectChunked<T> {}
897
898pub enum ChunkedArrayLayout<'a, T: PolarsDataType> {
899 SingleNoNull(&'a T::Array),
900 Single(&'a T::Array),
901 MultiNoNull(&'a ChunkedArray<T>),
902 Multi(&'a ChunkedArray<T>),
903}
904
905impl<T> ChunkedArray<T>
906where
907 T: PolarsDataType,
908{
909 pub fn layout(&self) -> ChunkedArrayLayout<'_, T> {
910 if self.chunks.len() == 1 {
911 let arr = self.downcast_iter().next().unwrap();
912 return if arr.null_count() == 0 {
913 ChunkedArrayLayout::SingleNoNull(arr)
914 } else {
915 ChunkedArrayLayout::Single(arr)
916 };
917 }
918
919 if self.downcast_iter().all(|a| a.null_count() == 0) {
920 ChunkedArrayLayout::MultiNoNull(self)
921 } else {
922 ChunkedArrayLayout::Multi(self)
923 }
924 }
925}
926
927impl<T> ChunkedArray<T>
928where
929 T: PolarsNumericType,
930{
931 pub fn cont_slice(&self) -> PolarsResult<&[T::Native]> {
933 polars_ensure!(
934 self.chunks.len() == 1 && self.chunks[0].null_count() == 0,
935 ComputeError: "chunked array is not contiguous"
936 );
937 Ok(self.downcast_iter().next().map(|arr| arr.values()).unwrap())
938 }
939
940 pub(crate) fn cont_slice_mut(&mut self) -> Option<&mut [T::Native]> {
942 if self.chunks.len() == 1 && self.chunks[0].null_count() == 0 {
943 let arr = unsafe { self.downcast_iter_mut().next().unwrap() };
945 arr.get_mut_values()
946 } else {
947 None
948 }
949 }
950
951 pub fn data_views(&self) -> impl DoubleEndedIterator<Item = &[T::Native]> {
955 self.downcast_iter().map(|arr| arr.values().as_slice())
956 }
957
958 #[allow(clippy::wrong_self_convention)]
959 pub fn into_no_null_iter(
960 &self,
961 ) -> impl '_ + Send + Sync + ExactSizeIterator<Item = T::Native> + DoubleEndedIterator + TrustedLen
962 {
963 #[allow(clippy::map_clone)]
965 unsafe {
967 self.data_views()
968 .flatten()
969 .map(|v| *v)
970 .trust_my_length(self.len())
971 }
972 }
973}
974
975impl<T: PolarsDataType> Clone for ChunkedArray<T> {
976 fn clone(&self) -> Self {
977 ChunkedArray {
978 field: self.field.clone(),
979 chunks: self.chunks.clone(),
980 flags: self.flags.clone(),
981
982 _pd: Default::default(),
983 length: self.length,
984 null_count: self.null_count,
985 }
986 }
987}
988
989impl<T: PolarsDataType> AsRef<ChunkedArray<T>> for ChunkedArray<T> {
990 fn as_ref(&self) -> &ChunkedArray<T> {
991 self
992 }
993}
994
995impl ValueSize for ListChunked {
996 fn get_values_size(&self) -> usize {
997 self.chunks
998 .iter()
999 .fold(0usize, |acc, arr| acc + arr.get_values_size())
1000 }
1001}
1002
1003#[cfg(feature = "dtype-array")]
1004impl ValueSize for ArrayChunked {
1005 fn get_values_size(&self) -> usize {
1006 self.chunks
1007 .iter()
1008 .fold(0usize, |acc, arr| acc + arr.get_values_size())
1009 }
1010}
1011impl ValueSize for StringChunked {
1012 fn get_values_size(&self) -> usize {
1013 self.chunks
1014 .iter()
1015 .fold(0usize, |acc, arr| acc + arr.get_values_size())
1016 }
1017}
1018
1019impl ValueSize for BinaryOffsetChunked {
1020 fn get_values_size(&self) -> usize {
1021 self.chunks
1022 .iter()
1023 .fold(0usize, |acc, arr| acc + arr.get_values_size())
1024 }
1025}
1026
1027pub(crate) fn to_primitive<T: PolarsNumericType>(
1028 values: Vec<T::Native>,
1029 validity: Option<Bitmap>,
1030) -> PrimitiveArray<T::Native> {
1031 PrimitiveArray::new(
1032 T::get_static_dtype().to_arrow(CompatLevel::newest()),
1033 values.into(),
1034 validity,
1035 )
1036}
1037
1038pub(crate) fn to_array<T: PolarsNumericType>(
1039 values: Vec<T::Native>,
1040 validity: Option<Bitmap>,
1041) -> ArrayRef {
1042 Box::new(to_primitive::<T>(values, validity))
1043}
1044
1045impl<T: PolarsDataType> Default for ChunkedArray<T> {
1046 fn default() -> Self {
1047 let dtype = T::get_static_dtype();
1048 let arrow_dtype = dtype.to_physical().to_arrow(CompatLevel::newest());
1049 ChunkedArray {
1050 field: Arc::new(Field::new(PlSmallStr::EMPTY, dtype)),
1051 chunks: vec![new_empty_array(arrow_dtype)],
1053 flags: StatisticsFlagsIM::empty(),
1054
1055 _pd: Default::default(),
1056 length: 0,
1057 null_count: 0,
1058 }
1059 }
1060}
1061
1062#[cfg(test)]
1063pub(crate) mod test {
1064 use crate::prelude::*;
1065
1066 pub(crate) fn get_chunked_array() -> Int32Chunked {
1067 ChunkedArray::new(PlSmallStr::from_static("a"), &[1, 2, 3])
1068 }
1069
1070 #[test]
1071 fn test_sort() {
1072 let a = Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 9, 3, 2]);
1073 let b = a
1074 .sort(false)
1075 .iter()
1076 .map(|opt| opt.unwrap())
1077 .collect::<Vec<_>>();
1078 assert_eq!(b, [1, 2, 3, 9]);
1079 let a = StringChunked::new(PlSmallStr::from_static("a"), &["b", "a", "c"]);
1080 let a = a.sort(false);
1081 let b = a.iter().collect::<Vec<_>>();
1082 assert_eq!(b, [Some("a"), Some("b"), Some("c")]);
1083 assert!(a.is_sorted_ascending_flag());
1084 }
1085
1086 #[test]
1087 fn arithmetic() {
1088 let a = &Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 100, 6, 40]);
1089 let b = &Int32Chunked::new(PlSmallStr::from_static("b"), &[-1, 2, 3, 4]);
1090
1091 println!("{:?}", a + b);
1094 println!("{:?}", a - b);
1095 println!("{:?}", a * b);
1096 println!("{:?}", a / b);
1097 }
1098
1099 #[test]
1100 fn iter() {
1101 let s1 = get_chunked_array();
1102 assert_eq!(s1.iter().fold(0, |acc, val| { acc + val.unwrap() }), 6)
1104 }
1105
1106 #[test]
1107 fn limit() {
1108 let a = get_chunked_array();
1109 let b = a.limit(2);
1110 println!("{b:?}");
1111 assert_eq!(b.len(), 2)
1112 }
1113
1114 #[test]
1115 fn filter() {
1116 let a = get_chunked_array();
1117 let b = a
1118 .filter(&BooleanChunked::new(
1119 PlSmallStr::from_static("filter"),
1120 &[true, false, false],
1121 ))
1122 .unwrap();
1123 assert_eq!(b.len(), 1);
1124 assert_eq!(b.iter().next(), Some(Some(1)));
1125 }
1126
1127 #[test]
1128 fn aggregates() {
1129 let a = &Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 100, 10, 9]);
1130 assert_eq!(a.max(), Some(100));
1131 assert_eq!(a.min(), Some(1));
1132 assert_eq!(a.sum(), Some(120))
1133 }
1134
1135 #[test]
1136 fn take() {
1137 let a = get_chunked_array();
1138 let new = a.take(&[0 as IdxSize, 1]).unwrap();
1139 assert_eq!(new.len(), 2)
1140 }
1141
1142 #[test]
1143 fn cast() {
1144 let a = get_chunked_array();
1145 let b = a.cast(&DataType::Int64).unwrap();
1146 assert_eq!(b.dtype(), &DataType::Int64)
1147 }
1148
1149 fn assert_slice_equal<T>(ca: &ChunkedArray<T>, eq: &[T::Native])
1150 where
1151 T: PolarsNumericType,
1152 {
1153 assert_eq!(ca.iter().map(|opt| opt.unwrap()).collect::<Vec<_>>(), eq)
1154 }
1155
1156 #[test]
1157 fn slice() {
1158 let mut first = UInt32Chunked::new(PlSmallStr::from_static("first"), &[0, 1, 2]);
1159 let second = UInt32Chunked::new(PlSmallStr::from_static("second"), &[3, 4, 5]);
1160 first.append(&second).unwrap();
1161 assert_slice_equal(&first.slice(0, 3), &[0, 1, 2]);
1162 assert_slice_equal(&first.slice(0, 4), &[0, 1, 2, 3]);
1163 assert_slice_equal(&first.slice(1, 4), &[1, 2, 3, 4]);
1164 assert_slice_equal(&first.slice(3, 2), &[3, 4]);
1165 assert_slice_equal(&first.slice(3, 3), &[3, 4, 5]);
1166 assert_slice_equal(&first.slice(-3, 3), &[3, 4, 5]);
1167 assert_slice_equal(&first.slice(-6, 6), &[0, 1, 2, 3, 4, 5]);
1168
1169 assert_eq!(first.slice(-7, 2).len(), 1);
1170 assert_eq!(first.slice(-3, 4).len(), 3);
1171 assert_eq!(first.slice(3, 4).len(), 3);
1172 assert_eq!(first.slice(10, 4).len(), 0);
1173 }
1174
1175 #[test]
1176 fn sorting() {
1177 let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[9, 2, 4]);
1178 let sorted = s.sort(false);
1179 assert_slice_equal(&sorted, &[2, 4, 9]);
1180 let sorted = s.sort(true);
1181 assert_slice_equal(&sorted, &[9, 4, 2]);
1182
1183 let s: StringChunked = ["b", "a", "z"].iter().collect();
1184 let sorted = s.sort(false);
1185 assert_eq!(
1186 sorted.iter().collect::<Vec<_>>(),
1187 &[Some("a"), Some("b"), Some("z")]
1188 );
1189 let sorted = s.sort(true);
1190 assert_eq!(
1191 sorted.iter().collect::<Vec<_>>(),
1192 &[Some("z"), Some("b"), Some("a")]
1193 );
1194 let s: StringChunked = [Some("b"), None, Some("z")].iter().copied().collect();
1195 let sorted = s.sort(false);
1196 assert_eq!(
1197 sorted.iter().collect::<Vec<_>>(),
1198 &[None, Some("b"), Some("z")]
1199 );
1200 }
1201
1202 #[test]
1203 fn reverse() {
1204 let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3]);
1205 assert_slice_equal(&s.reverse(), &[3, 2, 1]);
1207 let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[Some(1), None, Some(3)]);
1209 assert_eq!(Vec::from(&s.reverse()), &[Some(3), None, Some(1)]);
1210 let s = BooleanChunked::new(PlSmallStr::EMPTY, &[true, false]);
1211 assert_eq!(Vec::from(&s.reverse()), &[Some(false), Some(true)]);
1212
1213 let s = StringChunked::new(PlSmallStr::EMPTY, &["a", "b", "c"]);
1214 assert_eq!(Vec::from(&s.reverse()), &[Some("c"), Some("b"), Some("a")]);
1215
1216 let s = StringChunked::new(PlSmallStr::EMPTY, &[Some("a"), None, Some("c")]);
1217 assert_eq!(Vec::from(&s.reverse()), &[Some("c"), None, Some("a")]);
1218 }
1219
1220 #[test]
1221 #[cfg(feature = "dtype-categorical")]
1222 fn test_iter_categorical() {
1223 let ca = StringChunked::new(
1224 PlSmallStr::EMPTY,
1225 &[Some("foo"), None, Some("bar"), Some("ham")],
1226 );
1227 let cats = Categories::new(
1228 PlSmallStr::EMPTY,
1229 PlSmallStr::EMPTY,
1230 CategoricalPhysical::U32,
1231 );
1232 let ca = ca.cast(&DataType::from_categories(cats)).unwrap();
1233 let ca = ca.cat32().unwrap();
1234 let v: Vec<_> = ca.physical().iter().collect();
1235 assert_eq!(v, &[Some(0), None, Some(1), Some(2)]);
1236 }
1237
1238 #[test]
1239 #[ignore]
1240 fn test_shrink_to_fit() {
1241 let mut builder = StringChunkedBuilder::new(PlSmallStr::from_static("foo"), 2048);
1242 builder.append_value("foo");
1243 let mut arr = builder.finish();
1244 let before = arr
1245 .chunks()
1246 .iter()
1247 .map(|arr| arrow::compute::aggregate::estimated_bytes_size(arr.as_ref()))
1248 .sum::<usize>();
1249 arr.shrink_to_fit();
1250 let after = arr
1251 .chunks()
1252 .iter()
1253 .map(|arr| arrow::compute::aggregate::estimated_bytes_size(arr.as_ref()))
1254 .sum::<usize>();
1255 assert!(before > after);
1256 }
1257}