1#![allow(unsafe_op_in_unsafe_fn)]
3use std::sync::Arc;
4
5use arrow::array::*;
6use arrow::bitmap::Bitmap;
7use arrow::compute::concatenate::concatenate_unchecked;
8use arrow::compute::utils::combine_validities_and;
9use polars_compute::filter::filter_with_bitmap;
10
11use crate::prelude::{ChunkTakeUnchecked, *};
12
13pub mod ops;
14#[macro_use]
15pub mod arithmetic;
16pub mod builder;
17pub mod cast;
18pub mod collect;
19pub mod comparison;
20pub mod flags;
21pub mod float;
22pub mod iterator;
23#[cfg(feature = "ndarray")]
24pub(crate) mod ndarray;
25
26pub mod arg_min_max;
27#[cfg(feature = "dtype-array")]
28pub(crate) mod array;
29mod binary;
30mod binary_offset;
31mod bitwise;
32#[cfg(feature = "object")]
33mod drop;
34mod from;
35mod from_iterator;
36pub mod from_iterator_par;
37pub(crate) mod list;
38pub(crate) mod logical;
39#[cfg(feature = "object")]
40pub mod object;
41#[cfg(feature = "random")]
42mod random;
43#[cfg(feature = "dtype-struct")]
44mod struct_;
45#[cfg(any(
46 feature = "temporal",
47 feature = "dtype-datetime",
48 feature = "dtype-date"
49))]
50pub mod temporal;
51mod to_vec;
52mod trusted_len;
53pub(crate) use arg_min_max::*;
54use arrow::legacy::prelude::*;
55#[cfg(feature = "dtype-struct")]
56pub use struct_::StructChunked;
57
58use self::flags::{StatisticsFlags, StatisticsFlagsIM};
59use crate::series::IsSorted;
60use crate::utils::{first_non_null, first_null, last_non_null};
61
62pub type ChunkLenIter<'a> = std::iter::Map<std::slice::Iter<'a, ArrayRef>, fn(&ArrayRef) -> usize>;
63
64pub struct ChunkedArray<T: PolarsDataType> {
139 pub(crate) field: Arc<Field>,
140 pub(crate) chunks: Vec<ArrayRef>,
141
142 pub(crate) flags: StatisticsFlagsIM,
143
144 length: usize,
145 null_count: usize,
146 _pd: std::marker::PhantomData<T>,
147}
148
149impl<T: PolarsDataType> ChunkedArray<T> {
150 fn should_rechunk(&self) -> bool {
151 self.chunks.len() > 1 && self.chunks.len() > self.len() / 3
152 }
153
154 fn optional_rechunk(mut self) -> Self {
155 if self.should_rechunk() {
157 self.rechunk_mut()
158 }
159 self
160 }
161
162 pub(crate) fn as_any(&self) -> &dyn std::any::Any {
163 self
164 }
165
166 pub fn unpack_series_matching_type<'a>(
168 &self,
169 series: &'a Series,
170 ) -> PolarsResult<&'a ChunkedArray<T>> {
171 polars_ensure!(
172 self.dtype() == series.dtype(),
173 SchemaMismatch: "cannot unpack series of type `{}` into `{}`",
174 series.dtype(),
175 self.dtype(),
176 );
177
178 Ok(unsafe { self.unpack_series_matching_physical_type(series) })
180 }
181
182 fn new_with_compute_len(field: Arc<Field>, chunks: Vec<ArrayRef>) -> Self {
187 unsafe {
188 let mut chunked_arr = Self::new_with_dims(field, chunks, 0, 0);
189 chunked_arr.compute_len();
190 chunked_arr
191 }
192 }
193
194 pub unsafe fn new_with_dims(
198 field: Arc<Field>,
199 chunks: Vec<ArrayRef>,
200 length: usize,
201 null_count: usize,
202 ) -> Self {
203 Self {
204 field,
205 chunks,
206 flags: StatisticsFlagsIM::empty(),
207
208 _pd: Default::default(),
209 length,
210 null_count,
211 }
212 }
213
214 pub(crate) fn is_sorted_ascending_flag(&self) -> bool {
215 self.get_flags().is_sorted_ascending()
216 }
217
218 pub(crate) fn is_sorted_descending_flag(&self) -> bool {
219 self.get_flags().is_sorted_descending()
220 }
221
222 pub(crate) fn is_sorted_any(&self) -> bool {
224 self.get_flags().is_sorted_any()
225 }
226
227 pub fn unset_fast_explode_list(&mut self) {
228 self.set_fast_explode_list(false)
229 }
230
231 pub fn set_fast_explode_list(&mut self, value: bool) {
232 let mut flags = self.flags.get_mut();
233 flags.set(StatisticsFlags::CAN_FAST_EXPLODE_LIST, value);
234 self.flags.set_mut(flags);
235 }
236
237 pub fn get_fast_explode_list(&self) -> bool {
238 self.get_flags().can_fast_explode_list()
239 }
240
241 pub fn get_flags(&self) -> StatisticsFlags {
242 self.flags.get()
243 }
244
245 pub fn set_flags(&mut self, flags: StatisticsFlags) {
247 self.flags = StatisticsFlagsIM::new(flags);
248 }
249
250 pub fn is_sorted_flag(&self) -> IsSorted {
251 self.get_flags().is_sorted()
252 }
253
254 pub fn retain_flags_from<U: PolarsDataType>(
255 &mut self,
256 from: &ChunkedArray<U>,
257 retain_flags: StatisticsFlags,
258 ) {
259 let flags = from.flags.get();
260 if !flags.is_empty() {
262 self.set_flags(flags & retain_flags)
263 }
264 }
265
266 pub fn set_sorted_flag(&mut self, sorted: IsSorted) {
268 let mut flags = self.flags.get_mut();
269 flags.set_sorted(sorted);
270 self.flags.set_mut(flags);
271 }
272
273 pub fn with_sorted_flag(&self, sorted: IsSorted) -> Self {
275 let mut out = self.clone();
276 out.set_sorted_flag(sorted);
277 out
278 }
279
280 pub fn first_null(&self) -> Option<usize> {
281 if self.null_count() == 0 {
282 None
283 }
284 else if self.null_count() == self.len() {
286 Some(0)
287 } else if self.is_sorted_any() {
288 let out = if unsafe { self.downcast_get_unchecked(0).is_null_unchecked(0) } {
289 0
291 } else {
292 self.null_count()
294 };
295
296 debug_assert!(
297 unsafe { self.get_unchecked(out) }.is_some(),
299 "incorrect sorted flag"
300 );
301
302 Some(out)
303 } else {
304 first_null(self.chunks().iter().map(|arr| arr.as_ref()))
305 }
306 }
307
308 pub fn first_non_null(&self) -> Option<usize> {
310 if self.null_count() == self.len() {
311 None
312 }
313 else if self.null_count() == 0 {
315 Some(0)
316 } else if self.is_sorted_any() {
317 let out = if unsafe { self.downcast_get_unchecked(0).is_null_unchecked(0) } {
318 self.null_count()
320 } else {
321 0
323 };
324
325 debug_assert!(
326 unsafe { self.get_unchecked(out) }.is_some(),
328 "incorrect sorted flag"
329 );
330
331 Some(out)
332 } else {
333 first_non_null(self.chunks().iter().map(|arr| arr.as_ref()))
334 }
335 }
336
337 pub fn last_non_null(&self) -> Option<usize> {
339 if self.null_count() == self.len() {
340 None
341 }
342 else if self.null_count() == 0 {
344 Some(self.len() - 1)
345 } else if self.is_sorted_any() {
346 let out = if unsafe { self.downcast_get_unchecked(0).is_null_unchecked(0) } {
347 self.len() - 1
349 } else {
350 self.len() - self.null_count() - 1
352 };
353
354 debug_assert!(
355 unsafe { self.get_unchecked(out) }.is_some(),
357 "incorrect sorted flag"
358 );
359
360 Some(out)
361 } else {
362 last_non_null(self.chunks().iter().map(|arr| arr.as_ref()), self.len())
363 }
364 }
365
366 pub fn drop_nulls(&self) -> Self {
367 if self.null_count() == 0 {
368 self.clone()
369 } else {
370 let chunks = self
371 .downcast_iter()
372 .map(|arr| {
373 if arr.null_count() == 0 {
374 arr.to_boxed()
375 } else {
376 filter_with_bitmap(arr, arr.validity().unwrap())
377 }
378 })
379 .collect();
380 unsafe {
381 Self::new_with_dims(
382 self.field.clone(),
383 chunks,
384 self.len() - self.null_count(),
385 0,
386 )
387 }
388 }
389 }
390
391 #[inline]
393 #[allow(clippy::type_complexity)]
394 pub fn iter_validities(
395 &self,
396 ) -> impl ExactSizeIterator<Item = Option<&Bitmap>> + DoubleEndedIterator {
397 fn to_validity(arr: &ArrayRef) -> Option<&Bitmap> {
398 arr.validity()
399 }
400 self.chunks.iter().map(to_validity)
401 }
402
403 #[inline]
404 pub fn has_nulls(&self) -> bool {
406 self.null_count > 0
407 }
408
409 pub fn shrink_to_fit(&mut self) {
411 self.chunks = vec![concatenate_unchecked(self.chunks.as_slice()).unwrap()];
412 }
413
414 pub fn clear(&self) -> Self {
415 let mut ca = unsafe {
417 self.copy_with_chunks(vec![new_empty_array(
418 self.chunks.first().unwrap().dtype().clone(),
419 )])
420 };
421
422 use StatisticsFlags as F;
423 ca.retain_flags_from(self, F::IS_SORTED_ANY | F::CAN_FAST_EXPLODE_LIST);
424 ca
425 }
426
427 pub(crate) unsafe fn unpack_series_matching_physical_type<'a>(
434 &self,
435 series: &'a Series,
436 ) -> &'a ChunkedArray<T> {
437 let series_trait = &**series;
438 if self.dtype() == series.dtype() {
439 &*(series_trait as *const dyn SeriesTrait as *const ChunkedArray<T>)
440 } else {
441 use DataType::*;
442 match (self.dtype(), series.dtype()) {
443 (Int64, Datetime(_, _)) | (Int64, Duration(_)) | (Int32, Date) => {
444 &*(series_trait as *const dyn SeriesTrait as *const ChunkedArray<T>)
445 },
446 _ => panic!(
447 "cannot unpack series {:?} into matching type {:?}",
448 series,
449 self.dtype()
450 ),
451 }
452 }
453 }
454
455 pub fn chunk_lengths(&self) -> ChunkLenIter<'_> {
457 self.chunks.iter().map(|chunk| chunk.len())
458 }
459
460 #[inline]
462 pub fn chunks(&self) -> &Vec<ArrayRef> {
463 &self.chunks
464 }
465
466 #[inline]
472 pub unsafe fn chunks_mut(&mut self) -> &mut Vec<ArrayRef> {
473 &mut self.chunks
474 }
475
476 pub fn is_optimal_aligned(&self) -> bool {
478 self.chunks.len() == 1 && self.null_count() == 0
479 }
480
481 unsafe fn copy_with_chunks(&self, chunks: Vec<ArrayRef>) -> Self {
486 Self::new_with_compute_len(self.field.clone(), chunks)
487 }
488
489 pub fn dtype(&self) -> &DataType {
491 self.field.dtype()
492 }
493
494 pub(crate) unsafe fn set_dtype(&mut self, dtype: DataType) {
495 self.field = Arc::new(Field::new(self.name().clone(), dtype))
496 }
497
498 pub fn name(&self) -> &PlSmallStr {
500 self.field.name()
501 }
502
503 pub fn ref_field(&self) -> &Field {
505 &self.field
506 }
507
508 pub fn rename(&mut self, name: PlSmallStr) {
510 self.field = Arc::new(Field::new(name, self.field.dtype().clone()));
511 }
512
513 pub fn with_name(mut self, name: PlSmallStr) -> Self {
515 self.rename(name);
516 self
517 }
518}
519
520impl<T> ChunkedArray<T>
521where
522 T: PolarsDataType,
523{
524 #[inline]
530 pub fn get(&self, idx: usize) -> Option<T::Physical<'_>> {
531 let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
532 assert!(
533 chunk_idx < self.chunks().len(),
534 "index: {} out of bounds for len: {}",
535 idx,
536 self.len()
537 );
538 unsafe {
539 let arr = self.downcast_get_unchecked(chunk_idx);
540 assert!(
541 arr_idx < arr.len(),
542 "index: {} out of bounds for len: {}",
543 idx,
544 self.len()
545 );
546 arr.get_unchecked(arr_idx)
547 }
548 }
549
550 #[inline]
556 pub unsafe fn get_unchecked(&self, idx: usize) -> Option<T::Physical<'_>> {
557 let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
558
559 unsafe {
560 self.downcast_get_unchecked(chunk_idx)
562 .get_unchecked(arr_idx)
563 }
564 }
565
566 #[inline]
572 pub unsafe fn value_unchecked(&self, idx: usize) -> T::Physical<'_> {
573 let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
574
575 unsafe {
576 self.downcast_get_unchecked(chunk_idx)
578 .value_unchecked(arr_idx)
579 }
580 }
581
582 #[inline]
585 pub fn first(&self) -> Option<T::Physical<'_>> {
586 self.iter().next().unwrap()
587 }
588
589 #[inline]
592 pub fn last(&self) -> Option<T::Physical<'_>> {
593 let arr = self
594 .downcast_iter()
595 .rev()
596 .find(|arr| !arr.is_empty())
597 .unwrap();
598 unsafe { arr.get_unchecked(arr.len() - 1) }
599 }
600
601 pub fn set_validity(&mut self, validity: Option<Bitmap>) {
602 assert!(
603 !self.dtype().is_struct(),
604 "set_outer_validity should be used for struct types"
605 );
606 if let Some(v) = &validity {
607 assert_eq!(self.len(), v.len());
608 }
609 let mut i = 0;
610 for chunk in unsafe { self.chunks_mut() } {
611 *chunk =
612 chunk.with_validity(validity.as_ref().map(|v| v.clone().sliced(i, chunk.len())));
613 i += chunk.len();
614 }
615 self.null_count = validity.map(|v| v.unset_bits()).unwrap_or(0);
616 self.set_fast_explode_list(false);
617 }
618
619 pub fn with_validity(mut self, validity: Option<Bitmap>) -> Self {
620 self.set_validity(validity);
621 self
622 }
623}
624
625impl<T> ChunkedArray<T>
626where
627 T: PolarsDataType,
628 ChunkedArray<T>: ChunkTakeUnchecked<[IdxSize]>,
629{
630 pub fn deposit(&self, validity: &Bitmap) -> Self {
632 let set_bits = validity.set_bits();
633
634 assert_eq!(self.len(), set_bits);
635
636 if set_bits == validity.len() {
637 return self.clone();
638 }
639
640 if set_bits == 0 {
641 return Self::full_null_like(self, validity.len());
642 }
643
644 let mut null_mask = validity.clone();
645
646 let mut gather_idxs = Vec::with_capacity(validity.len());
647 let leading_nulls = null_mask.take_leading_zeros();
648 gather_idxs.extend(std::iter::repeat_n(0, leading_nulls + 1));
649
650 let mut i = 0 as IdxSize;
651 gather_idxs.extend(null_mask.iter().skip(1).map(|v| {
652 i += IdxSize::from(v);
653 i
654 }));
655
656 let mut ca = unsafe { ChunkTakeUnchecked::take_unchecked(self, &gather_idxs) };
657 ca.set_validity(combine_validities_and(
658 Some(validity),
659 ca.rechunk_validity().as_ref(),
660 ));
661 ca
662 }
663}
664
665impl ListChunked {
666 #[inline]
667 pub fn get_as_series(&self, idx: usize) -> Option<Series> {
668 unsafe {
669 Some(Series::from_chunks_and_dtype_unchecked(
670 self.name().clone(),
671 vec![self.get(idx)?],
672 &self.inner_dtype().to_physical(),
673 ))
674 }
675 }
676
677 pub fn has_empty_lists(&self) -> bool {
678 for arr in self.downcast_iter() {
679 if arr.is_empty() {
680 continue;
681 }
682
683 if match arr.validity() {
684 None => arr.offsets().lengths().any(|l| l == 0),
685 Some(validity) => arr
686 .offsets()
687 .lengths()
688 .enumerate()
689 .any(|(i, l)| l == 0 && unsafe { validity.get_bit_unchecked(i) }),
690 } {
691 return true;
692 }
693 }
694
695 false
696 }
697
698 pub fn has_masked_out_values(&self) -> bool {
699 for arr in self.downcast_iter() {
700 if arr.is_empty() {
701 continue;
702 }
703
704 if *arr.offsets().first() != 0 || *arr.offsets().last() != arr.values().len() as i64 {
705 return true;
706 }
707
708 let Some(validity) = arr.validity() else {
709 continue;
710 };
711 if validity.set_bits() == 0 {
712 continue;
713 }
714
715 for i in (!validity).true_idx_iter() {
717 if arr.offsets().length_at(i) > 0 {
718 return true;
719 }
720 }
721 }
722
723 false
724 }
725}
726
727#[cfg(feature = "dtype-array")]
728impl ArrayChunked {
729 #[inline]
730 pub fn get_as_series(&self, idx: usize) -> Option<Series> {
731 unsafe {
732 Some(Series::from_chunks_and_dtype_unchecked(
733 self.name().clone(),
734 vec![self.get(idx)?],
735 &self.inner_dtype().to_physical(),
736 ))
737 }
738 }
739
740 pub fn from_aligned_values(
741 name: PlSmallStr,
742 inner_dtype: &DataType,
743 width: usize,
744 chunks: Vec<ArrayRef>,
745 length: usize,
746 ) -> Self {
747 let dtype = DataType::Array(Box::new(inner_dtype.clone()), width);
748 let arrow_dtype = dtype.to_arrow(CompatLevel::newest()).to_storage_recursive();
749 let field = Arc::new(Field::new(name, dtype));
750 if width == 0 {
751 use arrow::array::builder::{ArrayBuilder, make_builder};
752 let values = make_builder(&inner_dtype.to_arrow(CompatLevel::newest())).freeze();
753 return ArrayChunked::new_with_compute_len(
754 field,
755 vec![FixedSizeListArray::new(arrow_dtype, length, values, None).into_boxed()],
756 );
757 }
758 let mut total_len = 0;
759 let chunks = chunks
760 .iter()
761 .map(|chunk| {
762 debug_assert_eq!(chunk.len() % width, 0);
763 let chunk_len = chunk.len() / width;
764 total_len += chunk_len;
765 FixedSizeListArray::new(arrow_dtype.clone(), chunk_len, chunk.clone(), None)
766 .into_boxed()
767 })
768 .collect();
769 debug_assert_eq!(total_len, length);
770
771 unsafe { Self::new_with_dims(field, chunks, length, 0) }
772 }
773
774 pub fn to_list(&self) -> ListChunked {
778 let inner_dtype = self.inner_dtype();
779 let chunks = self
780 .downcast_iter()
781 .map(|chunk| {
782 use arrow::offset::OffsetsBuffer;
783
784 let inner_dtype = chunk.dtype().inner_dtype().unwrap();
785 let dtype = inner_dtype.clone().to_large_list(true);
786
787 let offsets = (0..=chunk.len())
788 .map(|i| (i * self.width()) as i64)
789 .collect::<Vec<i64>>();
790
791 let offsets = unsafe { OffsetsBuffer::new_unchecked(offsets.into()) };
793
794 ListArray::<i64>::new(
795 dtype,
796 offsets,
797 chunk.values().clone(),
798 chunk.validity().cloned(),
799 )
800 .into_boxed()
801 })
802 .collect();
803
804 let mut ca = unsafe {
806 ListChunked::new_with_dims(
807 Arc::new(Field::new(
808 self.name().clone(),
809 DataType::List(Box::new(inner_dtype.clone())),
810 )),
811 chunks,
812 self.len(),
813 self.null_count(),
814 )
815 };
816 ca.set_fast_explode_list(!self.has_nulls());
817 ca
818 }
819}
820
821impl<T> ChunkedArray<T>
822where
823 T: PolarsDataType,
824{
825 pub fn match_chunks<I>(&self, chunk_id: I) -> Self
829 where
830 I: Iterator<Item = usize>,
831 {
832 debug_assert!(self.chunks.len() == 1);
833 let slice = |ca: &Self| {
835 let array = &ca.chunks[0];
836
837 let mut offset = 0;
838 let chunks = chunk_id
839 .map(|len| {
840 debug_assert!((offset + len) <= array.len());
842 let out = unsafe { array.sliced_unchecked(offset, len) };
843 offset += len;
844 out
845 })
846 .collect();
847
848 debug_assert_eq!(offset, array.len());
849
850 unsafe {
852 Self::from_chunks_and_dtype(self.name().clone(), chunks, self.dtype().clone())
853 }
854 };
855
856 if self.chunks.len() != 1 {
857 let out = self.rechunk();
858 slice(&out)
859 } else {
860 slice(self)
861 }
862 }
863}
864
865impl<T: PolarsDataType> AsRefDataType for ChunkedArray<T> {
866 fn as_ref_dtype(&self) -> &DataType {
867 self.dtype()
868 }
869}
870
871pub(crate) trait AsSinglePtr: AsRefDataType {
872 fn as_single_ptr(&mut self) -> PolarsResult<usize> {
874 polars_bail!(opq = as_single_ptr, self.as_ref_dtype());
875 }
876}
877
878impl<T> AsSinglePtr for ChunkedArray<T>
879where
880 T: PolarsNumericType,
881{
882 fn as_single_ptr(&mut self) -> PolarsResult<usize> {
883 self.rechunk_mut();
884 let a = self.data_views().next().unwrap();
885 let ptr = a.as_ptr();
886 Ok(ptr as usize)
887 }
888}
889
890impl AsSinglePtr for BooleanChunked {}
891impl AsSinglePtr for ListChunked {}
892#[cfg(feature = "dtype-array")]
893impl AsSinglePtr for ArrayChunked {}
894impl AsSinglePtr for StringChunked {}
895impl AsSinglePtr for BinaryChunked {}
896#[cfg(feature = "object")]
897impl<T: PolarsObject> AsSinglePtr for ObjectChunked<T> {}
898
899pub enum ChunkedArrayLayout<'a, T: PolarsDataType> {
900 SingleNoNull(&'a T::Array),
901 Single(&'a T::Array),
902 MultiNoNull(&'a ChunkedArray<T>),
903 Multi(&'a ChunkedArray<T>),
904}
905
906impl<T> ChunkedArray<T>
907where
908 T: PolarsDataType,
909{
910 pub fn layout(&self) -> ChunkedArrayLayout<'_, T> {
911 if self.chunks.len() == 1 {
912 let arr = self.downcast_iter().next().unwrap();
913 return if arr.null_count() == 0 {
914 ChunkedArrayLayout::SingleNoNull(arr)
915 } else {
916 ChunkedArrayLayout::Single(arr)
917 };
918 }
919
920 if self.downcast_iter().all(|a| a.null_count() == 0) {
921 ChunkedArrayLayout::MultiNoNull(self)
922 } else {
923 ChunkedArrayLayout::Multi(self)
924 }
925 }
926}
927
928impl<T> ChunkedArray<T>
929where
930 T: PolarsNumericType,
931{
932 pub fn cont_slice(&self) -> PolarsResult<&[T::Native]> {
934 polars_ensure!(
935 self.chunks.len() == 1 && self.chunks[0].null_count() == 0,
936 ComputeError: "chunked array is not contiguous"
937 );
938 Ok(self.downcast_iter().next().map(|arr| arr.values()).unwrap())
939 }
940
941 pub(crate) fn cont_slice_mut(&mut self) -> Option<&mut [T::Native]> {
943 if self.chunks.len() == 1 && self.chunks[0].null_count() == 0 {
944 let arr = unsafe { self.downcast_iter_mut().next().unwrap() };
946 arr.get_mut_values()
947 } else {
948 None
949 }
950 }
951
952 pub fn data_views(&self) -> impl DoubleEndedIterator<Item = &[T::Native]> {
956 self.downcast_iter().map(|arr| arr.values().as_slice())
957 }
958
959 #[allow(clippy::wrong_self_convention)]
960 pub fn into_no_null_iter(
961 &self,
962 ) -> impl '_ + Send + Sync + ExactSizeIterator<Item = T::Native> + DoubleEndedIterator + TrustedLen
963 {
964 #[allow(clippy::map_clone)]
966 unsafe {
968 self.data_views()
969 .flatten()
970 .map(|v| *v)
971 .trust_my_length(self.len())
972 }
973 }
974}
975
976impl<T: PolarsDataType> Clone for ChunkedArray<T> {
977 fn clone(&self) -> Self {
978 ChunkedArray {
979 field: self.field.clone(),
980 chunks: self.chunks.clone(),
981 flags: self.flags.clone(),
982
983 _pd: Default::default(),
984 length: self.length,
985 null_count: self.null_count,
986 }
987 }
988}
989
990impl<T: PolarsDataType> AsRef<ChunkedArray<T>> for ChunkedArray<T> {
991 fn as_ref(&self) -> &ChunkedArray<T> {
992 self
993 }
994}
995
996impl ValueSize for ListChunked {
997 fn get_values_size(&self) -> usize {
998 self.chunks
999 .iter()
1000 .fold(0usize, |acc, arr| acc + arr.get_values_size())
1001 }
1002}
1003
1004#[cfg(feature = "dtype-array")]
1005impl ValueSize for ArrayChunked {
1006 fn get_values_size(&self) -> usize {
1007 self.chunks
1008 .iter()
1009 .fold(0usize, |acc, arr| acc + arr.get_values_size())
1010 }
1011}
1012impl ValueSize for StringChunked {
1013 fn get_values_size(&self) -> usize {
1014 self.chunks
1015 .iter()
1016 .fold(0usize, |acc, arr| acc + arr.get_values_size())
1017 }
1018}
1019
1020impl ValueSize for BinaryOffsetChunked {
1021 fn get_values_size(&self) -> usize {
1022 self.chunks
1023 .iter()
1024 .fold(0usize, |acc, arr| acc + arr.get_values_size())
1025 }
1026}
1027
1028pub(crate) fn to_primitive<T: PolarsNumericType>(
1029 values: Vec<T::Native>,
1030 validity: Option<Bitmap>,
1031) -> PrimitiveArray<T::Native> {
1032 PrimitiveArray::new(
1033 T::get_static_dtype().to_arrow(CompatLevel::newest()),
1034 values.into(),
1035 validity,
1036 )
1037}
1038
1039pub(crate) fn to_array<T: PolarsNumericType>(
1040 values: Vec<T::Native>,
1041 validity: Option<Bitmap>,
1042) -> ArrayRef {
1043 Box::new(to_primitive::<T>(values, validity))
1044}
1045
1046impl<T: PolarsDataType> Default for ChunkedArray<T> {
1047 fn default() -> Self {
1048 let dtype = T::get_static_dtype();
1049 let arrow_dtype = dtype.to_physical().to_arrow(CompatLevel::newest());
1050 ChunkedArray {
1051 field: Arc::new(Field::new(PlSmallStr::EMPTY, dtype)),
1052 chunks: vec![new_empty_array(arrow_dtype)],
1054 flags: StatisticsFlagsIM::empty(),
1055
1056 _pd: Default::default(),
1057 length: 0,
1058 null_count: 0,
1059 }
1060 }
1061}
1062
1063#[cfg(test)]
1064pub(crate) mod test {
1065 use crate::prelude::*;
1066
1067 pub(crate) fn get_chunked_array() -> Int32Chunked {
1068 ChunkedArray::new(PlSmallStr::from_static("a"), &[1, 2, 3])
1069 }
1070
1071 #[test]
1072 fn test_sort() {
1073 let a = Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 9, 3, 2]);
1074 let b = a
1075 .sort(false)
1076 .iter()
1077 .map(|opt| opt.unwrap())
1078 .collect::<Vec<_>>();
1079 assert_eq!(b, [1, 2, 3, 9]);
1080 let a = StringChunked::new(PlSmallStr::from_static("a"), &["b", "a", "c"]);
1081 let a = a.sort(false);
1082 let b = a.iter().collect::<Vec<_>>();
1083 assert_eq!(b, [Some("a"), Some("b"), Some("c")]);
1084 assert!(a.is_sorted_ascending_flag());
1085 }
1086
1087 #[test]
1088 fn arithmetic() {
1089 let a = &Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 100, 6, 40]);
1090 let b = &Int32Chunked::new(PlSmallStr::from_static("b"), &[-1, 2, 3, 4]);
1091
1092 println!("{:?}", a + b);
1095 println!("{:?}", a - b);
1096 println!("{:?}", a * b);
1097 println!("{:?}", a / b);
1098 }
1099
1100 #[test]
1101 fn iter() {
1102 let s1 = get_chunked_array();
1103 assert_eq!(s1.iter().fold(0, |acc, val| { acc + val.unwrap() }), 6)
1105 }
1106
1107 #[test]
1108 fn limit() {
1109 let a = get_chunked_array();
1110 let b = a.limit(2);
1111 println!("{b:?}");
1112 assert_eq!(b.len(), 2)
1113 }
1114
1115 #[test]
1116 fn filter() {
1117 let a = get_chunked_array();
1118 let b = a
1119 .filter(&BooleanChunked::new(
1120 PlSmallStr::from_static("filter"),
1121 &[true, false, false],
1122 ))
1123 .unwrap();
1124 assert_eq!(b.len(), 1);
1125 assert_eq!(b.iter().next(), Some(Some(1)));
1126 }
1127
1128 #[test]
1129 fn aggregates() {
1130 let a = &Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 100, 10, 9]);
1131 assert_eq!(a.max(), Some(100));
1132 assert_eq!(a.min(), Some(1));
1133 assert_eq!(a.sum(), Some(120))
1134 }
1135
1136 #[test]
1137 fn take() {
1138 let a = get_chunked_array();
1139 let new = a.take(&[0 as IdxSize, 1]).unwrap();
1140 assert_eq!(new.len(), 2)
1141 }
1142
1143 #[test]
1144 fn cast() {
1145 let a = get_chunked_array();
1146 let b = a.cast(&DataType::Int64).unwrap();
1147 assert_eq!(b.dtype(), &DataType::Int64)
1148 }
1149
1150 fn assert_slice_equal<T>(ca: &ChunkedArray<T>, eq: &[T::Native])
1151 where
1152 T: PolarsNumericType,
1153 {
1154 assert_eq!(ca.iter().map(|opt| opt.unwrap()).collect::<Vec<_>>(), eq)
1155 }
1156
1157 #[test]
1158 fn slice() {
1159 let mut first = UInt32Chunked::new(PlSmallStr::from_static("first"), &[0, 1, 2]);
1160 let second = UInt32Chunked::new(PlSmallStr::from_static("second"), &[3, 4, 5]);
1161 first.append(&second).unwrap();
1162 assert_slice_equal(&first.slice(0, 3), &[0, 1, 2]);
1163 assert_slice_equal(&first.slice(0, 4), &[0, 1, 2, 3]);
1164 assert_slice_equal(&first.slice(1, 4), &[1, 2, 3, 4]);
1165 assert_slice_equal(&first.slice(3, 2), &[3, 4]);
1166 assert_slice_equal(&first.slice(3, 3), &[3, 4, 5]);
1167 assert_slice_equal(&first.slice(-3, 3), &[3, 4, 5]);
1168 assert_slice_equal(&first.slice(-6, 6), &[0, 1, 2, 3, 4, 5]);
1169
1170 assert_eq!(first.slice(-7, 2).len(), 1);
1171 assert_eq!(first.slice(-3, 4).len(), 3);
1172 assert_eq!(first.slice(3, 4).len(), 3);
1173 assert_eq!(first.slice(10, 4).len(), 0);
1174 }
1175
1176 #[test]
1177 fn sorting() {
1178 let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[9, 2, 4]);
1179 let sorted = s.sort(false);
1180 assert_slice_equal(&sorted, &[2, 4, 9]);
1181 let sorted = s.sort(true);
1182 assert_slice_equal(&sorted, &[9, 4, 2]);
1183
1184 let s: StringChunked = ["b", "a", "z"].iter().collect();
1185 let sorted = s.sort(false);
1186 assert_eq!(
1187 sorted.iter().collect::<Vec<_>>(),
1188 &[Some("a"), Some("b"), Some("z")]
1189 );
1190 let sorted = s.sort(true);
1191 assert_eq!(
1192 sorted.iter().collect::<Vec<_>>(),
1193 &[Some("z"), Some("b"), Some("a")]
1194 );
1195 let s: StringChunked = [Some("b"), None, Some("z")].iter().copied().collect();
1196 let sorted = s.sort(false);
1197 assert_eq!(
1198 sorted.iter().collect::<Vec<_>>(),
1199 &[None, Some("b"), Some("z")]
1200 );
1201 }
1202
1203 #[test]
1204 fn reverse() {
1205 let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3]);
1206 assert_slice_equal(&s.reverse(), &[3, 2, 1]);
1208 let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[Some(1), None, Some(3)]);
1210 assert_eq!(Vec::from(&s.reverse()), &[Some(3), None, Some(1)]);
1211 let s = BooleanChunked::new(PlSmallStr::EMPTY, &[true, false]);
1212 assert_eq!(Vec::from(&s.reverse()), &[Some(false), Some(true)]);
1213
1214 let s = StringChunked::new(PlSmallStr::EMPTY, &["a", "b", "c"]);
1215 assert_eq!(Vec::from(&s.reverse()), &[Some("c"), Some("b"), Some("a")]);
1216
1217 let s = StringChunked::new(PlSmallStr::EMPTY, &[Some("a"), None, Some("c")]);
1218 assert_eq!(Vec::from(&s.reverse()), &[Some("c"), None, Some("a")]);
1219 }
1220
1221 #[test]
1222 #[cfg(feature = "dtype-categorical")]
1223 fn test_iter_categorical() {
1224 let ca = StringChunked::new(
1225 PlSmallStr::EMPTY,
1226 &[Some("foo"), None, Some("bar"), Some("ham")],
1227 );
1228 let cats = Categories::new(
1229 PlSmallStr::EMPTY,
1230 PlSmallStr::EMPTY,
1231 CategoricalPhysical::U32,
1232 );
1233 let ca = ca.cast(&DataType::from_categories(cats)).unwrap();
1234 let ca = ca.cat32().unwrap();
1235 let v: Vec<_> = ca.physical().iter().collect();
1236 assert_eq!(v, &[Some(0), None, Some(1), Some(2)]);
1237 }
1238
1239 #[test]
1240 #[ignore]
1241 fn test_shrink_to_fit() {
1242 let mut builder = StringChunkedBuilder::new(PlSmallStr::from_static("foo"), 2048);
1243 builder.append_value("foo");
1244 let mut arr = builder.finish();
1245 let before = arr
1246 .chunks()
1247 .iter()
1248 .map(|arr| arrow::compute::aggregate::estimated_bytes_size(arr.as_ref()))
1249 .sum::<usize>();
1250 arr.shrink_to_fit();
1251 let after = arr
1252 .chunks()
1253 .iter()
1254 .map(|arr| arrow::compute::aggregate::estimated_bytes_size(arr.as_ref()))
1255 .sum::<usize>();
1256 assert!(before > after);
1257 }
1258}