1#![allow(unsafe_op_in_unsafe_fn)]
3use std::sync::Arc;
4
5use arrow::array::*;
6use arrow::bitmap::Bitmap;
7use arrow::compute::concatenate::concatenate_unchecked;
8use arrow::compute::utils::combine_validities_and;
9use polars_compute::filter::filter_with_bitmap;
10
11use crate::prelude::{ChunkTakeUnchecked, *};
12
13pub mod ops;
14#[macro_use]
15pub mod arithmetic;
16pub mod builder;
17pub mod cast;
18pub mod collect;
19pub mod comparison;
20pub mod flags;
21pub mod float;
22pub mod iterator;
23#[cfg(feature = "ndarray")]
24pub(crate) mod ndarray;
25
26pub mod arg_min_max;
27#[cfg(feature = "dtype-array")]
28pub(crate) mod array;
29mod binary;
30mod binary_offset;
31mod bitwise;
32#[cfg(feature = "object")]
33mod drop;
34mod from;
35mod from_iterator;
36pub mod from_iterator_par;
37pub(crate) mod list;
38pub(crate) mod logical;
39#[cfg(feature = "object")]
40pub mod object;
41#[cfg(feature = "random")]
42mod random;
43#[cfg(feature = "dtype-struct")]
44mod struct_;
45#[cfg(any(
46 feature = "temporal",
47 feature = "dtype-datetime",
48 feature = "dtype-date"
49))]
50pub mod temporal;
51mod to_vec;
52mod trusted_len;
53pub(crate) use arg_min_max::*;
54use arrow::legacy::prelude::*;
55#[cfg(feature = "dtype-struct")]
56pub use struct_::StructChunked;
57
58use self::flags::{StatisticsFlags, StatisticsFlagsIM};
59use crate::series::IsSorted;
60use crate::utils::{first_non_null, first_null, last_non_null};
61
62pub type ChunkLenIter<'a> = std::iter::Map<std::slice::Iter<'a, ArrayRef>, fn(&ArrayRef) -> usize>;
63
64pub struct ChunkedArray<T: PolarsDataType> {
139 pub(crate) field: Arc<Field>,
140 pub(crate) chunks: Vec<ArrayRef>,
141
142 pub(crate) flags: StatisticsFlagsIM,
143
144 length: usize,
145 null_count: usize,
146 _pd: std::marker::PhantomData<T>,
147}
148
149impl<T: PolarsDataType> ChunkedArray<T> {
150 fn should_rechunk(&self) -> bool {
151 self.chunks.len() > 1 && self.chunks.len() > self.len() / 3
152 }
153
154 fn optional_rechunk(mut self) -> Self {
155 if self.should_rechunk() {
157 self.rechunk_mut()
158 }
159 self
160 }
161
162 pub(crate) fn as_any(&self) -> &dyn std::any::Any {
163 self
164 }
165
166 pub fn unpack_series_matching_type<'a>(
168 &self,
169 series: &'a Series,
170 ) -> PolarsResult<&'a ChunkedArray<T>> {
171 polars_ensure!(
172 self.dtype() == series.dtype(),
173 SchemaMismatch: "cannot unpack series of type `{}` into `{}`",
174 series.dtype(),
175 self.dtype(),
176 );
177
178 Ok(unsafe { self.unpack_series_matching_physical_type(series) })
180 }
181
182 fn new_with_compute_len(field: Arc<Field>, chunks: Vec<ArrayRef>) -> Self {
187 unsafe {
188 let mut chunked_arr = Self::new_with_dims(field, chunks, 0, 0);
189 chunked_arr.compute_len();
190 chunked_arr
191 }
192 }
193
194 pub unsafe fn new_with_dims(
198 field: Arc<Field>,
199 chunks: Vec<ArrayRef>,
200 length: usize,
201 null_count: usize,
202 ) -> Self {
203 Self {
204 field,
205 chunks,
206 flags: StatisticsFlagsIM::empty(),
207
208 _pd: Default::default(),
209 length,
210 null_count,
211 }
212 }
213
214 pub(crate) fn is_sorted_ascending_flag(&self) -> bool {
215 self.get_flags().is_sorted_ascending()
216 }
217
218 pub(crate) fn is_sorted_descending_flag(&self) -> bool {
219 self.get_flags().is_sorted_descending()
220 }
221
222 pub(crate) fn is_sorted_any(&self) -> bool {
224 self.get_flags().is_sorted_any()
225 }
226
227 pub fn unset_fast_explode_list(&mut self) {
228 self.set_fast_explode_list(false)
229 }
230
231 pub fn set_fast_explode_list(&mut self, value: bool) {
232 let mut flags = self.flags.get_mut();
233 flags.set(StatisticsFlags::CAN_FAST_EXPLODE_LIST, value);
234 self.flags.set_mut(flags);
235 }
236
237 pub fn get_fast_explode_list(&self) -> bool {
238 self.get_flags().can_fast_explode_list()
239 }
240
241 pub fn get_flags(&self) -> StatisticsFlags {
242 self.flags.get()
243 }
244
245 pub fn set_flags(&mut self, flags: StatisticsFlags) {
247 self.flags = StatisticsFlagsIM::new(flags);
248 }
249
250 pub fn is_sorted_flag(&self) -> IsSorted {
251 self.get_flags().is_sorted()
252 }
253
254 pub fn retain_flags_from<U: PolarsDataType>(
255 &mut self,
256 from: &ChunkedArray<U>,
257 retain_flags: StatisticsFlags,
258 ) {
259 let flags = from.flags.get();
260 if !flags.is_empty() {
262 self.set_flags(flags & retain_flags)
263 }
264 }
265
266 pub fn set_sorted_flag(&mut self, sorted: IsSorted) {
268 let mut flags = self.flags.get_mut();
269 flags.set_sorted(sorted);
270 self.flags.set_mut(flags);
271 }
272
273 pub fn with_sorted_flag(&self, sorted: IsSorted) -> Self {
275 let mut out = self.clone();
276 out.set_sorted_flag(sorted);
277 out
278 }
279
280 pub fn first_null(&self) -> Option<usize> {
281 if self.null_count() == 0 {
282 None
283 }
284 else if self.null_count() == self.len() {
286 Some(0)
287 } else if self.is_sorted_any() {
288 let out = if unsafe { self.downcast_get_unchecked(0).is_null_unchecked(0) } {
289 0
291 } else {
292 self.null_count()
294 };
295
296 debug_assert!(
297 unsafe { self.get_unchecked(out) }.is_some(),
299 "incorrect sorted flag"
300 );
301
302 Some(out)
303 } else {
304 first_null(self.chunks().iter().map(|arr| arr.as_ref()))
305 }
306 }
307
308 pub fn first_non_null(&self) -> Option<usize> {
310 if self.null_count() == self.len() {
311 None
312 }
313 else if self.null_count() == 0 {
315 Some(0)
316 } else if self.is_sorted_any() {
317 let out = if unsafe { self.downcast_get_unchecked(0).is_null_unchecked(0) } {
318 self.null_count()
320 } else {
321 0
323 };
324
325 debug_assert!(
326 unsafe { self.get_unchecked(out) }.is_some(),
328 "incorrect sorted flag"
329 );
330
331 Some(out)
332 } else {
333 first_non_null(self.chunks().iter().map(|arr| arr.as_ref()))
334 }
335 }
336
337 pub fn last_non_null(&self) -> Option<usize> {
339 if self.null_count() == self.len() {
340 None
341 }
342 else if self.null_count() == 0 {
344 Some(self.len() - 1)
345 } else if self.is_sorted_any() {
346 let out = if unsafe { self.downcast_get_unchecked(0).is_null_unchecked(0) } {
347 self.len() - 1
349 } else {
350 self.len() - self.null_count() - 1
352 };
353
354 debug_assert!(
355 unsafe { self.get_unchecked(out) }.is_some(),
357 "incorrect sorted flag"
358 );
359
360 Some(out)
361 } else {
362 last_non_null(self.chunks().iter().map(|arr| arr.as_ref()), self.len())
363 }
364 }
365
366 pub fn drop_nulls(&self) -> Self {
367 if self.null_count() == 0 {
368 self.clone()
369 } else {
370 let chunks = self
371 .downcast_iter()
372 .map(|arr| {
373 if arr.null_count() == 0 {
374 arr.to_boxed()
375 } else {
376 filter_with_bitmap(arr, arr.validity().unwrap())
377 }
378 })
379 .collect();
380 unsafe {
381 Self::new_with_dims(
382 self.field.clone(),
383 chunks,
384 self.len() - self.null_count(),
385 0,
386 )
387 }
388 }
389 }
390
391 #[inline]
393 #[allow(clippy::type_complexity)]
394 pub fn iter_validities(
395 &self,
396 ) -> impl ExactSizeIterator<Item = Option<&Bitmap>> + DoubleEndedIterator {
397 fn to_validity(arr: &ArrayRef) -> Option<&Bitmap> {
398 arr.validity()
399 }
400 self.chunks.iter().map(to_validity)
401 }
402
403 #[inline]
404 pub fn has_nulls(&self) -> bool {
406 self.null_count > 0
407 }
408
409 pub fn shrink_to_fit(&mut self) {
411 self.chunks = vec![concatenate_unchecked(self.chunks.as_slice()).unwrap()];
412 }
413
414 pub fn clear(&self) -> Self {
415 let mut ca = unsafe {
417 self.copy_with_chunks(vec![new_empty_array(
418 self.chunks.first().unwrap().dtype().clone(),
419 )])
420 };
421
422 use StatisticsFlags as F;
423 ca.retain_flags_from(self, F::IS_SORTED_ANY | F::CAN_FAST_EXPLODE_LIST);
424 ca
425 }
426
427 pub(crate) unsafe fn unpack_series_matching_physical_type<'a>(
434 &self,
435 series: &'a Series,
436 ) -> &'a ChunkedArray<T> {
437 let series_trait = &**series;
438 if self.dtype() == series.dtype() {
439 &*(series_trait as *const dyn SeriesTrait as *const ChunkedArray<T>)
440 } else {
441 use DataType::*;
442 match (self.dtype(), series.dtype()) {
443 (Int64, Datetime(_, _)) | (Int64, Duration(_)) | (Int32, Date) => {
444 &*(series_trait as *const dyn SeriesTrait as *const ChunkedArray<T>)
445 },
446 _ => panic!(
447 "cannot unpack series {:?} into matching type {:?}",
448 series,
449 self.dtype()
450 ),
451 }
452 }
453 }
454
455 pub fn chunk_lengths(&self) -> ChunkLenIter<'_> {
457 self.chunks.iter().map(|chunk| chunk.len())
458 }
459
460 #[inline]
462 pub fn chunks(&self) -> &Vec<ArrayRef> {
463 &self.chunks
464 }
465
466 #[inline]
472 pub unsafe fn chunks_mut(&mut self) -> &mut Vec<ArrayRef> {
473 &mut self.chunks
474 }
475
476 pub fn is_optimal_aligned(&self) -> bool {
478 self.chunks.len() == 1 && self.null_count() == 0
479 }
480
481 unsafe fn copy_with_chunks(&self, chunks: Vec<ArrayRef>) -> Self {
486 Self::new_with_compute_len(self.field.clone(), chunks)
487 }
488
489 pub fn dtype(&self) -> &DataType {
491 self.field.dtype()
492 }
493
494 pub(crate) unsafe fn set_dtype(&mut self, dtype: DataType) {
495 self.field = Arc::new(Field::new(self.name().clone(), dtype))
496 }
497
498 pub fn name(&self) -> &PlSmallStr {
500 self.field.name()
501 }
502
503 pub fn ref_field(&self) -> &Field {
505 &self.field
506 }
507
508 pub fn rename(&mut self, name: PlSmallStr) {
510 self.field = Arc::new(Field::new(name, self.field.dtype().clone()));
511 }
512
513 pub fn with_name(mut self, name: PlSmallStr) -> Self {
515 self.rename(name);
516 self
517 }
518}
519
520impl<T> ChunkedArray<T>
521where
522 T: PolarsDataType,
523{
524 #[inline]
530 pub fn get(&self, idx: usize) -> Option<T::Physical<'_>> {
531 let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
532 assert!(
533 chunk_idx < self.chunks().len(),
534 "index: {} out of bounds for len: {}",
535 idx,
536 self.len()
537 );
538 unsafe {
539 let arr = self.downcast_get_unchecked(chunk_idx);
540 assert!(
541 arr_idx < arr.len(),
542 "index: {} out of bounds for len: {}",
543 idx,
544 self.len()
545 );
546 arr.get_unchecked(arr_idx)
547 }
548 }
549
550 #[inline]
556 pub unsafe fn get_unchecked(&self, idx: usize) -> Option<T::Physical<'_>> {
557 let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
558
559 unsafe {
560 self.downcast_get_unchecked(chunk_idx)
562 .get_unchecked(arr_idx)
563 }
564 }
565
566 #[inline]
572 pub unsafe fn value_unchecked(&self, idx: usize) -> T::Physical<'_> {
573 let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
574
575 unsafe {
576 self.downcast_get_unchecked(chunk_idx)
578 .value_unchecked(arr_idx)
579 }
580 }
581
582 #[inline]
585 pub fn first(&self) -> Option<T::Physical<'_>> {
586 self.iter().next().unwrap()
587 }
588
589 #[inline]
592 pub fn last(&self) -> Option<T::Physical<'_>> {
593 let arr = self
594 .downcast_iter()
595 .rev()
596 .find(|arr| !arr.is_empty())
597 .unwrap();
598 unsafe { arr.get_unchecked(arr.len() - 1) }
599 }
600
601 pub fn set_validity(&mut self, validity: &Bitmap) {
602 assert_eq!(self.len(), validity.len());
603 let mut i = 0;
604 for chunk in unsafe { self.chunks_mut() } {
605 *chunk = chunk.with_validity(Some(validity.clone().sliced(i, chunk.len())));
606 i += chunk.len();
607 }
608 self.null_count = validity.unset_bits();
609 self.set_fast_explode_list(false);
610 }
611}
612
613impl<T> ChunkedArray<T>
614where
615 T: PolarsDataType,
616 ChunkedArray<T>: ChunkTakeUnchecked<[IdxSize]>,
617{
618 pub fn deposit(&self, validity: &Bitmap) -> Self {
620 let set_bits = validity.set_bits();
621
622 assert_eq!(self.len(), set_bits);
623
624 if set_bits == validity.len() {
625 return self.clone();
626 }
627
628 if set_bits == 0 {
629 return Self::full_null_like(self, validity.len());
630 }
631
632 let mut null_mask = validity.clone();
633
634 let mut gather_idxs = Vec::with_capacity(validity.len());
635 let leading_nulls = null_mask.take_leading_zeros();
636 gather_idxs.extend(std::iter::repeat_n(0, leading_nulls + 1));
637
638 let mut i = 0 as IdxSize;
639 gather_idxs.extend(null_mask.iter().skip(1).map(|v| {
640 i += IdxSize::from(v);
641 i
642 }));
643
644 let mut ca = unsafe { ChunkTakeUnchecked::take_unchecked(self, &gather_idxs) };
645
646 if let Some(combined) =
647 combine_validities_and(Some(validity), ca.rechunk_validity().as_ref())
648 {
649 ca.set_validity(&combined);
650 }
651
652 ca
653 }
654}
655
656impl ListChunked {
657 #[inline]
658 pub fn get_as_series(&self, idx: usize) -> Option<Series> {
659 unsafe {
660 Some(Series::from_chunks_and_dtype_unchecked(
661 self.name().clone(),
662 vec![self.get(idx)?],
663 &self.inner_dtype().to_physical(),
664 ))
665 }
666 }
667
668 pub fn has_empty_lists(&self) -> bool {
669 for arr in self.downcast_iter() {
670 if arr.is_empty() {
671 continue;
672 }
673
674 if match arr.validity() {
675 None => arr.offsets().lengths().any(|l| l == 0),
676 Some(validity) => arr
677 .offsets()
678 .lengths()
679 .enumerate()
680 .any(|(i, l)| l == 0 && unsafe { validity.get_bit_unchecked(i) }),
681 } {
682 return true;
683 }
684 }
685
686 false
687 }
688
689 pub fn has_masked_out_values(&self) -> bool {
690 for arr in self.downcast_iter() {
691 if arr.is_empty() {
692 continue;
693 }
694
695 if *arr.offsets().first() != 0 || *arr.offsets().last() != arr.values().len() as i64 {
696 return true;
697 }
698
699 let Some(validity) = arr.validity() else {
700 continue;
701 };
702 if validity.set_bits() == 0 {
703 continue;
704 }
705
706 for i in (!validity).true_idx_iter() {
708 if arr.offsets().length_at(i) > 0 {
709 return true;
710 }
711 }
712 }
713
714 false
715 }
716}
717
718#[cfg(feature = "dtype-array")]
719impl ArrayChunked {
720 #[inline]
721 pub fn get_as_series(&self, idx: usize) -> Option<Series> {
722 unsafe {
723 Some(Series::from_chunks_and_dtype_unchecked(
724 self.name().clone(),
725 vec![self.get(idx)?],
726 &self.inner_dtype().to_physical(),
727 ))
728 }
729 }
730
731 pub fn from_aligned_values(
732 name: PlSmallStr,
733 inner_dtype: &DataType,
734 width: usize,
735 chunks: Vec<ArrayRef>,
736 length: usize,
737 ) -> Self {
738 let dtype = DataType::Array(Box::new(inner_dtype.clone()), width);
739 let arrow_dtype = dtype.to_arrow(CompatLevel::newest());
740 let field = Arc::new(Field::new(name, dtype));
741 if width == 0 {
742 use arrow::array::builder::{ArrayBuilder, make_builder};
743 let values = make_builder(&inner_dtype.to_arrow(CompatLevel::newest())).freeze();
744 return ArrayChunked::new_with_compute_len(
745 field,
746 vec![FixedSizeListArray::new(arrow_dtype, length, values, None).into_boxed()],
747 );
748 }
749 let mut total_len = 0;
750 let chunks = chunks
751 .into_iter()
752 .map(|chunk| {
753 debug_assert_eq!(chunk.len() % width, 0);
754 let chunk_len = chunk.len() / width;
755 total_len += chunk_len;
756 FixedSizeListArray::new(arrow_dtype.clone(), chunk_len, chunk, None).into_boxed()
757 })
758 .collect();
759 debug_assert_eq!(total_len, length);
760
761 unsafe { Self::new_with_dims(field, chunks, length, 0) }
762 }
763
764 pub fn to_list(&self) -> ListChunked {
768 let inner_dtype = self.inner_dtype();
769 let chunks = self
770 .downcast_iter()
771 .map(|chunk| {
772 use arrow::offset::OffsetsBuffer;
773
774 let inner_dtype = chunk.dtype().inner_dtype().unwrap();
775 let dtype = inner_dtype.clone().to_large_list(true);
776
777 let offsets = (0..=chunk.len())
778 .map(|i| (i * self.width()) as i64)
779 .collect::<Vec<i64>>();
780
781 let offsets = unsafe { OffsetsBuffer::new_unchecked(offsets.into()) };
783
784 ListArray::<i64>::new(
785 dtype,
786 offsets,
787 chunk.values().clone(),
788 chunk.validity().cloned(),
789 )
790 .into_boxed()
791 })
792 .collect();
793
794 let mut ca = unsafe {
796 ListChunked::new_with_dims(
797 Arc::new(Field::new(
798 self.name().clone(),
799 DataType::List(Box::new(inner_dtype.clone())),
800 )),
801 chunks,
802 self.len(),
803 self.null_count(),
804 )
805 };
806 ca.set_fast_explode_list(!self.has_nulls());
807 ca
808 }
809}
810
811impl<T> ChunkedArray<T>
812where
813 T: PolarsDataType,
814{
815 pub fn match_chunks<I>(&self, chunk_id: I) -> Self
819 where
820 I: Iterator<Item = usize>,
821 {
822 debug_assert!(self.chunks.len() == 1);
823 let slice = |ca: &Self| {
825 let array = &ca.chunks[0];
826
827 let mut offset = 0;
828 let chunks = chunk_id
829 .map(|len| {
830 debug_assert!((offset + len) <= array.len());
832 let out = unsafe { array.sliced_unchecked(offset, len) };
833 offset += len;
834 out
835 })
836 .collect();
837
838 debug_assert_eq!(offset, array.len());
839
840 unsafe {
842 Self::from_chunks_and_dtype(self.name().clone(), chunks, self.dtype().clone())
843 }
844 };
845
846 if self.chunks.len() != 1 {
847 let out = self.rechunk();
848 slice(&out)
849 } else {
850 slice(self)
851 }
852 }
853}
854
855impl<T: PolarsDataType> AsRefDataType for ChunkedArray<T> {
856 fn as_ref_dtype(&self) -> &DataType {
857 self.dtype()
858 }
859}
860
861pub(crate) trait AsSinglePtr: AsRefDataType {
862 fn as_single_ptr(&mut self) -> PolarsResult<usize> {
864 polars_bail!(opq = as_single_ptr, self.as_ref_dtype());
865 }
866}
867
868impl<T> AsSinglePtr for ChunkedArray<T>
869where
870 T: PolarsNumericType,
871{
872 fn as_single_ptr(&mut self) -> PolarsResult<usize> {
873 self.rechunk_mut();
874 let a = self.data_views().next().unwrap();
875 let ptr = a.as_ptr();
876 Ok(ptr as usize)
877 }
878}
879
880impl AsSinglePtr for BooleanChunked {}
881impl AsSinglePtr for ListChunked {}
882#[cfg(feature = "dtype-array")]
883impl AsSinglePtr for ArrayChunked {}
884impl AsSinglePtr for StringChunked {}
885impl AsSinglePtr for BinaryChunked {}
886#[cfg(feature = "object")]
887impl<T: PolarsObject> AsSinglePtr for ObjectChunked<T> {}
888
889pub enum ChunkedArrayLayout<'a, T: PolarsDataType> {
890 SingleNoNull(&'a T::Array),
891 Single(&'a T::Array),
892 MultiNoNull(&'a ChunkedArray<T>),
893 Multi(&'a ChunkedArray<T>),
894}
895
896impl<T> ChunkedArray<T>
897where
898 T: PolarsDataType,
899{
900 pub fn layout(&self) -> ChunkedArrayLayout<'_, T> {
901 if self.chunks.len() == 1 {
902 let arr = self.downcast_iter().next().unwrap();
903 return if arr.null_count() == 0 {
904 ChunkedArrayLayout::SingleNoNull(arr)
905 } else {
906 ChunkedArrayLayout::Single(arr)
907 };
908 }
909
910 if self.downcast_iter().all(|a| a.null_count() == 0) {
911 ChunkedArrayLayout::MultiNoNull(self)
912 } else {
913 ChunkedArrayLayout::Multi(self)
914 }
915 }
916}
917
918impl<T> ChunkedArray<T>
919where
920 T: PolarsNumericType,
921{
922 pub fn cont_slice(&self) -> PolarsResult<&[T::Native]> {
924 polars_ensure!(
925 self.chunks.len() == 1 && self.chunks[0].null_count() == 0,
926 ComputeError: "chunked array is not contiguous"
927 );
928 Ok(self.downcast_iter().next().map(|arr| arr.values()).unwrap())
929 }
930
931 pub(crate) fn cont_slice_mut(&mut self) -> Option<&mut [T::Native]> {
933 if self.chunks.len() == 1 && self.chunks[0].null_count() == 0 {
934 let arr = unsafe { self.downcast_iter_mut().next().unwrap() };
936 arr.get_mut_values()
937 } else {
938 None
939 }
940 }
941
942 pub fn data_views(&self) -> impl DoubleEndedIterator<Item = &[T::Native]> {
946 self.downcast_iter().map(|arr| arr.values().as_slice())
947 }
948
949 #[allow(clippy::wrong_self_convention)]
950 pub fn into_no_null_iter(
951 &self,
952 ) -> impl '_ + Send + Sync + ExactSizeIterator<Item = T::Native> + DoubleEndedIterator + TrustedLen
953 {
954 #[allow(clippy::map_clone)]
956 unsafe {
958 self.data_views()
959 .flatten()
960 .map(|v| *v)
961 .trust_my_length(self.len())
962 }
963 }
964}
965
966impl<T: PolarsDataType> Clone for ChunkedArray<T> {
967 fn clone(&self) -> Self {
968 ChunkedArray {
969 field: self.field.clone(),
970 chunks: self.chunks.clone(),
971 flags: self.flags.clone(),
972
973 _pd: Default::default(),
974 length: self.length,
975 null_count: self.null_count,
976 }
977 }
978}
979
980impl<T: PolarsDataType> AsRef<ChunkedArray<T>> for ChunkedArray<T> {
981 fn as_ref(&self) -> &ChunkedArray<T> {
982 self
983 }
984}
985
986impl ValueSize for ListChunked {
987 fn get_values_size(&self) -> usize {
988 self.chunks
989 .iter()
990 .fold(0usize, |acc, arr| acc + arr.get_values_size())
991 }
992}
993
994#[cfg(feature = "dtype-array")]
995impl ValueSize for ArrayChunked {
996 fn get_values_size(&self) -> usize {
997 self.chunks
998 .iter()
999 .fold(0usize, |acc, arr| acc + arr.get_values_size())
1000 }
1001}
1002impl ValueSize for StringChunked {
1003 fn get_values_size(&self) -> usize {
1004 self.chunks
1005 .iter()
1006 .fold(0usize, |acc, arr| acc + arr.get_values_size())
1007 }
1008}
1009
1010impl ValueSize for BinaryOffsetChunked {
1011 fn get_values_size(&self) -> usize {
1012 self.chunks
1013 .iter()
1014 .fold(0usize, |acc, arr| acc + arr.get_values_size())
1015 }
1016}
1017
1018pub(crate) fn to_primitive<T: PolarsNumericType>(
1019 values: Vec<T::Native>,
1020 validity: Option<Bitmap>,
1021) -> PrimitiveArray<T::Native> {
1022 PrimitiveArray::new(
1023 T::get_static_dtype().to_arrow(CompatLevel::newest()),
1024 values.into(),
1025 validity,
1026 )
1027}
1028
1029pub(crate) fn to_array<T: PolarsNumericType>(
1030 values: Vec<T::Native>,
1031 validity: Option<Bitmap>,
1032) -> ArrayRef {
1033 Box::new(to_primitive::<T>(values, validity))
1034}
1035
1036impl<T: PolarsDataType> Default for ChunkedArray<T> {
1037 fn default() -> Self {
1038 let dtype = T::get_static_dtype();
1039 let arrow_dtype = dtype.to_physical().to_arrow(CompatLevel::newest());
1040 ChunkedArray {
1041 field: Arc::new(Field::new(PlSmallStr::EMPTY, dtype)),
1042 chunks: vec![new_empty_array(arrow_dtype)],
1044 flags: StatisticsFlagsIM::empty(),
1045
1046 _pd: Default::default(),
1047 length: 0,
1048 null_count: 0,
1049 }
1050 }
1051}
1052
1053#[cfg(test)]
1054pub(crate) mod test {
1055 use crate::prelude::*;
1056
1057 pub(crate) fn get_chunked_array() -> Int32Chunked {
1058 ChunkedArray::new(PlSmallStr::from_static("a"), &[1, 2, 3])
1059 }
1060
1061 #[test]
1062 fn test_sort() {
1063 let a = Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 9, 3, 2]);
1064 let b = a
1065 .sort(false)
1066 .into_iter()
1067 .map(|opt| opt.unwrap())
1068 .collect::<Vec<_>>();
1069 assert_eq!(b, [1, 2, 3, 9]);
1070 let a = StringChunked::new(PlSmallStr::from_static("a"), &["b", "a", "c"]);
1071 let a = a.sort(false);
1072 let b = a.into_iter().collect::<Vec<_>>();
1073 assert_eq!(b, [Some("a"), Some("b"), Some("c")]);
1074 assert!(a.is_sorted_ascending_flag());
1075 }
1076
1077 #[test]
1078 fn arithmetic() {
1079 let a = &Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 100, 6, 40]);
1080 let b = &Int32Chunked::new(PlSmallStr::from_static("b"), &[-1, 2, 3, 4]);
1081
1082 println!("{:?}", a + b);
1085 println!("{:?}", a - b);
1086 println!("{:?}", a * b);
1087 println!("{:?}", a / b);
1088 }
1089
1090 #[test]
1091 fn iter() {
1092 let s1 = get_chunked_array();
1093 assert_eq!(s1.into_iter().fold(0, |acc, val| { acc + val.unwrap() }), 6)
1095 }
1096
1097 #[test]
1098 fn limit() {
1099 let a = get_chunked_array();
1100 let b = a.limit(2);
1101 println!("{b:?}");
1102 assert_eq!(b.len(), 2)
1103 }
1104
1105 #[test]
1106 fn filter() {
1107 let a = get_chunked_array();
1108 let b = a
1109 .filter(&BooleanChunked::new(
1110 PlSmallStr::from_static("filter"),
1111 &[true, false, false],
1112 ))
1113 .unwrap();
1114 assert_eq!(b.len(), 1);
1115 assert_eq!(b.into_iter().next(), Some(Some(1)));
1116 }
1117
1118 #[test]
1119 fn aggregates() {
1120 let a = &Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 100, 10, 9]);
1121 assert_eq!(a.max(), Some(100));
1122 assert_eq!(a.min(), Some(1));
1123 assert_eq!(a.sum(), Some(120))
1124 }
1125
1126 #[test]
1127 fn take() {
1128 let a = get_chunked_array();
1129 let new = a.take(&[0 as IdxSize, 1]).unwrap();
1130 assert_eq!(new.len(), 2)
1131 }
1132
1133 #[test]
1134 fn cast() {
1135 let a = get_chunked_array();
1136 let b = a.cast(&DataType::Int64).unwrap();
1137 assert_eq!(b.dtype(), &DataType::Int64)
1138 }
1139
1140 fn assert_slice_equal<T>(ca: &ChunkedArray<T>, eq: &[T::Native])
1141 where
1142 T: PolarsNumericType,
1143 {
1144 assert_eq!(ca.iter().map(|opt| opt.unwrap()).collect::<Vec<_>>(), eq)
1145 }
1146
1147 #[test]
1148 fn slice() {
1149 let mut first = UInt32Chunked::new(PlSmallStr::from_static("first"), &[0, 1, 2]);
1150 let second = UInt32Chunked::new(PlSmallStr::from_static("second"), &[3, 4, 5]);
1151 first.append(&second).unwrap();
1152 assert_slice_equal(&first.slice(0, 3), &[0, 1, 2]);
1153 assert_slice_equal(&first.slice(0, 4), &[0, 1, 2, 3]);
1154 assert_slice_equal(&first.slice(1, 4), &[1, 2, 3, 4]);
1155 assert_slice_equal(&first.slice(3, 2), &[3, 4]);
1156 assert_slice_equal(&first.slice(3, 3), &[3, 4, 5]);
1157 assert_slice_equal(&first.slice(-3, 3), &[3, 4, 5]);
1158 assert_slice_equal(&first.slice(-6, 6), &[0, 1, 2, 3, 4, 5]);
1159
1160 assert_eq!(first.slice(-7, 2).len(), 1);
1161 assert_eq!(first.slice(-3, 4).len(), 3);
1162 assert_eq!(first.slice(3, 4).len(), 3);
1163 assert_eq!(first.slice(10, 4).len(), 0);
1164 }
1165
1166 #[test]
1167 fn sorting() {
1168 let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[9, 2, 4]);
1169 let sorted = s.sort(false);
1170 assert_slice_equal(&sorted, &[2, 4, 9]);
1171 let sorted = s.sort(true);
1172 assert_slice_equal(&sorted, &[9, 4, 2]);
1173
1174 let s: StringChunked = ["b", "a", "z"].iter().collect();
1175 let sorted = s.sort(false);
1176 assert_eq!(
1177 sorted.into_iter().collect::<Vec<_>>(),
1178 &[Some("a"), Some("b"), Some("z")]
1179 );
1180 let sorted = s.sort(true);
1181 assert_eq!(
1182 sorted.into_iter().collect::<Vec<_>>(),
1183 &[Some("z"), Some("b"), Some("a")]
1184 );
1185 let s: StringChunked = [Some("b"), None, Some("z")].iter().copied().collect();
1186 let sorted = s.sort(false);
1187 assert_eq!(
1188 sorted.into_iter().collect::<Vec<_>>(),
1189 &[None, Some("b"), Some("z")]
1190 );
1191 }
1192
1193 #[test]
1194 fn reverse() {
1195 let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3]);
1196 assert_slice_equal(&s.reverse(), &[3, 2, 1]);
1198 let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[Some(1), None, Some(3)]);
1200 assert_eq!(Vec::from(&s.reverse()), &[Some(3), None, Some(1)]);
1201 let s = BooleanChunked::new(PlSmallStr::EMPTY, &[true, false]);
1202 assert_eq!(Vec::from(&s.reverse()), &[Some(false), Some(true)]);
1203
1204 let s = StringChunked::new(PlSmallStr::EMPTY, &["a", "b", "c"]);
1205 assert_eq!(Vec::from(&s.reverse()), &[Some("c"), Some("b"), Some("a")]);
1206
1207 let s = StringChunked::new(PlSmallStr::EMPTY, &[Some("a"), None, Some("c")]);
1208 assert_eq!(Vec::from(&s.reverse()), &[Some("c"), None, Some("a")]);
1209 }
1210
1211 #[test]
1212 #[cfg(feature = "dtype-categorical")]
1213 fn test_iter_categorical() {
1214 let ca = StringChunked::new(
1215 PlSmallStr::EMPTY,
1216 &[Some("foo"), None, Some("bar"), Some("ham")],
1217 );
1218 let cats = Categories::new(
1219 PlSmallStr::EMPTY,
1220 PlSmallStr::EMPTY,
1221 CategoricalPhysical::U32,
1222 );
1223 let ca = ca.cast(&DataType::from_categories(cats)).unwrap();
1224 let ca = ca.cat32().unwrap();
1225 let v: Vec<_> = ca.physical().into_iter().collect();
1226 assert_eq!(v, &[Some(0), None, Some(1), Some(2)]);
1227 }
1228
1229 #[test]
1230 #[ignore]
1231 fn test_shrink_to_fit() {
1232 let mut builder = StringChunkedBuilder::new(PlSmallStr::from_static("foo"), 2048);
1233 builder.append_value("foo");
1234 let mut arr = builder.finish();
1235 let before = arr
1236 .chunks()
1237 .iter()
1238 .map(|arr| arrow::compute::aggregate::estimated_bytes_size(arr.as_ref()))
1239 .sum::<usize>();
1240 arr.shrink_to_fit();
1241 let after = arr
1242 .chunks()
1243 .iter()
1244 .map(|arr| arrow::compute::aggregate::estimated_bytes_size(arr.as_ref()))
1245 .sum::<usize>();
1246 assert!(before > after);
1247 }
1248}