1#![allow(unsafe_op_in_unsafe_fn)]
3use std::sync::Arc;
4
5use arrow::array::*;
6use arrow::bitmap::Bitmap;
7use arrow::compute::concatenate::concatenate_unchecked;
8use polars_compute::filter::filter_with_bitmap;
9
10use crate::prelude::{ChunkTakeUnchecked, *};
11
12pub mod ops;
13#[macro_use]
14pub mod arithmetic;
15pub mod builder;
16pub mod cast;
17pub mod collect;
18pub mod comparison;
19pub mod flags;
20pub mod float;
21pub mod iterator;
22#[cfg(feature = "ndarray")]
23pub(crate) mod ndarray;
24
25#[cfg(feature = "dtype-array")]
26pub(crate) mod array;
27mod binary;
28mod binary_offset;
29mod bitwise;
30#[cfg(feature = "object")]
31mod drop;
32mod from;
33mod from_iterator;
34pub mod from_iterator_par;
35pub(crate) mod list;
36pub(crate) mod logical;
37#[cfg(feature = "object")]
38pub mod object;
39#[cfg(feature = "random")]
40mod random;
41#[cfg(feature = "dtype-struct")]
42mod struct_;
43#[cfg(any(
44 feature = "temporal",
45 feature = "dtype-datetime",
46 feature = "dtype-date"
47))]
48pub mod temporal;
49mod to_vec;
50mod trusted_len;
51
52use arrow::legacy::prelude::*;
53#[cfg(feature = "dtype-struct")]
54pub use struct_::StructChunked;
55
56use self::flags::{StatisticsFlags, StatisticsFlagsIM};
57use crate::series::IsSorted;
58use crate::utils::{first_non_null, first_null, last_non_null};
59
60#[cfg(not(feature = "dtype-categorical"))]
61pub struct RevMapping {}
62
63pub type ChunkLenIter<'a> = std::iter::Map<std::slice::Iter<'a, ArrayRef>, fn(&ArrayRef) -> usize>;
64
65pub struct ChunkedArray<T: PolarsDataType> {
140 pub(crate) field: Arc<Field>,
141 pub(crate) chunks: Vec<ArrayRef>,
142
143 pub(crate) flags: StatisticsFlagsIM,
144
145 length: usize,
146 null_count: usize,
147 _pd: std::marker::PhantomData<T>,
148}
149
150impl<T: PolarsDataType> ChunkedArray<T> {
151 fn should_rechunk(&self) -> bool {
152 self.chunks.len() > 1 && self.chunks.len() > self.len() / 3
153 }
154
155 fn optional_rechunk(mut self) -> Self {
156 if self.should_rechunk() {
158 self.rechunk_mut()
159 }
160 self
161 }
162
163 pub(crate) fn as_any(&self) -> &dyn std::any::Any {
164 self
165 }
166
167 pub fn unpack_series_matching_type<'a>(
169 &self,
170 series: &'a Series,
171 ) -> PolarsResult<&'a ChunkedArray<T>> {
172 polars_ensure!(
173 self.dtype() == series.dtype(),
174 SchemaMismatch: "cannot unpack series of type `{}` into `{}`",
175 series.dtype(),
176 self.dtype(),
177 );
178
179 Ok(unsafe { self.unpack_series_matching_physical_type(series) })
181 }
182
183 fn new_with_compute_len(field: Arc<Field>, chunks: Vec<ArrayRef>) -> Self {
188 unsafe {
189 let mut chunked_arr = Self::new_with_dims(field, chunks, 0, 0);
190 chunked_arr.compute_len();
191 chunked_arr
192 }
193 }
194
195 pub unsafe fn new_with_dims(
199 field: Arc<Field>,
200 chunks: Vec<ArrayRef>,
201 length: usize,
202 null_count: usize,
203 ) -> Self {
204 Self {
205 field,
206 chunks,
207 flags: StatisticsFlagsIM::empty(),
208
209 _pd: Default::default(),
210 length,
211 null_count,
212 }
213 }
214
215 pub(crate) fn is_sorted_ascending_flag(&self) -> bool {
216 self.get_flags().is_sorted_ascending()
217 }
218
219 pub(crate) fn is_sorted_descending_flag(&self) -> bool {
220 self.get_flags().is_sorted_descending()
221 }
222
223 pub(crate) fn is_sorted_any(&self) -> bool {
225 self.get_flags().is_sorted_any()
226 }
227
228 pub fn unset_fast_explode_list(&mut self) {
229 self.set_fast_explode_list(false)
230 }
231
232 pub fn set_fast_explode_list(&mut self, value: bool) {
233 let mut flags = self.flags.get_mut();
234 flags.set(StatisticsFlags::CAN_FAST_EXPLODE_LIST, value);
235 self.flags.set_mut(flags);
236 }
237
238 pub fn get_fast_explode_list(&self) -> bool {
239 self.get_flags().can_fast_explode_list()
240 }
241
242 pub fn get_flags(&self) -> StatisticsFlags {
243 self.flags.get()
244 }
245
246 pub fn set_flags(&mut self, flags: StatisticsFlags) {
248 self.flags = StatisticsFlagsIM::new(flags);
249 }
250
251 pub fn is_sorted_flag(&self) -> IsSorted {
252 self.get_flags().is_sorted()
253 }
254
255 pub fn retain_flags_from<U: PolarsDataType>(
256 &mut self,
257 from: &ChunkedArray<U>,
258 retain_flags: StatisticsFlags,
259 ) {
260 let flags = from.flags.get();
261 if !flags.is_empty() {
263 self.set_flags(flags & retain_flags)
264 }
265 }
266
267 pub fn set_sorted_flag(&mut self, sorted: IsSorted) {
269 let mut flags = self.flags.get_mut();
270 flags.set_sorted(sorted);
271 self.flags.set_mut(flags);
272 }
273
274 pub fn with_sorted_flag(&self, sorted: IsSorted) -> Self {
276 let mut out = self.clone();
277 out.set_sorted_flag(sorted);
278 out
279 }
280
281 pub fn first_null(&self) -> Option<usize> {
282 if self.null_count() == 0 {
283 None
284 }
285 else if self.null_count() == self.len() {
287 Some(0)
288 } else if self.is_sorted_any() {
289 let out = if unsafe { self.downcast_get_unchecked(0).is_null_unchecked(0) } {
290 0
292 } else {
293 self.null_count()
295 };
296
297 debug_assert!(
298 unsafe { self.get_unchecked(out) }.is_some(),
300 "incorrect sorted flag"
301 );
302
303 Some(out)
304 } else {
305 first_null(self.chunks().iter().map(|arr| arr.as_ref()))
306 }
307 }
308
309 pub fn first_non_null(&self) -> Option<usize> {
311 if self.null_count() == self.len() {
312 None
313 }
314 else if self.null_count() == 0 {
316 Some(0)
317 } else if self.is_sorted_any() {
318 let out = if unsafe { self.downcast_get_unchecked(0).is_null_unchecked(0) } {
319 self.null_count()
321 } else {
322 0
324 };
325
326 debug_assert!(
327 unsafe { self.get_unchecked(out) }.is_some(),
329 "incorrect sorted flag"
330 );
331
332 Some(out)
333 } else {
334 first_non_null(self.chunks().iter().map(|arr| arr.as_ref()))
335 }
336 }
337
338 pub fn last_non_null(&self) -> Option<usize> {
340 if self.null_count() == self.len() {
341 None
342 }
343 else if self.null_count() == 0 {
345 Some(self.len() - 1)
346 } else if self.is_sorted_any() {
347 let out = if unsafe { self.downcast_get_unchecked(0).is_null_unchecked(0) } {
348 self.len() - 1
350 } else {
351 self.len() - self.null_count() - 1
353 };
354
355 debug_assert!(
356 unsafe { self.get_unchecked(out) }.is_some(),
358 "incorrect sorted flag"
359 );
360
361 Some(out)
362 } else {
363 last_non_null(self.chunks().iter().map(|arr| arr.as_ref()), self.len())
364 }
365 }
366
367 pub fn drop_nulls(&self) -> Self {
368 if self.null_count() == 0 {
369 self.clone()
370 } else {
371 let chunks = self
372 .downcast_iter()
373 .map(|arr| {
374 if arr.null_count() == 0 {
375 arr.to_boxed()
376 } else {
377 filter_with_bitmap(arr, arr.validity().unwrap())
378 }
379 })
380 .collect();
381 unsafe {
382 Self::new_with_dims(
383 self.field.clone(),
384 chunks,
385 self.len() - self.null_count(),
386 0,
387 )
388 }
389 }
390 }
391
392 #[inline]
394 #[allow(clippy::type_complexity)]
395 pub fn iter_validities(
396 &self,
397 ) -> impl ExactSizeIterator<Item = Option<&Bitmap>> + DoubleEndedIterator {
398 fn to_validity(arr: &ArrayRef) -> Option<&Bitmap> {
399 arr.validity()
400 }
401 self.chunks.iter().map(to_validity)
402 }
403
404 #[inline]
405 pub fn has_nulls(&self) -> bool {
407 self.null_count > 0
408 }
409
410 pub fn shrink_to_fit(&mut self) {
412 self.chunks = vec![concatenate_unchecked(self.chunks.as_slice()).unwrap()];
413 }
414
415 pub fn clear(&self) -> Self {
416 let mut ca = unsafe {
418 self.copy_with_chunks(vec![new_empty_array(
419 self.chunks.first().unwrap().dtype().clone(),
420 )])
421 };
422
423 use StatisticsFlags as F;
424 ca.retain_flags_from(self, F::IS_SORTED_ANY | F::CAN_FAST_EXPLODE_LIST);
425 ca
426 }
427
428 pub(crate) unsafe fn unpack_series_matching_physical_type<'a>(
435 &self,
436 series: &'a Series,
437 ) -> &'a ChunkedArray<T> {
438 let series_trait = &**series;
439 if self.dtype() == series.dtype() {
440 &*(series_trait as *const dyn SeriesTrait as *const ChunkedArray<T>)
441 } else {
442 use DataType::*;
443 match (self.dtype(), series.dtype()) {
444 (Int64, Datetime(_, _)) | (Int64, Duration(_)) | (Int32, Date) => {
445 &*(series_trait as *const dyn SeriesTrait as *const ChunkedArray<T>)
446 },
447 _ => panic!(
448 "cannot unpack series {:?} into matching type {:?}",
449 series,
450 self.dtype()
451 ),
452 }
453 }
454 }
455
456 pub fn chunk_lengths(&self) -> ChunkLenIter<'_> {
458 self.chunks.iter().map(|chunk| chunk.len())
459 }
460
461 #[inline]
463 pub fn chunks(&self) -> &Vec<ArrayRef> {
464 &self.chunks
465 }
466
467 #[inline]
473 pub unsafe fn chunks_mut(&mut self) -> &mut Vec<ArrayRef> {
474 &mut self.chunks
475 }
476
477 pub fn is_optimal_aligned(&self) -> bool {
479 self.chunks.len() == 1 && self.null_count() == 0
480 }
481
482 unsafe fn copy_with_chunks(&self, chunks: Vec<ArrayRef>) -> Self {
487 Self::new_with_compute_len(self.field.clone(), chunks)
488 }
489
490 pub fn dtype(&self) -> &DataType {
492 self.field.dtype()
493 }
494
495 pub(crate) unsafe fn set_dtype(&mut self, dtype: DataType) {
496 self.field = Arc::new(Field::new(self.name().clone(), dtype))
497 }
498
499 pub fn name(&self) -> &PlSmallStr {
501 self.field.name()
502 }
503
504 pub fn ref_field(&self) -> &Field {
506 &self.field
507 }
508
509 pub fn rename(&mut self, name: PlSmallStr) {
511 self.field = Arc::new(Field::new(name, self.field.dtype().clone()));
512 }
513
514 pub fn with_name(mut self, name: PlSmallStr) -> Self {
516 self.rename(name);
517 self
518 }
519}
520
521impl<T> ChunkedArray<T>
522where
523 T: PolarsDataType,
524{
525 #[inline]
531 pub fn get(&self, idx: usize) -> Option<T::Physical<'_>> {
532 let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
533 assert!(
534 chunk_idx < self.chunks().len(),
535 "index: {} out of bounds for len: {}",
536 idx,
537 self.len()
538 );
539 unsafe {
540 let arr = self.downcast_get_unchecked(chunk_idx);
541 assert!(
542 arr_idx < arr.len(),
543 "index: {} out of bounds for len: {}",
544 idx,
545 self.len()
546 );
547 arr.get_unchecked(arr_idx)
548 }
549 }
550
551 #[inline]
557 pub unsafe fn get_unchecked(&self, idx: usize) -> Option<T::Physical<'_>> {
558 let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
559
560 unsafe {
561 self.downcast_get_unchecked(chunk_idx)
563 .get_unchecked(arr_idx)
564 }
565 }
566
567 #[inline]
573 pub unsafe fn value_unchecked(&self, idx: usize) -> T::Physical<'_> {
574 let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
575
576 unsafe {
577 self.downcast_get_unchecked(chunk_idx)
579 .value_unchecked(arr_idx)
580 }
581 }
582
583 #[inline]
584 pub fn first(&self) -> Option<T::Physical<'_>> {
585 unsafe {
586 let arr = self.downcast_get_unchecked(0);
587 arr.get_unchecked(0)
588 }
589 }
590
591 #[inline]
592 pub fn last(&self) -> Option<T::Physical<'_>> {
593 unsafe {
594 let arr = self.downcast_get_unchecked(self.chunks.len().checked_sub(1)?);
595 arr.get_unchecked(arr.len().checked_sub(1)?)
596 }
597 }
598
599 pub fn set_validity(&mut self, validity: &Bitmap) {
600 assert_eq!(self.len(), validity.len());
601 let mut i = 0;
602 for chunk in unsafe { self.chunks_mut() } {
603 *chunk = chunk.with_validity(Some(validity.clone().sliced(i, chunk.len())));
604 i += chunk.len();
605 }
606 self.null_count = validity.unset_bits();
607 self.set_fast_explode_list(false);
608 }
609}
610
611impl<T> ChunkedArray<T>
612where
613 T: PolarsDataType,
614 ChunkedArray<T>: ChunkTakeUnchecked<[IdxSize]>,
615{
616 pub fn deposit(&self, validity: &Bitmap) -> Self {
618 let set_bits = validity.set_bits();
619
620 assert_eq!(self.null_count(), 0);
621 assert_eq!(self.len(), set_bits);
622
623 if set_bits == validity.len() {
624 return self.clone();
625 }
626
627 if set_bits == 0 {
628 return Self::full_null_like(self, validity.len());
629 }
630
631 let mut null_mask = validity.clone();
632
633 let mut gather_idxs = Vec::with_capacity(validity.len());
634 let leading_nulls = null_mask.take_leading_zeros();
635 gather_idxs.extend(std::iter::repeat_n(0, leading_nulls + 1));
636
637 let mut i = 0 as IdxSize;
638 gather_idxs.extend(null_mask.iter().skip(1).map(|v| {
639 i += IdxSize::from(v);
640 i
641 }));
642
643 let mut ca = unsafe { ChunkTakeUnchecked::take_unchecked(self, &gather_idxs) };
644 ca.set_validity(validity);
645 ca
646 }
647}
648
649impl ListChunked {
650 #[inline]
651 pub fn get_as_series(&self, idx: usize) -> Option<Series> {
652 unsafe {
653 Some(Series::from_chunks_and_dtype_unchecked(
654 self.name().clone(),
655 vec![self.get(idx)?],
656 &self.inner_dtype().to_physical(),
657 ))
658 }
659 }
660
661 pub fn has_empty_lists(&self) -> bool {
662 for arr in self.downcast_iter() {
663 if arr.is_empty() {
664 continue;
665 }
666
667 if match arr.validity() {
668 None => arr.offsets().lengths().any(|l| l == 0),
669 Some(validity) => arr
670 .offsets()
671 .lengths()
672 .enumerate()
673 .any(|(i, l)| l == 0 && unsafe { validity.get_bit_unchecked(i) }),
674 } {
675 return true;
676 }
677 }
678
679 false
680 }
681
682 pub fn has_masked_out_values(&self) -> bool {
683 for arr in self.downcast_iter() {
684 if arr.is_empty() {
685 continue;
686 }
687
688 if *arr.offsets().first() != 0 || *arr.offsets().last() != arr.values().len() as i64 {
689 return true;
690 }
691
692 let Some(validity) = arr.validity() else {
693 continue;
694 };
695 if validity.set_bits() == 0 {
696 continue;
697 }
698
699 for i in (!validity).true_idx_iter() {
701 if arr.offsets().length_at(i) > 0 {
702 return true;
703 }
704 }
705 }
706
707 false
708 }
709}
710
711#[cfg(feature = "dtype-array")]
712impl ArrayChunked {
713 #[inline]
714 pub fn get_as_series(&self, idx: usize) -> Option<Series> {
715 unsafe {
716 Some(Series::from_chunks_and_dtype_unchecked(
717 self.name().clone(),
718 vec![self.get(idx)?],
719 &self.inner_dtype().to_physical(),
720 ))
721 }
722 }
723
724 pub fn from_aligned_values(
725 name: PlSmallStr,
726 inner_dtype: &DataType,
727 width: usize,
728 chunks: Vec<ArrayRef>,
729 length: usize,
730 ) -> Self {
731 let dtype = DataType::Array(Box::new(inner_dtype.clone()), width);
732 let arrow_dtype = dtype.to_arrow(CompatLevel::newest());
733 let field = Arc::new(Field::new(name, dtype));
734 if width == 0 {
735 use arrow::array::builder::{ArrayBuilder, make_builder};
736 let values = make_builder(&inner_dtype.to_arrow(CompatLevel::newest())).freeze();
737 return ArrayChunked::new_with_compute_len(
738 field,
739 vec![FixedSizeListArray::new(arrow_dtype, length, values, None).into_boxed()],
740 );
741 }
742 let mut total_len = 0;
743 let chunks = chunks
744 .into_iter()
745 .map(|chunk| {
746 debug_assert_eq!(chunk.len() % width, 0);
747 let chunk_len = chunk.len() / width;
748 total_len += chunk_len;
749 FixedSizeListArray::new(arrow_dtype.clone(), chunk_len, chunk, None).into_boxed()
750 })
751 .collect();
752 debug_assert_eq!(total_len, length);
753
754 unsafe { Self::new_with_dims(field, chunks, length, 0) }
755 }
756
757 pub fn to_list(&self) -> ListChunked {
761 let inner_dtype = self.inner_dtype();
762 let chunks = self
763 .downcast_iter()
764 .map(|chunk| {
765 use arrow::offset::OffsetsBuffer;
766
767 let inner_dtype = chunk.dtype().inner_dtype().unwrap();
768 let dtype = inner_dtype.clone().to_large_list(true);
769
770 let offsets = (0..=chunk.len())
771 .map(|i| (i * self.width()) as i64)
772 .collect::<Vec<i64>>();
773
774 let offsets = unsafe { OffsetsBuffer::new_unchecked(offsets.into()) };
776
777 ListArray::<i64>::new(
778 dtype,
779 offsets,
780 chunk.values().clone(),
781 chunk.validity().cloned(),
782 )
783 .into_boxed()
784 })
785 .collect();
786
787 let mut ca = unsafe {
789 ListChunked::new_with_dims(
790 Arc::new(Field::new(
791 self.name().clone(),
792 DataType::List(Box::new(inner_dtype.clone())),
793 )),
794 chunks,
795 self.len(),
796 self.null_count(),
797 )
798 };
799 ca.set_fast_explode_list(!self.has_nulls());
800 ca
801 }
802}
803
804impl<T> ChunkedArray<T>
805where
806 T: PolarsDataType,
807{
808 pub fn match_chunks<I>(&self, chunk_id: I) -> Self
812 where
813 I: Iterator<Item = usize>,
814 {
815 debug_assert!(self.chunks.len() == 1);
816 let slice = |ca: &Self| {
818 let array = &ca.chunks[0];
819
820 let mut offset = 0;
821 let chunks = chunk_id
822 .map(|len| {
823 debug_assert!((offset + len) <= array.len());
825 let out = unsafe { array.sliced_unchecked(offset, len) };
826 offset += len;
827 out
828 })
829 .collect();
830
831 debug_assert_eq!(offset, array.len());
832
833 unsafe {
835 Self::from_chunks_and_dtype(self.name().clone(), chunks, self.dtype().clone())
836 }
837 };
838
839 if self.chunks.len() != 1 {
840 let out = self.rechunk();
841 slice(&out)
842 } else {
843 slice(self)
844 }
845 }
846}
847
848impl<T: PolarsDataType> AsRefDataType for ChunkedArray<T> {
849 fn as_ref_dtype(&self) -> &DataType {
850 self.dtype()
851 }
852}
853
854pub(crate) trait AsSinglePtr: AsRefDataType {
855 fn as_single_ptr(&mut self) -> PolarsResult<usize> {
857 polars_bail!(opq = as_single_ptr, self.as_ref_dtype());
858 }
859}
860
861impl<T> AsSinglePtr for ChunkedArray<T>
862where
863 T: PolarsNumericType,
864{
865 fn as_single_ptr(&mut self) -> PolarsResult<usize> {
866 self.rechunk_mut();
867 let a = self.data_views().next().unwrap();
868 let ptr = a.as_ptr();
869 Ok(ptr as usize)
870 }
871}
872
873impl AsSinglePtr for BooleanChunked {}
874impl AsSinglePtr for ListChunked {}
875#[cfg(feature = "dtype-array")]
876impl AsSinglePtr for ArrayChunked {}
877impl AsSinglePtr for StringChunked {}
878impl AsSinglePtr for BinaryChunked {}
879#[cfg(feature = "object")]
880impl<T: PolarsObject> AsSinglePtr for ObjectChunked<T> {}
881
882pub enum ChunkedArrayLayout<'a, T: PolarsDataType> {
883 SingleNoNull(&'a T::Array),
884 Single(&'a T::Array),
885 MultiNoNull(&'a ChunkedArray<T>),
886 Multi(&'a ChunkedArray<T>),
887}
888
889impl<T> ChunkedArray<T>
890where
891 T: PolarsDataType,
892{
893 pub fn layout(&self) -> ChunkedArrayLayout<'_, T> {
894 if self.chunks.len() == 1 {
895 let arr = self.downcast_iter().next().unwrap();
896 return if arr.null_count() == 0 {
897 ChunkedArrayLayout::SingleNoNull(arr)
898 } else {
899 ChunkedArrayLayout::Single(arr)
900 };
901 }
902
903 if self.downcast_iter().all(|a| a.null_count() == 0) {
904 ChunkedArrayLayout::MultiNoNull(self)
905 } else {
906 ChunkedArrayLayout::Multi(self)
907 }
908 }
909}
910
911impl<T> ChunkedArray<T>
912where
913 T: PolarsNumericType,
914{
915 pub fn cont_slice(&self) -> PolarsResult<&[T::Native]> {
917 polars_ensure!(
918 self.chunks.len() == 1 && self.chunks[0].null_count() == 0,
919 ComputeError: "chunked array is not contiguous"
920 );
921 Ok(self.downcast_iter().next().map(|arr| arr.values()).unwrap())
922 }
923
924 pub(crate) fn cont_slice_mut(&mut self) -> Option<&mut [T::Native]> {
926 if self.chunks.len() == 1 && self.chunks[0].null_count() == 0 {
927 let arr = unsafe { self.downcast_iter_mut().next().unwrap() };
929 arr.get_mut_values()
930 } else {
931 None
932 }
933 }
934
935 pub fn data_views(&self) -> impl DoubleEndedIterator<Item = &[T::Native]> {
939 self.downcast_iter().map(|arr| arr.values().as_slice())
940 }
941
942 #[allow(clippy::wrong_self_convention)]
943 pub fn into_no_null_iter(
944 &self,
945 ) -> impl '_ + Send + Sync + ExactSizeIterator<Item = T::Native> + DoubleEndedIterator + TrustedLen
946 {
947 #[allow(clippy::map_clone)]
949 unsafe {
951 self.data_views()
952 .flatten()
953 .map(|v| *v)
954 .trust_my_length(self.len())
955 }
956 }
957}
958
959impl<T: PolarsDataType> Clone for ChunkedArray<T> {
960 fn clone(&self) -> Self {
961 ChunkedArray {
962 field: self.field.clone(),
963 chunks: self.chunks.clone(),
964 flags: self.flags.clone(),
965
966 _pd: Default::default(),
967 length: self.length,
968 null_count: self.null_count,
969 }
970 }
971}
972
973impl<T: PolarsDataType> AsRef<ChunkedArray<T>> for ChunkedArray<T> {
974 fn as_ref(&self) -> &ChunkedArray<T> {
975 self
976 }
977}
978
979impl ValueSize for ListChunked {
980 fn get_values_size(&self) -> usize {
981 self.chunks
982 .iter()
983 .fold(0usize, |acc, arr| acc + arr.get_values_size())
984 }
985}
986
987#[cfg(feature = "dtype-array")]
988impl ValueSize for ArrayChunked {
989 fn get_values_size(&self) -> usize {
990 self.chunks
991 .iter()
992 .fold(0usize, |acc, arr| acc + arr.get_values_size())
993 }
994}
995impl ValueSize for StringChunked {
996 fn get_values_size(&self) -> usize {
997 self.chunks
998 .iter()
999 .fold(0usize, |acc, arr| acc + arr.get_values_size())
1000 }
1001}
1002
1003impl ValueSize for BinaryOffsetChunked {
1004 fn get_values_size(&self) -> usize {
1005 self.chunks
1006 .iter()
1007 .fold(0usize, |acc, arr| acc + arr.get_values_size())
1008 }
1009}
1010
1011pub(crate) fn to_primitive<T: PolarsNumericType>(
1012 values: Vec<T::Native>,
1013 validity: Option<Bitmap>,
1014) -> PrimitiveArray<T::Native> {
1015 PrimitiveArray::new(
1016 T::get_static_dtype().to_arrow(CompatLevel::newest()),
1017 values.into(),
1018 validity,
1019 )
1020}
1021
1022pub(crate) fn to_array<T: PolarsNumericType>(
1023 values: Vec<T::Native>,
1024 validity: Option<Bitmap>,
1025) -> ArrayRef {
1026 Box::new(to_primitive::<T>(values, validity))
1027}
1028
1029impl<T: PolarsDataType> Default for ChunkedArray<T> {
1030 fn default() -> Self {
1031 let dtype = T::get_static_dtype();
1032 let arrow_dtype = dtype.to_physical().to_arrow(CompatLevel::newest());
1033 ChunkedArray {
1034 field: Arc::new(Field::new(PlSmallStr::EMPTY, dtype)),
1035 chunks: vec![new_empty_array(arrow_dtype)],
1037 flags: StatisticsFlagsIM::empty(),
1038
1039 _pd: Default::default(),
1040 length: 0,
1041 null_count: 0,
1042 }
1043 }
1044}
1045
1046#[cfg(test)]
1047pub(crate) mod test {
1048 use crate::prelude::*;
1049
1050 pub(crate) fn get_chunked_array() -> Int32Chunked {
1051 ChunkedArray::new(PlSmallStr::from_static("a"), &[1, 2, 3])
1052 }
1053
1054 #[test]
1055 fn test_sort() {
1056 let a = Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 9, 3, 2]);
1057 let b = a
1058 .sort(false)
1059 .into_iter()
1060 .map(|opt| opt.unwrap())
1061 .collect::<Vec<_>>();
1062 assert_eq!(b, [1, 2, 3, 9]);
1063 let a = StringChunked::new(PlSmallStr::from_static("a"), &["b", "a", "c"]);
1064 let a = a.sort(false);
1065 let b = a.into_iter().collect::<Vec<_>>();
1066 assert_eq!(b, [Some("a"), Some("b"), Some("c")]);
1067 assert!(a.is_sorted_ascending_flag());
1068 }
1069
1070 #[test]
1071 fn arithmetic() {
1072 let a = &Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 100, 6, 40]);
1073 let b = &Int32Chunked::new(PlSmallStr::from_static("b"), &[-1, 2, 3, 4]);
1074
1075 println!("{:?}", a + b);
1078 println!("{:?}", a - b);
1079 println!("{:?}", a * b);
1080 println!("{:?}", a / b);
1081 }
1082
1083 #[test]
1084 fn iter() {
1085 let s1 = get_chunked_array();
1086 assert_eq!(s1.into_iter().fold(0, |acc, val| { acc + val.unwrap() }), 6)
1088 }
1089
1090 #[test]
1091 fn limit() {
1092 let a = get_chunked_array();
1093 let b = a.limit(2);
1094 println!("{b:?}");
1095 assert_eq!(b.len(), 2)
1096 }
1097
1098 #[test]
1099 fn filter() {
1100 let a = get_chunked_array();
1101 let b = a
1102 .filter(&BooleanChunked::new(
1103 PlSmallStr::from_static("filter"),
1104 &[true, false, false],
1105 ))
1106 .unwrap();
1107 assert_eq!(b.len(), 1);
1108 assert_eq!(b.into_iter().next(), Some(Some(1)));
1109 }
1110
1111 #[test]
1112 fn aggregates() {
1113 let a = &Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 100, 10, 9]);
1114 assert_eq!(a.max(), Some(100));
1115 assert_eq!(a.min(), Some(1));
1116 assert_eq!(a.sum(), Some(120))
1117 }
1118
1119 #[test]
1120 fn take() {
1121 let a = get_chunked_array();
1122 let new = a.take(&[0 as IdxSize, 1]).unwrap();
1123 assert_eq!(new.len(), 2)
1124 }
1125
1126 #[test]
1127 fn cast() {
1128 let a = get_chunked_array();
1129 let b = a.cast(&DataType::Int64).unwrap();
1130 assert_eq!(b.dtype(), &DataType::Int64)
1131 }
1132
1133 fn assert_slice_equal<T>(ca: &ChunkedArray<T>, eq: &[T::Native])
1134 where
1135 T: PolarsNumericType,
1136 {
1137 assert_eq!(ca.iter().map(|opt| opt.unwrap()).collect::<Vec<_>>(), eq)
1138 }
1139
1140 #[test]
1141 fn slice() {
1142 let mut first = UInt32Chunked::new(PlSmallStr::from_static("first"), &[0, 1, 2]);
1143 let second = UInt32Chunked::new(PlSmallStr::from_static("second"), &[3, 4, 5]);
1144 first.append(&second).unwrap();
1145 assert_slice_equal(&first.slice(0, 3), &[0, 1, 2]);
1146 assert_slice_equal(&first.slice(0, 4), &[0, 1, 2, 3]);
1147 assert_slice_equal(&first.slice(1, 4), &[1, 2, 3, 4]);
1148 assert_slice_equal(&first.slice(3, 2), &[3, 4]);
1149 assert_slice_equal(&first.slice(3, 3), &[3, 4, 5]);
1150 assert_slice_equal(&first.slice(-3, 3), &[3, 4, 5]);
1151 assert_slice_equal(&first.slice(-6, 6), &[0, 1, 2, 3, 4, 5]);
1152
1153 assert_eq!(first.slice(-7, 2).len(), 1);
1154 assert_eq!(first.slice(-3, 4).len(), 3);
1155 assert_eq!(first.slice(3, 4).len(), 3);
1156 assert_eq!(first.slice(10, 4).len(), 0);
1157 }
1158
1159 #[test]
1160 fn sorting() {
1161 let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[9, 2, 4]);
1162 let sorted = s.sort(false);
1163 assert_slice_equal(&sorted, &[2, 4, 9]);
1164 let sorted = s.sort(true);
1165 assert_slice_equal(&sorted, &[9, 4, 2]);
1166
1167 let s: StringChunked = ["b", "a", "z"].iter().collect();
1168 let sorted = s.sort(false);
1169 assert_eq!(
1170 sorted.into_iter().collect::<Vec<_>>(),
1171 &[Some("a"), Some("b"), Some("z")]
1172 );
1173 let sorted = s.sort(true);
1174 assert_eq!(
1175 sorted.into_iter().collect::<Vec<_>>(),
1176 &[Some("z"), Some("b"), Some("a")]
1177 );
1178 let s: StringChunked = [Some("b"), None, Some("z")].iter().copied().collect();
1179 let sorted = s.sort(false);
1180 assert_eq!(
1181 sorted.into_iter().collect::<Vec<_>>(),
1182 &[None, Some("b"), Some("z")]
1183 );
1184 }
1185
1186 #[test]
1187 fn reverse() {
1188 let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3]);
1189 assert_slice_equal(&s.reverse(), &[3, 2, 1]);
1191 let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[Some(1), None, Some(3)]);
1193 assert_eq!(Vec::from(&s.reverse()), &[Some(3), None, Some(1)]);
1194 let s = BooleanChunked::new(PlSmallStr::EMPTY, &[true, false]);
1195 assert_eq!(Vec::from(&s.reverse()), &[Some(false), Some(true)]);
1196
1197 let s = StringChunked::new(PlSmallStr::EMPTY, &["a", "b", "c"]);
1198 assert_eq!(Vec::from(&s.reverse()), &[Some("c"), Some("b"), Some("a")]);
1199
1200 let s = StringChunked::new(PlSmallStr::EMPTY, &[Some("a"), None, Some("c")]);
1201 assert_eq!(Vec::from(&s.reverse()), &[Some("c"), None, Some("a")]);
1202 }
1203
1204 #[test]
1205 #[cfg(feature = "dtype-categorical")]
1206 fn test_iter_categorical() {
1207 let ca = StringChunked::new(
1208 PlSmallStr::EMPTY,
1209 &[Some("foo"), None, Some("bar"), Some("ham")],
1210 );
1211 let cats = Categories::new(
1212 PlSmallStr::EMPTY,
1213 PlSmallStr::EMPTY,
1214 CategoricalPhysical::U32,
1215 );
1216 let ca = ca.cast(&DataType::from_categories(cats)).unwrap();
1217 let ca = ca.cat32().unwrap();
1218 let v: Vec<_> = ca.physical().into_iter().collect();
1219 assert_eq!(v, &[Some(0), None, Some(1), Some(2)]);
1220 }
1221
1222 #[test]
1223 #[ignore]
1224 fn test_shrink_to_fit() {
1225 let mut builder = StringChunkedBuilder::new(PlSmallStr::from_static("foo"), 2048);
1226 builder.append_value("foo");
1227 let mut arr = builder.finish();
1228 let before = arr
1229 .chunks()
1230 .iter()
1231 .map(|arr| arrow::compute::aggregate::estimated_bytes_size(arr.as_ref()))
1232 .sum::<usize>();
1233 arr.shrink_to_fit();
1234 let after = arr
1235 .chunks()
1236 .iter()
1237 .map(|arr| arrow::compute::aggregate::estimated_bytes_size(arr.as_ref()))
1238 .sum::<usize>();
1239 assert!(before > after);
1240 }
1241}