1#![allow(unsafe_op_in_unsafe_fn)]
3use std::sync::Arc;
4
5use arrow::array::*;
6use arrow::bitmap::Bitmap;
7use arrow::compute::concatenate::concatenate_unchecked;
8use polars_compute::filter::filter_with_bitmap;
9
10use crate::prelude::{ChunkTakeUnchecked, *};
11
12pub mod ops;
13#[macro_use]
14pub mod arithmetic;
15pub mod builder;
16pub mod cast;
17pub mod collect;
18pub mod comparison;
19pub mod flags;
20pub mod float;
21pub mod iterator;
22#[cfg(feature = "ndarray")]
23pub(crate) mod ndarray;
24
25pub mod arg_min_max;
26#[cfg(feature = "dtype-array")]
27pub(crate) mod array;
28mod binary;
29mod binary_offset;
30mod bitwise;
31#[cfg(feature = "object")]
32mod drop;
33mod from;
34mod from_iterator;
35pub mod from_iterator_par;
36pub(crate) mod list;
37pub(crate) mod logical;
38#[cfg(feature = "object")]
39pub mod object;
40#[cfg(feature = "random")]
41mod random;
42#[cfg(feature = "dtype-struct")]
43mod struct_;
44#[cfg(any(
45 feature = "temporal",
46 feature = "dtype-datetime",
47 feature = "dtype-date"
48))]
49pub mod temporal;
50mod to_vec;
51mod trusted_len;
52pub(crate) use arg_min_max::*;
53use arrow::legacy::prelude::*;
54#[cfg(feature = "dtype-struct")]
55pub use struct_::StructChunked;
56
57use self::flags::{StatisticsFlags, StatisticsFlagsIM};
58use crate::series::IsSorted;
59use crate::utils::{first_non_null, first_null, last_non_null};
60
61#[cfg(not(feature = "dtype-categorical"))]
62pub struct RevMapping {}
63
64pub type ChunkLenIter<'a> = std::iter::Map<std::slice::Iter<'a, ArrayRef>, fn(&ArrayRef) -> usize>;
65
66pub struct ChunkedArray<T: PolarsDataType> {
141 pub(crate) field: Arc<Field>,
142 pub(crate) chunks: Vec<ArrayRef>,
143
144 pub(crate) flags: StatisticsFlagsIM,
145
146 length: usize,
147 null_count: usize,
148 _pd: std::marker::PhantomData<T>,
149}
150
151impl<T: PolarsDataType> ChunkedArray<T> {
152 fn should_rechunk(&self) -> bool {
153 self.chunks.len() > 1 && self.chunks.len() > self.len() / 3
154 }
155
156 fn optional_rechunk(mut self) -> Self {
157 if self.should_rechunk() {
159 self.rechunk_mut()
160 }
161 self
162 }
163
164 pub(crate) fn as_any(&self) -> &dyn std::any::Any {
165 self
166 }
167
168 pub fn unpack_series_matching_type<'a>(
170 &self,
171 series: &'a Series,
172 ) -> PolarsResult<&'a ChunkedArray<T>> {
173 polars_ensure!(
174 self.dtype() == series.dtype(),
175 SchemaMismatch: "cannot unpack series of type `{}` into `{}`",
176 series.dtype(),
177 self.dtype(),
178 );
179
180 Ok(unsafe { self.unpack_series_matching_physical_type(series) })
182 }
183
184 fn new_with_compute_len(field: Arc<Field>, chunks: Vec<ArrayRef>) -> Self {
189 unsafe {
190 let mut chunked_arr = Self::new_with_dims(field, chunks, 0, 0);
191 chunked_arr.compute_len();
192 chunked_arr
193 }
194 }
195
196 pub unsafe fn new_with_dims(
200 field: Arc<Field>,
201 chunks: Vec<ArrayRef>,
202 length: usize,
203 null_count: usize,
204 ) -> Self {
205 Self {
206 field,
207 chunks,
208 flags: StatisticsFlagsIM::empty(),
209
210 _pd: Default::default(),
211 length,
212 null_count,
213 }
214 }
215
216 pub(crate) fn is_sorted_ascending_flag(&self) -> bool {
217 self.get_flags().is_sorted_ascending()
218 }
219
220 pub(crate) fn is_sorted_descending_flag(&self) -> bool {
221 self.get_flags().is_sorted_descending()
222 }
223
224 pub(crate) fn is_sorted_any(&self) -> bool {
226 self.get_flags().is_sorted_any()
227 }
228
229 pub fn unset_fast_explode_list(&mut self) {
230 self.set_fast_explode_list(false)
231 }
232
233 pub fn set_fast_explode_list(&mut self, value: bool) {
234 let mut flags = self.flags.get_mut();
235 flags.set(StatisticsFlags::CAN_FAST_EXPLODE_LIST, value);
236 self.flags.set_mut(flags);
237 }
238
239 pub fn get_fast_explode_list(&self) -> bool {
240 self.get_flags().can_fast_explode_list()
241 }
242
243 pub fn get_flags(&self) -> StatisticsFlags {
244 self.flags.get()
245 }
246
247 pub fn set_flags(&mut self, flags: StatisticsFlags) {
249 self.flags = StatisticsFlagsIM::new(flags);
250 }
251
252 pub fn is_sorted_flag(&self) -> IsSorted {
253 self.get_flags().is_sorted()
254 }
255
256 pub fn retain_flags_from<U: PolarsDataType>(
257 &mut self,
258 from: &ChunkedArray<U>,
259 retain_flags: StatisticsFlags,
260 ) {
261 let flags = from.flags.get();
262 if !flags.is_empty() {
264 self.set_flags(flags & retain_flags)
265 }
266 }
267
268 pub fn set_sorted_flag(&mut self, sorted: IsSorted) {
270 let mut flags = self.flags.get_mut();
271 flags.set_sorted(sorted);
272 self.flags.set_mut(flags);
273 }
274
275 pub fn with_sorted_flag(&self, sorted: IsSorted) -> Self {
277 let mut out = self.clone();
278 out.set_sorted_flag(sorted);
279 out
280 }
281
282 pub fn first_null(&self) -> Option<usize> {
283 if self.null_count() == 0 {
284 None
285 }
286 else if self.null_count() == self.len() {
288 Some(0)
289 } else if self.is_sorted_any() {
290 let out = if unsafe { self.downcast_get_unchecked(0).is_null_unchecked(0) } {
291 0
293 } else {
294 self.null_count()
296 };
297
298 debug_assert!(
299 unsafe { self.get_unchecked(out) }.is_some(),
301 "incorrect sorted flag"
302 );
303
304 Some(out)
305 } else {
306 first_null(self.chunks().iter().map(|arr| arr.as_ref()))
307 }
308 }
309
310 pub fn first_non_null(&self) -> Option<usize> {
312 if self.null_count() == self.len() {
313 None
314 }
315 else if self.null_count() == 0 {
317 Some(0)
318 } else if self.is_sorted_any() {
319 let out = if unsafe { self.downcast_get_unchecked(0).is_null_unchecked(0) } {
320 self.null_count()
322 } else {
323 0
325 };
326
327 debug_assert!(
328 unsafe { self.get_unchecked(out) }.is_some(),
330 "incorrect sorted flag"
331 );
332
333 Some(out)
334 } else {
335 first_non_null(self.chunks().iter().map(|arr| arr.as_ref()))
336 }
337 }
338
339 pub fn last_non_null(&self) -> Option<usize> {
341 if self.null_count() == self.len() {
342 None
343 }
344 else if self.null_count() == 0 {
346 Some(self.len() - 1)
347 } else if self.is_sorted_any() {
348 let out = if unsafe { self.downcast_get_unchecked(0).is_null_unchecked(0) } {
349 self.len() - 1
351 } else {
352 self.len() - self.null_count() - 1
354 };
355
356 debug_assert!(
357 unsafe { self.get_unchecked(out) }.is_some(),
359 "incorrect sorted flag"
360 );
361
362 Some(out)
363 } else {
364 last_non_null(self.chunks().iter().map(|arr| arr.as_ref()), self.len())
365 }
366 }
367
368 pub fn drop_nulls(&self) -> Self {
369 if self.null_count() == 0 {
370 self.clone()
371 } else {
372 let chunks = self
373 .downcast_iter()
374 .map(|arr| {
375 if arr.null_count() == 0 {
376 arr.to_boxed()
377 } else {
378 filter_with_bitmap(arr, arr.validity().unwrap())
379 }
380 })
381 .collect();
382 unsafe {
383 Self::new_with_dims(
384 self.field.clone(),
385 chunks,
386 self.len() - self.null_count(),
387 0,
388 )
389 }
390 }
391 }
392
393 #[inline]
395 #[allow(clippy::type_complexity)]
396 pub fn iter_validities(
397 &self,
398 ) -> impl ExactSizeIterator<Item = Option<&Bitmap>> + DoubleEndedIterator {
399 fn to_validity(arr: &ArrayRef) -> Option<&Bitmap> {
400 arr.validity()
401 }
402 self.chunks.iter().map(to_validity)
403 }
404
405 #[inline]
406 pub fn has_nulls(&self) -> bool {
408 self.null_count > 0
409 }
410
411 pub fn shrink_to_fit(&mut self) {
413 self.chunks = vec![concatenate_unchecked(self.chunks.as_slice()).unwrap()];
414 }
415
416 pub fn clear(&self) -> Self {
417 let mut ca = unsafe {
419 self.copy_with_chunks(vec![new_empty_array(
420 self.chunks.first().unwrap().dtype().clone(),
421 )])
422 };
423
424 use StatisticsFlags as F;
425 ca.retain_flags_from(self, F::IS_SORTED_ANY | F::CAN_FAST_EXPLODE_LIST);
426 ca
427 }
428
429 pub(crate) unsafe fn unpack_series_matching_physical_type<'a>(
436 &self,
437 series: &'a Series,
438 ) -> &'a ChunkedArray<T> {
439 let series_trait = &**series;
440 if self.dtype() == series.dtype() {
441 &*(series_trait as *const dyn SeriesTrait as *const ChunkedArray<T>)
442 } else {
443 use DataType::*;
444 match (self.dtype(), series.dtype()) {
445 (Int64, Datetime(_, _)) | (Int64, Duration(_)) | (Int32, Date) => {
446 &*(series_trait as *const dyn SeriesTrait as *const ChunkedArray<T>)
447 },
448 _ => panic!(
449 "cannot unpack series {:?} into matching type {:?}",
450 series,
451 self.dtype()
452 ),
453 }
454 }
455 }
456
457 pub fn chunk_lengths(&self) -> ChunkLenIter<'_> {
459 self.chunks.iter().map(|chunk| chunk.len())
460 }
461
462 #[inline]
464 pub fn chunks(&self) -> &Vec<ArrayRef> {
465 &self.chunks
466 }
467
468 #[inline]
474 pub unsafe fn chunks_mut(&mut self) -> &mut Vec<ArrayRef> {
475 &mut self.chunks
476 }
477
478 pub fn is_optimal_aligned(&self) -> bool {
480 self.chunks.len() == 1 && self.null_count() == 0
481 }
482
483 unsafe fn copy_with_chunks(&self, chunks: Vec<ArrayRef>) -> Self {
488 Self::new_with_compute_len(self.field.clone(), chunks)
489 }
490
491 pub fn dtype(&self) -> &DataType {
493 self.field.dtype()
494 }
495
496 pub(crate) unsafe fn set_dtype(&mut self, dtype: DataType) {
497 self.field = Arc::new(Field::new(self.name().clone(), dtype))
498 }
499
500 pub fn name(&self) -> &PlSmallStr {
502 self.field.name()
503 }
504
505 pub fn ref_field(&self) -> &Field {
507 &self.field
508 }
509
510 pub fn rename(&mut self, name: PlSmallStr) {
512 self.field = Arc::new(Field::new(name, self.field.dtype().clone()));
513 }
514
515 pub fn with_name(mut self, name: PlSmallStr) -> Self {
517 self.rename(name);
518 self
519 }
520}
521
522impl<T> ChunkedArray<T>
523where
524 T: PolarsDataType,
525{
526 #[inline]
532 pub fn get(&self, idx: usize) -> Option<T::Physical<'_>> {
533 let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
534 assert!(
535 chunk_idx < self.chunks().len(),
536 "index: {} out of bounds for len: {}",
537 idx,
538 self.len()
539 );
540 unsafe {
541 let arr = self.downcast_get_unchecked(chunk_idx);
542 assert!(
543 arr_idx < arr.len(),
544 "index: {} out of bounds for len: {}",
545 idx,
546 self.len()
547 );
548 arr.get_unchecked(arr_idx)
549 }
550 }
551
552 #[inline]
558 pub unsafe fn get_unchecked(&self, idx: usize) -> Option<T::Physical<'_>> {
559 let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
560
561 unsafe {
562 self.downcast_get_unchecked(chunk_idx)
564 .get_unchecked(arr_idx)
565 }
566 }
567
568 #[inline]
574 pub unsafe fn value_unchecked(&self, idx: usize) -> T::Physical<'_> {
575 let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
576
577 unsafe {
578 self.downcast_get_unchecked(chunk_idx)
580 .value_unchecked(arr_idx)
581 }
582 }
583
584 #[inline]
587 pub fn first(&self) -> Option<T::Physical<'_>> {
588 self.iter().next().unwrap()
589 }
590
591 #[inline]
594 pub fn last(&self) -> Option<T::Physical<'_>> {
595 let arr = self
596 .downcast_iter()
597 .rev()
598 .find(|arr| !arr.is_empty())
599 .unwrap();
600 unsafe { arr.get_unchecked(arr.len() - 1) }
601 }
602
603 pub fn set_validity(&mut self, validity: &Bitmap) {
604 assert_eq!(self.len(), validity.len());
605 let mut i = 0;
606 for chunk in unsafe { self.chunks_mut() } {
607 *chunk = chunk.with_validity(Some(validity.clone().sliced(i, chunk.len())));
608 i += chunk.len();
609 }
610 self.null_count = validity.unset_bits();
611 self.set_fast_explode_list(false);
612 }
613}
614
615impl<T> ChunkedArray<T>
616where
617 T: PolarsDataType,
618 ChunkedArray<T>: ChunkTakeUnchecked<[IdxSize]>,
619{
620 pub fn deposit(&self, validity: &Bitmap) -> Self {
622 let set_bits = validity.set_bits();
623
624 assert_eq!(self.null_count(), 0);
625 assert_eq!(self.len(), set_bits);
626
627 if set_bits == validity.len() {
628 return self.clone();
629 }
630
631 if set_bits == 0 {
632 return Self::full_null_like(self, validity.len());
633 }
634
635 let mut null_mask = validity.clone();
636
637 let mut gather_idxs = Vec::with_capacity(validity.len());
638 let leading_nulls = null_mask.take_leading_zeros();
639 gather_idxs.extend(std::iter::repeat_n(0, leading_nulls + 1));
640
641 let mut i = 0 as IdxSize;
642 gather_idxs.extend(null_mask.iter().skip(1).map(|v| {
643 i += IdxSize::from(v);
644 i
645 }));
646
647 let mut ca = unsafe { ChunkTakeUnchecked::take_unchecked(self, &gather_idxs) };
648 ca.set_validity(validity);
649 ca
650 }
651}
652
653impl ListChunked {
654 #[inline]
655 pub fn get_as_series(&self, idx: usize) -> Option<Series> {
656 unsafe {
657 Some(Series::from_chunks_and_dtype_unchecked(
658 self.name().clone(),
659 vec![self.get(idx)?],
660 &self.inner_dtype().to_physical(),
661 ))
662 }
663 }
664
665 pub fn has_empty_lists(&self) -> bool {
666 for arr in self.downcast_iter() {
667 if arr.is_empty() {
668 continue;
669 }
670
671 if match arr.validity() {
672 None => arr.offsets().lengths().any(|l| l == 0),
673 Some(validity) => arr
674 .offsets()
675 .lengths()
676 .enumerate()
677 .any(|(i, l)| l == 0 && unsafe { validity.get_bit_unchecked(i) }),
678 } {
679 return true;
680 }
681 }
682
683 false
684 }
685
686 pub fn has_masked_out_values(&self) -> bool {
687 for arr in self.downcast_iter() {
688 if arr.is_empty() {
689 continue;
690 }
691
692 if *arr.offsets().first() != 0 || *arr.offsets().last() != arr.values().len() as i64 {
693 return true;
694 }
695
696 let Some(validity) = arr.validity() else {
697 continue;
698 };
699 if validity.set_bits() == 0 {
700 continue;
701 }
702
703 for i in (!validity).true_idx_iter() {
705 if arr.offsets().length_at(i) > 0 {
706 return true;
707 }
708 }
709 }
710
711 false
712 }
713}
714
715#[cfg(feature = "dtype-array")]
716impl ArrayChunked {
717 #[inline]
718 pub fn get_as_series(&self, idx: usize) -> Option<Series> {
719 unsafe {
720 Some(Series::from_chunks_and_dtype_unchecked(
721 self.name().clone(),
722 vec![self.get(idx)?],
723 &self.inner_dtype().to_physical(),
724 ))
725 }
726 }
727
728 pub fn from_aligned_values(
729 name: PlSmallStr,
730 inner_dtype: &DataType,
731 width: usize,
732 chunks: Vec<ArrayRef>,
733 length: usize,
734 ) -> Self {
735 let dtype = DataType::Array(Box::new(inner_dtype.clone()), width);
736 let arrow_dtype = dtype.to_arrow(CompatLevel::newest());
737 let field = Arc::new(Field::new(name, dtype));
738 if width == 0 {
739 use arrow::array::builder::{ArrayBuilder, make_builder};
740 let values = make_builder(&inner_dtype.to_arrow(CompatLevel::newest())).freeze();
741 return ArrayChunked::new_with_compute_len(
742 field,
743 vec![FixedSizeListArray::new(arrow_dtype, length, values, None).into_boxed()],
744 );
745 }
746 let mut total_len = 0;
747 let chunks = chunks
748 .into_iter()
749 .map(|chunk| {
750 debug_assert_eq!(chunk.len() % width, 0);
751 let chunk_len = chunk.len() / width;
752 total_len += chunk_len;
753 FixedSizeListArray::new(arrow_dtype.clone(), chunk_len, chunk, None).into_boxed()
754 })
755 .collect();
756 debug_assert_eq!(total_len, length);
757
758 unsafe { Self::new_with_dims(field, chunks, length, 0) }
759 }
760
761 pub fn to_list(&self) -> ListChunked {
765 let inner_dtype = self.inner_dtype();
766 let chunks = self
767 .downcast_iter()
768 .map(|chunk| {
769 use arrow::offset::OffsetsBuffer;
770
771 let inner_dtype = chunk.dtype().inner_dtype().unwrap();
772 let dtype = inner_dtype.clone().to_large_list(true);
773
774 let offsets = (0..=chunk.len())
775 .map(|i| (i * self.width()) as i64)
776 .collect::<Vec<i64>>();
777
778 let offsets = unsafe { OffsetsBuffer::new_unchecked(offsets.into()) };
780
781 ListArray::<i64>::new(
782 dtype,
783 offsets,
784 chunk.values().clone(),
785 chunk.validity().cloned(),
786 )
787 .into_boxed()
788 })
789 .collect();
790
791 let mut ca = unsafe {
793 ListChunked::new_with_dims(
794 Arc::new(Field::new(
795 self.name().clone(),
796 DataType::List(Box::new(inner_dtype.clone())),
797 )),
798 chunks,
799 self.len(),
800 self.null_count(),
801 )
802 };
803 ca.set_fast_explode_list(!self.has_nulls());
804 ca
805 }
806}
807
808impl<T> ChunkedArray<T>
809where
810 T: PolarsDataType,
811{
812 pub fn match_chunks<I>(&self, chunk_id: I) -> Self
816 where
817 I: Iterator<Item = usize>,
818 {
819 debug_assert!(self.chunks.len() == 1);
820 let slice = |ca: &Self| {
822 let array = &ca.chunks[0];
823
824 let mut offset = 0;
825 let chunks = chunk_id
826 .map(|len| {
827 debug_assert!((offset + len) <= array.len());
829 let out = unsafe { array.sliced_unchecked(offset, len) };
830 offset += len;
831 out
832 })
833 .collect();
834
835 debug_assert_eq!(offset, array.len());
836
837 unsafe {
839 Self::from_chunks_and_dtype(self.name().clone(), chunks, self.dtype().clone())
840 }
841 };
842
843 if self.chunks.len() != 1 {
844 let out = self.rechunk();
845 slice(&out)
846 } else {
847 slice(self)
848 }
849 }
850}
851
852impl<T: PolarsDataType> AsRefDataType for ChunkedArray<T> {
853 fn as_ref_dtype(&self) -> &DataType {
854 self.dtype()
855 }
856}
857
858pub(crate) trait AsSinglePtr: AsRefDataType {
859 fn as_single_ptr(&mut self) -> PolarsResult<usize> {
861 polars_bail!(opq = as_single_ptr, self.as_ref_dtype());
862 }
863}
864
865impl<T> AsSinglePtr for ChunkedArray<T>
866where
867 T: PolarsNumericType,
868{
869 fn as_single_ptr(&mut self) -> PolarsResult<usize> {
870 self.rechunk_mut();
871 let a = self.data_views().next().unwrap();
872 let ptr = a.as_ptr();
873 Ok(ptr as usize)
874 }
875}
876
877impl AsSinglePtr for BooleanChunked {}
878impl AsSinglePtr for ListChunked {}
879#[cfg(feature = "dtype-array")]
880impl AsSinglePtr for ArrayChunked {}
881impl AsSinglePtr for StringChunked {}
882impl AsSinglePtr for BinaryChunked {}
883#[cfg(feature = "object")]
884impl<T: PolarsObject> AsSinglePtr for ObjectChunked<T> {}
885
886pub enum ChunkedArrayLayout<'a, T: PolarsDataType> {
887 SingleNoNull(&'a T::Array),
888 Single(&'a T::Array),
889 MultiNoNull(&'a ChunkedArray<T>),
890 Multi(&'a ChunkedArray<T>),
891}
892
893impl<T> ChunkedArray<T>
894where
895 T: PolarsDataType,
896{
897 pub fn layout(&self) -> ChunkedArrayLayout<'_, T> {
898 if self.chunks.len() == 1 {
899 let arr = self.downcast_iter().next().unwrap();
900 return if arr.null_count() == 0 {
901 ChunkedArrayLayout::SingleNoNull(arr)
902 } else {
903 ChunkedArrayLayout::Single(arr)
904 };
905 }
906
907 if self.downcast_iter().all(|a| a.null_count() == 0) {
908 ChunkedArrayLayout::MultiNoNull(self)
909 } else {
910 ChunkedArrayLayout::Multi(self)
911 }
912 }
913}
914
915impl<T> ChunkedArray<T>
916where
917 T: PolarsNumericType,
918{
919 pub fn cont_slice(&self) -> PolarsResult<&[T::Native]> {
921 polars_ensure!(
922 self.chunks.len() == 1 && self.chunks[0].null_count() == 0,
923 ComputeError: "chunked array is not contiguous"
924 );
925 Ok(self.downcast_iter().next().map(|arr| arr.values()).unwrap())
926 }
927
928 pub(crate) fn cont_slice_mut(&mut self) -> Option<&mut [T::Native]> {
930 if self.chunks.len() == 1 && self.chunks[0].null_count() == 0 {
931 let arr = unsafe { self.downcast_iter_mut().next().unwrap() };
933 arr.get_mut_values()
934 } else {
935 None
936 }
937 }
938
939 pub fn data_views(&self) -> impl DoubleEndedIterator<Item = &[T::Native]> {
943 self.downcast_iter().map(|arr| arr.values().as_slice())
944 }
945
946 #[allow(clippy::wrong_self_convention)]
947 pub fn into_no_null_iter(
948 &self,
949 ) -> impl '_ + Send + Sync + ExactSizeIterator<Item = T::Native> + DoubleEndedIterator + TrustedLen
950 {
951 #[allow(clippy::map_clone)]
953 unsafe {
955 self.data_views()
956 .flatten()
957 .map(|v| *v)
958 .trust_my_length(self.len())
959 }
960 }
961}
962
963impl<T: PolarsDataType> Clone for ChunkedArray<T> {
964 fn clone(&self) -> Self {
965 ChunkedArray {
966 field: self.field.clone(),
967 chunks: self.chunks.clone(),
968 flags: self.flags.clone(),
969
970 _pd: Default::default(),
971 length: self.length,
972 null_count: self.null_count,
973 }
974 }
975}
976
977impl<T: PolarsDataType> AsRef<ChunkedArray<T>> for ChunkedArray<T> {
978 fn as_ref(&self) -> &ChunkedArray<T> {
979 self
980 }
981}
982
983impl ValueSize for ListChunked {
984 fn get_values_size(&self) -> usize {
985 self.chunks
986 .iter()
987 .fold(0usize, |acc, arr| acc + arr.get_values_size())
988 }
989}
990
991#[cfg(feature = "dtype-array")]
992impl ValueSize for ArrayChunked {
993 fn get_values_size(&self) -> usize {
994 self.chunks
995 .iter()
996 .fold(0usize, |acc, arr| acc + arr.get_values_size())
997 }
998}
999impl ValueSize for StringChunked {
1000 fn get_values_size(&self) -> usize {
1001 self.chunks
1002 .iter()
1003 .fold(0usize, |acc, arr| acc + arr.get_values_size())
1004 }
1005}
1006
1007impl ValueSize for BinaryOffsetChunked {
1008 fn get_values_size(&self) -> usize {
1009 self.chunks
1010 .iter()
1011 .fold(0usize, |acc, arr| acc + arr.get_values_size())
1012 }
1013}
1014
1015pub(crate) fn to_primitive<T: PolarsNumericType>(
1016 values: Vec<T::Native>,
1017 validity: Option<Bitmap>,
1018) -> PrimitiveArray<T::Native> {
1019 PrimitiveArray::new(
1020 T::get_static_dtype().to_arrow(CompatLevel::newest()),
1021 values.into(),
1022 validity,
1023 )
1024}
1025
1026pub(crate) fn to_array<T: PolarsNumericType>(
1027 values: Vec<T::Native>,
1028 validity: Option<Bitmap>,
1029) -> ArrayRef {
1030 Box::new(to_primitive::<T>(values, validity))
1031}
1032
1033impl<T: PolarsDataType> Default for ChunkedArray<T> {
1034 fn default() -> Self {
1035 let dtype = T::get_static_dtype();
1036 let arrow_dtype = dtype.to_physical().to_arrow(CompatLevel::newest());
1037 ChunkedArray {
1038 field: Arc::new(Field::new(PlSmallStr::EMPTY, dtype)),
1039 chunks: vec![new_empty_array(arrow_dtype)],
1041 flags: StatisticsFlagsIM::empty(),
1042
1043 _pd: Default::default(),
1044 length: 0,
1045 null_count: 0,
1046 }
1047 }
1048}
1049
1050#[cfg(test)]
1051pub(crate) mod test {
1052 use crate::prelude::*;
1053
1054 pub(crate) fn get_chunked_array() -> Int32Chunked {
1055 ChunkedArray::new(PlSmallStr::from_static("a"), &[1, 2, 3])
1056 }
1057
1058 #[test]
1059 fn test_sort() {
1060 let a = Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 9, 3, 2]);
1061 let b = a
1062 .sort(false)
1063 .into_iter()
1064 .map(|opt| opt.unwrap())
1065 .collect::<Vec<_>>();
1066 assert_eq!(b, [1, 2, 3, 9]);
1067 let a = StringChunked::new(PlSmallStr::from_static("a"), &["b", "a", "c"]);
1068 let a = a.sort(false);
1069 let b = a.into_iter().collect::<Vec<_>>();
1070 assert_eq!(b, [Some("a"), Some("b"), Some("c")]);
1071 assert!(a.is_sorted_ascending_flag());
1072 }
1073
1074 #[test]
1075 fn arithmetic() {
1076 let a = &Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 100, 6, 40]);
1077 let b = &Int32Chunked::new(PlSmallStr::from_static("b"), &[-1, 2, 3, 4]);
1078
1079 println!("{:?}", a + b);
1082 println!("{:?}", a - b);
1083 println!("{:?}", a * b);
1084 println!("{:?}", a / b);
1085 }
1086
1087 #[test]
1088 fn iter() {
1089 let s1 = get_chunked_array();
1090 assert_eq!(s1.into_iter().fold(0, |acc, val| { acc + val.unwrap() }), 6)
1092 }
1093
1094 #[test]
1095 fn limit() {
1096 let a = get_chunked_array();
1097 let b = a.limit(2);
1098 println!("{b:?}");
1099 assert_eq!(b.len(), 2)
1100 }
1101
1102 #[test]
1103 fn filter() {
1104 let a = get_chunked_array();
1105 let b = a
1106 .filter(&BooleanChunked::new(
1107 PlSmallStr::from_static("filter"),
1108 &[true, false, false],
1109 ))
1110 .unwrap();
1111 assert_eq!(b.len(), 1);
1112 assert_eq!(b.into_iter().next(), Some(Some(1)));
1113 }
1114
1115 #[test]
1116 fn aggregates() {
1117 let a = &Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 100, 10, 9]);
1118 assert_eq!(a.max(), Some(100));
1119 assert_eq!(a.min(), Some(1));
1120 assert_eq!(a.sum(), Some(120))
1121 }
1122
1123 #[test]
1124 fn take() {
1125 let a = get_chunked_array();
1126 let new = a.take(&[0 as IdxSize, 1]).unwrap();
1127 assert_eq!(new.len(), 2)
1128 }
1129
1130 #[test]
1131 fn cast() {
1132 let a = get_chunked_array();
1133 let b = a.cast(&DataType::Int64).unwrap();
1134 assert_eq!(b.dtype(), &DataType::Int64)
1135 }
1136
1137 fn assert_slice_equal<T>(ca: &ChunkedArray<T>, eq: &[T::Native])
1138 where
1139 T: PolarsNumericType,
1140 {
1141 assert_eq!(ca.iter().map(|opt| opt.unwrap()).collect::<Vec<_>>(), eq)
1142 }
1143
1144 #[test]
1145 fn slice() {
1146 let mut first = UInt32Chunked::new(PlSmallStr::from_static("first"), &[0, 1, 2]);
1147 let second = UInt32Chunked::new(PlSmallStr::from_static("second"), &[3, 4, 5]);
1148 first.append(&second).unwrap();
1149 assert_slice_equal(&first.slice(0, 3), &[0, 1, 2]);
1150 assert_slice_equal(&first.slice(0, 4), &[0, 1, 2, 3]);
1151 assert_slice_equal(&first.slice(1, 4), &[1, 2, 3, 4]);
1152 assert_slice_equal(&first.slice(3, 2), &[3, 4]);
1153 assert_slice_equal(&first.slice(3, 3), &[3, 4, 5]);
1154 assert_slice_equal(&first.slice(-3, 3), &[3, 4, 5]);
1155 assert_slice_equal(&first.slice(-6, 6), &[0, 1, 2, 3, 4, 5]);
1156
1157 assert_eq!(first.slice(-7, 2).len(), 1);
1158 assert_eq!(first.slice(-3, 4).len(), 3);
1159 assert_eq!(first.slice(3, 4).len(), 3);
1160 assert_eq!(first.slice(10, 4).len(), 0);
1161 }
1162
1163 #[test]
1164 fn sorting() {
1165 let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[9, 2, 4]);
1166 let sorted = s.sort(false);
1167 assert_slice_equal(&sorted, &[2, 4, 9]);
1168 let sorted = s.sort(true);
1169 assert_slice_equal(&sorted, &[9, 4, 2]);
1170
1171 let s: StringChunked = ["b", "a", "z"].iter().collect();
1172 let sorted = s.sort(false);
1173 assert_eq!(
1174 sorted.into_iter().collect::<Vec<_>>(),
1175 &[Some("a"), Some("b"), Some("z")]
1176 );
1177 let sorted = s.sort(true);
1178 assert_eq!(
1179 sorted.into_iter().collect::<Vec<_>>(),
1180 &[Some("z"), Some("b"), Some("a")]
1181 );
1182 let s: StringChunked = [Some("b"), None, Some("z")].iter().copied().collect();
1183 let sorted = s.sort(false);
1184 assert_eq!(
1185 sorted.into_iter().collect::<Vec<_>>(),
1186 &[None, Some("b"), Some("z")]
1187 );
1188 }
1189
1190 #[test]
1191 fn reverse() {
1192 let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3]);
1193 assert_slice_equal(&s.reverse(), &[3, 2, 1]);
1195 let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[Some(1), None, Some(3)]);
1197 assert_eq!(Vec::from(&s.reverse()), &[Some(3), None, Some(1)]);
1198 let s = BooleanChunked::new(PlSmallStr::EMPTY, &[true, false]);
1199 assert_eq!(Vec::from(&s.reverse()), &[Some(false), Some(true)]);
1200
1201 let s = StringChunked::new(PlSmallStr::EMPTY, &["a", "b", "c"]);
1202 assert_eq!(Vec::from(&s.reverse()), &[Some("c"), Some("b"), Some("a")]);
1203
1204 let s = StringChunked::new(PlSmallStr::EMPTY, &[Some("a"), None, Some("c")]);
1205 assert_eq!(Vec::from(&s.reverse()), &[Some("c"), None, Some("a")]);
1206 }
1207
1208 #[test]
1209 #[cfg(feature = "dtype-categorical")]
1210 fn test_iter_categorical() {
1211 let ca = StringChunked::new(
1212 PlSmallStr::EMPTY,
1213 &[Some("foo"), None, Some("bar"), Some("ham")],
1214 );
1215 let cats = Categories::new(
1216 PlSmallStr::EMPTY,
1217 PlSmallStr::EMPTY,
1218 CategoricalPhysical::U32,
1219 );
1220 let ca = ca.cast(&DataType::from_categories(cats)).unwrap();
1221 let ca = ca.cat32().unwrap();
1222 let v: Vec<_> = ca.physical().into_iter().collect();
1223 assert_eq!(v, &[Some(0), None, Some(1), Some(2)]);
1224 }
1225
1226 #[test]
1227 #[ignore]
1228 fn test_shrink_to_fit() {
1229 let mut builder = StringChunkedBuilder::new(PlSmallStr::from_static("foo"), 2048);
1230 builder.append_value("foo");
1231 let mut arr = builder.finish();
1232 let before = arr
1233 .chunks()
1234 .iter()
1235 .map(|arr| arrow::compute::aggregate::estimated_bytes_size(arr.as_ref()))
1236 .sum::<usize>();
1237 arr.shrink_to_fit();
1238 let after = arr
1239 .chunks()
1240 .iter()
1241 .map(|arr| arrow::compute::aggregate::estimated_bytes_size(arr.as_ref()))
1242 .sum::<usize>();
1243 assert!(before > after);
1244 }
1245}