1#![allow(unsafe_op_in_unsafe_fn)]
3use std::sync::Arc;
4
5use arrow::array::*;
6use arrow::bitmap::Bitmap;
7use arrow::compute::concatenate::concatenate_unchecked;
8use polars_compute::filter::filter_with_bitmap;
9
10use crate::prelude::{ChunkTakeUnchecked, *};
11
12pub mod ops;
13#[macro_use]
14pub mod arithmetic;
15pub mod builder;
16pub mod cast;
17pub mod collect;
18pub mod comparison;
19pub mod flags;
20pub mod float;
21pub mod iterator;
22#[cfg(feature = "ndarray")]
23pub(crate) mod ndarray;
24
25#[cfg(feature = "dtype-array")]
26pub(crate) mod array;
27mod binary;
28mod binary_offset;
29mod bitwise;
30#[cfg(feature = "object")]
31mod drop;
32mod from;
33mod from_iterator;
34pub mod from_iterator_par;
35pub(crate) mod list;
36pub(crate) mod logical;
37#[cfg(feature = "object")]
38pub mod object;
39#[cfg(feature = "random")]
40mod random;
41#[cfg(feature = "dtype-struct")]
42mod struct_;
43#[cfg(any(
44 feature = "temporal",
45 feature = "dtype-datetime",
46 feature = "dtype-date"
47))]
48pub mod temporal;
49mod to_vec;
50mod trusted_len;
51
52use arrow::legacy::prelude::*;
53#[cfg(feature = "dtype-struct")]
54pub use struct_::StructChunked;
55
56use self::flags::{StatisticsFlags, StatisticsFlagsIM};
57use crate::series::IsSorted;
58use crate::utils::{first_non_null, first_null, last_non_null};
59
60#[cfg(not(feature = "dtype-categorical"))]
61pub struct RevMapping {}
62
63pub type ChunkLenIter<'a> = std::iter::Map<std::slice::Iter<'a, ArrayRef>, fn(&ArrayRef) -> usize>;
64
65pub struct ChunkedArray<T: PolarsDataType> {
140 pub(crate) field: Arc<Field>,
141 pub(crate) chunks: Vec<ArrayRef>,
142
143 pub(crate) flags: StatisticsFlagsIM,
144
145 length: usize,
146 null_count: usize,
147 _pd: std::marker::PhantomData<T>,
148}
149
150impl<T: PolarsDataType> ChunkedArray<T> {
151 fn should_rechunk(&self) -> bool {
152 self.chunks.len() > 1 && self.chunks.len() > self.len() / 3
153 }
154
155 fn optional_rechunk(mut self) -> Self {
156 if self.should_rechunk() {
158 self.rechunk_mut()
159 }
160 self
161 }
162
163 pub(crate) fn as_any(&self) -> &dyn std::any::Any {
164 self
165 }
166
167 pub fn unpack_series_matching_type<'a>(
169 &self,
170 series: &'a Series,
171 ) -> PolarsResult<&'a ChunkedArray<T>> {
172 polars_ensure!(
173 self.dtype() == series.dtype(),
174 SchemaMismatch: "cannot unpack series of type `{}` into `{}`",
175 series.dtype(),
176 self.dtype(),
177 );
178
179 Ok(unsafe { self.unpack_series_matching_physical_type(series) })
181 }
182
183 fn new_with_compute_len(field: Arc<Field>, chunks: Vec<ArrayRef>) -> Self {
188 unsafe {
189 let mut chunked_arr = Self::new_with_dims(field, chunks, 0, 0);
190 chunked_arr.compute_len();
191 chunked_arr
192 }
193 }
194
195 pub unsafe fn new_with_dims(
199 field: Arc<Field>,
200 chunks: Vec<ArrayRef>,
201 length: usize,
202 null_count: usize,
203 ) -> Self {
204 Self {
205 field,
206 chunks,
207 flags: StatisticsFlagsIM::empty(),
208
209 _pd: Default::default(),
210 length,
211 null_count,
212 }
213 }
214
215 pub(crate) fn is_sorted_ascending_flag(&self) -> bool {
216 self.get_flags().is_sorted_ascending()
217 }
218
219 pub(crate) fn is_sorted_descending_flag(&self) -> bool {
220 self.get_flags().is_sorted_descending()
221 }
222
223 pub(crate) fn is_sorted_any(&self) -> bool {
225 self.get_flags().is_sorted_any()
226 }
227
228 pub fn unset_fast_explode_list(&mut self) {
229 self.set_fast_explode_list(false)
230 }
231
232 pub fn set_fast_explode_list(&mut self, value: bool) {
233 let mut flags = self.flags.get_mut();
234 flags.set(StatisticsFlags::CAN_FAST_EXPLODE_LIST, value);
235 self.flags.set_mut(flags);
236 }
237
238 pub fn get_fast_explode_list(&self) -> bool {
239 self.get_flags().can_fast_explode_list()
240 }
241
242 pub fn get_flags(&self) -> StatisticsFlags {
243 self.flags.get()
244 }
245
246 pub fn set_flags(&mut self, flags: StatisticsFlags) {
248 self.flags = StatisticsFlagsIM::new(flags);
249 }
250
251 pub fn is_sorted_flag(&self) -> IsSorted {
252 self.get_flags().is_sorted()
253 }
254
255 pub fn retain_flags_from<U: PolarsDataType>(
256 &mut self,
257 from: &ChunkedArray<U>,
258 retain_flags: StatisticsFlags,
259 ) {
260 let flags = from.flags.get();
261 if !flags.is_empty() {
263 self.set_flags(flags & retain_flags)
264 }
265 }
266
267 pub fn set_sorted_flag(&mut self, sorted: IsSorted) {
269 let mut flags = self.flags.get_mut();
270 flags.set_sorted(sorted);
271 self.flags.set_mut(flags);
272 }
273
274 pub fn with_sorted_flag(&self, sorted: IsSorted) -> Self {
276 let mut out = self.clone();
277 out.set_sorted_flag(sorted);
278 out
279 }
280
281 pub fn first_null(&self) -> Option<usize> {
282 if self.null_count() == 0 {
283 None
284 }
285 else if self.null_count() == self.len() {
287 Some(0)
288 } else if self.is_sorted_any() {
289 let out = if unsafe { self.downcast_get_unchecked(0).is_null_unchecked(0) } {
290 0
292 } else {
293 self.null_count()
295 };
296
297 debug_assert!(
298 unsafe { self.get_unchecked(out) }.is_some(),
300 "incorrect sorted flag"
301 );
302
303 Some(out)
304 } else {
305 first_null(self.chunks().iter().map(|arr| arr.as_ref()))
306 }
307 }
308
309 pub fn first_non_null(&self) -> Option<usize> {
311 if self.null_count() == self.len() {
312 None
313 }
314 else if self.null_count() == 0 {
316 Some(0)
317 } else if self.is_sorted_any() {
318 let out = if unsafe { self.downcast_get_unchecked(0).is_null_unchecked(0) } {
319 self.null_count()
321 } else {
322 0
324 };
325
326 debug_assert!(
327 unsafe { self.get_unchecked(out) }.is_some(),
329 "incorrect sorted flag"
330 );
331
332 Some(out)
333 } else {
334 first_non_null(self.chunks().iter().map(|arr| arr.as_ref()))
335 }
336 }
337
338 pub fn last_non_null(&self) -> Option<usize> {
340 if self.null_count() == self.len() {
341 None
342 }
343 else if self.null_count() == 0 {
345 Some(self.len() - 1)
346 } else if self.is_sorted_any() {
347 let out = if unsafe { self.downcast_get_unchecked(0).is_null_unchecked(0) } {
348 self.len() - 1
350 } else {
351 self.len() - self.null_count() - 1
353 };
354
355 debug_assert!(
356 unsafe { self.get_unchecked(out) }.is_some(),
358 "incorrect sorted flag"
359 );
360
361 Some(out)
362 } else {
363 last_non_null(self.chunks().iter().map(|arr| arr.as_ref()), self.len())
364 }
365 }
366
367 pub fn drop_nulls(&self) -> Self {
368 if self.null_count() == 0 {
369 self.clone()
370 } else {
371 let chunks = self
372 .downcast_iter()
373 .map(|arr| {
374 if arr.null_count() == 0 {
375 arr.to_boxed()
376 } else {
377 filter_with_bitmap(arr, arr.validity().unwrap())
378 }
379 })
380 .collect();
381 unsafe {
382 Self::new_with_dims(
383 self.field.clone(),
384 chunks,
385 self.len() - self.null_count(),
386 0,
387 )
388 }
389 }
390 }
391
392 #[inline]
394 #[allow(clippy::type_complexity)]
395 pub fn iter_validities(
396 &self,
397 ) -> impl ExactSizeIterator<Item = Option<&Bitmap>> + DoubleEndedIterator {
398 fn to_validity(arr: &ArrayRef) -> Option<&Bitmap> {
399 arr.validity()
400 }
401 self.chunks.iter().map(to_validity)
402 }
403
404 #[inline]
405 pub fn has_nulls(&self) -> bool {
407 self.null_count > 0
408 }
409
410 pub fn shrink_to_fit(&mut self) {
412 self.chunks = vec![concatenate_unchecked(self.chunks.as_slice()).unwrap()];
413 }
414
415 pub fn clear(&self) -> Self {
416 let mut ca = unsafe {
418 self.copy_with_chunks(vec![new_empty_array(
419 self.chunks.first().unwrap().dtype().clone(),
420 )])
421 };
422
423 use StatisticsFlags as F;
424 ca.retain_flags_from(self, F::IS_SORTED_ANY | F::CAN_FAST_EXPLODE_LIST);
425 ca
426 }
427
428 pub(crate) unsafe fn unpack_series_matching_physical_type<'a>(
435 &self,
436 series: &'a Series,
437 ) -> &'a ChunkedArray<T> {
438 let series_trait = &**series;
439 if self.dtype() == series.dtype() {
440 &*(series_trait as *const dyn SeriesTrait as *const ChunkedArray<T>)
441 } else {
442 use DataType::*;
443 match (self.dtype(), series.dtype()) {
444 (Int64, Datetime(_, _)) | (Int64, Duration(_)) | (Int32, Date) => {
445 &*(series_trait as *const dyn SeriesTrait as *const ChunkedArray<T>)
446 },
447 _ => panic!(
448 "cannot unpack series {:?} into matching type {:?}",
449 series,
450 self.dtype()
451 ),
452 }
453 }
454 }
455
456 pub fn chunk_lengths(&self) -> ChunkLenIter<'_> {
458 self.chunks.iter().map(|chunk| chunk.len())
459 }
460
461 #[inline]
463 pub fn chunks(&self) -> &Vec<ArrayRef> {
464 &self.chunks
465 }
466
467 #[inline]
473 pub unsafe fn chunks_mut(&mut self) -> &mut Vec<ArrayRef> {
474 &mut self.chunks
475 }
476
477 pub fn is_optimal_aligned(&self) -> bool {
479 self.chunks.len() == 1 && self.null_count() == 0
480 }
481
482 unsafe fn copy_with_chunks(&self, chunks: Vec<ArrayRef>) -> Self {
487 Self::new_with_compute_len(self.field.clone(), chunks)
488 }
489
490 pub fn dtype(&self) -> &DataType {
492 self.field.dtype()
493 }
494
495 pub(crate) unsafe fn set_dtype(&mut self, dtype: DataType) {
496 self.field = Arc::new(Field::new(self.name().clone(), dtype))
497 }
498
499 pub fn name(&self) -> &PlSmallStr {
501 self.field.name()
502 }
503
504 pub fn ref_field(&self) -> &Field {
506 &self.field
507 }
508
509 pub fn rename(&mut self, name: PlSmallStr) {
511 self.field = Arc::new(Field::new(name, self.field.dtype().clone()));
512 }
513
514 pub fn with_name(mut self, name: PlSmallStr) -> Self {
516 self.rename(name);
517 self
518 }
519}
520
521impl<T> ChunkedArray<T>
522where
523 T: PolarsDataType,
524{
525 #[inline]
531 pub fn get(&self, idx: usize) -> Option<T::Physical<'_>> {
532 let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
533 assert!(
534 chunk_idx < self.chunks().len(),
535 "index: {} out of bounds for len: {}",
536 idx,
537 self.len()
538 );
539 unsafe {
540 let arr = self.downcast_get_unchecked(chunk_idx);
541 assert!(
542 arr_idx < arr.len(),
543 "index: {} out of bounds for len: {}",
544 idx,
545 self.len()
546 );
547 arr.get_unchecked(arr_idx)
548 }
549 }
550
551 #[inline]
557 pub unsafe fn get_unchecked(&self, idx: usize) -> Option<T::Physical<'_>> {
558 let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
559
560 unsafe {
561 self.downcast_get_unchecked(chunk_idx)
563 .get_unchecked(arr_idx)
564 }
565 }
566
567 #[inline]
573 pub unsafe fn value_unchecked(&self, idx: usize) -> T::Physical<'_> {
574 let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
575
576 unsafe {
577 self.downcast_get_unchecked(chunk_idx)
579 .value_unchecked(arr_idx)
580 }
581 }
582
583 #[inline]
586 pub fn first(&self) -> Option<T::Physical<'_>> {
587 self.iter().next().unwrap()
588 }
589
590 #[inline]
593 pub fn last(&self) -> Option<T::Physical<'_>> {
594 let arr = self
595 .downcast_iter()
596 .rev()
597 .find(|arr| !arr.is_empty())
598 .unwrap();
599 unsafe { arr.get_unchecked(arr.len() - 1) }
600 }
601
602 pub fn set_validity(&mut self, validity: &Bitmap) {
603 assert_eq!(self.len(), validity.len());
604 let mut i = 0;
605 for chunk in unsafe { self.chunks_mut() } {
606 *chunk = chunk.with_validity(Some(validity.clone().sliced(i, chunk.len())));
607 i += chunk.len();
608 }
609 self.null_count = validity.unset_bits();
610 self.set_fast_explode_list(false);
611 }
612}
613
614impl<T> ChunkedArray<T>
615where
616 T: PolarsDataType,
617 ChunkedArray<T>: ChunkTakeUnchecked<[IdxSize]>,
618{
619 pub fn deposit(&self, validity: &Bitmap) -> Self {
621 let set_bits = validity.set_bits();
622
623 assert_eq!(self.null_count(), 0);
624 assert_eq!(self.len(), set_bits);
625
626 if set_bits == validity.len() {
627 return self.clone();
628 }
629
630 if set_bits == 0 {
631 return Self::full_null_like(self, validity.len());
632 }
633
634 let mut null_mask = validity.clone();
635
636 let mut gather_idxs = Vec::with_capacity(validity.len());
637 let leading_nulls = null_mask.take_leading_zeros();
638 gather_idxs.extend(std::iter::repeat_n(0, leading_nulls + 1));
639
640 let mut i = 0 as IdxSize;
641 gather_idxs.extend(null_mask.iter().skip(1).map(|v| {
642 i += IdxSize::from(v);
643 i
644 }));
645
646 let mut ca = unsafe { ChunkTakeUnchecked::take_unchecked(self, &gather_idxs) };
647 ca.set_validity(validity);
648 ca
649 }
650}
651
652impl ListChunked {
653 #[inline]
654 pub fn get_as_series(&self, idx: usize) -> Option<Series> {
655 unsafe {
656 Some(Series::from_chunks_and_dtype_unchecked(
657 self.name().clone(),
658 vec![self.get(idx)?],
659 &self.inner_dtype().to_physical(),
660 ))
661 }
662 }
663
664 pub fn has_empty_lists(&self) -> bool {
665 for arr in self.downcast_iter() {
666 if arr.is_empty() {
667 continue;
668 }
669
670 if match arr.validity() {
671 None => arr.offsets().lengths().any(|l| l == 0),
672 Some(validity) => arr
673 .offsets()
674 .lengths()
675 .enumerate()
676 .any(|(i, l)| l == 0 && unsafe { validity.get_bit_unchecked(i) }),
677 } {
678 return true;
679 }
680 }
681
682 false
683 }
684
685 pub fn has_masked_out_values(&self) -> bool {
686 for arr in self.downcast_iter() {
687 if arr.is_empty() {
688 continue;
689 }
690
691 if *arr.offsets().first() != 0 || *arr.offsets().last() != arr.values().len() as i64 {
692 return true;
693 }
694
695 let Some(validity) = arr.validity() else {
696 continue;
697 };
698 if validity.set_bits() == 0 {
699 continue;
700 }
701
702 for i in (!validity).true_idx_iter() {
704 if arr.offsets().length_at(i) > 0 {
705 return true;
706 }
707 }
708 }
709
710 false
711 }
712}
713
714#[cfg(feature = "dtype-array")]
715impl ArrayChunked {
716 #[inline]
717 pub fn get_as_series(&self, idx: usize) -> Option<Series> {
718 unsafe {
719 Some(Series::from_chunks_and_dtype_unchecked(
720 self.name().clone(),
721 vec![self.get(idx)?],
722 &self.inner_dtype().to_physical(),
723 ))
724 }
725 }
726
727 pub fn from_aligned_values(
728 name: PlSmallStr,
729 inner_dtype: &DataType,
730 width: usize,
731 chunks: Vec<ArrayRef>,
732 length: usize,
733 ) -> Self {
734 let dtype = DataType::Array(Box::new(inner_dtype.clone()), width);
735 let arrow_dtype = dtype.to_arrow(CompatLevel::newest());
736 let field = Arc::new(Field::new(name, dtype));
737 if width == 0 {
738 use arrow::array::builder::{ArrayBuilder, make_builder};
739 let values = make_builder(&inner_dtype.to_arrow(CompatLevel::newest())).freeze();
740 return ArrayChunked::new_with_compute_len(
741 field,
742 vec![FixedSizeListArray::new(arrow_dtype, length, values, None).into_boxed()],
743 );
744 }
745 let mut total_len = 0;
746 let chunks = chunks
747 .into_iter()
748 .map(|chunk| {
749 debug_assert_eq!(chunk.len() % width, 0);
750 let chunk_len = chunk.len() / width;
751 total_len += chunk_len;
752 FixedSizeListArray::new(arrow_dtype.clone(), chunk_len, chunk, None).into_boxed()
753 })
754 .collect();
755 debug_assert_eq!(total_len, length);
756
757 unsafe { Self::new_with_dims(field, chunks, length, 0) }
758 }
759
760 pub fn to_list(&self) -> ListChunked {
764 let inner_dtype = self.inner_dtype();
765 let chunks = self
766 .downcast_iter()
767 .map(|chunk| {
768 use arrow::offset::OffsetsBuffer;
769
770 let inner_dtype = chunk.dtype().inner_dtype().unwrap();
771 let dtype = inner_dtype.clone().to_large_list(true);
772
773 let offsets = (0..=chunk.len())
774 .map(|i| (i * self.width()) as i64)
775 .collect::<Vec<i64>>();
776
777 let offsets = unsafe { OffsetsBuffer::new_unchecked(offsets.into()) };
779
780 ListArray::<i64>::new(
781 dtype,
782 offsets,
783 chunk.values().clone(),
784 chunk.validity().cloned(),
785 )
786 .into_boxed()
787 })
788 .collect();
789
790 let mut ca = unsafe {
792 ListChunked::new_with_dims(
793 Arc::new(Field::new(
794 self.name().clone(),
795 DataType::List(Box::new(inner_dtype.clone())),
796 )),
797 chunks,
798 self.len(),
799 self.null_count(),
800 )
801 };
802 ca.set_fast_explode_list(!self.has_nulls());
803 ca
804 }
805}
806
807impl<T> ChunkedArray<T>
808where
809 T: PolarsDataType,
810{
811 pub fn match_chunks<I>(&self, chunk_id: I) -> Self
815 where
816 I: Iterator<Item = usize>,
817 {
818 debug_assert!(self.chunks.len() == 1);
819 let slice = |ca: &Self| {
821 let array = &ca.chunks[0];
822
823 let mut offset = 0;
824 let chunks = chunk_id
825 .map(|len| {
826 debug_assert!((offset + len) <= array.len());
828 let out = unsafe { array.sliced_unchecked(offset, len) };
829 offset += len;
830 out
831 })
832 .collect();
833
834 debug_assert_eq!(offset, array.len());
835
836 unsafe {
838 Self::from_chunks_and_dtype(self.name().clone(), chunks, self.dtype().clone())
839 }
840 };
841
842 if self.chunks.len() != 1 {
843 let out = self.rechunk();
844 slice(&out)
845 } else {
846 slice(self)
847 }
848 }
849}
850
851impl<T: PolarsDataType> AsRefDataType for ChunkedArray<T> {
852 fn as_ref_dtype(&self) -> &DataType {
853 self.dtype()
854 }
855}
856
857pub(crate) trait AsSinglePtr: AsRefDataType {
858 fn as_single_ptr(&mut self) -> PolarsResult<usize> {
860 polars_bail!(opq = as_single_ptr, self.as_ref_dtype());
861 }
862}
863
864impl<T> AsSinglePtr for ChunkedArray<T>
865where
866 T: PolarsNumericType,
867{
868 fn as_single_ptr(&mut self) -> PolarsResult<usize> {
869 self.rechunk_mut();
870 let a = self.data_views().next().unwrap();
871 let ptr = a.as_ptr();
872 Ok(ptr as usize)
873 }
874}
875
876impl AsSinglePtr for BooleanChunked {}
877impl AsSinglePtr for ListChunked {}
878#[cfg(feature = "dtype-array")]
879impl AsSinglePtr for ArrayChunked {}
880impl AsSinglePtr for StringChunked {}
881impl AsSinglePtr for BinaryChunked {}
882#[cfg(feature = "object")]
883impl<T: PolarsObject> AsSinglePtr for ObjectChunked<T> {}
884
885pub enum ChunkedArrayLayout<'a, T: PolarsDataType> {
886 SingleNoNull(&'a T::Array),
887 Single(&'a T::Array),
888 MultiNoNull(&'a ChunkedArray<T>),
889 Multi(&'a ChunkedArray<T>),
890}
891
892impl<T> ChunkedArray<T>
893where
894 T: PolarsDataType,
895{
896 pub fn layout(&self) -> ChunkedArrayLayout<'_, T> {
897 if self.chunks.len() == 1 {
898 let arr = self.downcast_iter().next().unwrap();
899 return if arr.null_count() == 0 {
900 ChunkedArrayLayout::SingleNoNull(arr)
901 } else {
902 ChunkedArrayLayout::Single(arr)
903 };
904 }
905
906 if self.downcast_iter().all(|a| a.null_count() == 0) {
907 ChunkedArrayLayout::MultiNoNull(self)
908 } else {
909 ChunkedArrayLayout::Multi(self)
910 }
911 }
912}
913
914impl<T> ChunkedArray<T>
915where
916 T: PolarsNumericType,
917{
918 pub fn cont_slice(&self) -> PolarsResult<&[T::Native]> {
920 polars_ensure!(
921 self.chunks.len() == 1 && self.chunks[0].null_count() == 0,
922 ComputeError: "chunked array is not contiguous"
923 );
924 Ok(self.downcast_iter().next().map(|arr| arr.values()).unwrap())
925 }
926
927 pub(crate) fn cont_slice_mut(&mut self) -> Option<&mut [T::Native]> {
929 if self.chunks.len() == 1 && self.chunks[0].null_count() == 0 {
930 let arr = unsafe { self.downcast_iter_mut().next().unwrap() };
932 arr.get_mut_values()
933 } else {
934 None
935 }
936 }
937
938 pub fn data_views(&self) -> impl DoubleEndedIterator<Item = &[T::Native]> {
942 self.downcast_iter().map(|arr| arr.values().as_slice())
943 }
944
945 #[allow(clippy::wrong_self_convention)]
946 pub fn into_no_null_iter(
947 &self,
948 ) -> impl '_ + Send + Sync + ExactSizeIterator<Item = T::Native> + DoubleEndedIterator + TrustedLen
949 {
950 #[allow(clippy::map_clone)]
952 unsafe {
954 self.data_views()
955 .flatten()
956 .map(|v| *v)
957 .trust_my_length(self.len())
958 }
959 }
960}
961
962impl<T: PolarsDataType> Clone for ChunkedArray<T> {
963 fn clone(&self) -> Self {
964 ChunkedArray {
965 field: self.field.clone(),
966 chunks: self.chunks.clone(),
967 flags: self.flags.clone(),
968
969 _pd: Default::default(),
970 length: self.length,
971 null_count: self.null_count,
972 }
973 }
974}
975
976impl<T: PolarsDataType> AsRef<ChunkedArray<T>> for ChunkedArray<T> {
977 fn as_ref(&self) -> &ChunkedArray<T> {
978 self
979 }
980}
981
982impl ValueSize for ListChunked {
983 fn get_values_size(&self) -> usize {
984 self.chunks
985 .iter()
986 .fold(0usize, |acc, arr| acc + arr.get_values_size())
987 }
988}
989
990#[cfg(feature = "dtype-array")]
991impl ValueSize for ArrayChunked {
992 fn get_values_size(&self) -> usize {
993 self.chunks
994 .iter()
995 .fold(0usize, |acc, arr| acc + arr.get_values_size())
996 }
997}
998impl ValueSize for StringChunked {
999 fn get_values_size(&self) -> usize {
1000 self.chunks
1001 .iter()
1002 .fold(0usize, |acc, arr| acc + arr.get_values_size())
1003 }
1004}
1005
1006impl ValueSize for BinaryOffsetChunked {
1007 fn get_values_size(&self) -> usize {
1008 self.chunks
1009 .iter()
1010 .fold(0usize, |acc, arr| acc + arr.get_values_size())
1011 }
1012}
1013
1014pub(crate) fn to_primitive<T: PolarsNumericType>(
1015 values: Vec<T::Native>,
1016 validity: Option<Bitmap>,
1017) -> PrimitiveArray<T::Native> {
1018 PrimitiveArray::new(
1019 T::get_static_dtype().to_arrow(CompatLevel::newest()),
1020 values.into(),
1021 validity,
1022 )
1023}
1024
1025pub(crate) fn to_array<T: PolarsNumericType>(
1026 values: Vec<T::Native>,
1027 validity: Option<Bitmap>,
1028) -> ArrayRef {
1029 Box::new(to_primitive::<T>(values, validity))
1030}
1031
1032impl<T: PolarsDataType> Default for ChunkedArray<T> {
1033 fn default() -> Self {
1034 let dtype = T::get_static_dtype();
1035 let arrow_dtype = dtype.to_physical().to_arrow(CompatLevel::newest());
1036 ChunkedArray {
1037 field: Arc::new(Field::new(PlSmallStr::EMPTY, dtype)),
1038 chunks: vec![new_empty_array(arrow_dtype)],
1040 flags: StatisticsFlagsIM::empty(),
1041
1042 _pd: Default::default(),
1043 length: 0,
1044 null_count: 0,
1045 }
1046 }
1047}
1048
1049#[cfg(test)]
1050pub(crate) mod test {
1051 use crate::prelude::*;
1052
1053 pub(crate) fn get_chunked_array() -> Int32Chunked {
1054 ChunkedArray::new(PlSmallStr::from_static("a"), &[1, 2, 3])
1055 }
1056
1057 #[test]
1058 fn test_sort() {
1059 let a = Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 9, 3, 2]);
1060 let b = a
1061 .sort(false)
1062 .into_iter()
1063 .map(|opt| opt.unwrap())
1064 .collect::<Vec<_>>();
1065 assert_eq!(b, [1, 2, 3, 9]);
1066 let a = StringChunked::new(PlSmallStr::from_static("a"), &["b", "a", "c"]);
1067 let a = a.sort(false);
1068 let b = a.into_iter().collect::<Vec<_>>();
1069 assert_eq!(b, [Some("a"), Some("b"), Some("c")]);
1070 assert!(a.is_sorted_ascending_flag());
1071 }
1072
1073 #[test]
1074 fn arithmetic() {
1075 let a = &Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 100, 6, 40]);
1076 let b = &Int32Chunked::new(PlSmallStr::from_static("b"), &[-1, 2, 3, 4]);
1077
1078 println!("{:?}", a + b);
1081 println!("{:?}", a - b);
1082 println!("{:?}", a * b);
1083 println!("{:?}", a / b);
1084 }
1085
1086 #[test]
1087 fn iter() {
1088 let s1 = get_chunked_array();
1089 assert_eq!(s1.into_iter().fold(0, |acc, val| { acc + val.unwrap() }), 6)
1091 }
1092
1093 #[test]
1094 fn limit() {
1095 let a = get_chunked_array();
1096 let b = a.limit(2);
1097 println!("{b:?}");
1098 assert_eq!(b.len(), 2)
1099 }
1100
1101 #[test]
1102 fn filter() {
1103 let a = get_chunked_array();
1104 let b = a
1105 .filter(&BooleanChunked::new(
1106 PlSmallStr::from_static("filter"),
1107 &[true, false, false],
1108 ))
1109 .unwrap();
1110 assert_eq!(b.len(), 1);
1111 assert_eq!(b.into_iter().next(), Some(Some(1)));
1112 }
1113
1114 #[test]
1115 fn aggregates() {
1116 let a = &Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 100, 10, 9]);
1117 assert_eq!(a.max(), Some(100));
1118 assert_eq!(a.min(), Some(1));
1119 assert_eq!(a.sum(), Some(120))
1120 }
1121
1122 #[test]
1123 fn take() {
1124 let a = get_chunked_array();
1125 let new = a.take(&[0 as IdxSize, 1]).unwrap();
1126 assert_eq!(new.len(), 2)
1127 }
1128
1129 #[test]
1130 fn cast() {
1131 let a = get_chunked_array();
1132 let b = a.cast(&DataType::Int64).unwrap();
1133 assert_eq!(b.dtype(), &DataType::Int64)
1134 }
1135
1136 fn assert_slice_equal<T>(ca: &ChunkedArray<T>, eq: &[T::Native])
1137 where
1138 T: PolarsNumericType,
1139 {
1140 assert_eq!(ca.iter().map(|opt| opt.unwrap()).collect::<Vec<_>>(), eq)
1141 }
1142
1143 #[test]
1144 fn slice() {
1145 let mut first = UInt32Chunked::new(PlSmallStr::from_static("first"), &[0, 1, 2]);
1146 let second = UInt32Chunked::new(PlSmallStr::from_static("second"), &[3, 4, 5]);
1147 first.append(&second).unwrap();
1148 assert_slice_equal(&first.slice(0, 3), &[0, 1, 2]);
1149 assert_slice_equal(&first.slice(0, 4), &[0, 1, 2, 3]);
1150 assert_slice_equal(&first.slice(1, 4), &[1, 2, 3, 4]);
1151 assert_slice_equal(&first.slice(3, 2), &[3, 4]);
1152 assert_slice_equal(&first.slice(3, 3), &[3, 4, 5]);
1153 assert_slice_equal(&first.slice(-3, 3), &[3, 4, 5]);
1154 assert_slice_equal(&first.slice(-6, 6), &[0, 1, 2, 3, 4, 5]);
1155
1156 assert_eq!(first.slice(-7, 2).len(), 1);
1157 assert_eq!(first.slice(-3, 4).len(), 3);
1158 assert_eq!(first.slice(3, 4).len(), 3);
1159 assert_eq!(first.slice(10, 4).len(), 0);
1160 }
1161
1162 #[test]
1163 fn sorting() {
1164 let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[9, 2, 4]);
1165 let sorted = s.sort(false);
1166 assert_slice_equal(&sorted, &[2, 4, 9]);
1167 let sorted = s.sort(true);
1168 assert_slice_equal(&sorted, &[9, 4, 2]);
1169
1170 let s: StringChunked = ["b", "a", "z"].iter().collect();
1171 let sorted = s.sort(false);
1172 assert_eq!(
1173 sorted.into_iter().collect::<Vec<_>>(),
1174 &[Some("a"), Some("b"), Some("z")]
1175 );
1176 let sorted = s.sort(true);
1177 assert_eq!(
1178 sorted.into_iter().collect::<Vec<_>>(),
1179 &[Some("z"), Some("b"), Some("a")]
1180 );
1181 let s: StringChunked = [Some("b"), None, Some("z")].iter().copied().collect();
1182 let sorted = s.sort(false);
1183 assert_eq!(
1184 sorted.into_iter().collect::<Vec<_>>(),
1185 &[None, Some("b"), Some("z")]
1186 );
1187 }
1188
1189 #[test]
1190 fn reverse() {
1191 let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3]);
1192 assert_slice_equal(&s.reverse(), &[3, 2, 1]);
1194 let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[Some(1), None, Some(3)]);
1196 assert_eq!(Vec::from(&s.reverse()), &[Some(3), None, Some(1)]);
1197 let s = BooleanChunked::new(PlSmallStr::EMPTY, &[true, false]);
1198 assert_eq!(Vec::from(&s.reverse()), &[Some(false), Some(true)]);
1199
1200 let s = StringChunked::new(PlSmallStr::EMPTY, &["a", "b", "c"]);
1201 assert_eq!(Vec::from(&s.reverse()), &[Some("c"), Some("b"), Some("a")]);
1202
1203 let s = StringChunked::new(PlSmallStr::EMPTY, &[Some("a"), None, Some("c")]);
1204 assert_eq!(Vec::from(&s.reverse()), &[Some("c"), None, Some("a")]);
1205 }
1206
1207 #[test]
1208 #[cfg(feature = "dtype-categorical")]
1209 fn test_iter_categorical() {
1210 let ca = StringChunked::new(
1211 PlSmallStr::EMPTY,
1212 &[Some("foo"), None, Some("bar"), Some("ham")],
1213 );
1214 let cats = Categories::new(
1215 PlSmallStr::EMPTY,
1216 PlSmallStr::EMPTY,
1217 CategoricalPhysical::U32,
1218 );
1219 let ca = ca.cast(&DataType::from_categories(cats)).unwrap();
1220 let ca = ca.cat32().unwrap();
1221 let v: Vec<_> = ca.physical().into_iter().collect();
1222 assert_eq!(v, &[Some(0), None, Some(1), Some(2)]);
1223 }
1224
1225 #[test]
1226 #[ignore]
1227 fn test_shrink_to_fit() {
1228 let mut builder = StringChunkedBuilder::new(PlSmallStr::from_static("foo"), 2048);
1229 builder.append_value("foo");
1230 let mut arr = builder.finish();
1231 let before = arr
1232 .chunks()
1233 .iter()
1234 .map(|arr| arrow::compute::aggregate::estimated_bytes_size(arr.as_ref()))
1235 .sum::<usize>();
1236 arr.shrink_to_fit();
1237 let after = arr
1238 .chunks()
1239 .iter()
1240 .map(|arr| arrow::compute::aggregate::estimated_bytes_size(arr.as_ref()))
1241 .sum::<usize>();
1242 assert!(before > after);
1243 }
1244}