1#![allow(unsafe_op_in_unsafe_fn)]
3use std::iter::Map;
4use std::sync::Arc;
5
6use arrow::array::*;
7use arrow::bitmap::Bitmap;
8use arrow::compute::concatenate::concatenate_unchecked;
9use polars_compute::filter::filter_with_bitmap;
10
11use crate::prelude::{ChunkTakeUnchecked, *};
12
13pub mod ops;
14#[macro_use]
15pub mod arithmetic;
16pub mod builder;
17pub mod cast;
18pub mod collect;
19pub mod comparison;
20pub mod flags;
21pub mod float;
22pub mod iterator;
23#[cfg(feature = "ndarray")]
24pub(crate) mod ndarray;
25
26#[cfg(feature = "dtype-array")]
27pub(crate) mod array;
28mod binary;
29mod binary_offset;
30mod bitwise;
31#[cfg(feature = "object")]
32mod drop;
33mod from;
34mod from_iterator;
35pub mod from_iterator_par;
36pub(crate) mod list;
37pub(crate) mod logical;
38#[cfg(feature = "object")]
39pub mod object;
40#[cfg(feature = "random")]
41mod random;
42#[cfg(feature = "dtype-struct")]
43mod struct_;
44#[cfg(any(
45 feature = "temporal",
46 feature = "dtype-datetime",
47 feature = "dtype-date"
48))]
49pub mod temporal;
50mod to_vec;
51mod trusted_len;
52
53use std::slice::Iter;
54
55use arrow::legacy::prelude::*;
56#[cfg(feature = "dtype-struct")]
57pub use struct_::StructChunked;
58
59use self::flags::{StatisticsFlags, StatisticsFlagsIM};
60use crate::series::IsSorted;
61use crate::utils::{first_non_null, last_non_null};
62
63#[cfg(not(feature = "dtype-categorical"))]
64pub struct RevMapping {}
65
66pub type ChunkLenIter<'a> = std::iter::Map<std::slice::Iter<'a, ArrayRef>, fn(&ArrayRef) -> usize>;
67
68pub struct ChunkedArray<T: PolarsDataType> {
143 pub(crate) field: Arc<Field>,
144 pub(crate) chunks: Vec<ArrayRef>,
145
146 pub(crate) flags: StatisticsFlagsIM,
147
148 length: usize,
149 null_count: usize,
150 _pd: std::marker::PhantomData<T>,
151}
152
153impl<T: PolarsDataType> ChunkedArray<T> {
154 fn should_rechunk(&self) -> bool {
155 self.chunks.len() > 1 && self.chunks.len() > self.len() / 3
156 }
157
158 fn optional_rechunk(mut self) -> Self {
159 if self.should_rechunk() {
161 self.rechunk_mut()
162 }
163 self
164 }
165
166 pub(crate) fn as_any(&self) -> &dyn std::any::Any {
167 self
168 }
169
170 pub fn unpack_series_matching_type<'a>(
172 &self,
173 series: &'a Series,
174 ) -> PolarsResult<&'a ChunkedArray<T>> {
175 polars_ensure!(
176 self.dtype() == series.dtype(),
177 SchemaMismatch: "cannot unpack series of type `{}` into `{}`",
178 series.dtype(),
179 self.dtype(),
180 );
181
182 Ok(unsafe { self.unpack_series_matching_physical_type(series) })
184 }
185
186 fn new_with_compute_len(field: Arc<Field>, chunks: Vec<ArrayRef>) -> Self {
191 unsafe {
192 let mut chunked_arr = Self::new_with_dims(field, chunks, 0, 0);
193 chunked_arr.compute_len();
194 chunked_arr
195 }
196 }
197
198 pub unsafe fn new_with_dims(
202 field: Arc<Field>,
203 chunks: Vec<ArrayRef>,
204 length: usize,
205 null_count: usize,
206 ) -> Self {
207 Self {
208 field,
209 chunks,
210 flags: StatisticsFlagsIM::empty(),
211
212 _pd: Default::default(),
213 length,
214 null_count,
215 }
216 }
217
218 pub(crate) fn is_sorted_ascending_flag(&self) -> bool {
219 self.get_flags().is_sorted_ascending()
220 }
221
222 pub(crate) fn is_sorted_descending_flag(&self) -> bool {
223 self.get_flags().is_sorted_descending()
224 }
225
226 pub(crate) fn is_sorted_any(&self) -> bool {
228 self.get_flags().is_sorted_any()
229 }
230
231 pub fn unset_fast_explode_list(&mut self) {
232 self.set_fast_explode_list(false)
233 }
234
235 pub fn set_fast_explode_list(&mut self, value: bool) {
236 let mut flags = self.flags.get_mut();
237 flags.set(StatisticsFlags::CAN_FAST_EXPLODE_LIST, value);
238 self.flags.set_mut(flags);
239 }
240
241 pub fn get_fast_explode_list(&self) -> bool {
242 self.get_flags().can_fast_explode_list()
243 }
244
245 pub fn get_flags(&self) -> StatisticsFlags {
246 self.flags.get()
247 }
248
249 pub fn set_flags(&mut self, flags: StatisticsFlags) {
251 self.flags = StatisticsFlagsIM::new(flags);
252 }
253
254 pub fn is_sorted_flag(&self) -> IsSorted {
255 self.get_flags().is_sorted()
256 }
257
258 pub fn retain_flags_from<U: PolarsDataType>(
259 &mut self,
260 from: &ChunkedArray<U>,
261 retain_flags: StatisticsFlags,
262 ) {
263 let flags = from.flags.get();
264 if !flags.is_empty() {
266 self.set_flags(flags & retain_flags)
267 }
268 }
269
270 pub fn set_sorted_flag(&mut self, sorted: IsSorted) {
272 let mut flags = self.flags.get_mut();
273 flags.set_sorted(sorted);
274 self.flags.set_mut(flags);
275 }
276
277 pub fn with_sorted_flag(&self, sorted: IsSorted) -> Self {
279 let mut out = self.clone();
280 out.set_sorted_flag(sorted);
281 out
282 }
283
284 pub fn first_non_null(&self) -> Option<usize> {
286 if self.null_count() == self.len() {
287 None
288 }
289 else if self.null_count() == 0 {
291 Some(0)
292 } else if self.is_sorted_any() {
293 let out = if unsafe { self.downcast_get_unchecked(0).is_null_unchecked(0) } {
294 self.null_count()
296 } else {
297 0
299 };
300
301 debug_assert!(
302 unsafe { self.get_unchecked(out) }.is_some(),
304 "incorrect sorted flag"
305 );
306
307 Some(out)
308 } else {
309 first_non_null(self.iter_validities())
310 }
311 }
312
313 pub fn last_non_null(&self) -> Option<usize> {
315 if self.null_count() == self.len() {
316 None
317 }
318 else if self.null_count() == 0 {
320 Some(self.len() - 1)
321 } else if self.is_sorted_any() {
322 let out = if unsafe { self.downcast_get_unchecked(0).is_null_unchecked(0) } {
323 self.len() - 1
325 } else {
326 self.len() - self.null_count() - 1
328 };
329
330 debug_assert!(
331 unsafe { self.get_unchecked(out) }.is_some(),
333 "incorrect sorted flag"
334 );
335
336 Some(out)
337 } else {
338 last_non_null(self.iter_validities(), self.len())
339 }
340 }
341
342 pub fn drop_nulls(&self) -> Self {
343 if self.null_count() == 0 {
344 self.clone()
345 } else {
346 let chunks = self
347 .downcast_iter()
348 .map(|arr| {
349 if arr.null_count() == 0 {
350 arr.to_boxed()
351 } else {
352 filter_with_bitmap(arr, arr.validity().unwrap())
353 }
354 })
355 .collect();
356 unsafe {
357 Self::new_with_dims(
358 self.field.clone(),
359 chunks,
360 self.len() - self.null_count(),
361 0,
362 )
363 }
364 }
365 }
366
367 #[inline]
369 #[allow(clippy::type_complexity)]
370 pub fn iter_validities(&self) -> Map<Iter<'_, ArrayRef>, fn(&ArrayRef) -> Option<&Bitmap>> {
371 fn to_validity(arr: &ArrayRef) -> Option<&Bitmap> {
372 arr.validity()
373 }
374 self.chunks.iter().map(to_validity)
375 }
376
377 #[inline]
378 pub fn has_nulls(&self) -> bool {
380 self.null_count > 0
381 }
382
383 pub fn shrink_to_fit(&mut self) {
385 self.chunks = vec![concatenate_unchecked(self.chunks.as_slice()).unwrap()];
386 }
387
388 pub fn clear(&self) -> Self {
389 let mut ca = unsafe {
391 self.copy_with_chunks(vec![new_empty_array(
392 self.chunks.first().unwrap().dtype().clone(),
393 )])
394 };
395
396 use StatisticsFlags as F;
397 ca.retain_flags_from(self, F::IS_SORTED_ANY | F::CAN_FAST_EXPLODE_LIST);
398 ca
399 }
400
401 pub(crate) unsafe fn unpack_series_matching_physical_type<'a>(
408 &self,
409 series: &'a Series,
410 ) -> &'a ChunkedArray<T> {
411 let series_trait = &**series;
412 if self.dtype() == series.dtype() {
413 &*(series_trait as *const dyn SeriesTrait as *const ChunkedArray<T>)
414 } else {
415 use DataType::*;
416 match (self.dtype(), series.dtype()) {
417 (Int64, Datetime(_, _)) | (Int64, Duration(_)) | (Int32, Date) => {
418 &*(series_trait as *const dyn SeriesTrait as *const ChunkedArray<T>)
419 },
420 _ => panic!(
421 "cannot unpack series {:?} into matching type {:?}",
422 series,
423 self.dtype()
424 ),
425 }
426 }
427 }
428
429 pub fn chunk_lengths(&self) -> ChunkLenIter<'_> {
431 self.chunks.iter().map(|chunk| chunk.len())
432 }
433
434 #[inline]
436 pub fn chunks(&self) -> &Vec<ArrayRef> {
437 &self.chunks
438 }
439
440 #[inline]
446 pub unsafe fn chunks_mut(&mut self) -> &mut Vec<ArrayRef> {
447 &mut self.chunks
448 }
449
450 pub fn is_optimal_aligned(&self) -> bool {
452 self.chunks.len() == 1 && self.null_count() == 0
453 }
454
455 unsafe fn copy_with_chunks(&self, chunks: Vec<ArrayRef>) -> Self {
460 Self::new_with_compute_len(self.field.clone(), chunks)
461 }
462
463 pub fn dtype(&self) -> &DataType {
465 self.field.dtype()
466 }
467
468 pub(crate) unsafe fn set_dtype(&mut self, dtype: DataType) {
469 self.field = Arc::new(Field::new(self.name().clone(), dtype))
470 }
471
472 pub fn name(&self) -> &PlSmallStr {
474 self.field.name()
475 }
476
477 pub fn ref_field(&self) -> &Field {
479 &self.field
480 }
481
482 pub fn rename(&mut self, name: PlSmallStr) {
484 self.field = Arc::new(Field::new(name, self.field.dtype().clone()));
485 }
486
487 pub fn with_name(mut self, name: PlSmallStr) -> Self {
489 self.rename(name);
490 self
491 }
492}
493
494impl<T> ChunkedArray<T>
495where
496 T: PolarsDataType,
497{
498 #[inline]
504 pub fn get(&self, idx: usize) -> Option<T::Physical<'_>> {
505 let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
506 assert!(
507 chunk_idx < self.chunks().len(),
508 "index: {} out of bounds for len: {}",
509 idx,
510 self.len()
511 );
512 unsafe {
513 let arr = self.downcast_get_unchecked(chunk_idx);
514 assert!(
515 arr_idx < arr.len(),
516 "index: {} out of bounds for len: {}",
517 idx,
518 self.len()
519 );
520 arr.get_unchecked(arr_idx)
521 }
522 }
523
524 #[inline]
530 pub unsafe fn get_unchecked(&self, idx: usize) -> Option<T::Physical<'_>> {
531 let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
532
533 unsafe {
534 self.downcast_get_unchecked(chunk_idx)
536 .get_unchecked(arr_idx)
537 }
538 }
539
540 #[inline]
546 pub unsafe fn value_unchecked(&self, idx: usize) -> T::Physical<'_> {
547 let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
548
549 unsafe {
550 self.downcast_get_unchecked(chunk_idx)
552 .value_unchecked(arr_idx)
553 }
554 }
555
556 #[inline]
557 pub fn first(&self) -> Option<T::Physical<'_>> {
558 unsafe {
559 let arr = self.downcast_get_unchecked(0);
560 arr.get_unchecked(0)
561 }
562 }
563
564 #[inline]
565 pub fn last(&self) -> Option<T::Physical<'_>> {
566 unsafe {
567 let arr = self.downcast_get_unchecked(self.chunks.len().checked_sub(1)?);
568 arr.get_unchecked(arr.len().checked_sub(1)?)
569 }
570 }
571
572 pub fn set_validity(&mut self, validity: &Bitmap) {
573 assert_eq!(self.len(), validity.len());
574 let mut i = 0;
575 for chunk in unsafe { self.chunks_mut() } {
576 *chunk = chunk.with_validity(Some(validity.clone().sliced(i, chunk.len())));
577 i += chunk.len();
578 }
579 self.null_count = validity.unset_bits();
580 self.set_fast_explode_list(false);
581 }
582}
583
584impl<T> ChunkedArray<T>
585where
586 T: PolarsDataType,
587 ChunkedArray<T>: ChunkTakeUnchecked<[IdxSize]>,
588{
589 pub fn deposit(&self, validity: &Bitmap) -> Self {
591 let set_bits = validity.set_bits();
592
593 assert_eq!(self.len(), set_bits);
594
595 if set_bits == validity.len() {
596 return self.clone();
597 }
598
599 if set_bits == 0 {
600 return Self::full_null_like(self, validity.len());
601 }
602
603 let mut null_mask = validity.clone();
604
605 let mut gather_idxs = Vec::with_capacity(validity.len());
606 let leading_nulls = null_mask.take_leading_zeros();
607 gather_idxs.extend(std::iter::repeat_n(0, leading_nulls + 1));
608
609 let mut i = 0 as IdxSize;
610 gather_idxs.extend(null_mask.iter().skip(1).map(|v| {
611 i += IdxSize::from(v);
612 i
613 }));
614
615 let mut ca = unsafe { ChunkTakeUnchecked::take_unchecked(self, &gather_idxs) };
616 ca.set_validity(validity);
617 ca
618 }
619}
620
621impl ListChunked {
622 #[inline]
623 pub fn get_as_series(&self, idx: usize) -> Option<Series> {
624 unsafe {
625 Some(Series::from_chunks_and_dtype_unchecked(
626 self.name().clone(),
627 vec![self.get(idx)?],
628 &self.inner_dtype().to_physical(),
629 ))
630 }
631 }
632}
633
634#[cfg(feature = "dtype-array")]
635impl ArrayChunked {
636 #[inline]
637 pub fn get_as_series(&self, idx: usize) -> Option<Series> {
638 unsafe {
639 Some(Series::from_chunks_and_dtype_unchecked(
640 self.name().clone(),
641 vec![self.get(idx)?],
642 &self.inner_dtype().to_physical(),
643 ))
644 }
645 }
646
647 pub fn from_aligned_values(
648 name: PlSmallStr,
649 inner_dtype: &DataType,
650 width: usize,
651 chunks: Vec<ArrayRef>,
652 length: usize,
653 ) -> Self {
654 let dtype = DataType::Array(Box::new(inner_dtype.clone()), width);
655 let arrow_dtype = dtype.to_arrow(CompatLevel::newest());
656 let field = Arc::new(Field::new(name, dtype));
657 if width == 0 {
658 use arrow::array::builder::{ArrayBuilder, make_builder};
659 let values = make_builder(&inner_dtype.to_arrow(CompatLevel::newest())).freeze();
660 return ArrayChunked::new_with_compute_len(
661 field,
662 vec![FixedSizeListArray::new(arrow_dtype, length, values, None).into_boxed()],
663 );
664 }
665
666 let chunks = chunks
667 .into_iter()
668 .map(|chunk| {
669 debug_assert_eq!(chunk.len() % width, 0);
670 FixedSizeListArray::new(arrow_dtype.clone(), length, chunk, None).into_boxed()
671 })
672 .collect();
673
674 unsafe { Self::new_with_dims(field, chunks, length, 0) }
675 }
676
677 pub fn to_list(&self) -> ListChunked {
681 let inner_dtype = self.inner_dtype();
682 let chunks = self
683 .downcast_iter()
684 .map(|chunk| {
685 use arrow::offset::OffsetsBuffer;
686
687 let inner_dtype = chunk.dtype().inner_dtype().unwrap();
688 let dtype = inner_dtype.clone().to_large_list(true);
689
690 let offsets = (0..=chunk.len())
691 .map(|i| (i * self.width()) as i64)
692 .collect::<Vec<i64>>();
693
694 let offsets = unsafe { OffsetsBuffer::new_unchecked(offsets.into()) };
696
697 ListArray::<i64>::new(
698 dtype,
699 offsets,
700 chunk.values().clone(),
701 chunk.validity().cloned(),
702 )
703 .into_boxed()
704 })
705 .collect();
706
707 let mut ca = unsafe {
709 ListChunked::new_with_dims(
710 Arc::new(Field::new(
711 self.name().clone(),
712 DataType::List(Box::new(inner_dtype.clone())),
713 )),
714 chunks,
715 self.len(),
716 self.null_count(),
717 )
718 };
719 ca.set_fast_explode_list(!self.has_nulls());
720 ca
721 }
722}
723
724impl<T> ChunkedArray<T>
725where
726 T: PolarsDataType,
727{
728 pub fn match_chunks<I>(&self, chunk_id: I) -> Self
732 where
733 I: Iterator<Item = usize>,
734 {
735 debug_assert!(self.chunks.len() == 1);
736 let slice = |ca: &Self| {
738 let array = &ca.chunks[0];
739
740 let mut offset = 0;
741 let chunks = chunk_id
742 .map(|len| {
743 debug_assert!((offset + len) <= array.len());
745 let out = unsafe { array.sliced_unchecked(offset, len) };
746 offset += len;
747 out
748 })
749 .collect();
750
751 debug_assert_eq!(offset, array.len());
752
753 unsafe {
755 Self::from_chunks_and_dtype(self.name().clone(), chunks, self.dtype().clone())
756 }
757 };
758
759 if self.chunks.len() != 1 {
760 let out = self.rechunk();
761 slice(&out)
762 } else {
763 slice(self)
764 }
765 }
766}
767
768impl<T: PolarsDataType> AsRefDataType for ChunkedArray<T> {
769 fn as_ref_dtype(&self) -> &DataType {
770 self.dtype()
771 }
772}
773
774pub(crate) trait AsSinglePtr: AsRefDataType {
775 fn as_single_ptr(&mut self) -> PolarsResult<usize> {
777 polars_bail!(opq = as_single_ptr, self.as_ref_dtype());
778 }
779}
780
781impl<T> AsSinglePtr for ChunkedArray<T>
782where
783 T: PolarsNumericType,
784{
785 fn as_single_ptr(&mut self) -> PolarsResult<usize> {
786 self.rechunk_mut();
787 let a = self.data_views().next().unwrap();
788 let ptr = a.as_ptr();
789 Ok(ptr as usize)
790 }
791}
792
793impl AsSinglePtr for BooleanChunked {}
794impl AsSinglePtr for ListChunked {}
795#[cfg(feature = "dtype-array")]
796impl AsSinglePtr for ArrayChunked {}
797impl AsSinglePtr for StringChunked {}
798impl AsSinglePtr for BinaryChunked {}
799#[cfg(feature = "object")]
800impl<T: PolarsObject> AsSinglePtr for ObjectChunked<T> {}
801
802pub enum ChunkedArrayLayout<'a, T: PolarsDataType> {
803 SingleNoNull(&'a T::Array),
804 Single(&'a T::Array),
805 MultiNoNull(&'a ChunkedArray<T>),
806 Multi(&'a ChunkedArray<T>),
807}
808
809impl<T> ChunkedArray<T>
810where
811 T: PolarsDataType,
812{
813 pub fn layout(&self) -> ChunkedArrayLayout<'_, T> {
814 if self.chunks.len() == 1 {
815 let arr = self.downcast_iter().next().unwrap();
816 return if arr.null_count() == 0 {
817 ChunkedArrayLayout::SingleNoNull(arr)
818 } else {
819 ChunkedArrayLayout::Single(arr)
820 };
821 }
822
823 if self.downcast_iter().all(|a| a.null_count() == 0) {
824 ChunkedArrayLayout::MultiNoNull(self)
825 } else {
826 ChunkedArrayLayout::Multi(self)
827 }
828 }
829}
830
831impl<T> ChunkedArray<T>
832where
833 T: PolarsNumericType,
834{
835 pub fn cont_slice(&self) -> PolarsResult<&[T::Native]> {
837 polars_ensure!(
838 self.chunks.len() == 1 && self.chunks[0].null_count() == 0,
839 ComputeError: "chunked array is not contiguous"
840 );
841 Ok(self.downcast_iter().next().map(|arr| arr.values()).unwrap())
842 }
843
844 pub(crate) fn cont_slice_mut(&mut self) -> Option<&mut [T::Native]> {
846 if self.chunks.len() == 1 && self.chunks[0].null_count() == 0 {
847 let arr = unsafe { self.downcast_iter_mut().next().unwrap() };
849 arr.get_mut_values()
850 } else {
851 None
852 }
853 }
854
855 pub fn data_views(&self) -> impl DoubleEndedIterator<Item = &[T::Native]> {
859 self.downcast_iter().map(|arr| arr.values().as_slice())
860 }
861
862 #[allow(clippy::wrong_self_convention)]
863 pub fn into_no_null_iter(
864 &self,
865 ) -> impl '_ + Send + Sync + ExactSizeIterator<Item = T::Native> + DoubleEndedIterator + TrustedLen
866 {
867 #[allow(clippy::map_clone)]
869 unsafe {
871 self.data_views()
872 .flatten()
873 .map(|v| *v)
874 .trust_my_length(self.len())
875 }
876 }
877}
878
879impl<T: PolarsDataType> Clone for ChunkedArray<T> {
880 fn clone(&self) -> Self {
881 ChunkedArray {
882 field: self.field.clone(),
883 chunks: self.chunks.clone(),
884 flags: self.flags.clone(),
885
886 _pd: Default::default(),
887 length: self.length,
888 null_count: self.null_count,
889 }
890 }
891}
892
893impl<T: PolarsDataType> AsRef<ChunkedArray<T>> for ChunkedArray<T> {
894 fn as_ref(&self) -> &ChunkedArray<T> {
895 self
896 }
897}
898
899impl ValueSize for ListChunked {
900 fn get_values_size(&self) -> usize {
901 self.chunks
902 .iter()
903 .fold(0usize, |acc, arr| acc + arr.get_values_size())
904 }
905}
906
907#[cfg(feature = "dtype-array")]
908impl ValueSize for ArrayChunked {
909 fn get_values_size(&self) -> usize {
910 self.chunks
911 .iter()
912 .fold(0usize, |acc, arr| acc + arr.get_values_size())
913 }
914}
915impl ValueSize for StringChunked {
916 fn get_values_size(&self) -> usize {
917 self.chunks
918 .iter()
919 .fold(0usize, |acc, arr| acc + arr.get_values_size())
920 }
921}
922
923impl ValueSize for BinaryOffsetChunked {
924 fn get_values_size(&self) -> usize {
925 self.chunks
926 .iter()
927 .fold(0usize, |acc, arr| acc + arr.get_values_size())
928 }
929}
930
931pub(crate) fn to_primitive<T: PolarsNumericType>(
932 values: Vec<T::Native>,
933 validity: Option<Bitmap>,
934) -> PrimitiveArray<T::Native> {
935 PrimitiveArray::new(
936 T::get_static_dtype().to_arrow(CompatLevel::newest()),
937 values.into(),
938 validity,
939 )
940}
941
942pub(crate) fn to_array<T: PolarsNumericType>(
943 values: Vec<T::Native>,
944 validity: Option<Bitmap>,
945) -> ArrayRef {
946 Box::new(to_primitive::<T>(values, validity))
947}
948
949impl<T: PolarsDataType> Default for ChunkedArray<T> {
950 fn default() -> Self {
951 let dtype = T::get_static_dtype();
952 let arrow_dtype = dtype.to_physical().to_arrow(CompatLevel::newest());
953 ChunkedArray {
954 field: Arc::new(Field::new(PlSmallStr::EMPTY, dtype)),
955 chunks: vec![new_empty_array(arrow_dtype)],
957 flags: StatisticsFlagsIM::empty(),
958
959 _pd: Default::default(),
960 length: 0,
961 null_count: 0,
962 }
963 }
964}
965
966#[cfg(test)]
967pub(crate) mod test {
968 use crate::prelude::*;
969
970 pub(crate) fn get_chunked_array() -> Int32Chunked {
971 ChunkedArray::new(PlSmallStr::from_static("a"), &[1, 2, 3])
972 }
973
974 #[test]
975 fn test_sort() {
976 let a = Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 9, 3, 2]);
977 let b = a
978 .sort(false)
979 .into_iter()
980 .map(|opt| opt.unwrap())
981 .collect::<Vec<_>>();
982 assert_eq!(b, [1, 2, 3, 9]);
983 let a = StringChunked::new(PlSmallStr::from_static("a"), &["b", "a", "c"]);
984 let a = a.sort(false);
985 let b = a.into_iter().collect::<Vec<_>>();
986 assert_eq!(b, [Some("a"), Some("b"), Some("c")]);
987 assert!(a.is_sorted_ascending_flag());
988 }
989
990 #[test]
991 fn arithmetic() {
992 let a = &Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 100, 6, 40]);
993 let b = &Int32Chunked::new(PlSmallStr::from_static("b"), &[-1, 2, 3, 4]);
994
995 println!("{:?}", a + b);
998 println!("{:?}", a - b);
999 println!("{:?}", a * b);
1000 println!("{:?}", a / b);
1001 }
1002
1003 #[test]
1004 fn iter() {
1005 let s1 = get_chunked_array();
1006 assert_eq!(s1.into_iter().fold(0, |acc, val| { acc + val.unwrap() }), 6)
1008 }
1009
1010 #[test]
1011 fn limit() {
1012 let a = get_chunked_array();
1013 let b = a.limit(2);
1014 println!("{b:?}");
1015 assert_eq!(b.len(), 2)
1016 }
1017
1018 #[test]
1019 fn filter() {
1020 let a = get_chunked_array();
1021 let b = a
1022 .filter(&BooleanChunked::new(
1023 PlSmallStr::from_static("filter"),
1024 &[true, false, false],
1025 ))
1026 .unwrap();
1027 assert_eq!(b.len(), 1);
1028 assert_eq!(b.into_iter().next(), Some(Some(1)));
1029 }
1030
1031 #[test]
1032 fn aggregates() {
1033 let a = &Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 100, 10, 9]);
1034 assert_eq!(a.max(), Some(100));
1035 assert_eq!(a.min(), Some(1));
1036 assert_eq!(a.sum(), Some(120))
1037 }
1038
1039 #[test]
1040 fn take() {
1041 let a = get_chunked_array();
1042 let new = a.take(&[0 as IdxSize, 1]).unwrap();
1043 assert_eq!(new.len(), 2)
1044 }
1045
1046 #[test]
1047 fn cast() {
1048 let a = get_chunked_array();
1049 let b = a.cast(&DataType::Int64).unwrap();
1050 assert_eq!(b.dtype(), &DataType::Int64)
1051 }
1052
1053 fn assert_slice_equal<T>(ca: &ChunkedArray<T>, eq: &[T::Native])
1054 where
1055 T: PolarsNumericType,
1056 {
1057 assert_eq!(ca.iter().map(|opt| opt.unwrap()).collect::<Vec<_>>(), eq)
1058 }
1059
1060 #[test]
1061 fn slice() {
1062 let mut first = UInt32Chunked::new(PlSmallStr::from_static("first"), &[0, 1, 2]);
1063 let second = UInt32Chunked::new(PlSmallStr::from_static("second"), &[3, 4, 5]);
1064 first.append(&second).unwrap();
1065 assert_slice_equal(&first.slice(0, 3), &[0, 1, 2]);
1066 assert_slice_equal(&first.slice(0, 4), &[0, 1, 2, 3]);
1067 assert_slice_equal(&first.slice(1, 4), &[1, 2, 3, 4]);
1068 assert_slice_equal(&first.slice(3, 2), &[3, 4]);
1069 assert_slice_equal(&first.slice(3, 3), &[3, 4, 5]);
1070 assert_slice_equal(&first.slice(-3, 3), &[3, 4, 5]);
1071 assert_slice_equal(&first.slice(-6, 6), &[0, 1, 2, 3, 4, 5]);
1072
1073 assert_eq!(first.slice(-7, 2).len(), 1);
1074 assert_eq!(first.slice(-3, 4).len(), 3);
1075 assert_eq!(first.slice(3, 4).len(), 3);
1076 assert_eq!(first.slice(10, 4).len(), 0);
1077 }
1078
1079 #[test]
1080 fn sorting() {
1081 let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[9, 2, 4]);
1082 let sorted = s.sort(false);
1083 assert_slice_equal(&sorted, &[2, 4, 9]);
1084 let sorted = s.sort(true);
1085 assert_slice_equal(&sorted, &[9, 4, 2]);
1086
1087 let s: StringChunked = ["b", "a", "z"].iter().collect();
1088 let sorted = s.sort(false);
1089 assert_eq!(
1090 sorted.into_iter().collect::<Vec<_>>(),
1091 &[Some("a"), Some("b"), Some("z")]
1092 );
1093 let sorted = s.sort(true);
1094 assert_eq!(
1095 sorted.into_iter().collect::<Vec<_>>(),
1096 &[Some("z"), Some("b"), Some("a")]
1097 );
1098 let s: StringChunked = [Some("b"), None, Some("z")].iter().copied().collect();
1099 let sorted = s.sort(false);
1100 assert_eq!(
1101 sorted.into_iter().collect::<Vec<_>>(),
1102 &[None, Some("b"), Some("z")]
1103 );
1104 }
1105
1106 #[test]
1107 fn reverse() {
1108 let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3]);
1109 assert_slice_equal(&s.reverse(), &[3, 2, 1]);
1111 let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[Some(1), None, Some(3)]);
1113 assert_eq!(Vec::from(&s.reverse()), &[Some(3), None, Some(1)]);
1114 let s = BooleanChunked::new(PlSmallStr::EMPTY, &[true, false]);
1115 assert_eq!(Vec::from(&s.reverse()), &[Some(false), Some(true)]);
1116
1117 let s = StringChunked::new(PlSmallStr::EMPTY, &["a", "b", "c"]);
1118 assert_eq!(Vec::from(&s.reverse()), &[Some("c"), Some("b"), Some("a")]);
1119
1120 let s = StringChunked::new(PlSmallStr::EMPTY, &[Some("a"), None, Some("c")]);
1121 assert_eq!(Vec::from(&s.reverse()), &[Some("c"), None, Some("a")]);
1122 }
1123
1124 #[test]
1125 #[cfg(feature = "dtype-categorical")]
1126 fn test_iter_categorical() {
1127 let ca = StringChunked::new(
1128 PlSmallStr::EMPTY,
1129 &[Some("foo"), None, Some("bar"), Some("ham")],
1130 );
1131 let cats = Categories::new(
1132 PlSmallStr::EMPTY,
1133 PlSmallStr::EMPTY,
1134 CategoricalPhysical::U32,
1135 );
1136 let ca = ca.cast(&DataType::from_categories(cats)).unwrap();
1137 let ca = ca.cat32().unwrap();
1138 let v: Vec<_> = ca.physical().into_iter().collect();
1139 assert_eq!(v, &[Some(0), None, Some(1), Some(2)]);
1140 }
1141
1142 #[test]
1143 #[ignore]
1144 fn test_shrink_to_fit() {
1145 let mut builder = StringChunkedBuilder::new(PlSmallStr::from_static("foo"), 2048);
1146 builder.append_value("foo");
1147 let mut arr = builder.finish();
1148 let before = arr
1149 .chunks()
1150 .iter()
1151 .map(|arr| arrow::compute::aggregate::estimated_bytes_size(arr.as_ref()))
1152 .sum::<usize>();
1153 arr.shrink_to_fit();
1154 let after = arr
1155 .chunks()
1156 .iter()
1157 .map(|arr| arrow::compute::aggregate::estimated_bytes_size(arr.as_ref()))
1158 .sum::<usize>();
1159 assert!(before > after);
1160 }
1161}