1mod any_value;
2use arrow::compute::concatenate::concatenate_validities;
3use arrow::compute::utils::combine_validities_and;
4pub mod flatten;
5pub(crate) mod series;
6mod supertype;
7use std::borrow::Cow;
8use std::ops::{Deref, DerefMut};
9mod schema;
10
11pub use any_value::*;
12use arrow::bitmap::Bitmap;
13pub use arrow::legacy::utils::*;
14pub use arrow::trusted_len::TrustMyLength;
15use flatten::*;
16use num_traits::{One, Zero};
17use rayon::prelude::*;
18pub use schema::*;
19pub use series::*;
20pub use supertype::*;
21pub use {arrow, rayon};
22
23use crate::POOL;
24use crate::prelude::*;
25
26#[repr(transparent)]
27pub struct Wrap<T>(pub T);
28
29impl<T> Deref for Wrap<T> {
30 type Target = T;
31 fn deref(&self) -> &Self::Target {
32 &self.0
33 }
34}
35
36#[inline(always)]
37pub fn _set_partition_size() -> usize {
38 POOL.current_num_threads()
39}
40
41pub struct NoNull<T> {
48 inner: T,
49}
50
51impl<T> NoNull<T> {
52 pub fn new(inner: T) -> Self {
53 NoNull { inner }
54 }
55
56 pub fn into_inner(self) -> T {
57 self.inner
58 }
59}
60
61impl<T> Deref for NoNull<T> {
62 type Target = T;
63
64 fn deref(&self) -> &Self::Target {
65 &self.inner
66 }
67}
68
69impl<T> DerefMut for NoNull<T> {
70 fn deref_mut(&mut self) -> &mut Self::Target {
71 &mut self.inner
72 }
73}
74
75pub(crate) fn get_iter_capacity<T, I: Iterator<Item = T>>(iter: &I) -> usize {
76 match iter.size_hint() {
77 (_lower, Some(upper)) => upper,
78 (0, None) => 1024,
79 (lower, None) => lower,
80 }
81}
82
83#[doc(hidden)]
86pub fn _split_offsets(len: usize, n: usize) -> Vec<(usize, usize)> {
87 if n == 1 {
88 vec![(0, len)]
89 } else {
90 let chunk_size = len / n;
91
92 (0..n)
93 .map(|partition| {
94 let offset = partition * chunk_size;
95 let len = if partition == (n - 1) {
96 len - offset
97 } else {
98 chunk_size
99 };
100 (partition * chunk_size, len)
101 })
102 .collect_trusted()
103 }
104}
105
106#[allow(clippy::len_without_is_empty)]
107pub trait Container: Clone {
108 fn slice(&self, offset: i64, len: usize) -> Self;
109
110 fn split_at(&self, offset: i64) -> (Self, Self);
111
112 fn len(&self) -> usize;
113
114 fn iter_chunks(&self) -> impl Iterator<Item = Self>;
115
116 fn should_rechunk(&self) -> bool;
117
118 fn n_chunks(&self) -> usize;
119
120 fn chunk_lengths(&self) -> impl Iterator<Item = usize>;
121}
122
123impl Container for DataFrame {
124 fn slice(&self, offset: i64, len: usize) -> Self {
125 DataFrame::slice(self, offset, len)
126 }
127
128 fn split_at(&self, offset: i64) -> (Self, Self) {
129 DataFrame::split_at(self, offset)
130 }
131
132 fn len(&self) -> usize {
133 self.height()
134 }
135
136 fn iter_chunks(&self) -> impl Iterator<Item = Self> {
137 flatten_df_iter(self)
138 }
139
140 fn should_rechunk(&self) -> bool {
141 self.should_rechunk()
142 }
143
144 fn n_chunks(&self) -> usize {
145 DataFrame::first_col_n_chunks(self)
146 }
147
148 fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
149 self.columns[0].as_materialized_series().chunk_lengths()
151 }
152}
153
154impl<T: PolarsDataType> Container for ChunkedArray<T> {
155 fn slice(&self, offset: i64, len: usize) -> Self {
156 ChunkedArray::slice(self, offset, len)
157 }
158
159 fn split_at(&self, offset: i64) -> (Self, Self) {
160 ChunkedArray::split_at(self, offset)
161 }
162
163 fn len(&self) -> usize {
164 ChunkedArray::len(self)
165 }
166
167 fn iter_chunks(&self) -> impl Iterator<Item = Self> {
168 self.downcast_iter()
169 .map(|arr| Self::with_chunk(self.name().clone(), arr.clone()))
170 }
171
172 fn should_rechunk(&self) -> bool {
173 false
174 }
175
176 fn n_chunks(&self) -> usize {
177 self.chunks().len()
178 }
179
180 fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
181 ChunkedArray::chunk_lengths(self)
182 }
183}
184
185impl Container for Series {
186 fn slice(&self, offset: i64, len: usize) -> Self {
187 self.0.slice(offset, len)
188 }
189
190 fn split_at(&self, offset: i64) -> (Self, Self) {
191 self.0.split_at(offset)
192 }
193
194 fn len(&self) -> usize {
195 self.0.len()
196 }
197
198 fn iter_chunks(&self) -> impl Iterator<Item = Self> {
199 (0..self.0.n_chunks()).map(|i| self.select_chunk(i))
200 }
201
202 fn should_rechunk(&self) -> bool {
203 false
204 }
205
206 fn n_chunks(&self) -> usize {
207 self.chunks().len()
208 }
209
210 fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
211 self.0.chunk_lengths()
212 }
213}
214
215fn split_impl<C: Container>(container: &C, target: usize, chunk_size: usize) -> Vec<C> {
216 if target == 1 {
217 return vec![container.clone()];
218 }
219 let mut out = Vec::with_capacity(target);
220 let chunk_size = chunk_size as i64;
221
222 let (chunk, mut remainder) = container.split_at(chunk_size);
224 out.push(chunk);
225
226 for _ in 1..target - 1 {
228 let (a, b) = remainder.split_at(chunk_size);
229 out.push(a);
230 remainder = b
231 }
232 out.push(remainder);
234 out
235}
236
237pub fn split<C: Container>(container: &C, target: usize) -> Vec<C> {
239 let total_len = container.len();
240 if total_len == 0 {
241 return vec![container.clone()];
242 }
243
244 let chunk_size = std::cmp::max(total_len / target, 1);
245
246 if container.n_chunks() == target
247 && container
248 .chunk_lengths()
249 .all(|len| len.abs_diff(chunk_size) < 100)
250 && !container.should_rechunk()
252 {
253 return container.iter_chunks().collect();
254 }
255 split_impl(container, target, chunk_size)
256}
257
258pub fn split_and_flatten<C: Container>(container: &C, target: usize) -> Vec<C> {
261 let total_len = container.len();
262 if total_len == 0 {
263 return vec![container.clone()];
264 }
265
266 let chunk_size = std::cmp::max(total_len / target, 1);
267
268 if container.n_chunks() == target
269 && container
270 .chunk_lengths()
271 .all(|len| len.abs_diff(chunk_size) < 100)
272 && !container.should_rechunk()
274 {
275 return container.iter_chunks().collect();
276 }
277
278 if container.n_chunks() == 1 {
279 split_impl(container, target, chunk_size)
280 } else {
281 let mut out = Vec::with_capacity(target);
282 let chunks = container.iter_chunks();
283
284 'new_chunk: for mut chunk in chunks {
285 loop {
286 let h = chunk.len();
287 if h < chunk_size {
288 out.push(chunk);
290 continue 'new_chunk;
291 }
292
293 if ((h - chunk_size) as f64 / chunk_size as f64) < 0.3 {
295 out.push(chunk);
296 continue 'new_chunk;
297 }
298
299 let (a, b) = chunk.split_at(chunk_size as i64);
300 out.push(a);
301 chunk = b;
302 }
303 }
304 out
305 }
306}
307
308pub fn split_df_as_ref(df: &DataFrame, target: usize, strict: bool) -> Vec<DataFrame> {
314 if strict {
315 split(df, target)
316 } else {
317 split_and_flatten(df, target)
318 }
319}
320
321#[doc(hidden)]
322pub fn split_df(df: &mut DataFrame, target: usize, strict: bool) -> Vec<DataFrame> {
325 if target == 0 || df.is_empty() {
326 return vec![df.clone()];
327 }
328 df.align_chunks_par();
330 split_df_as_ref(df, target, strict)
331}
332
333pub fn slice_slice<T>(vals: &[T], offset: i64, len: usize) -> &[T] {
334 let (raw_offset, slice_len) = slice_offsets(offset, len, vals.len());
335 &vals[raw_offset..raw_offset + slice_len]
336}
337
338#[inline]
339pub fn slice_offsets(offset: i64, length: usize, array_len: usize) -> (usize, usize) {
340 let signed_start_offset = if offset < 0 {
341 offset.saturating_add_unsigned(array_len as u64)
342 } else {
343 offset
344 };
345 let signed_stop_offset = signed_start_offset.saturating_add_unsigned(length as u64);
346
347 let signed_array_len: i64 = array_len
348 .try_into()
349 .expect("array length larger than i64::MAX");
350 let clamped_start_offset = signed_start_offset.clamp(0, signed_array_len);
351 let clamped_stop_offset = signed_stop_offset.clamp(0, signed_array_len);
352
353 let slice_start_idx = clamped_start_offset as usize;
354 let slice_len = (clamped_stop_offset - clamped_start_offset) as usize;
355 (slice_start_idx, slice_len)
356}
357
358#[macro_export]
360macro_rules! match_dtype_to_physical_apply_macro {
361 ($obj:expr, $macro:ident, $macro_string:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
362 match $obj {
363 DataType::String => $macro_string!($($opt_args)*),
364 DataType::Boolean => $macro_bool!($($opt_args)*),
365 #[cfg(feature = "dtype-u8")]
366 DataType::UInt8 => $macro!(u8 $(, $opt_args)*),
367 #[cfg(feature = "dtype-u16")]
368 DataType::UInt16 => $macro!(u16 $(, $opt_args)*),
369 DataType::UInt32 => $macro!(u32 $(, $opt_args)*),
370 DataType::UInt64 => $macro!(u64 $(, $opt_args)*),
371 #[cfg(feature = "dtype-i8")]
372 DataType::Int8 => $macro!(i8 $(, $opt_args)*),
373 #[cfg(feature = "dtype-i16")]
374 DataType::Int16 => $macro!(i16 $(, $opt_args)*),
375 DataType::Int32 => $macro!(i32 $(, $opt_args)*),
376 DataType::Int64 => $macro!(i64 $(, $opt_args)*),
377 #[cfg(feature = "dtype-i128")]
378 DataType::Int128 => $macro!(i128 $(, $opt_args)*),
379 #[cfg(feature = "dtype-f16")]
380 DataType::Float16 => $macro!(pf16 $(, $opt_args)*),
381 DataType::Float32 => $macro!(f32 $(, $opt_args)*),
382 DataType::Float64 => $macro!(f64 $(, $opt_args)*),
383 dt => panic!("not implemented for dtype {:?}", dt),
384 }
385 }};
386}
387
388#[macro_export]
390macro_rules! match_dtype_to_logical_apply_macro {
391 ($obj:expr, $macro:ident, $macro_string:ident, $macro_binary:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
392 match $obj {
393 DataType::String => $macro_string!($($opt_args)*),
394 DataType::Binary => $macro_binary!($($opt_args)*),
395 DataType::Boolean => $macro_bool!($($opt_args)*),
396 #[cfg(feature = "dtype-u8")]
397 DataType::UInt8 => $macro!(UInt8Type $(, $opt_args)*),
398 #[cfg(feature = "dtype-u16")]
399 DataType::UInt16 => $macro!(UInt16Type $(, $opt_args)*),
400 DataType::UInt32 => $macro!(UInt32Type $(, $opt_args)*),
401 DataType::UInt64 => $macro!(UInt64Type $(, $opt_args)*),
402 #[cfg(feature = "dtype-u128")]
403 DataType::UInt128 => $macro!(UInt128Type $(, $opt_args)*),
404 #[cfg(feature = "dtype-i8")]
405 DataType::Int8 => $macro!(Int8Type $(, $opt_args)*),
406 #[cfg(feature = "dtype-i16")]
407 DataType::Int16 => $macro!(Int16Type $(, $opt_args)*),
408 DataType::Int32 => $macro!(Int32Type $(, $opt_args)*),
409 DataType::Int64 => $macro!(Int64Type $(, $opt_args)*),
410 #[cfg(feature = "dtype-i128")]
411 DataType::Int128 => $macro!(Int128Type $(, $opt_args)*),
412 #[cfg(feature = "dtype-f16")]
413 DataType::Float16 => $macro!(Float16Type $(, $opt_args)*),
414 DataType::Float32 => $macro!(Float32Type $(, $opt_args)*),
415 DataType::Float64 => $macro!(Float64Type $(, $opt_args)*),
416 dt => panic!("not implemented for dtype {:?}", dt),
417 }
418 }};
419}
420
421#[macro_export]
423macro_rules! match_arrow_dtype_apply_macro_ca {
424 ($self:expr, $macro:ident, $macro_string:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
425 match $self.dtype() {
426 DataType::String => $macro_string!($self.str().unwrap() $(, $opt_args)*),
427 DataType::Boolean => $macro_bool!($self.bool().unwrap() $(, $opt_args)*),
428 #[cfg(feature = "dtype-u8")]
429 DataType::UInt8 => $macro!($self.u8().unwrap() $(, $opt_args)*),
430 #[cfg(feature = "dtype-u16")]
431 DataType::UInt16 => $macro!($self.u16().unwrap() $(, $opt_args)*),
432 DataType::UInt32 => $macro!($self.u32().unwrap() $(, $opt_args)*),
433 DataType::UInt64 => $macro!($self.u64().unwrap() $(, $opt_args)*),
434 #[cfg(feature = "dtype-u128")]
435 DataType::UInt128 => $macro!($self.u128().unwrap() $(, $opt_args)*),
436 #[cfg(feature = "dtype-i8")]
437 DataType::Int8 => $macro!($self.i8().unwrap() $(, $opt_args)*),
438 #[cfg(feature = "dtype-i16")]
439 DataType::Int16 => $macro!($self.i16().unwrap() $(, $opt_args)*),
440 DataType::Int32 => $macro!($self.i32().unwrap() $(, $opt_args)*),
441 DataType::Int64 => $macro!($self.i64().unwrap() $(, $opt_args)*),
442 #[cfg(feature = "dtype-i128")]
443 DataType::Int128 => $macro!($self.i128().unwrap() $(, $opt_args)*),
444 #[cfg(feature = "dtype-f16")]
445 DataType::Float16 => $macro!($self.f16().unwrap() $(, $opt_args)*),
446 DataType::Float32 => $macro!($self.f32().unwrap() $(, $opt_args)*),
447 DataType::Float64 => $macro!($self.f64().unwrap() $(, $opt_args)*),
448 dt => panic!("not implemented for dtype {:?}", dt),
449 }
450 }};
451}
452
453#[macro_export]
454macro_rules! with_match_physical_numeric_type {(
455 $dtype:expr, | $_:tt $T:ident | $($body:tt)*
456) => ({
457 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
458 #[cfg(feature = "dtype-f16")]
459 use polars_utils::float16::pf16;
460 use $crate::datatypes::DataType::*;
461 match $dtype {
462 #[cfg(feature = "dtype-i8")]
463 Int8 => __with_ty__! { i8 },
464 #[cfg(feature = "dtype-i16")]
465 Int16 => __with_ty__! { i16 },
466 Int32 => __with_ty__! { i32 },
467 Int64 => __with_ty__! { i64 },
468 #[cfg(feature = "dtype-i128")]
469 Int128 => __with_ty__! { i128 },
470 #[cfg(feature = "dtype-u8")]
471 UInt8 => __with_ty__! { u8 },
472 #[cfg(feature = "dtype-u16")]
473 UInt16 => __with_ty__! { u16 },
474 UInt32 => __with_ty__! { u32 },
475 UInt64 => __with_ty__! { u64 },
476 #[cfg(feature = "dtype-u128")]
477 UInt128 => __with_ty__! { u128 },
478 #[cfg(feature = "dtype-f16")]
479 Float16 => __with_ty__! { pf16 },
480 Float32 => __with_ty__! { f32 },
481 Float64 => __with_ty__! { f64 },
482 dt => panic!("not implemented for dtype {:?}", dt),
483 }
484})}
485
486#[macro_export]
487macro_rules! with_match_physical_integer_type {(
488 $dtype:expr, | $_:tt $T:ident | $($body:tt)*
489) => ({
490 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
491 #[cfg(feature = "dtype-f16")]
492 use polars_utils::float16::pf16;
493 use $crate::datatypes::DataType::*;
494 match $dtype {
495 #[cfg(feature = "dtype-i8")]
496 Int8 => __with_ty__! { i8 },
497 #[cfg(feature = "dtype-i16")]
498 Int16 => __with_ty__! { i16 },
499 Int32 => __with_ty__! { i32 },
500 Int64 => __with_ty__! { i64 },
501 #[cfg(feature = "dtype-i128")]
502 Int128 => __with_ty__! { i128 },
503 #[cfg(feature = "dtype-u8")]
504 UInt8 => __with_ty__! { u8 },
505 #[cfg(feature = "dtype-u16")]
506 UInt16 => __with_ty__! { u16 },
507 UInt32 => __with_ty__! { u32 },
508 UInt64 => __with_ty__! { u64 },
509 #[cfg(feature = "dtype-u128")]
510 UInt128 => __with_ty__! { u128 },
511 dt => panic!("not implemented for dtype {:?}", dt),
512 }
513})}
514
515#[macro_export]
516macro_rules! with_match_physical_float_type {(
517 $dtype:expr, | $_:tt $T:ident | $($body:tt)*
518) => ({
519 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
520 use polars_utils::float16::pf16;
521 use $crate::datatypes::DataType::*;
522 match $dtype {
523 #[cfg(feature = "dtype-f16")]
524 Float16 => __with_ty__! { pf16 },
525 Float32 => __with_ty__! { f32 },
526 Float64 => __with_ty__! { f64 },
527 dt => panic!("not implemented for dtype {:?}", dt),
528 }
529})}
530
531#[macro_export]
532macro_rules! with_match_physical_float_polars_type {(
533 $key_type:expr, | $_:tt $T:ident | $($body:tt)*
534) => ({
535 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
536 use $crate::datatypes::DataType::*;
537 match $key_type {
538 #[cfg(feature = "dtype-f16")]
539 Float16 => __with_ty__! { Float16Type },
540 Float32 => __with_ty__! { Float32Type },
541 Float64 => __with_ty__! { Float64Type },
542 dt => panic!("not implemented for dtype {:?}", dt),
543 }
544})}
545
546#[macro_export]
547macro_rules! with_match_physical_numeric_polars_type {(
548 $key_type:expr, | $_:tt $T:ident | $($body:tt)*
549) => ({
550 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
551 use $crate::datatypes::DataType::*;
552 match $key_type {
553 #[cfg(feature = "dtype-i8")]
554 Int8 => __with_ty__! { Int8Type },
555 #[cfg(feature = "dtype-i16")]
556 Int16 => __with_ty__! { Int16Type },
557 Int32 => __with_ty__! { Int32Type },
558 Int64 => __with_ty__! { Int64Type },
559 #[cfg(feature = "dtype-i128")]
560 Int128 => __with_ty__! { Int128Type },
561 #[cfg(feature = "dtype-u8")]
562 UInt8 => __with_ty__! { UInt8Type },
563 #[cfg(feature = "dtype-u16")]
564 UInt16 => __with_ty__! { UInt16Type },
565 UInt32 => __with_ty__! { UInt32Type },
566 UInt64 => __with_ty__! { UInt64Type },
567 #[cfg(feature = "dtype-u128")]
568 UInt128 => __with_ty__! { UInt128Type },
569 #[cfg(feature = "dtype-f16")]
570 Float16 => __with_ty__! { Float16Type },
571 Float32 => __with_ty__! { Float32Type },
572 Float64 => __with_ty__! { Float64Type },
573 dt => panic!("not implemented for dtype {:?}", dt),
574 }
575})}
576
577#[macro_export]
578macro_rules! with_match_physical_integer_polars_type {(
579 $key_type:expr, | $_:tt $T:ident | $($body:tt)*
580) => ({
581 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
582 use $crate::datatypes::DataType::*;
583 use $crate::datatypes::*;
584 match $key_type {
585 #[cfg(feature = "dtype-i8")]
586 Int8 => __with_ty__! { Int8Type },
587 #[cfg(feature = "dtype-i16")]
588 Int16 => __with_ty__! { Int16Type },
589 Int32 => __with_ty__! { Int32Type },
590 Int64 => __with_ty__! { Int64Type },
591 #[cfg(feature = "dtype-i128")]
592 Int128 => __with_ty__! { Int128Type },
593 #[cfg(feature = "dtype-u8")]
594 UInt8 => __with_ty__! { UInt8Type },
595 #[cfg(feature = "dtype-u16")]
596 UInt16 => __with_ty__! { UInt16Type },
597 UInt32 => __with_ty__! { UInt32Type },
598 UInt64 => __with_ty__! { UInt64Type },
599 #[cfg(feature = "dtype-u128")]
600 UInt128 => __with_ty__! { UInt128Type },
601 dt => panic!("not implemented for dtype {:?}", dt),
602 }
603})}
604
605#[macro_export]
606macro_rules! with_match_categorical_physical_type {(
607 $dtype:expr, | $_:tt $T:ident | $($body:tt)*
608) => ({
609 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
610 match $dtype {
611 CategoricalPhysical::U8 => __with_ty__! { Categorical8Type },
612 CategoricalPhysical::U16 => __with_ty__! { Categorical16Type },
613 CategoricalPhysical::U32 => __with_ty__! { Categorical32Type },
614 }
615})}
616
617#[macro_export]
620macro_rules! downcast_as_macro_arg_physical {
621 ($self:expr, $macro:ident $(, $opt_args:expr)*) => {{
622 match $self.dtype() {
623 #[cfg(feature = "dtype-u8")]
624 DataType::UInt8 => $macro!($self.u8().unwrap() $(, $opt_args)*),
625 #[cfg(feature = "dtype-u16")]
626 DataType::UInt16 => $macro!($self.u16().unwrap() $(, $opt_args)*),
627 DataType::UInt32 => $macro!($self.u32().unwrap() $(, $opt_args)*),
628 DataType::UInt64 => $macro!($self.u64().unwrap() $(, $opt_args)*),
629 #[cfg(feature = "dtype-u128")]
630 DataType::UInt128 => $macro!($self.u128().unwrap() $(, $opt_args)*),
631 #[cfg(feature = "dtype-i8")]
632 DataType::Int8 => $macro!($self.i8().unwrap() $(, $opt_args)*),
633 #[cfg(feature = "dtype-i16")]
634 DataType::Int16 => $macro!($self.i16().unwrap() $(, $opt_args)*),
635 DataType::Int32 => $macro!($self.i32().unwrap() $(, $opt_args)*),
636 DataType::Int64 => $macro!($self.i64().unwrap() $(, $opt_args)*),
637 #[cfg(feature = "dtype-i128")]
638 DataType::Int128 => $macro!($self.i128().unwrap() $(, $opt_args)*),
639 #[cfg(feature = "dtype-f16")]
640 DataType::Float16 => $macro!($self.f16().unwrap() $(, $opt_args)*),
641 DataType::Float32 => $macro!($self.f32().unwrap() $(, $opt_args)*),
642 DataType::Float64 => $macro!($self.f64().unwrap() $(, $opt_args)*),
643 dt => panic!("not implemented for {:?}", dt),
644 }
645 }};
646}
647
648#[macro_export]
651macro_rules! downcast_as_macro_arg_physical_mut {
652 ($self:expr, $macro:ident $(, $opt_args:expr)*) => {{
653 match $self.dtype().clone() {
655 #[cfg(feature = "dtype-u8")]
656 DataType::UInt8 => {
657 let ca: &mut UInt8Chunked = $self.as_mut();
658 $macro!(UInt8Type, ca $(, $opt_args)*)
659 },
660 #[cfg(feature = "dtype-u16")]
661 DataType::UInt16 => {
662 let ca: &mut UInt16Chunked = $self.as_mut();
663 $macro!(UInt16Type, ca $(, $opt_args)*)
664 },
665 DataType::UInt32 => {
666 let ca: &mut UInt32Chunked = $self.as_mut();
667 $macro!(UInt32Type, ca $(, $opt_args)*)
668 },
669 DataType::UInt64 => {
670 let ca: &mut UInt64Chunked = $self.as_mut();
671 $macro!(UInt64Type, ca $(, $opt_args)*)
672 },
673 #[cfg(feature = "dtype-u128")]
674 DataType::UInt128 => {
675 let ca: &mut UInt128Chunked = $self.as_mut();
676 $macro!(UInt128Type, ca $(, $opt_args)*)
677 },
678 #[cfg(feature = "dtype-i8")]
679 DataType::Int8 => {
680 let ca: &mut Int8Chunked = $self.as_mut();
681 $macro!(Int8Type, ca $(, $opt_args)*)
682 },
683 #[cfg(feature = "dtype-i16")]
684 DataType::Int16 => {
685 let ca: &mut Int16Chunked = $self.as_mut();
686 $macro!(Int16Type, ca $(, $opt_args)*)
687 },
688 DataType::Int32 => {
689 let ca: &mut Int32Chunked = $self.as_mut();
690 $macro!(Int32Type, ca $(, $opt_args)*)
691 },
692 DataType::Int64 => {
693 let ca: &mut Int64Chunked = $self.as_mut();
694 $macro!(Int64Type, ca $(, $opt_args)*)
695 },
696 #[cfg(feature = "dtype-i128")]
697 DataType::Int128 => {
698 let ca: &mut Int128Chunked = $self.as_mut();
699 $macro!(Int128Type, ca $(, $opt_args)*)
700 },
701 #[cfg(feature = "dtype-f16")]
702 DataType::Float16 => {
703 let ca: &mut Float16Chunked = $self.as_mut();
704 $macro!(Float16Type, ca $(, $opt_args)*)
705 },
706 DataType::Float32 => {
707 let ca: &mut Float32Chunked = $self.as_mut();
708 $macro!(Float32Type, ca $(, $opt_args)*)
709 },
710 DataType::Float64 => {
711 let ca: &mut Float64Chunked = $self.as_mut();
712 $macro!(Float64Type, ca $(, $opt_args)*)
713 },
714 dt => panic!("not implemented for {:?}", dt),
715 }
716 }};
717}
718
719#[macro_export]
720macro_rules! apply_method_all_arrow_series {
721 ($self:expr, $method:ident, $($args:expr),*) => {
722 match $self.dtype() {
723 DataType::Boolean => $self.bool().unwrap().$method($($args),*),
724 DataType::String => $self.str().unwrap().$method($($args),*),
725 #[cfg(feature = "dtype-u8")]
726 DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
727 #[cfg(feature = "dtype-u16")]
728 DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
729 DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
730 DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
731 #[cfg(feature = "dtype-u128")]
732 DataType::UInt128 => $self.u128().unwrap().$medthod($($args),*),
733 #[cfg(feature = "dtype-i8")]
734 DataType::Int8 => $self.i8().unwrap().$method($($args),*),
735 #[cfg(feature = "dtype-i16")]
736 DataType::Int16 => $self.i16().unwrap().$method($($args),*),
737 DataType::Int32 => $self.i32().unwrap().$method($($args),*),
738 DataType::Int64 => $self.i64().unwrap().$method($($args),*),
739 #[cfg(feature = "dtype-i128")]
740 DataType::Int128 => $self.i128().unwrap().$method($($args),*),
741 #[cfg(feature = "dtype-f16")]
742 DataType::Float16 => $self.f16().unwrap().$method($($args),*),
743 DataType::Float32 => $self.f32().unwrap().$method($($args),*),
744 DataType::Float64 => $self.f64().unwrap().$method($($args),*),
745 DataType::Time => $self.time().unwrap().$method($($args),*),
746 DataType::Date => $self.date().unwrap().$method($($args),*),
747 DataType::Datetime(_, _) => $self.datetime().unwrap().$method($($args),*),
748 DataType::List(_) => $self.list().unwrap().$method($($args),*),
749 DataType::Struct(_) => $self.struct_().unwrap().$method($($args),*),
750 dt => panic!("dtype {:?} not supported", dt)
751 }
752 }
753}
754
755#[macro_export]
756macro_rules! apply_method_physical_integer {
757 ($self:expr, $method:ident, $($args:expr),*) => {
758 match $self.dtype() {
759 #[cfg(feature = "dtype-u8")]
760 DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
761 #[cfg(feature = "dtype-u16")]
762 DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
763 DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
764 DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
765 #[cfg(feature = "dtype-u128")]
766 DataType::UInt128 => $self.u128().unwrap().$method($($args),*),
767 #[cfg(feature = "dtype-i8")]
768 DataType::Int8 => $self.i8().unwrap().$method($($args),*),
769 #[cfg(feature = "dtype-i16")]
770 DataType::Int16 => $self.i16().unwrap().$method($($args),*),
771 DataType::Int32 => $self.i32().unwrap().$method($($args),*),
772 DataType::Int64 => $self.i64().unwrap().$method($($args),*),
773 #[cfg(feature = "dtype-i128")]
774 DataType::Int128 => $self.i128().unwrap().$method($($args),*),
775 dt => panic!("not implemented for dtype {:?}", dt),
776 }
777 }
778}
779
780#[macro_export]
782macro_rules! apply_method_physical_numeric {
783 ($self:expr, $method:ident, $($args:expr),*) => {
784 match $self.dtype() {
785 #[cfg(feature = "dtype-f16")]
786 DataType::Float16 => $self.f16().unwrap().$method($($args),*),
787 DataType::Float32 => $self.f32().unwrap().$method($($args),*),
788 DataType::Float64 => $self.f64().unwrap().$method($($args),*),
789 _ => apply_method_physical_integer!($self, $method, $($args),*),
790 }
791 }
792}
793
794#[macro_export]
795macro_rules! df {
796 ($($col_name:expr => $slice:expr), + $(,)?) => {
797 $crate::prelude::DataFrame::new(vec![
798 $($crate::prelude::Column::from(<$crate::prelude::Series as $crate::prelude::NamedFrom::<_, _>>::new($col_name.into(), $slice)),)+
799 ])
800 }
801}
802
803pub fn get_time_units(tu_l: &TimeUnit, tu_r: &TimeUnit) -> TimeUnit {
804 use crate::datatypes::time_unit::TimeUnit::*;
805 match (tu_l, tu_r) {
806 (Nanoseconds, Microseconds) => Microseconds,
807 (_, Milliseconds) => Milliseconds,
808 _ => *tu_l,
809 }
810}
811
812#[cold]
813#[inline(never)]
814fn width_mismatch(df1: &DataFrame, df2: &DataFrame) -> PolarsError {
815 let mut df1_extra = Vec::new();
816 let mut df2_extra = Vec::new();
817
818 let s1 = df1.schema();
819 let s2 = df2.schema();
820
821 s1.field_compare(s2, &mut df1_extra, &mut df2_extra);
822
823 let df1_extra = df1_extra
824 .into_iter()
825 .map(|(_, (n, _))| n.as_str())
826 .collect::<Vec<_>>()
827 .join(", ");
828 let df2_extra = df2_extra
829 .into_iter()
830 .map(|(_, (n, _))| n.as_str())
831 .collect::<Vec<_>>()
832 .join(", ");
833
834 polars_err!(
835 SchemaMismatch: r#"unable to vstack, dataframes have different widths ({} != {}).
836One dataframe has additional columns: [{df1_extra}].
837Other dataframe has additional columns: [{df2_extra}]."#,
838 df1.width(),
839 df2.width(),
840 )
841}
842
843pub fn accumulate_dataframes_vertical_unchecked_optional<I>(dfs: I) -> Option<DataFrame>
844where
845 I: IntoIterator<Item = DataFrame>,
846{
847 let mut iter = dfs.into_iter();
848 let additional = iter.size_hint().0;
849 let mut acc_df = iter.next()?;
850 acc_df.reserve_chunks(additional);
851
852 for df in iter {
853 if acc_df.width() != df.width() {
854 panic!("{}", width_mismatch(&acc_df, &df));
855 }
856
857 acc_df.vstack_mut_owned_unchecked(df);
858 }
859 Some(acc_df)
860}
861
862pub fn accumulate_dataframes_vertical_unchecked<I>(dfs: I) -> DataFrame
865where
866 I: IntoIterator<Item = DataFrame>,
867{
868 let mut iter = dfs.into_iter();
869 let additional = iter.size_hint().0;
870 let mut acc_df = iter.next().unwrap();
871 acc_df.reserve_chunks(additional);
872
873 for df in iter {
874 if acc_df.width() != df.width() {
875 panic!("{}", width_mismatch(&acc_df, &df));
876 }
877
878 acc_df.vstack_mut_owned_unchecked(df);
879 }
880 acc_df
881}
882
883pub fn accumulate_dataframes_vertical<I>(dfs: I) -> PolarsResult<DataFrame>
887where
888 I: IntoIterator<Item = DataFrame>,
889{
890 let mut iter = dfs.into_iter();
891 let additional = iter.size_hint().0;
892 let mut acc_df = iter.next().unwrap();
893 acc_df.reserve_chunks(additional);
894 for df in iter {
895 if acc_df.width() != df.width() {
896 return Err(width_mismatch(&acc_df, &df));
897 }
898
899 acc_df.vstack_mut_owned(df)?;
900 }
901
902 Ok(acc_df)
903}
904
905pub fn concat_df<'a, I>(dfs: I) -> PolarsResult<DataFrame>
907where
908 I: IntoIterator<Item = &'a DataFrame>,
909{
910 let mut iter = dfs.into_iter();
911 let additional = iter.size_hint().0;
912 let mut acc_df = iter.next().unwrap().clone();
913 acc_df.reserve_chunks(additional);
914 for df in iter {
915 acc_df.vstack_mut(df)?;
916 }
917 Ok(acc_df)
918}
919
920pub fn concat_df_unchecked<'a, I>(dfs: I) -> DataFrame
922where
923 I: IntoIterator<Item = &'a DataFrame>,
924{
925 let mut iter = dfs.into_iter();
926 let additional = iter.size_hint().0;
927 let mut acc_df = iter.next().unwrap().clone();
928 acc_df.reserve_chunks(additional);
929 for df in iter {
930 acc_df.vstack_mut_unchecked(df);
931 }
932 acc_df
933}
934
935pub fn accumulate_dataframes_horizontal(dfs: Vec<DataFrame>) -> PolarsResult<DataFrame> {
936 let mut iter = dfs.into_iter();
937 let mut acc_df = iter.next().unwrap();
938 for df in iter {
939 acc_df.hstack_mut(df.get_columns())?;
940 }
941 Ok(acc_df)
942}
943
944pub fn align_chunks_binary<'a, T, B>(
948 left: &'a ChunkedArray<T>,
949 right: &'a ChunkedArray<B>,
950) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, ChunkedArray<B>>)
951where
952 B: PolarsDataType,
953 T: PolarsDataType,
954{
955 let assert = || {
956 assert_eq!(
957 left.len(),
958 right.len(),
959 "expected arrays of the same length"
960 )
961 };
962 match (left.chunks.len(), right.chunks.len()) {
963 (1, 1) => (Cow::Borrowed(left), Cow::Borrowed(right)),
965 (a, b)
967 if a == b
968 && left
969 .chunk_lengths()
970 .zip(right.chunk_lengths())
971 .all(|(l, r)| l == r) =>
972 {
973 (Cow::Borrowed(left), Cow::Borrowed(right))
974 },
975 (_, 1) => {
976 assert();
977 (
978 Cow::Borrowed(left),
979 Cow::Owned(right.match_chunks(left.chunk_lengths())),
980 )
981 },
982 (1, _) => {
983 assert();
984 (
985 Cow::Owned(left.match_chunks(right.chunk_lengths())),
986 Cow::Borrowed(right),
987 )
988 },
989 (_, _) => {
990 assert();
991 let left = left.rechunk();
993 (
994 Cow::Owned(left.match_chunks(right.chunk_lengths())),
995 Cow::Borrowed(right),
996 )
997 },
998 }
999}
1000
1001pub fn align_chunks_binary_ca_series<'a, T>(
1005 left: &'a ChunkedArray<T>,
1006 right: &'a Series,
1007) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, Series>)
1008where
1009 T: PolarsDataType,
1010{
1011 let assert = || {
1012 assert_eq!(
1013 left.len(),
1014 right.len(),
1015 "expected arrays of the same length"
1016 )
1017 };
1018 match (left.chunks.len(), right.chunks().len()) {
1019 (1, 1) => (Cow::Borrowed(left), Cow::Borrowed(right)),
1021 (a, b)
1023 if a == b
1024 && left
1025 .chunk_lengths()
1026 .zip(right.chunk_lengths())
1027 .all(|(l, r)| l == r) =>
1028 {
1029 assert();
1030 (Cow::Borrowed(left), Cow::Borrowed(right))
1031 },
1032 (_, 1) => (left.rechunk(), Cow::Borrowed(right)),
1033 (1, _) => (Cow::Borrowed(left), Cow::Owned(right.rechunk())),
1034 (_, _) => {
1035 assert();
1036 (left.rechunk(), Cow::Owned(right.rechunk()))
1037 },
1038 }
1039}
1040
1041#[cfg(feature = "performant")]
1042pub(crate) fn align_chunks_binary_owned_series(left: Series, right: Series) -> (Series, Series) {
1043 match (left.chunks().len(), right.chunks().len()) {
1044 (1, 1) => (left, right),
1045 (a, b)
1047 if a == b
1048 && left
1049 .chunk_lengths()
1050 .zip(right.chunk_lengths())
1051 .all(|(l, r)| l == r) =>
1052 {
1053 (left, right)
1054 },
1055 (_, 1) => (left.rechunk(), right),
1056 (1, _) => (left, right.rechunk()),
1057 (_, _) => (left.rechunk(), right.rechunk()),
1058 }
1059}
1060
1061pub(crate) fn align_chunks_binary_owned<T, B>(
1062 left: ChunkedArray<T>,
1063 right: ChunkedArray<B>,
1064) -> (ChunkedArray<T>, ChunkedArray<B>)
1065where
1066 B: PolarsDataType,
1067 T: PolarsDataType,
1068{
1069 match (left.chunks.len(), right.chunks.len()) {
1070 (1, 1) => (left, right),
1071 (a, b)
1073 if a == b
1074 && left
1075 .chunk_lengths()
1076 .zip(right.chunk_lengths())
1077 .all(|(l, r)| l == r) =>
1078 {
1079 (left, right)
1080 },
1081 (_, 1) => (left.rechunk().into_owned(), right),
1082 (1, _) => (left, right.rechunk().into_owned()),
1083 (_, _) => (left.rechunk().into_owned(), right.rechunk().into_owned()),
1084 }
1085}
1086
1087#[allow(clippy::type_complexity)]
1090pub fn align_chunks_ternary<'a, A, B, C>(
1091 a: &'a ChunkedArray<A>,
1092 b: &'a ChunkedArray<B>,
1093 c: &'a ChunkedArray<C>,
1094) -> (
1095 Cow<'a, ChunkedArray<A>>,
1096 Cow<'a, ChunkedArray<B>>,
1097 Cow<'a, ChunkedArray<C>>,
1098)
1099where
1100 A: PolarsDataType,
1101 B: PolarsDataType,
1102 C: PolarsDataType,
1103{
1104 if a.chunks.len() == 1 && b.chunks.len() == 1 && c.chunks.len() == 1 {
1105 return (Cow::Borrowed(a), Cow::Borrowed(b), Cow::Borrowed(c));
1106 }
1107
1108 assert!(
1109 a.len() == b.len() && b.len() == c.len(),
1110 "expected arrays of the same length"
1111 );
1112
1113 match (a.chunks.len(), b.chunks.len(), c.chunks.len()) {
1114 (_, 1, 1) => (
1115 Cow::Borrowed(a),
1116 Cow::Owned(b.match_chunks(a.chunk_lengths())),
1117 Cow::Owned(c.match_chunks(a.chunk_lengths())),
1118 ),
1119 (1, 1, _) => (
1120 Cow::Owned(a.match_chunks(c.chunk_lengths())),
1121 Cow::Owned(b.match_chunks(c.chunk_lengths())),
1122 Cow::Borrowed(c),
1123 ),
1124 (1, _, 1) => (
1125 Cow::Owned(a.match_chunks(b.chunk_lengths())),
1126 Cow::Borrowed(b),
1127 Cow::Owned(c.match_chunks(b.chunk_lengths())),
1128 ),
1129 (1, _, _) => {
1130 let b = b.rechunk();
1131 (
1132 Cow::Owned(a.match_chunks(c.chunk_lengths())),
1133 Cow::Owned(b.match_chunks(c.chunk_lengths())),
1134 Cow::Borrowed(c),
1135 )
1136 },
1137 (_, 1, _) => {
1138 let a = a.rechunk();
1139 (
1140 Cow::Owned(a.match_chunks(c.chunk_lengths())),
1141 Cow::Owned(b.match_chunks(c.chunk_lengths())),
1142 Cow::Borrowed(c),
1143 )
1144 },
1145 (_, _, 1) => {
1146 let b = b.rechunk();
1147 (
1148 Cow::Borrowed(a),
1149 Cow::Owned(b.match_chunks(a.chunk_lengths())),
1150 Cow::Owned(c.match_chunks(a.chunk_lengths())),
1151 )
1152 },
1153 (len_a, len_b, len_c)
1154 if len_a == len_b
1155 && len_b == len_c
1156 && a.chunk_lengths()
1157 .zip(b.chunk_lengths())
1158 .zip(c.chunk_lengths())
1159 .all(|((a, b), c)| a == b && b == c) =>
1160 {
1161 (Cow::Borrowed(a), Cow::Borrowed(b), Cow::Borrowed(c))
1162 },
1163 _ => {
1164 let a = a.rechunk();
1166 let b = b.rechunk();
1167 (
1168 Cow::Owned(a.match_chunks(c.chunk_lengths())),
1169 Cow::Owned(b.match_chunks(c.chunk_lengths())),
1170 Cow::Borrowed(c),
1171 )
1172 },
1173 }
1174}
1175
1176pub fn binary_concatenate_validities<'a, T, B>(
1177 left: &'a ChunkedArray<T>,
1178 right: &'a ChunkedArray<B>,
1179) -> Option<Bitmap>
1180where
1181 B: PolarsDataType,
1182 T: PolarsDataType,
1183{
1184 let (left, right) = align_chunks_binary(left, right);
1185 let left_validity = concatenate_validities(left.chunks());
1186 let right_validity = concatenate_validities(right.chunks());
1187 combine_validities_and(left_validity.as_ref(), right_validity.as_ref())
1188}
1189
1190pub trait IntoVec<T> {
1192 fn into_vec(self) -> Vec<T>;
1193}
1194
1195impl<I, S> IntoVec<PlSmallStr> for I
1196where
1197 I: IntoIterator<Item = S>,
1198 S: Into<PlSmallStr>,
1199{
1200 fn into_vec(self) -> Vec<PlSmallStr> {
1201 self.into_iter().map(|s| s.into()).collect()
1202 }
1203}
1204
1205#[inline]
1210pub(crate) fn index_to_chunked_index<
1211 I: Iterator<Item = Idx>,
1212 Idx: PartialOrd + std::ops::AddAssign + std::ops::SubAssign + Zero + One,
1213>(
1214 chunk_lens: I,
1215 index: Idx,
1216) -> (Idx, Idx) {
1217 let mut index_remainder = index;
1218 let mut current_chunk_idx = Zero::zero();
1219
1220 for chunk_len in chunk_lens {
1221 if chunk_len > index_remainder {
1222 break;
1223 } else {
1224 index_remainder -= chunk_len;
1225 current_chunk_idx += One::one();
1226 }
1227 }
1228 (current_chunk_idx, index_remainder)
1229}
1230
1231pub(crate) fn index_to_chunked_index_rev<
1232 I: Iterator<Item = Idx>,
1233 Idx: PartialOrd
1234 + std::ops::AddAssign
1235 + std::ops::SubAssign
1236 + std::ops::Sub<Output = Idx>
1237 + Zero
1238 + One
1239 + Copy
1240 + std::fmt::Debug,
1241>(
1242 chunk_lens_rev: I,
1243 index_from_back: Idx,
1244 total_chunks: Idx,
1245) -> (Idx, Idx) {
1246 debug_assert!(index_from_back > Zero::zero(), "at least -1");
1247 let mut index_remainder = index_from_back;
1248 let mut current_chunk_idx = One::one();
1249 let mut current_chunk_len = Zero::zero();
1250
1251 for chunk_len in chunk_lens_rev {
1252 current_chunk_len = chunk_len;
1253 if chunk_len >= index_remainder {
1254 break;
1255 } else {
1256 index_remainder -= chunk_len;
1257 current_chunk_idx += One::one();
1258 }
1259 }
1260 (
1261 total_chunks - current_chunk_idx,
1262 current_chunk_len - index_remainder,
1263 )
1264}
1265
1266pub fn first_null<'a, I>(iter: I) -> Option<usize>
1267where
1268 I: Iterator<Item = &'a dyn Array>,
1269{
1270 let mut offset = 0;
1271 for arr in iter {
1272 if let Some(mask) = arr.validity() {
1273 let len_mask = mask.len();
1274 let n = mask.leading_ones();
1275 if n < len_mask {
1276 return Some(offset + n);
1277 }
1278 offset += len_mask
1279 } else {
1280 offset += arr.len();
1281 }
1282 }
1283 None
1284}
1285
1286pub fn first_non_null<'a, I>(iter: I) -> Option<usize>
1287where
1288 I: Iterator<Item = &'a dyn Array>,
1289{
1290 let mut offset = 0;
1291 for arr in iter {
1292 if let Some(mask) = arr.validity() {
1293 let len_mask = mask.len();
1294 let n = mask.leading_zeros();
1295 if n < len_mask {
1296 return Some(offset + n);
1297 }
1298 offset += len_mask
1299 } else if !arr.is_empty() {
1300 return Some(offset);
1301 }
1302 }
1303 None
1304}
1305
1306pub fn last_non_null<'a, I>(iter: I, len: usize) -> Option<usize>
1307where
1308 I: DoubleEndedIterator<Item = &'a dyn Array>,
1309{
1310 if len == 0 {
1311 return None;
1312 }
1313 let mut offset = 0;
1314 for arr in iter.rev() {
1315 if let Some(mask) = arr.validity() {
1316 let len_mask = mask.len();
1317 let n = mask.trailing_zeros();
1318 if n < len_mask {
1319 return Some(len - offset - n - 1);
1320 }
1321 offset += len_mask;
1322 } else if !arr.is_empty() {
1323 return Some(len - offset - 1);
1324 }
1325 }
1326 None
1327}
1328
1329pub fn coalesce_nulls<'a, T: PolarsDataType>(
1331 a: &'a ChunkedArray<T>,
1332 b: &'a ChunkedArray<T>,
1333) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, ChunkedArray<T>>) {
1334 if a.null_count() > 0 || b.null_count() > 0 {
1335 let (a, b) = align_chunks_binary(a, b);
1336 let mut b = b.into_owned();
1337 let a = a.coalesce_nulls(b.chunks());
1338
1339 for arr in a.chunks().iter() {
1340 for arr_b in unsafe { b.chunks_mut() } {
1341 *arr_b = arr_b.with_validity(arr.validity().cloned())
1342 }
1343 }
1344 b.compute_len();
1345 (Cow::Owned(a), Cow::Owned(b))
1346 } else {
1347 (Cow::Borrowed(a), Cow::Borrowed(b))
1348 }
1349}
1350
1351pub fn coalesce_nulls_columns(a: &Column, b: &Column) -> (Column, Column) {
1352 if a.null_count() > 0 || b.null_count() > 0 {
1353 let mut a = a.as_materialized_series().rechunk();
1354 let mut b = b.as_materialized_series().rechunk();
1355 for (arr_a, arr_b) in unsafe { a.chunks_mut().iter_mut().zip(b.chunks_mut()) } {
1356 let validity = match (arr_a.validity(), arr_b.validity()) {
1357 (None, Some(b)) => Some(b.clone()),
1358 (Some(a), Some(b)) => Some(a & b),
1359 (Some(a), None) => Some(a.clone()),
1360 (None, None) => None,
1361 };
1362 *arr_a = arr_a.with_validity(validity.clone());
1363 *arr_b = arr_b.with_validity(validity);
1364 }
1365 a.compute_len();
1366 b.compute_len();
1367 (a.into(), b.into())
1368 } else {
1369 (a.clone(), b.clone())
1370 }
1371}
1372
1373#[cfg(test)]
1374mod test {
1375 use super::*;
1376
1377 #[test]
1378 fn test_split() {
1379 let ca: Int32Chunked = (0..10).collect_ca("a".into());
1380
1381 let out = split(&ca, 3);
1382 assert_eq!(out[0].len(), 3);
1383 assert_eq!(out[1].len(), 3);
1384 assert_eq!(out[2].len(), 4);
1385 }
1386
1387 #[test]
1388 fn test_align_chunks() -> PolarsResult<()> {
1389 let a = Int32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3, 4]);
1390 let mut b = Int32Chunked::new(PlSmallStr::EMPTY, &[1]);
1391 let b2 = Int32Chunked::new(PlSmallStr::EMPTY, &[2, 3, 4]);
1392
1393 b.append(&b2)?;
1394 let (a, b) = align_chunks_binary(&a, &b);
1395 assert_eq!(
1396 a.chunk_lengths().collect::<Vec<_>>(),
1397 b.chunk_lengths().collect::<Vec<_>>()
1398 );
1399
1400 let a = Int32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3, 4]);
1401 let mut b = Int32Chunked::new(PlSmallStr::EMPTY, &[1]);
1402 let b1 = b.clone();
1403 b.append(&b1)?;
1404 b.append(&b1)?;
1405 b.append(&b1)?;
1406 let (a, b) = align_chunks_binary(&a, &b);
1407 assert_eq!(
1408 a.chunk_lengths().collect::<Vec<_>>(),
1409 b.chunk_lengths().collect::<Vec<_>>()
1410 );
1411
1412 Ok(())
1413 }
1414}