1mod any_value;
2use arrow::compute::concatenate::concatenate_validities;
3use arrow::compute::utils::combine_validities_and;
4pub mod flatten;
5pub(crate) mod series;
6mod supertype;
7use std::borrow::Cow;
8use std::ops::{Deref, DerefMut};
9mod schema;
10
11pub use any_value::*;
12pub use arrow;
13use arrow::bitmap::Bitmap;
14pub use arrow::legacy::utils::*;
15pub use arrow::trusted_len::TrustMyLength;
16use flatten::*;
17use num_traits::{One, Zero};
18pub use rayon;
19use rayon::prelude::*;
20pub use schema::*;
21pub use series::*;
22pub use supertype::*;
23
24use crate::POOL;
25use crate::prelude::*;
26
27#[repr(transparent)]
28pub struct Wrap<T>(pub T);
29
30impl<T> Deref for Wrap<T> {
31 type Target = T;
32 fn deref(&self) -> &Self::Target {
33 &self.0
34 }
35}
36
37#[inline(always)]
38pub fn _set_partition_size() -> usize {
39 POOL.current_num_threads()
40}
41
42pub struct NoNull<T> {
49 inner: T,
50}
51
52impl<T> NoNull<T> {
53 pub fn new(inner: T) -> Self {
54 NoNull { inner }
55 }
56
57 pub fn into_inner(self) -> T {
58 self.inner
59 }
60}
61
62impl<T> Deref for NoNull<T> {
63 type Target = T;
64
65 fn deref(&self) -> &Self::Target {
66 &self.inner
67 }
68}
69
70impl<T> DerefMut for NoNull<T> {
71 fn deref_mut(&mut self) -> &mut Self::Target {
72 &mut self.inner
73 }
74}
75
76pub(crate) fn get_iter_capacity<T, I: Iterator<Item = T>>(iter: &I) -> usize {
77 match iter.size_hint() {
78 (_lower, Some(upper)) => upper,
79 (0, None) => 1024,
80 (lower, None) => lower,
81 }
82}
83
84#[doc(hidden)]
87pub fn _split_offsets(len: usize, n: usize) -> Vec<(usize, usize)> {
88 if n == 1 {
89 vec![(0, len)]
90 } else {
91 let chunk_size = len / n;
92
93 (0..n)
94 .map(|partition| {
95 let offset = partition * chunk_size;
96 let len = if partition == (n - 1) {
97 len - offset
98 } else {
99 chunk_size
100 };
101 (partition * chunk_size, len)
102 })
103 .collect_trusted()
104 }
105}
106
107#[allow(clippy::len_without_is_empty)]
108pub trait Container: Clone {
109 fn slice(&self, offset: i64, len: usize) -> Self;
110
111 fn split_at(&self, offset: i64) -> (Self, Self);
112
113 fn len(&self) -> usize;
114
115 fn iter_chunks(&self) -> impl Iterator<Item = Self>;
116
117 fn should_rechunk(&self) -> bool;
118
119 fn n_chunks(&self) -> usize;
120
121 fn chunk_lengths(&self) -> impl Iterator<Item = usize>;
122}
123
124impl Container for DataFrame {
125 fn slice(&self, offset: i64, len: usize) -> Self {
126 DataFrame::slice(self, offset, len)
127 }
128
129 fn split_at(&self, offset: i64) -> (Self, Self) {
130 DataFrame::split_at(self, offset)
131 }
132
133 fn len(&self) -> usize {
134 self.height()
135 }
136
137 fn iter_chunks(&self) -> impl Iterator<Item = Self> {
138 flatten_df_iter(self)
139 }
140
141 fn should_rechunk(&self) -> bool {
142 self.should_rechunk()
143 }
144
145 fn n_chunks(&self) -> usize {
146 DataFrame::first_col_n_chunks(self)
147 }
148
149 fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
150 self.columns()[0].as_materialized_series().chunk_lengths()
152 }
153}
154
155impl<T: PolarsDataType> Container for ChunkedArray<T> {
156 fn slice(&self, offset: i64, len: usize) -> Self {
157 ChunkedArray::slice(self, offset, len)
158 }
159
160 fn split_at(&self, offset: i64) -> (Self, Self) {
161 ChunkedArray::split_at(self, offset)
162 }
163
164 fn len(&self) -> usize {
165 ChunkedArray::len(self)
166 }
167
168 fn iter_chunks(&self) -> impl Iterator<Item = Self> {
169 self.downcast_iter()
170 .map(|arr| Self::with_chunk(self.name().clone(), arr.clone()))
171 }
172
173 fn should_rechunk(&self) -> bool {
174 false
175 }
176
177 fn n_chunks(&self) -> usize {
178 self.chunks().len()
179 }
180
181 fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
182 ChunkedArray::chunk_lengths(self)
183 }
184}
185
186impl Container for Series {
187 fn slice(&self, offset: i64, len: usize) -> Self {
188 self.0.slice(offset, len)
189 }
190
191 fn split_at(&self, offset: i64) -> (Self, Self) {
192 self.0.split_at(offset)
193 }
194
195 fn len(&self) -> usize {
196 self.0.len()
197 }
198
199 fn iter_chunks(&self) -> impl Iterator<Item = Self> {
200 (0..self.0.n_chunks()).map(|i| self.select_chunk(i))
201 }
202
203 fn should_rechunk(&self) -> bool {
204 false
205 }
206
207 fn n_chunks(&self) -> usize {
208 self.chunks().len()
209 }
210
211 fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
212 self.0.chunk_lengths()
213 }
214}
215
216fn split_impl<C: Container>(container: &C, target: usize, chunk_size: usize) -> Vec<C> {
217 if target == 1 {
218 return vec![container.clone()];
219 }
220 let mut out = Vec::with_capacity(target);
221 let chunk_size = chunk_size as i64;
222
223 let (chunk, mut remainder) = container.split_at(chunk_size);
225 out.push(chunk);
226
227 for _ in 1..target - 1 {
229 let (a, b) = remainder.split_at(chunk_size);
230 out.push(a);
231 remainder = b
232 }
233 out.push(remainder);
235 out
236}
237
238pub fn split<C: Container>(container: &C, target: usize) -> Vec<C> {
240 let total_len = container.len();
241 if total_len == 0 {
242 return vec![container.clone()];
243 }
244
245 let chunk_size = std::cmp::max(total_len / target, 1);
246
247 if container.n_chunks() == target
248 && container
249 .chunk_lengths()
250 .all(|len| len.abs_diff(chunk_size) < 100)
251 && !container.should_rechunk()
253 {
254 return container.iter_chunks().collect();
255 }
256 split_impl(container, target, chunk_size)
257}
258
259pub fn split_and_flatten<C: Container>(container: &C, target: usize) -> Vec<C> {
262 let total_len = container.len();
263 if total_len == 0 {
264 return vec![container.clone()];
265 }
266
267 let chunk_size = std::cmp::max(total_len / target, 1);
268
269 if container.n_chunks() == target
270 && container
271 .chunk_lengths()
272 .all(|len| len.abs_diff(chunk_size) < 100)
273 && !container.should_rechunk()
275 {
276 return container.iter_chunks().collect();
277 }
278
279 if container.n_chunks() == 1 {
280 split_impl(container, target, chunk_size)
281 } else {
282 let mut out = Vec::with_capacity(target);
283 let chunks = container.iter_chunks();
284
285 'new_chunk: for mut chunk in chunks {
286 loop {
287 let h = chunk.len();
288 if h < chunk_size {
289 out.push(chunk);
291 continue 'new_chunk;
292 }
293
294 if ((h - chunk_size) as f64 / chunk_size as f64) < 0.3 {
296 out.push(chunk);
297 continue 'new_chunk;
298 }
299
300 let (a, b) = chunk.split_at(chunk_size as i64);
301 out.push(a);
302 chunk = b;
303 }
304 }
305 out
306 }
307}
308
309pub fn split_df_as_ref(df: &DataFrame, target: usize, strict: bool) -> Vec<DataFrame> {
315 if strict {
316 split(df, target)
317 } else {
318 split_and_flatten(df, target)
319 }
320}
321
322#[doc(hidden)]
323pub fn split_df(df: &mut DataFrame, target: usize, strict: bool) -> Vec<DataFrame> {
326 if target == 0 || df.height() == 0 {
327 return vec![df.clone()];
328 }
329 df.align_chunks_par();
331 split_df_as_ref(df, target, strict)
332}
333
334pub fn slice_slice<T>(vals: &[T], offset: i64, len: usize) -> &[T] {
335 let (raw_offset, slice_len) = slice_offsets(offset, len, vals.len());
336 &vals[raw_offset..raw_offset + slice_len]
337}
338
339#[inline]
340pub fn slice_offsets(offset: i64, length: usize, array_len: usize) -> (usize, usize) {
341 let signed_start_offset = if offset < 0 {
342 offset.saturating_add_unsigned(array_len as u64)
343 } else {
344 offset
345 };
346 let signed_stop_offset = signed_start_offset.saturating_add_unsigned(length as u64);
347
348 let signed_array_len: i64 = array_len
349 .try_into()
350 .expect("array length larger than i64::MAX");
351 let clamped_start_offset = signed_start_offset.clamp(0, signed_array_len);
352 let clamped_stop_offset = signed_stop_offset.clamp(0, signed_array_len);
353
354 let slice_start_idx = clamped_start_offset as usize;
355 let slice_len = (clamped_stop_offset - clamped_start_offset) as usize;
356 (slice_start_idx, slice_len)
357}
358
359#[macro_export]
361macro_rules! match_dtype_to_physical_apply_macro {
362 ($obj:expr, $macro:ident, $macro_string:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
363 match $obj {
364 DataType::String => $macro_string!($($opt_args)*),
365 DataType::Boolean => $macro_bool!($($opt_args)*),
366 #[cfg(feature = "dtype-u8")]
367 DataType::UInt8 => $macro!(u8 $(, $opt_args)*),
368 #[cfg(feature = "dtype-u16")]
369 DataType::UInt16 => $macro!(u16 $(, $opt_args)*),
370 DataType::UInt32 => $macro!(u32 $(, $opt_args)*),
371 DataType::UInt64 => $macro!(u64 $(, $opt_args)*),
372 #[cfg(feature = "dtype-i8")]
373 DataType::Int8 => $macro!(i8 $(, $opt_args)*),
374 #[cfg(feature = "dtype-i16")]
375 DataType::Int16 => $macro!(i16 $(, $opt_args)*),
376 DataType::Int32 => $macro!(i32 $(, $opt_args)*),
377 DataType::Int64 => $macro!(i64 $(, $opt_args)*),
378 #[cfg(feature = "dtype-i128")]
379 DataType::Int128 => $macro!(i128 $(, $opt_args)*),
380 #[cfg(feature = "dtype-f16")]
381 DataType::Float16 => $macro!(pf16 $(, $opt_args)*),
382 DataType::Float32 => $macro!(f32 $(, $opt_args)*),
383 DataType::Float64 => $macro!(f64 $(, $opt_args)*),
384 dt => panic!("not implemented for dtype {:?}", dt),
385 }
386 }};
387}
388
389#[macro_export]
391macro_rules! match_dtype_to_logical_apply_macro {
392 ($obj:expr, $macro:ident, $macro_string:ident, $macro_binary:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
393 match $obj {
394 DataType::String => $macro_string!($($opt_args)*),
395 DataType::Binary => $macro_binary!($($opt_args)*),
396 DataType::Boolean => $macro_bool!($($opt_args)*),
397 #[cfg(feature = "dtype-u8")]
398 DataType::UInt8 => $macro!(UInt8Type $(, $opt_args)*),
399 #[cfg(feature = "dtype-u16")]
400 DataType::UInt16 => $macro!(UInt16Type $(, $opt_args)*),
401 DataType::UInt32 => $macro!(UInt32Type $(, $opt_args)*),
402 DataType::UInt64 => $macro!(UInt64Type $(, $opt_args)*),
403 #[cfg(feature = "dtype-u128")]
404 DataType::UInt128 => $macro!(UInt128Type $(, $opt_args)*),
405 #[cfg(feature = "dtype-i8")]
406 DataType::Int8 => $macro!(Int8Type $(, $opt_args)*),
407 #[cfg(feature = "dtype-i16")]
408 DataType::Int16 => $macro!(Int16Type $(, $opt_args)*),
409 DataType::Int32 => $macro!(Int32Type $(, $opt_args)*),
410 DataType::Int64 => $macro!(Int64Type $(, $opt_args)*),
411 #[cfg(feature = "dtype-i128")]
412 DataType::Int128 => $macro!(Int128Type $(, $opt_args)*),
413 #[cfg(feature = "dtype-f16")]
414 DataType::Float16 => $macro!(Float16Type $(, $opt_args)*),
415 DataType::Float32 => $macro!(Float32Type $(, $opt_args)*),
416 DataType::Float64 => $macro!(Float64Type $(, $opt_args)*),
417 dt => panic!("not implemented for dtype {:?}", dt),
418 }
419 }};
420}
421
422#[macro_export]
424macro_rules! match_arrow_dtype_apply_macro_ca {
425 ($self:expr, $macro:ident, $macro_string:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
426 match $self.dtype() {
427 DataType::String => $macro_string!($self.str().unwrap() $(, $opt_args)*),
428 DataType::Boolean => $macro_bool!($self.bool().unwrap() $(, $opt_args)*),
429 #[cfg(feature = "dtype-u8")]
430 DataType::UInt8 => $macro!($self.u8().unwrap() $(, $opt_args)*),
431 #[cfg(feature = "dtype-u16")]
432 DataType::UInt16 => $macro!($self.u16().unwrap() $(, $opt_args)*),
433 DataType::UInt32 => $macro!($self.u32().unwrap() $(, $opt_args)*),
434 DataType::UInt64 => $macro!($self.u64().unwrap() $(, $opt_args)*),
435 #[cfg(feature = "dtype-u128")]
436 DataType::UInt128 => $macro!($self.u128().unwrap() $(, $opt_args)*),
437 #[cfg(feature = "dtype-i8")]
438 DataType::Int8 => $macro!($self.i8().unwrap() $(, $opt_args)*),
439 #[cfg(feature = "dtype-i16")]
440 DataType::Int16 => $macro!($self.i16().unwrap() $(, $opt_args)*),
441 DataType::Int32 => $macro!($self.i32().unwrap() $(, $opt_args)*),
442 DataType::Int64 => $macro!($self.i64().unwrap() $(, $opt_args)*),
443 #[cfg(feature = "dtype-i128")]
444 DataType::Int128 => $macro!($self.i128().unwrap() $(, $opt_args)*),
445 #[cfg(feature = "dtype-f16")]
446 DataType::Float16 => $macro!($self.f16().unwrap() $(, $opt_args)*),
447 DataType::Float32 => $macro!($self.f32().unwrap() $(, $opt_args)*),
448 DataType::Float64 => $macro!($self.f64().unwrap() $(, $opt_args)*),
449 dt => panic!("not implemented for dtype {:?}", dt),
450 }
451 }};
452}
453
454#[macro_export]
455macro_rules! with_match_physical_numeric_type {(
456 $dtype:expr, | $_:tt $T:ident | $($body:tt)*
457) => ({
458 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
459 #[cfg(feature = "dtype-f16")]
460 use polars_utils::float16::pf16;
461 use $crate::datatypes::DataType::*;
462 match $dtype {
463 #[cfg(feature = "dtype-i8")]
464 Int8 => __with_ty__! { i8 },
465 #[cfg(feature = "dtype-i16")]
466 Int16 => __with_ty__! { i16 },
467 Int32 => __with_ty__! { i32 },
468 Int64 => __with_ty__! { i64 },
469 #[cfg(feature = "dtype-i128")]
470 Int128 => __with_ty__! { i128 },
471 #[cfg(feature = "dtype-u8")]
472 UInt8 => __with_ty__! { u8 },
473 #[cfg(feature = "dtype-u16")]
474 UInt16 => __with_ty__! { u16 },
475 UInt32 => __with_ty__! { u32 },
476 UInt64 => __with_ty__! { u64 },
477 #[cfg(feature = "dtype-u128")]
478 UInt128 => __with_ty__! { u128 },
479 #[cfg(feature = "dtype-f16")]
480 Float16 => __with_ty__! { pf16 },
481 Float32 => __with_ty__! { f32 },
482 Float64 => __with_ty__! { f64 },
483 dt => panic!("not implemented for dtype {:?}", dt),
484 }
485})}
486
487#[macro_export]
488macro_rules! with_match_physical_integer_type {(
489 $dtype:expr, | $_:tt $T:ident | $($body:tt)*
490) => ({
491 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
492 #[cfg(feature = "dtype-f16")]
493 use polars_utils::float16::pf16;
494 use $crate::datatypes::DataType::*;
495 match $dtype {
496 #[cfg(feature = "dtype-i8")]
497 Int8 => __with_ty__! { i8 },
498 #[cfg(feature = "dtype-i16")]
499 Int16 => __with_ty__! { i16 },
500 Int32 => __with_ty__! { i32 },
501 Int64 => __with_ty__! { i64 },
502 #[cfg(feature = "dtype-i128")]
503 Int128 => __with_ty__! { i128 },
504 #[cfg(feature = "dtype-u8")]
505 UInt8 => __with_ty__! { u8 },
506 #[cfg(feature = "dtype-u16")]
507 UInt16 => __with_ty__! { u16 },
508 UInt32 => __with_ty__! { u32 },
509 UInt64 => __with_ty__! { u64 },
510 #[cfg(feature = "dtype-u128")]
511 UInt128 => __with_ty__! { u128 },
512 dt => panic!("not implemented for dtype {:?}", dt),
513 }
514})}
515
516#[macro_export]
517macro_rules! with_match_physical_float_type {(
518 $dtype:expr, | $_:tt $T:ident | $($body:tt)*
519) => ({
520 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
521 use polars_utils::float16::pf16;
522 use $crate::datatypes::DataType::*;
523 match $dtype {
524 #[cfg(feature = "dtype-f16")]
525 Float16 => __with_ty__! { pf16 },
526 Float32 => __with_ty__! { f32 },
527 Float64 => __with_ty__! { f64 },
528 dt => panic!("not implemented for dtype {:?}", dt),
529 }
530})}
531
532#[macro_export]
533macro_rules! with_match_physical_float_polars_type {(
534 $key_type:expr, | $_:tt $T:ident | $($body:tt)*
535) => ({
536 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
537 use $crate::datatypes::DataType::*;
538 match $key_type {
539 #[cfg(feature = "dtype-f16")]
540 Float16 => __with_ty__! { Float16Type },
541 Float32 => __with_ty__! { Float32Type },
542 Float64 => __with_ty__! { Float64Type },
543 dt => panic!("not implemented for dtype {:?}", dt),
544 }
545})}
546
547#[macro_export]
548macro_rules! with_match_physical_numeric_polars_type {(
549 $key_type:expr, | $_:tt $T:ident | $($body:tt)*
550) => ({
551 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
552 use $crate::datatypes::DataType::*;
553 match $key_type {
554 #[cfg(feature = "dtype-i8")]
555 Int8 => __with_ty__! { Int8Type },
556 #[cfg(feature = "dtype-i16")]
557 Int16 => __with_ty__! { Int16Type },
558 Int32 => __with_ty__! { Int32Type },
559 Int64 => __with_ty__! { Int64Type },
560 #[cfg(feature = "dtype-i128")]
561 Int128 => __with_ty__! { Int128Type },
562 #[cfg(feature = "dtype-u8")]
563 UInt8 => __with_ty__! { UInt8Type },
564 #[cfg(feature = "dtype-u16")]
565 UInt16 => __with_ty__! { UInt16Type },
566 UInt32 => __with_ty__! { UInt32Type },
567 UInt64 => __with_ty__! { UInt64Type },
568 #[cfg(feature = "dtype-u128")]
569 UInt128 => __with_ty__! { UInt128Type },
570 #[cfg(feature = "dtype-f16")]
571 Float16 => __with_ty__! { Float16Type },
572 Float32 => __with_ty__! { Float32Type },
573 Float64 => __with_ty__! { Float64Type },
574 dt => panic!("not implemented for dtype {:?}", dt),
575 }
576})}
577
578#[macro_export]
579macro_rules! with_match_physical_integer_polars_type {(
580 $key_type:expr, | $_:tt $T:ident | $($body:tt)*
581) => ({
582 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
583 use $crate::datatypes::DataType::*;
584 use $crate::datatypes::*;
585 match $key_type {
586 #[cfg(feature = "dtype-i8")]
587 Int8 => __with_ty__! { Int8Type },
588 #[cfg(feature = "dtype-i16")]
589 Int16 => __with_ty__! { Int16Type },
590 Int32 => __with_ty__! { Int32Type },
591 Int64 => __with_ty__! { Int64Type },
592 #[cfg(feature = "dtype-i128")]
593 Int128 => __with_ty__! { Int128Type },
594 #[cfg(feature = "dtype-u8")]
595 UInt8 => __with_ty__! { UInt8Type },
596 #[cfg(feature = "dtype-u16")]
597 UInt16 => __with_ty__! { UInt16Type },
598 UInt32 => __with_ty__! { UInt32Type },
599 UInt64 => __with_ty__! { UInt64Type },
600 #[cfg(feature = "dtype-u128")]
601 UInt128 => __with_ty__! { UInt128Type },
602 dt => panic!("not implemented for dtype {:?}", dt),
603 }
604})}
605
606#[macro_export]
607macro_rules! with_match_categorical_physical_type {(
608 $dtype:expr, | $_:tt $T:ident | $($body:tt)*
609) => ({
610 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
611 match $dtype {
612 CategoricalPhysical::U8 => __with_ty__! { Categorical8Type },
613 CategoricalPhysical::U16 => __with_ty__! { Categorical16Type },
614 CategoricalPhysical::U32 => __with_ty__! { Categorical32Type },
615 }
616})}
617
618#[macro_export]
621macro_rules! downcast_as_macro_arg_physical {
622 ($self:expr, $macro:ident $(, $opt_args:expr)*) => {{
623 match $self.dtype() {
624 #[cfg(feature = "dtype-u8")]
625 DataType::UInt8 => $macro!($self.u8().unwrap() $(, $opt_args)*),
626 #[cfg(feature = "dtype-u16")]
627 DataType::UInt16 => $macro!($self.u16().unwrap() $(, $opt_args)*),
628 DataType::UInt32 => $macro!($self.u32().unwrap() $(, $opt_args)*),
629 DataType::UInt64 => $macro!($self.u64().unwrap() $(, $opt_args)*),
630 #[cfg(feature = "dtype-u128")]
631 DataType::UInt128 => $macro!($self.u128().unwrap() $(, $opt_args)*),
632 #[cfg(feature = "dtype-i8")]
633 DataType::Int8 => $macro!($self.i8().unwrap() $(, $opt_args)*),
634 #[cfg(feature = "dtype-i16")]
635 DataType::Int16 => $macro!($self.i16().unwrap() $(, $opt_args)*),
636 DataType::Int32 => $macro!($self.i32().unwrap() $(, $opt_args)*),
637 DataType::Int64 => $macro!($self.i64().unwrap() $(, $opt_args)*),
638 #[cfg(feature = "dtype-i128")]
639 DataType::Int128 => $macro!($self.i128().unwrap() $(, $opt_args)*),
640 #[cfg(feature = "dtype-f16")]
641 DataType::Float16 => $macro!($self.f16().unwrap() $(, $opt_args)*),
642 DataType::Float32 => $macro!($self.f32().unwrap() $(, $opt_args)*),
643 DataType::Float64 => $macro!($self.f64().unwrap() $(, $opt_args)*),
644 dt => panic!("not implemented for {:?}", dt),
645 }
646 }};
647}
648
649#[macro_export]
652macro_rules! downcast_as_macro_arg_physical_mut {
653 ($self:expr, $macro:ident $(, $opt_args:expr)*) => {{
654 match $self.dtype().clone() {
656 #[cfg(feature = "dtype-u8")]
657 DataType::UInt8 => {
658 let ca: &mut UInt8Chunked = $self.as_mut();
659 $macro!(UInt8Type, ca $(, $opt_args)*)
660 },
661 #[cfg(feature = "dtype-u16")]
662 DataType::UInt16 => {
663 let ca: &mut UInt16Chunked = $self.as_mut();
664 $macro!(UInt16Type, ca $(, $opt_args)*)
665 },
666 DataType::UInt32 => {
667 let ca: &mut UInt32Chunked = $self.as_mut();
668 $macro!(UInt32Type, ca $(, $opt_args)*)
669 },
670 DataType::UInt64 => {
671 let ca: &mut UInt64Chunked = $self.as_mut();
672 $macro!(UInt64Type, ca $(, $opt_args)*)
673 },
674 #[cfg(feature = "dtype-u128")]
675 DataType::UInt128 => {
676 let ca: &mut UInt128Chunked = $self.as_mut();
677 $macro!(UInt128Type, ca $(, $opt_args)*)
678 },
679 #[cfg(feature = "dtype-i8")]
680 DataType::Int8 => {
681 let ca: &mut Int8Chunked = $self.as_mut();
682 $macro!(Int8Type, ca $(, $opt_args)*)
683 },
684 #[cfg(feature = "dtype-i16")]
685 DataType::Int16 => {
686 let ca: &mut Int16Chunked = $self.as_mut();
687 $macro!(Int16Type, ca $(, $opt_args)*)
688 },
689 DataType::Int32 => {
690 let ca: &mut Int32Chunked = $self.as_mut();
691 $macro!(Int32Type, ca $(, $opt_args)*)
692 },
693 DataType::Int64 => {
694 let ca: &mut Int64Chunked = $self.as_mut();
695 $macro!(Int64Type, ca $(, $opt_args)*)
696 },
697 #[cfg(feature = "dtype-i128")]
698 DataType::Int128 => {
699 let ca: &mut Int128Chunked = $self.as_mut();
700 $macro!(Int128Type, ca $(, $opt_args)*)
701 },
702 #[cfg(feature = "dtype-f16")]
703 DataType::Float16 => {
704 let ca: &mut Float16Chunked = $self.as_mut();
705 $macro!(Float16Type, ca $(, $opt_args)*)
706 },
707 DataType::Float32 => {
708 let ca: &mut Float32Chunked = $self.as_mut();
709 $macro!(Float32Type, ca $(, $opt_args)*)
710 },
711 DataType::Float64 => {
712 let ca: &mut Float64Chunked = $self.as_mut();
713 $macro!(Float64Type, ca $(, $opt_args)*)
714 },
715 dt => panic!("not implemented for {:?}", dt),
716 }
717 }};
718}
719
720#[macro_export]
721macro_rules! apply_method_all_arrow_series {
722 ($self:expr, $method:ident, $($args:expr),*) => {
723 match $self.dtype() {
724 DataType::Boolean => $self.bool().unwrap().$method($($args),*),
725 DataType::String => $self.str().unwrap().$method($($args),*),
726 #[cfg(feature = "dtype-u8")]
727 DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
728 #[cfg(feature = "dtype-u16")]
729 DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
730 DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
731 DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
732 #[cfg(feature = "dtype-u128")]
733 DataType::UInt128 => $self.u128().unwrap().$medthod($($args),*),
734 #[cfg(feature = "dtype-i8")]
735 DataType::Int8 => $self.i8().unwrap().$method($($args),*),
736 #[cfg(feature = "dtype-i16")]
737 DataType::Int16 => $self.i16().unwrap().$method($($args),*),
738 DataType::Int32 => $self.i32().unwrap().$method($($args),*),
739 DataType::Int64 => $self.i64().unwrap().$method($($args),*),
740 #[cfg(feature = "dtype-i128")]
741 DataType::Int128 => $self.i128().unwrap().$method($($args),*),
742 #[cfg(feature = "dtype-f16")]
743 DataType::Float16 => $self.f16().unwrap().$method($($args),*),
744 DataType::Float32 => $self.f32().unwrap().$method($($args),*),
745 DataType::Float64 => $self.f64().unwrap().$method($($args),*),
746 DataType::Time => $self.time().unwrap().$method($($args),*),
747 DataType::Date => $self.date().unwrap().$method($($args),*),
748 DataType::Datetime(_, _) => $self.datetime().unwrap().$method($($args),*),
749 DataType::List(_) => $self.list().unwrap().$method($($args),*),
750 DataType::Struct(_) => $self.struct_().unwrap().$method($($args),*),
751 dt => panic!("dtype {:?} not supported", dt)
752 }
753 }
754}
755
756#[macro_export]
757macro_rules! apply_method_physical_integer {
758 ($self:expr, $method:ident, $($args:expr),*) => {
759 match $self.dtype() {
760 #[cfg(feature = "dtype-u8")]
761 DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
762 #[cfg(feature = "dtype-u16")]
763 DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
764 DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
765 DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
766 #[cfg(feature = "dtype-u128")]
767 DataType::UInt128 => $self.u128().unwrap().$method($($args),*),
768 #[cfg(feature = "dtype-i8")]
769 DataType::Int8 => $self.i8().unwrap().$method($($args),*),
770 #[cfg(feature = "dtype-i16")]
771 DataType::Int16 => $self.i16().unwrap().$method($($args),*),
772 DataType::Int32 => $self.i32().unwrap().$method($($args),*),
773 DataType::Int64 => $self.i64().unwrap().$method($($args),*),
774 #[cfg(feature = "dtype-i128")]
775 DataType::Int128 => $self.i128().unwrap().$method($($args),*),
776 dt => panic!("not implemented for dtype {:?}", dt),
777 }
778 }
779}
780
781#[macro_export]
783macro_rules! apply_method_physical_numeric {
784 ($self:expr, $method:ident, $($args:expr),*) => {
785 match $self.dtype() {
786 #[cfg(feature = "dtype-f16")]
787 DataType::Float16 => $self.f16().unwrap().$method($($args),*),
788 DataType::Float32 => $self.f32().unwrap().$method($($args),*),
789 DataType::Float64 => $self.f64().unwrap().$method($($args),*),
790 _ => apply_method_physical_integer!($self, $method, $($args),*),
791 }
792 }
793}
794
795#[macro_export]
796macro_rules! df {
797 ($($col_name:expr => $slice:expr), + $(,)?) => {
798 $crate::prelude::DataFrame::new_infer_height(vec![
799 $($crate::prelude::Column::from(<$crate::prelude::Series as $crate::prelude::NamedFrom::<_, _>>::new($col_name.into(), $slice)),)+
800 ])
801 }
802}
803
804pub fn get_time_units(tu_l: &TimeUnit, tu_r: &TimeUnit) -> TimeUnit {
805 use crate::datatypes::time_unit::TimeUnit::*;
806 match (tu_l, tu_r) {
807 (Nanoseconds, Microseconds) => Microseconds,
808 (_, Milliseconds) => Milliseconds,
809 _ => *tu_l,
810 }
811}
812
813#[cold]
814#[inline(never)]
815fn width_mismatch(df1: &DataFrame, df2: &DataFrame) -> PolarsError {
816 let mut df1_extra = Vec::new();
817 let mut df2_extra = Vec::new();
818
819 let s1 = df1.schema();
820 let s2 = df2.schema();
821
822 s1.field_compare(s2, &mut df1_extra, &mut df2_extra);
823
824 let df1_extra = df1_extra
825 .into_iter()
826 .map(|(_, (n, _))| n.as_str())
827 .collect::<Vec<_>>()
828 .join(", ");
829 let df2_extra = df2_extra
830 .into_iter()
831 .map(|(_, (n, _))| n.as_str())
832 .collect::<Vec<_>>()
833 .join(", ");
834
835 polars_err!(
836 SchemaMismatch: r#"unable to vstack, dataframes have different widths ({} != {}).
837One dataframe has additional columns: [{df1_extra}].
838Other dataframe has additional columns: [{df2_extra}]."#,
839 df1.width(),
840 df2.width(),
841 )
842}
843
844pub fn accumulate_dataframes_vertical_unchecked_optional<I>(dfs: I) -> Option<DataFrame>
845where
846 I: IntoIterator<Item = DataFrame>,
847{
848 let mut iter = dfs.into_iter();
849 let additional = iter.size_hint().0;
850 let mut acc_df = iter.next()?;
851 acc_df.reserve_chunks(additional);
852
853 for df in iter {
854 if acc_df.width() != df.width() {
855 panic!("{}", width_mismatch(&acc_df, &df));
856 }
857
858 acc_df.vstack_mut_owned_unchecked(df);
859 }
860 Some(acc_df)
861}
862
863pub fn accumulate_dataframes_vertical_unchecked<I>(dfs: I) -> DataFrame
866where
867 I: IntoIterator<Item = DataFrame>,
868{
869 let mut iter = dfs.into_iter();
870 let additional = iter.size_hint().0;
871 let mut acc_df = iter.next().unwrap();
872 acc_df.reserve_chunks(additional);
873
874 for df in iter {
875 if acc_df.width() != df.width() {
876 panic!("{}", width_mismatch(&acc_df, &df));
877 }
878
879 acc_df.vstack_mut_owned_unchecked(df);
880 }
881 acc_df
882}
883
884pub fn accumulate_dataframes_vertical<I>(dfs: I) -> PolarsResult<DataFrame>
888where
889 I: IntoIterator<Item = DataFrame>,
890{
891 let mut iter = dfs.into_iter();
892 let additional = iter.size_hint().0;
893 let mut acc_df = iter.next().unwrap();
894 acc_df.reserve_chunks(additional);
895 for df in iter {
896 if acc_df.width() != df.width() {
897 return Err(width_mismatch(&acc_df, &df));
898 }
899
900 acc_df.vstack_mut_owned(df)?;
901 }
902
903 Ok(acc_df)
904}
905
906pub fn concat_df<'a, I>(dfs: I) -> PolarsResult<DataFrame>
908where
909 I: IntoIterator<Item = &'a DataFrame>,
910{
911 let mut iter = dfs.into_iter();
912 let additional = iter.size_hint().0;
913 let mut acc_df = iter.next().unwrap().clone();
914 acc_df.reserve_chunks(additional);
915 for df in iter {
916 acc_df.vstack_mut(df)?;
917 }
918 Ok(acc_df)
919}
920
921pub fn concat_df_unchecked<'a, I>(dfs: I) -> DataFrame
923where
924 I: IntoIterator<Item = &'a DataFrame>,
925{
926 let mut iter = dfs.into_iter();
927 let additional = iter.size_hint().0;
928 let mut acc_df = iter.next().unwrap().clone();
929 acc_df.reserve_chunks(additional);
930 for df in iter {
931 acc_df.vstack_mut_unchecked(df);
932 }
933 acc_df
934}
935
936pub fn accumulate_dataframes_horizontal(dfs: Vec<DataFrame>) -> PolarsResult<DataFrame> {
937 let mut iter = dfs.into_iter();
938 let mut acc_df = iter.next().unwrap();
939 for df in iter {
940 acc_df.hstack_mut(df.columns())?;
941 }
942 Ok(acc_df)
943}
944
945pub fn align_chunks_binary<'a, T, B>(
949 left: &'a ChunkedArray<T>,
950 right: &'a ChunkedArray<B>,
951) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, ChunkedArray<B>>)
952where
953 B: PolarsDataType,
954 T: PolarsDataType,
955{
956 let assert = || {
957 assert_eq!(
958 left.len(),
959 right.len(),
960 "expected arrays of the same length"
961 )
962 };
963 match (left.chunks.len(), right.chunks.len()) {
964 (1, 1) => (Cow::Borrowed(left), Cow::Borrowed(right)),
966 (a, b)
968 if a == b
969 && left
970 .chunk_lengths()
971 .zip(right.chunk_lengths())
972 .all(|(l, r)| l == r) =>
973 {
974 (Cow::Borrowed(left), Cow::Borrowed(right))
975 },
976 (_, 1) => {
977 assert();
978 (
979 Cow::Borrowed(left),
980 Cow::Owned(right.match_chunks(left.chunk_lengths())),
981 )
982 },
983 (1, _) => {
984 assert();
985 (
986 Cow::Owned(left.match_chunks(right.chunk_lengths())),
987 Cow::Borrowed(right),
988 )
989 },
990 (_, _) => {
991 assert();
992 let left = left.rechunk();
994 (
995 Cow::Owned(left.match_chunks(right.chunk_lengths())),
996 Cow::Borrowed(right),
997 )
998 },
999 }
1000}
1001
1002pub fn align_chunks_binary_ca_series<'a, T>(
1006 left: &'a ChunkedArray<T>,
1007 right: &'a Series,
1008) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, Series>)
1009where
1010 T: PolarsDataType,
1011{
1012 let assert = || {
1013 assert_eq!(
1014 left.len(),
1015 right.len(),
1016 "expected arrays of the same length"
1017 )
1018 };
1019 match (left.chunks.len(), right.chunks().len()) {
1020 (1, 1) => (Cow::Borrowed(left), Cow::Borrowed(right)),
1022 (a, b)
1024 if a == b
1025 && left
1026 .chunk_lengths()
1027 .zip(right.chunk_lengths())
1028 .all(|(l, r)| l == r) =>
1029 {
1030 assert();
1031 (Cow::Borrowed(left), Cow::Borrowed(right))
1032 },
1033 (_, 1) => (left.rechunk(), Cow::Borrowed(right)),
1034 (1, _) => (Cow::Borrowed(left), Cow::Owned(right.rechunk())),
1035 (_, _) => {
1036 assert();
1037 (left.rechunk(), Cow::Owned(right.rechunk()))
1038 },
1039 }
1040}
1041
1042#[cfg(feature = "performant")]
1043pub(crate) fn align_chunks_binary_owned_series(left: Series, right: Series) -> (Series, Series) {
1044 match (left.chunks().len(), right.chunks().len()) {
1045 (1, 1) => (left, right),
1046 (a, b)
1048 if a == b
1049 && left
1050 .chunk_lengths()
1051 .zip(right.chunk_lengths())
1052 .all(|(l, r)| l == r) =>
1053 {
1054 (left, right)
1055 },
1056 (_, 1) => (left.rechunk(), right),
1057 (1, _) => (left, right.rechunk()),
1058 (_, _) => (left.rechunk(), right.rechunk()),
1059 }
1060}
1061
1062pub(crate) fn align_chunks_binary_owned<T, B>(
1063 left: ChunkedArray<T>,
1064 right: ChunkedArray<B>,
1065) -> (ChunkedArray<T>, ChunkedArray<B>)
1066where
1067 B: PolarsDataType,
1068 T: PolarsDataType,
1069{
1070 match (left.chunks.len(), right.chunks.len()) {
1071 (1, 1) => (left, right),
1072 (a, b)
1074 if a == b
1075 && left
1076 .chunk_lengths()
1077 .zip(right.chunk_lengths())
1078 .all(|(l, r)| l == r) =>
1079 {
1080 (left, right)
1081 },
1082 (_, 1) => (left.rechunk().into_owned(), right),
1083 (1, _) => (left, right.rechunk().into_owned()),
1084 (_, _) => (left.rechunk().into_owned(), right.rechunk().into_owned()),
1085 }
1086}
1087
1088#[allow(clippy::type_complexity)]
1091pub fn align_chunks_ternary<'a, A, B, C>(
1092 a: &'a ChunkedArray<A>,
1093 b: &'a ChunkedArray<B>,
1094 c: &'a ChunkedArray<C>,
1095) -> (
1096 Cow<'a, ChunkedArray<A>>,
1097 Cow<'a, ChunkedArray<B>>,
1098 Cow<'a, ChunkedArray<C>>,
1099)
1100where
1101 A: PolarsDataType,
1102 B: PolarsDataType,
1103 C: PolarsDataType,
1104{
1105 if a.chunks.len() == 1 && b.chunks.len() == 1 && c.chunks.len() == 1 {
1106 return (Cow::Borrowed(a), Cow::Borrowed(b), Cow::Borrowed(c));
1107 }
1108
1109 assert!(
1110 a.len() == b.len() && b.len() == c.len(),
1111 "expected arrays of the same length"
1112 );
1113
1114 match (a.chunks.len(), b.chunks.len(), c.chunks.len()) {
1115 (_, 1, 1) => (
1116 Cow::Borrowed(a),
1117 Cow::Owned(b.match_chunks(a.chunk_lengths())),
1118 Cow::Owned(c.match_chunks(a.chunk_lengths())),
1119 ),
1120 (1, 1, _) => (
1121 Cow::Owned(a.match_chunks(c.chunk_lengths())),
1122 Cow::Owned(b.match_chunks(c.chunk_lengths())),
1123 Cow::Borrowed(c),
1124 ),
1125 (1, _, 1) => (
1126 Cow::Owned(a.match_chunks(b.chunk_lengths())),
1127 Cow::Borrowed(b),
1128 Cow::Owned(c.match_chunks(b.chunk_lengths())),
1129 ),
1130 (1, _, _) => {
1131 let b = b.rechunk();
1132 (
1133 Cow::Owned(a.match_chunks(c.chunk_lengths())),
1134 Cow::Owned(b.match_chunks(c.chunk_lengths())),
1135 Cow::Borrowed(c),
1136 )
1137 },
1138 (_, 1, _) => {
1139 let a = a.rechunk();
1140 (
1141 Cow::Owned(a.match_chunks(c.chunk_lengths())),
1142 Cow::Owned(b.match_chunks(c.chunk_lengths())),
1143 Cow::Borrowed(c),
1144 )
1145 },
1146 (_, _, 1) => {
1147 let b = b.rechunk();
1148 (
1149 Cow::Borrowed(a),
1150 Cow::Owned(b.match_chunks(a.chunk_lengths())),
1151 Cow::Owned(c.match_chunks(a.chunk_lengths())),
1152 )
1153 },
1154 (len_a, len_b, len_c)
1155 if len_a == len_b
1156 && len_b == len_c
1157 && a.chunk_lengths()
1158 .zip(b.chunk_lengths())
1159 .zip(c.chunk_lengths())
1160 .all(|((a, b), c)| a == b && b == c) =>
1161 {
1162 (Cow::Borrowed(a), Cow::Borrowed(b), Cow::Borrowed(c))
1163 },
1164 _ => {
1165 let a = a.rechunk();
1167 let b = b.rechunk();
1168 (
1169 Cow::Owned(a.match_chunks(c.chunk_lengths())),
1170 Cow::Owned(b.match_chunks(c.chunk_lengths())),
1171 Cow::Borrowed(c),
1172 )
1173 },
1174 }
1175}
1176
1177pub fn binary_concatenate_validities<'a, T, B>(
1178 left: &'a ChunkedArray<T>,
1179 right: &'a ChunkedArray<B>,
1180) -> Option<Bitmap>
1181where
1182 B: PolarsDataType,
1183 T: PolarsDataType,
1184{
1185 let (left, right) = align_chunks_binary(left, right);
1186 let left_validity = concatenate_validities(left.chunks());
1187 let right_validity = concatenate_validities(right.chunks());
1188 combine_validities_and(left_validity.as_ref(), right_validity.as_ref())
1189}
1190
1191pub trait IntoVec<T> {
1193 fn into_vec(self) -> Vec<T>;
1194}
1195
1196impl<I, S> IntoVec<PlSmallStr> for I
1197where
1198 I: IntoIterator<Item = S>,
1199 S: Into<PlSmallStr>,
1200{
1201 fn into_vec(self) -> Vec<PlSmallStr> {
1202 self.into_iter().map(|s| s.into()).collect()
1203 }
1204}
1205
1206#[inline]
1211pub(crate) fn index_to_chunked_index<
1212 I: Iterator<Item = Idx>,
1213 Idx: PartialOrd + std::ops::AddAssign + std::ops::SubAssign + Zero + One,
1214>(
1215 chunk_lens: I,
1216 index: Idx,
1217) -> (Idx, Idx) {
1218 let mut index_remainder = index;
1219 let mut current_chunk_idx = Zero::zero();
1220
1221 for chunk_len in chunk_lens {
1222 if chunk_len > index_remainder {
1223 break;
1224 } else {
1225 index_remainder -= chunk_len;
1226 current_chunk_idx += One::one();
1227 }
1228 }
1229 (current_chunk_idx, index_remainder)
1230}
1231
1232pub(crate) fn index_to_chunked_index_rev<
1233 I: Iterator<Item = Idx>,
1234 Idx: PartialOrd
1235 + std::ops::AddAssign
1236 + std::ops::SubAssign
1237 + std::ops::Sub<Output = Idx>
1238 + Zero
1239 + One
1240 + Copy
1241 + std::fmt::Debug,
1242>(
1243 chunk_lens_rev: I,
1244 index_from_back: Idx,
1245 total_chunks: Idx,
1246) -> (Idx, Idx) {
1247 debug_assert!(index_from_back > Zero::zero(), "at least -1");
1248 let mut index_remainder = index_from_back;
1249 let mut current_chunk_idx = One::one();
1250 let mut current_chunk_len = Zero::zero();
1251
1252 for chunk_len in chunk_lens_rev {
1253 current_chunk_len = chunk_len;
1254 if chunk_len >= index_remainder {
1255 break;
1256 } else {
1257 index_remainder -= chunk_len;
1258 current_chunk_idx += One::one();
1259 }
1260 }
1261 (
1262 total_chunks - current_chunk_idx,
1263 current_chunk_len - index_remainder,
1264 )
1265}
1266
1267pub fn first_null<'a, I>(iter: I) -> Option<usize>
1268where
1269 I: Iterator<Item = &'a dyn Array>,
1270{
1271 let mut offset = 0;
1272 for arr in iter {
1273 if let Some(mask) = arr.validity() {
1274 let len_mask = mask.len();
1275 let n = mask.leading_ones();
1276 if n < len_mask {
1277 return Some(offset + n);
1278 }
1279 offset += len_mask
1280 } else {
1281 offset += arr.len();
1282 }
1283 }
1284 None
1285}
1286
1287pub fn first_non_null<'a, I>(iter: I) -> Option<usize>
1288where
1289 I: Iterator<Item = &'a dyn Array>,
1290{
1291 let mut offset = 0;
1292 for arr in iter {
1293 if let Some(mask) = arr.validity() {
1294 let len_mask = mask.len();
1295 let n = mask.leading_zeros();
1296 if n < len_mask {
1297 return Some(offset + n);
1298 }
1299 offset += len_mask
1300 } else if !arr.is_empty() {
1301 return Some(offset);
1302 }
1303 }
1304 None
1305}
1306
1307pub fn last_non_null<'a, I>(iter: I, len: usize) -> Option<usize>
1308where
1309 I: DoubleEndedIterator<Item = &'a dyn Array>,
1310{
1311 if len == 0 {
1312 return None;
1313 }
1314 let mut offset = 0;
1315 for arr in iter.rev() {
1316 if let Some(mask) = arr.validity() {
1317 let len_mask = mask.len();
1318 let n = mask.trailing_zeros();
1319 if n < len_mask {
1320 return Some(len - offset - n - 1);
1321 }
1322 offset += len_mask;
1323 } else if !arr.is_empty() {
1324 return Some(len - offset - 1);
1325 }
1326 }
1327 None
1328}
1329
1330pub fn coalesce_nulls<'a, T: PolarsDataType>(
1332 a: &'a ChunkedArray<T>,
1333 b: &'a ChunkedArray<T>,
1334) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, ChunkedArray<T>>) {
1335 if a.null_count() > 0 || b.null_count() > 0 {
1336 let (a, b) = align_chunks_binary(a, b);
1337 let mut b = b.into_owned();
1338 let a = a.coalesce_nulls(b.chunks());
1339
1340 for arr in a.chunks().iter() {
1341 for arr_b in unsafe { b.chunks_mut() } {
1342 *arr_b = arr_b.with_validity(arr.validity().cloned())
1343 }
1344 }
1345 b.compute_len();
1346 (Cow::Owned(a), Cow::Owned(b))
1347 } else {
1348 (Cow::Borrowed(a), Cow::Borrowed(b))
1349 }
1350}
1351
1352pub fn coalesce_nulls_columns(a: &Column, b: &Column) -> (Column, Column) {
1353 if a.null_count() > 0 || b.null_count() > 0 {
1354 let mut a = a.as_materialized_series().rechunk();
1355 let mut b = b.as_materialized_series().rechunk();
1356 for (arr_a, arr_b) in unsafe { a.chunks_mut().iter_mut().zip(b.chunks_mut()) } {
1357 let validity = match (arr_a.validity(), arr_b.validity()) {
1358 (None, Some(b)) => Some(b.clone()),
1359 (Some(a), Some(b)) => Some(a & b),
1360 (Some(a), None) => Some(a.clone()),
1361 (None, None) => None,
1362 };
1363 *arr_a = arr_a.with_validity(validity.clone());
1364 *arr_b = arr_b.with_validity(validity);
1365 }
1366 a.compute_len();
1367 b.compute_len();
1368 (a.into(), b.into())
1369 } else {
1370 (a.clone(), b.clone())
1371 }
1372}
1373
1374#[cfg(test)]
1375mod test {
1376 use super::*;
1377
1378 #[test]
1379 fn test_split() {
1380 let ca: Int32Chunked = (0..10).collect_ca("a".into());
1381
1382 let out = split(&ca, 3);
1383 assert_eq!(out[0].len(), 3);
1384 assert_eq!(out[1].len(), 3);
1385 assert_eq!(out[2].len(), 4);
1386 }
1387
1388 #[test]
1389 fn test_align_chunks() -> PolarsResult<()> {
1390 let a = Int32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3, 4]);
1391 let mut b = Int32Chunked::new(PlSmallStr::EMPTY, &[1]);
1392 let b2 = Int32Chunked::new(PlSmallStr::EMPTY, &[2, 3, 4]);
1393
1394 b.append(&b2)?;
1395 let (a, b) = align_chunks_binary(&a, &b);
1396 assert_eq!(
1397 a.chunk_lengths().collect::<Vec<_>>(),
1398 b.chunk_lengths().collect::<Vec<_>>()
1399 );
1400
1401 let a = Int32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3, 4]);
1402 let mut b = Int32Chunked::new(PlSmallStr::EMPTY, &[1]);
1403 let b1 = b.clone();
1404 b.append(&b1)?;
1405 b.append(&b1)?;
1406 b.append(&b1)?;
1407 let (a, b) = align_chunks_binary(&a, &b);
1408 assert_eq!(
1409 a.chunk_lengths().collect::<Vec<_>>(),
1410 b.chunk_lengths().collect::<Vec<_>>()
1411 );
1412
1413 Ok(())
1414 }
1415}