polars_core/utils/
mod.rs

1mod any_value;
2use arrow::compute::concatenate::concatenate_validities;
3use arrow::compute::utils::combine_validities_and;
4pub mod flatten;
5pub(crate) mod series;
6mod supertype;
7use std::borrow::Cow;
8use std::ops::{Deref, DerefMut};
9mod schema;
10
11pub use any_value::*;
12use arrow::bitmap::Bitmap;
13pub use arrow::legacy::utils::*;
14pub use arrow::trusted_len::TrustMyLength;
15use flatten::*;
16use num_traits::{One, Zero};
17use rayon::prelude::*;
18pub use schema::*;
19pub use series::*;
20pub use supertype::*;
21pub use {arrow, rayon};
22
23use crate::POOL;
24use crate::prelude::*;
25
26#[repr(transparent)]
27pub struct Wrap<T>(pub T);
28
29impl<T> Deref for Wrap<T> {
30    type Target = T;
31    fn deref(&self) -> &Self::Target {
32        &self.0
33    }
34}
35
36#[inline(always)]
37pub fn _set_partition_size() -> usize {
38    POOL.current_num_threads()
39}
40
41/// Just a wrapper structure which is useful for certain impl specializations.
42///
43/// This is for instance use to implement
44/// `impl<T> FromIterator<T::Native> for NoNull<ChunkedArray<T>>`
45/// as `Option<T::Native>` was already implemented:
46/// `impl<T> FromIterator<Option<T::Native>> for ChunkedArray<T>`
47pub struct NoNull<T> {
48    inner: T,
49}
50
51impl<T> NoNull<T> {
52    pub fn new(inner: T) -> Self {
53        NoNull { inner }
54    }
55
56    pub fn into_inner(self) -> T {
57        self.inner
58    }
59}
60
61impl<T> Deref for NoNull<T> {
62    type Target = T;
63
64    fn deref(&self) -> &Self::Target {
65        &self.inner
66    }
67}
68
69impl<T> DerefMut for NoNull<T> {
70    fn deref_mut(&mut self) -> &mut Self::Target {
71        &mut self.inner
72    }
73}
74
75pub(crate) fn get_iter_capacity<T, I: Iterator<Item = T>>(iter: &I) -> usize {
76    match iter.size_hint() {
77        (_lower, Some(upper)) => upper,
78        (0, None) => 1024,
79        (lower, None) => lower,
80    }
81}
82
83// prefer this one over split_ca, as this can push the null_count into the thread pool
84// returns an `(offset, length)` tuple
85#[doc(hidden)]
86pub fn _split_offsets(len: usize, n: usize) -> Vec<(usize, usize)> {
87    if n == 1 {
88        vec![(0, len)]
89    } else {
90        let chunk_size = len / n;
91
92        (0..n)
93            .map(|partition| {
94                let offset = partition * chunk_size;
95                let len = if partition == (n - 1) {
96                    len - offset
97                } else {
98                    chunk_size
99                };
100                (partition * chunk_size, len)
101            })
102            .collect_trusted()
103    }
104}
105
106#[allow(clippy::len_without_is_empty)]
107pub trait Container: Clone {
108    fn slice(&self, offset: i64, len: usize) -> Self;
109
110    fn split_at(&self, offset: i64) -> (Self, Self);
111
112    fn len(&self) -> usize;
113
114    fn iter_chunks(&self) -> impl Iterator<Item = Self>;
115
116    fn should_rechunk(&self) -> bool;
117
118    fn n_chunks(&self) -> usize;
119
120    fn chunk_lengths(&self) -> impl Iterator<Item = usize>;
121}
122
123impl Container for DataFrame {
124    fn slice(&self, offset: i64, len: usize) -> Self {
125        DataFrame::slice(self, offset, len)
126    }
127
128    fn split_at(&self, offset: i64) -> (Self, Self) {
129        DataFrame::split_at(self, offset)
130    }
131
132    fn len(&self) -> usize {
133        self.height()
134    }
135
136    fn iter_chunks(&self) -> impl Iterator<Item = Self> {
137        flatten_df_iter(self)
138    }
139
140    fn should_rechunk(&self) -> bool {
141        self.should_rechunk()
142    }
143
144    fn n_chunks(&self) -> usize {
145        DataFrame::first_col_n_chunks(self)
146    }
147
148    fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
149        // @scalar-correctness?
150        self.columns[0].as_materialized_series().chunk_lengths()
151    }
152}
153
154impl<T: PolarsDataType> Container for ChunkedArray<T> {
155    fn slice(&self, offset: i64, len: usize) -> Self {
156        ChunkedArray::slice(self, offset, len)
157    }
158
159    fn split_at(&self, offset: i64) -> (Self, Self) {
160        ChunkedArray::split_at(self, offset)
161    }
162
163    fn len(&self) -> usize {
164        ChunkedArray::len(self)
165    }
166
167    fn iter_chunks(&self) -> impl Iterator<Item = Self> {
168        self.downcast_iter()
169            .map(|arr| Self::with_chunk(self.name().clone(), arr.clone()))
170    }
171
172    fn should_rechunk(&self) -> bool {
173        false
174    }
175
176    fn n_chunks(&self) -> usize {
177        self.chunks().len()
178    }
179
180    fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
181        ChunkedArray::chunk_lengths(self)
182    }
183}
184
185impl Container for Series {
186    fn slice(&self, offset: i64, len: usize) -> Self {
187        self.0.slice(offset, len)
188    }
189
190    fn split_at(&self, offset: i64) -> (Self, Self) {
191        self.0.split_at(offset)
192    }
193
194    fn len(&self) -> usize {
195        self.0.len()
196    }
197
198    fn iter_chunks(&self) -> impl Iterator<Item = Self> {
199        (0..self.0.n_chunks()).map(|i| self.select_chunk(i))
200    }
201
202    fn should_rechunk(&self) -> bool {
203        false
204    }
205
206    fn n_chunks(&self) -> usize {
207        self.chunks().len()
208    }
209
210    fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
211        self.0.chunk_lengths()
212    }
213}
214
215fn split_impl<C: Container>(container: &C, target: usize, chunk_size: usize) -> Vec<C> {
216    if target == 1 {
217        return vec![container.clone()];
218    }
219    let mut out = Vec::with_capacity(target);
220    let chunk_size = chunk_size as i64;
221
222    // First split
223    let (chunk, mut remainder) = container.split_at(chunk_size);
224    out.push(chunk);
225
226    // Take the rest of the splits of exactly chunk size, but skip the last remainder as we won't split that.
227    for _ in 1..target - 1 {
228        let (a, b) = remainder.split_at(chunk_size);
229        out.push(a);
230        remainder = b
231    }
232    // This can be slightly larger than `chunk_size`, but is smaller than `2 * chunk_size`.
233    out.push(remainder);
234    out
235}
236
237/// Splits, but doesn't flatten chunks. E.g. a container can still have multiple chunks.
238pub fn split<C: Container>(container: &C, target: usize) -> Vec<C> {
239    let total_len = container.len();
240    if total_len == 0 {
241        return vec![container.clone()];
242    }
243
244    let chunk_size = std::cmp::max(total_len / target, 1);
245
246    if container.n_chunks() == target
247        && container
248            .chunk_lengths()
249            .all(|len| len.abs_diff(chunk_size) < 100)
250        // We cannot get chunks if they are misaligned
251        && !container.should_rechunk()
252    {
253        return container.iter_chunks().collect();
254    }
255    split_impl(container, target, chunk_size)
256}
257
258/// Split a [`Container`] in `target` elements. The target doesn't have to be respected if not
259/// Deviation of the target might be done to create more equal size chunks.
260pub fn split_and_flatten<C: Container>(container: &C, target: usize) -> Vec<C> {
261    let total_len = container.len();
262    if total_len == 0 {
263        return vec![container.clone()];
264    }
265
266    let chunk_size = std::cmp::max(total_len / target, 1);
267
268    if container.n_chunks() == target
269        && container
270            .chunk_lengths()
271            .all(|len| len.abs_diff(chunk_size) < 100)
272        // We cannot get chunks if they are misaligned
273        && !container.should_rechunk()
274    {
275        return container.iter_chunks().collect();
276    }
277
278    if container.n_chunks() == 1 {
279        split_impl(container, target, chunk_size)
280    } else {
281        let mut out = Vec::with_capacity(target);
282        let chunks = container.iter_chunks();
283
284        'new_chunk: for mut chunk in chunks {
285            loop {
286                let h = chunk.len();
287                if h < chunk_size {
288                    // TODO if the chunk is much smaller than chunk size, we should try to merge it with the next one.
289                    out.push(chunk);
290                    continue 'new_chunk;
291                }
292
293                // If a split leads to the next chunk being smaller than 30% take the whole chunk
294                if ((h - chunk_size) as f64 / chunk_size as f64) < 0.3 {
295                    out.push(chunk);
296                    continue 'new_chunk;
297                }
298
299                let (a, b) = chunk.split_at(chunk_size as i64);
300                out.push(a);
301                chunk = b;
302            }
303        }
304        out
305    }
306}
307
308/// Split a [`DataFrame`] in `target` elements. The target doesn't have to be respected if not
309/// strict. Deviation of the target might be done to create more equal size chunks.
310///
311/// # Panics
312/// if chunks are not aligned
313pub fn split_df_as_ref(df: &DataFrame, target: usize, strict: bool) -> Vec<DataFrame> {
314    if strict {
315        split(df, target)
316    } else {
317        split_and_flatten(df, target)
318    }
319}
320
321#[doc(hidden)]
322/// Split a [`DataFrame`] into `n` parts. We take a `&mut` to be able to repartition/align chunks.
323/// `strict` in that it respects `n` even if the chunks are suboptimal.
324pub fn split_df(df: &mut DataFrame, target: usize, strict: bool) -> Vec<DataFrame> {
325    if target == 0 || df.is_empty() {
326        return vec![df.clone()];
327    }
328    // make sure that chunks are aligned.
329    df.align_chunks_par();
330    split_df_as_ref(df, target, strict)
331}
332
333pub fn slice_slice<T>(vals: &[T], offset: i64, len: usize) -> &[T] {
334    let (raw_offset, slice_len) = slice_offsets(offset, len, vals.len());
335    &vals[raw_offset..raw_offset + slice_len]
336}
337
338#[inline]
339pub fn slice_offsets(offset: i64, length: usize, array_len: usize) -> (usize, usize) {
340    let signed_start_offset = if offset < 0 {
341        offset.saturating_add_unsigned(array_len as u64)
342    } else {
343        offset
344    };
345    let signed_stop_offset = signed_start_offset.saturating_add_unsigned(length as u64);
346
347    let signed_array_len: i64 = array_len
348        .try_into()
349        .expect("array length larger than i64::MAX");
350    let clamped_start_offset = signed_start_offset.clamp(0, signed_array_len);
351    let clamped_stop_offset = signed_stop_offset.clamp(0, signed_array_len);
352
353    let slice_start_idx = clamped_start_offset as usize;
354    let slice_len = (clamped_stop_offset - clamped_start_offset) as usize;
355    (slice_start_idx, slice_len)
356}
357
358/// Apply a macro on the Series
359#[macro_export]
360macro_rules! match_dtype_to_physical_apply_macro {
361    ($obj:expr, $macro:ident, $macro_string:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
362        match $obj {
363            DataType::String => $macro_string!($($opt_args)*),
364            DataType::Boolean => $macro_bool!($($opt_args)*),
365            #[cfg(feature = "dtype-u8")]
366            DataType::UInt8 => $macro!(u8 $(, $opt_args)*),
367            #[cfg(feature = "dtype-u16")]
368            DataType::UInt16 => $macro!(u16 $(, $opt_args)*),
369            DataType::UInt32 => $macro!(u32 $(, $opt_args)*),
370            DataType::UInt64 => $macro!(u64 $(, $opt_args)*),
371            #[cfg(feature = "dtype-i8")]
372            DataType::Int8 => $macro!(i8 $(, $opt_args)*),
373            #[cfg(feature = "dtype-i16")]
374            DataType::Int16 => $macro!(i16 $(, $opt_args)*),
375            DataType::Int32 => $macro!(i32 $(, $opt_args)*),
376            DataType::Int64 => $macro!(i64 $(, $opt_args)*),
377            #[cfg(feature = "dtype-i128")]
378            DataType::Int128 => $macro!(i128 $(, $opt_args)*),
379            DataType::Float32 => $macro!(f32 $(, $opt_args)*),
380            DataType::Float64 => $macro!(f64 $(, $opt_args)*),
381            dt => panic!("not implemented for dtype {:?}", dt),
382        }
383    }};
384}
385
386/// Apply a macro on the Series
387#[macro_export]
388macro_rules! match_dtype_to_logical_apply_macro {
389    ($obj:expr, $macro:ident, $macro_string:ident, $macro_binary:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
390        match $obj {
391            DataType::String => $macro_string!($($opt_args)*),
392            DataType::Binary => $macro_binary!($($opt_args)*),
393            DataType::Boolean => $macro_bool!($($opt_args)*),
394            #[cfg(feature = "dtype-u8")]
395            DataType::UInt8 => $macro!(UInt8Type $(, $opt_args)*),
396            #[cfg(feature = "dtype-u16")]
397            DataType::UInt16 => $macro!(UInt16Type $(, $opt_args)*),
398            DataType::UInt32 => $macro!(UInt32Type $(, $opt_args)*),
399            DataType::UInt64 => $macro!(UInt64Type $(, $opt_args)*),
400            #[cfg(feature = "dtype-i8")]
401            DataType::Int8 => $macro!(Int8Type $(, $opt_args)*),
402            #[cfg(feature = "dtype-i16")]
403            DataType::Int16 => $macro!(Int16Type $(, $opt_args)*),
404            DataType::Int32 => $macro!(Int32Type $(, $opt_args)*),
405            DataType::Int64 => $macro!(Int64Type $(, $opt_args)*),
406            #[cfg(feature = "dtype-i128")]
407            DataType::Int128 => $macro!(Int128Type $(, $opt_args)*),
408            DataType::Float32 => $macro!(Float32Type $(, $opt_args)*),
409            DataType::Float64 => $macro!(Float64Type $(, $opt_args)*),
410            dt => panic!("not implemented for dtype {:?}", dt),
411        }
412    }};
413}
414
415/// Apply a macro on the Downcasted ChunkedArrays
416#[macro_export]
417macro_rules! match_arrow_dtype_apply_macro_ca {
418    ($self:expr, $macro:ident, $macro_string:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
419        match $self.dtype() {
420            DataType::String => $macro_string!($self.str().unwrap() $(, $opt_args)*),
421            DataType::Boolean => $macro_bool!($self.bool().unwrap() $(, $opt_args)*),
422            #[cfg(feature = "dtype-u8")]
423            DataType::UInt8 => $macro!($self.u8().unwrap() $(, $opt_args)*),
424            #[cfg(feature = "dtype-u16")]
425            DataType::UInt16 => $macro!($self.u16().unwrap() $(, $opt_args)*),
426            DataType::UInt32 => $macro!($self.u32().unwrap() $(, $opt_args)*),
427            DataType::UInt64 => $macro!($self.u64().unwrap() $(, $opt_args)*),
428            #[cfg(feature = "dtype-i8")]
429            DataType::Int8 => $macro!($self.i8().unwrap() $(, $opt_args)*),
430            #[cfg(feature = "dtype-i16")]
431            DataType::Int16 => $macro!($self.i16().unwrap() $(, $opt_args)*),
432            DataType::Int32 => $macro!($self.i32().unwrap() $(, $opt_args)*),
433            DataType::Int64 => $macro!($self.i64().unwrap() $(, $opt_args)*),
434            #[cfg(feature = "dtype-i128")]
435            DataType::Int128 => $macro!($self.i128().unwrap() $(, $opt_args)*),
436            DataType::Float32 => $macro!($self.f32().unwrap() $(, $opt_args)*),
437            DataType::Float64 => $macro!($self.f64().unwrap() $(, $opt_args)*),
438            dt => panic!("not implemented for dtype {:?}", dt),
439        }
440    }};
441}
442
443#[macro_export]
444macro_rules! with_match_physical_numeric_type {(
445    $dtype:expr, | $_:tt $T:ident | $($body:tt)*
446) => ({
447    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
448    use $crate::datatypes::DataType::*;
449    match $dtype {
450        #[cfg(feature = "dtype-i8")]
451        Int8 => __with_ty__! { i8 },
452        #[cfg(feature = "dtype-i16")]
453        Int16 => __with_ty__! { i16 },
454        Int32 => __with_ty__! { i32 },
455        Int64 => __with_ty__! { i64 },
456        #[cfg(feature = "dtype-i128")]
457        Int128 => __with_ty__! { i128 },
458        #[cfg(feature = "dtype-u8")]
459        UInt8 => __with_ty__! { u8 },
460        #[cfg(feature = "dtype-u16")]
461        UInt16 => __with_ty__! { u16 },
462        UInt32 => __with_ty__! { u32 },
463        UInt64 => __with_ty__! { u64 },
464        Float32 => __with_ty__! { f32 },
465        Float64 => __with_ty__! { f64 },
466        dt => panic!("not implemented for dtype {:?}", dt),
467    }
468})}
469
470#[macro_export]
471macro_rules! with_match_physical_integer_type {(
472    $dtype:expr, | $_:tt $T:ident | $($body:tt)*
473) => ({
474    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
475    use $crate::datatypes::DataType::*;
476    match $dtype {
477        #[cfg(feature = "dtype-i8")]
478        Int8 => __with_ty__! { i8 },
479        #[cfg(feature = "dtype-i16")]
480        Int16 => __with_ty__! { i16 },
481        Int32 => __with_ty__! { i32 },
482        Int64 => __with_ty__! { i64 },
483        #[cfg(feature = "dtype-i128")]
484        Int128 => __with_ty__! { i128 },
485        #[cfg(feature = "dtype-u8")]
486        UInt8 => __with_ty__! { u8 },
487        #[cfg(feature = "dtype-u16")]
488        UInt16 => __with_ty__! { u16 },
489        UInt32 => __with_ty__! { u32 },
490        UInt64 => __with_ty__! { u64 },
491        dt => panic!("not implemented for dtype {:?}", dt),
492    }
493})}
494
495#[macro_export]
496macro_rules! with_match_physical_float_type {(
497    $dtype:expr, | $_:tt $T:ident | $($body:tt)*
498) => ({
499    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
500    use $crate::datatypes::DataType::*;
501    match $dtype {
502        Float32 => __with_ty__! { f32 },
503        Float64 => __with_ty__! { f64 },
504        dt => panic!("not implemented for dtype {:?}", dt),
505    }
506})}
507
508#[macro_export]
509macro_rules! with_match_physical_float_polars_type {(
510    $key_type:expr, | $_:tt $T:ident | $($body:tt)*
511) => ({
512    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
513    use $crate::datatypes::DataType::*;
514    match $key_type {
515        Float32 => __with_ty__! { Float32Type },
516        Float64 => __with_ty__! { Float64Type },
517        dt => panic!("not implemented for dtype {:?}", dt),
518    }
519})}
520
521#[macro_export]
522macro_rules! with_match_physical_numeric_polars_type {(
523    $key_type:expr, | $_:tt $T:ident | $($body:tt)*
524) => ({
525    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
526    use $crate::datatypes::DataType::*;
527    match $key_type {
528            #[cfg(feature = "dtype-i8")]
529        Int8 => __with_ty__! { Int8Type },
530            #[cfg(feature = "dtype-i16")]
531        Int16 => __with_ty__! { Int16Type },
532        Int32 => __with_ty__! { Int32Type },
533        Int64 => __with_ty__! { Int64Type },
534            #[cfg(feature = "dtype-i128")]
535        Int128 => __with_ty__! { Int128Type },
536            #[cfg(feature = "dtype-u8")]
537        UInt8 => __with_ty__! { UInt8Type },
538            #[cfg(feature = "dtype-u16")]
539        UInt16 => __with_ty__! { UInt16Type },
540        UInt32 => __with_ty__! { UInt32Type },
541        UInt64 => __with_ty__! { UInt64Type },
542        Float32 => __with_ty__! { Float32Type },
543        Float64 => __with_ty__! { Float64Type },
544        dt => panic!("not implemented for dtype {:?}", dt),
545    }
546})}
547
548#[macro_export]
549macro_rules! with_match_physical_integer_polars_type {(
550    $key_type:expr, | $_:tt $T:ident | $($body:tt)*
551) => ({
552    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
553    use $crate::datatypes::DataType::*;
554    use $crate::datatypes::*;
555    match $key_type {
556        #[cfg(feature = "dtype-i8")]
557        Int8 => __with_ty__! { Int8Type },
558        #[cfg(feature = "dtype-i16")]
559        Int16 => __with_ty__! { Int16Type },
560        Int32 => __with_ty__! { Int32Type },
561        Int64 => __with_ty__! { Int64Type },
562        #[cfg(feature = "dtype-i128")]
563        Int128 => __with_ty__! { Int128Type },
564        #[cfg(feature = "dtype-u8")]
565        UInt8 => __with_ty__! { UInt8Type },
566        #[cfg(feature = "dtype-u16")]
567        UInt16 => __with_ty__! { UInt16Type },
568        UInt32 => __with_ty__! { UInt32Type },
569        UInt64 => __with_ty__! { UInt64Type },
570        dt => panic!("not implemented for dtype {:?}", dt),
571    }
572})}
573
574#[macro_export]
575macro_rules! with_match_categorical_physical_type {(
576    $dtype:expr, | $_:tt $T:ident | $($body:tt)*
577) => ({
578    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
579    match $dtype {
580        CategoricalPhysical::U8 => __with_ty__! { Categorical8Type },
581        CategoricalPhysical::U16 => __with_ty__! { Categorical16Type },
582        CategoricalPhysical::U32 => __with_ty__! { Categorical32Type },
583    }
584})}
585
586/// Apply a macro on the Downcasted ChunkedArrays of DataTypes that are logical numerics.
587/// So no logical.
588#[macro_export]
589macro_rules! downcast_as_macro_arg_physical {
590    ($self:expr, $macro:ident $(, $opt_args:expr)*) => {{
591        match $self.dtype() {
592            #[cfg(feature = "dtype-u8")]
593            DataType::UInt8 => $macro!($self.u8().unwrap() $(, $opt_args)*),
594            #[cfg(feature = "dtype-u16")]
595            DataType::UInt16 => $macro!($self.u16().unwrap() $(, $opt_args)*),
596            DataType::UInt32 => $macro!($self.u32().unwrap() $(, $opt_args)*),
597            DataType::UInt64 => $macro!($self.u64().unwrap() $(, $opt_args)*),
598            #[cfg(feature = "dtype-i8")]
599            DataType::Int8 => $macro!($self.i8().unwrap() $(, $opt_args)*),
600            #[cfg(feature = "dtype-i16")]
601            DataType::Int16 => $macro!($self.i16().unwrap() $(, $opt_args)*),
602            DataType::Int32 => $macro!($self.i32().unwrap() $(, $opt_args)*),
603            DataType::Int64 => $macro!($self.i64().unwrap() $(, $opt_args)*),
604            #[cfg(feature = "dtype-i128")]
605            DataType::Int128 => $macro!($self.i128().unwrap() $(, $opt_args)*),
606            DataType::Float32 => $macro!($self.f32().unwrap() $(, $opt_args)*),
607            DataType::Float64 => $macro!($self.f64().unwrap() $(, $opt_args)*),
608            dt => panic!("not implemented for {:?}", dt),
609        }
610    }};
611}
612
613/// Apply a macro on the Downcasted ChunkedArrays of DataTypes that are logical numerics.
614/// So no logical.
615#[macro_export]
616macro_rules! downcast_as_macro_arg_physical_mut {
617    ($self:expr, $macro:ident $(, $opt_args:expr)*) => {{
618        // clone so that we do not borrow
619        match $self.dtype().clone() {
620            #[cfg(feature = "dtype-u8")]
621            DataType::UInt8 => {
622                let ca: &mut UInt8Chunked = $self.as_mut();
623                $macro!(UInt8Type, ca $(, $opt_args)*)
624            },
625            #[cfg(feature = "dtype-u16")]
626            DataType::UInt16 => {
627                let ca: &mut UInt16Chunked = $self.as_mut();
628                $macro!(UInt16Type, ca $(, $opt_args)*)
629            },
630            DataType::UInt32 => {
631                let ca: &mut UInt32Chunked = $self.as_mut();
632                $macro!(UInt32Type, ca $(, $opt_args)*)
633            },
634            DataType::UInt64 => {
635                let ca: &mut UInt64Chunked = $self.as_mut();
636                $macro!(UInt64Type, ca $(, $opt_args)*)
637            },
638            #[cfg(feature = "dtype-i8")]
639            DataType::Int8 => {
640                let ca: &mut Int8Chunked = $self.as_mut();
641                $macro!(Int8Type, ca $(, $opt_args)*)
642            },
643            #[cfg(feature = "dtype-i16")]
644            DataType::Int16 => {
645                let ca: &mut Int16Chunked = $self.as_mut();
646                $macro!(Int16Type, ca $(, $opt_args)*)
647            },
648            DataType::Int32 => {
649                let ca: &mut Int32Chunked = $self.as_mut();
650                $macro!(Int32Type, ca $(, $opt_args)*)
651            },
652            DataType::Int64 => {
653                let ca: &mut Int64Chunked = $self.as_mut();
654                $macro!(Int64Type, ca $(, $opt_args)*)
655            },
656            #[cfg(feature = "dtype-i128")]
657            DataType::Int128 => {
658                let ca: &mut Int128Chunked = $self.as_mut();
659                $macro!(Int128Type, ca $(, $opt_args)*)
660            },
661            DataType::Float32 => {
662                let ca: &mut Float32Chunked = $self.as_mut();
663                $macro!(Float32Type, ca $(, $opt_args)*)
664            },
665            DataType::Float64 => {
666                let ca: &mut Float64Chunked = $self.as_mut();
667                $macro!(Float64Type, ca $(, $opt_args)*)
668            },
669            dt => panic!("not implemented for {:?}", dt),
670        }
671    }};
672}
673
674#[macro_export]
675macro_rules! apply_method_all_arrow_series {
676    ($self:expr, $method:ident, $($args:expr),*) => {
677        match $self.dtype() {
678            DataType::Boolean => $self.bool().unwrap().$method($($args),*),
679            DataType::String => $self.str().unwrap().$method($($args),*),
680            #[cfg(feature = "dtype-u8")]
681            DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
682            #[cfg(feature = "dtype-u16")]
683            DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
684            DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
685            DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
686            #[cfg(feature = "dtype-i8")]
687            DataType::Int8 => $self.i8().unwrap().$method($($args),*),
688            #[cfg(feature = "dtype-i16")]
689            DataType::Int16 => $self.i16().unwrap().$method($($args),*),
690            DataType::Int32 => $self.i32().unwrap().$method($($args),*),
691            DataType::Int64 => $self.i64().unwrap().$method($($args),*),
692            #[cfg(feature = "dtype-i128")]
693            DataType::Int128 => $self.i128().unwrap().$method($($args),*),
694            DataType::Float32 => $self.f32().unwrap().$method($($args),*),
695            DataType::Float64 => $self.f64().unwrap().$method($($args),*),
696            DataType::Time => $self.time().unwrap().$method($($args),*),
697            DataType::Date => $self.date().unwrap().$method($($args),*),
698            DataType::Datetime(_, _) => $self.datetime().unwrap().$method($($args),*),
699            DataType::List(_) => $self.list().unwrap().$method($($args),*),
700            DataType::Struct(_) => $self.struct_().unwrap().$method($($args),*),
701            dt => panic!("dtype {:?} not supported", dt)
702        }
703    }
704}
705
706#[macro_export]
707macro_rules! apply_method_physical_integer {
708    ($self:expr, $method:ident, $($args:expr),*) => {
709        match $self.dtype() {
710            #[cfg(feature = "dtype-u8")]
711            DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
712            #[cfg(feature = "dtype-u16")]
713            DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
714            DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
715            DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
716            #[cfg(feature = "dtype-i8")]
717            DataType::Int8 => $self.i8().unwrap().$method($($args),*),
718            #[cfg(feature = "dtype-i16")]
719            DataType::Int16 => $self.i16().unwrap().$method($($args),*),
720            DataType::Int32 => $self.i32().unwrap().$method($($args),*),
721            DataType::Int64 => $self.i64().unwrap().$method($($args),*),
722            #[cfg(feature = "dtype-i128")]
723            DataType::Int128 => $self.i128().unwrap().$method($($args),*),
724            dt => panic!("not implemented for dtype {:?}", dt),
725        }
726    }
727}
728
729// doesn't include Bool and String
730#[macro_export]
731macro_rules! apply_method_physical_numeric {
732    ($self:expr, $method:ident, $($args:expr),*) => {
733        match $self.dtype() {
734            DataType::Float32 => $self.f32().unwrap().$method($($args),*),
735            DataType::Float64 => $self.f64().unwrap().$method($($args),*),
736            _ => apply_method_physical_integer!($self, $method, $($args),*),
737        }
738    }
739}
740
741#[macro_export]
742macro_rules! df {
743    ($($col_name:expr => $slice:expr), + $(,)?) => {
744        $crate::prelude::DataFrame::new(vec![
745            $($crate::prelude::Column::from(<$crate::prelude::Series as $crate::prelude::NamedFrom::<_, _>>::new($col_name.into(), $slice)),)+
746        ])
747    }
748}
749
750pub fn get_time_units(tu_l: &TimeUnit, tu_r: &TimeUnit) -> TimeUnit {
751    use crate::datatypes::time_unit::TimeUnit::*;
752    match (tu_l, tu_r) {
753        (Nanoseconds, Microseconds) => Microseconds,
754        (_, Milliseconds) => Milliseconds,
755        _ => *tu_l,
756    }
757}
758
759#[cold]
760#[inline(never)]
761fn width_mismatch(df1: &DataFrame, df2: &DataFrame) -> PolarsError {
762    let mut df1_extra = Vec::new();
763    let mut df2_extra = Vec::new();
764
765    let s1 = df1.schema();
766    let s2 = df2.schema();
767
768    s1.field_compare(s2, &mut df1_extra, &mut df2_extra);
769
770    let df1_extra = df1_extra
771        .into_iter()
772        .map(|(_, (n, _))| n.as_str())
773        .collect::<Vec<_>>()
774        .join(", ");
775    let df2_extra = df2_extra
776        .into_iter()
777        .map(|(_, (n, _))| n.as_str())
778        .collect::<Vec<_>>()
779        .join(", ");
780
781    polars_err!(
782        SchemaMismatch: r#"unable to vstack, dataframes have different widths ({} != {}).
783One dataframe has additional columns: [{df1_extra}].
784Other dataframe has additional columns: [{df2_extra}]."#,
785        df1.width(),
786        df2.width(),
787    )
788}
789
790pub fn accumulate_dataframes_vertical_unchecked_optional<I>(dfs: I) -> Option<DataFrame>
791where
792    I: IntoIterator<Item = DataFrame>,
793{
794    let mut iter = dfs.into_iter();
795    let additional = iter.size_hint().0;
796    let mut acc_df = iter.next()?;
797    acc_df.reserve_chunks(additional);
798
799    for df in iter {
800        if acc_df.width() != df.width() {
801            panic!("{}", width_mismatch(&acc_df, &df));
802        }
803
804        acc_df.vstack_mut_owned_unchecked(df);
805    }
806    Some(acc_df)
807}
808
809/// This takes ownership of the DataFrame so that drop is called earlier.
810/// Does not check if schema is correct
811pub fn accumulate_dataframes_vertical_unchecked<I>(dfs: I) -> DataFrame
812where
813    I: IntoIterator<Item = DataFrame>,
814{
815    let mut iter = dfs.into_iter();
816    let additional = iter.size_hint().0;
817    let mut acc_df = iter.next().unwrap();
818    acc_df.reserve_chunks(additional);
819
820    for df in iter {
821        if acc_df.width() != df.width() {
822            panic!("{}", width_mismatch(&acc_df, &df));
823        }
824
825        acc_df.vstack_mut_owned_unchecked(df);
826    }
827    acc_df
828}
829
830/// This takes ownership of the DataFrame so that drop is called earlier.
831/// # Panics
832/// Panics if `dfs` is empty.
833pub fn accumulate_dataframes_vertical<I>(dfs: I) -> PolarsResult<DataFrame>
834where
835    I: IntoIterator<Item = DataFrame>,
836{
837    let mut iter = dfs.into_iter();
838    let additional = iter.size_hint().0;
839    let mut acc_df = iter.next().unwrap();
840    acc_df.reserve_chunks(additional);
841    for df in iter {
842        if acc_df.width() != df.width() {
843            return Err(width_mismatch(&acc_df, &df));
844        }
845
846        acc_df.vstack_mut_owned(df)?;
847    }
848
849    Ok(acc_df)
850}
851
852/// Concat the DataFrames to a single DataFrame.
853pub fn concat_df<'a, I>(dfs: I) -> PolarsResult<DataFrame>
854where
855    I: IntoIterator<Item = &'a DataFrame>,
856{
857    let mut iter = dfs.into_iter();
858    let additional = iter.size_hint().0;
859    let mut acc_df = iter.next().unwrap().clone();
860    acc_df.reserve_chunks(additional);
861    for df in iter {
862        acc_df.vstack_mut(df)?;
863    }
864    Ok(acc_df)
865}
866
867/// Concat the DataFrames to a single DataFrame.
868pub fn concat_df_unchecked<'a, I>(dfs: I) -> DataFrame
869where
870    I: IntoIterator<Item = &'a DataFrame>,
871{
872    let mut iter = dfs.into_iter();
873    let additional = iter.size_hint().0;
874    let mut acc_df = iter.next().unwrap().clone();
875    acc_df.reserve_chunks(additional);
876    for df in iter {
877        acc_df.vstack_mut_unchecked(df);
878    }
879    acc_df
880}
881
882pub fn accumulate_dataframes_horizontal(dfs: Vec<DataFrame>) -> PolarsResult<DataFrame> {
883    let mut iter = dfs.into_iter();
884    let mut acc_df = iter.next().unwrap();
885    for df in iter {
886        acc_df.hstack_mut(df.get_columns())?;
887    }
888    Ok(acc_df)
889}
890
891/// Ensure the chunks in both ChunkedArrays have the same length.
892/// # Panics
893/// This will panic if `left.len() != right.len()` and array is chunked.
894pub fn align_chunks_binary<'a, T, B>(
895    left: &'a ChunkedArray<T>,
896    right: &'a ChunkedArray<B>,
897) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, ChunkedArray<B>>)
898where
899    B: PolarsDataType,
900    T: PolarsDataType,
901{
902    let assert = || {
903        assert_eq!(
904            left.len(),
905            right.len(),
906            "expected arrays of the same length"
907        )
908    };
909    match (left.chunks.len(), right.chunks.len()) {
910        // All chunks are equal length
911        (1, 1) => (Cow::Borrowed(left), Cow::Borrowed(right)),
912        // All chunks are equal length
913        (a, b)
914            if a == b
915                && left
916                    .chunk_lengths()
917                    .zip(right.chunk_lengths())
918                    .all(|(l, r)| l == r) =>
919        {
920            (Cow::Borrowed(left), Cow::Borrowed(right))
921        },
922        (_, 1) => {
923            assert();
924            (
925                Cow::Borrowed(left),
926                Cow::Owned(right.match_chunks(left.chunk_lengths())),
927            )
928        },
929        (1, _) => {
930            assert();
931            (
932                Cow::Owned(left.match_chunks(right.chunk_lengths())),
933                Cow::Borrowed(right),
934            )
935        },
936        (_, _) => {
937            assert();
938            // could optimize to choose to rechunk a primitive and not a string or list type
939            let left = left.rechunk();
940            (
941                Cow::Owned(left.match_chunks(right.chunk_lengths())),
942                Cow::Borrowed(right),
943            )
944        },
945    }
946}
947
948/// Ensure the chunks in ChunkedArray and Series have the same length.
949/// # Panics
950/// This will panic if `left.len() != right.len()` and array is chunked.
951pub fn align_chunks_binary_ca_series<'a, T>(
952    left: &'a ChunkedArray<T>,
953    right: &'a Series,
954) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, Series>)
955where
956    T: PolarsDataType,
957{
958    let assert = || {
959        assert_eq!(
960            left.len(),
961            right.len(),
962            "expected arrays of the same length"
963        )
964    };
965    match (left.chunks.len(), right.chunks().len()) {
966        // All chunks are equal length
967        (1, 1) => (Cow::Borrowed(left), Cow::Borrowed(right)),
968        // All chunks are equal length
969        (a, b)
970            if a == b
971                && left
972                    .chunk_lengths()
973                    .zip(right.chunk_lengths())
974                    .all(|(l, r)| l == r) =>
975        {
976            assert();
977            (Cow::Borrowed(left), Cow::Borrowed(right))
978        },
979        (_, 1) => (left.rechunk(), Cow::Borrowed(right)),
980        (1, _) => (Cow::Borrowed(left), Cow::Owned(right.rechunk())),
981        (_, _) => {
982            assert();
983            (left.rechunk(), Cow::Owned(right.rechunk()))
984        },
985    }
986}
987
988#[cfg(feature = "performant")]
989pub(crate) fn align_chunks_binary_owned_series(left: Series, right: Series) -> (Series, Series) {
990    match (left.chunks().len(), right.chunks().len()) {
991        (1, 1) => (left, right),
992        // All chunks are equal length
993        (a, b)
994            if a == b
995                && left
996                    .chunk_lengths()
997                    .zip(right.chunk_lengths())
998                    .all(|(l, r)| l == r) =>
999        {
1000            (left, right)
1001        },
1002        (_, 1) => (left.rechunk(), right),
1003        (1, _) => (left, right.rechunk()),
1004        (_, _) => (left.rechunk(), right.rechunk()),
1005    }
1006}
1007
1008pub(crate) fn align_chunks_binary_owned<T, B>(
1009    left: ChunkedArray<T>,
1010    right: ChunkedArray<B>,
1011) -> (ChunkedArray<T>, ChunkedArray<B>)
1012where
1013    B: PolarsDataType,
1014    T: PolarsDataType,
1015{
1016    match (left.chunks.len(), right.chunks.len()) {
1017        (1, 1) => (left, right),
1018        // All chunks are equal length
1019        (a, b)
1020            if a == b
1021                && left
1022                    .chunk_lengths()
1023                    .zip(right.chunk_lengths())
1024                    .all(|(l, r)| l == r) =>
1025        {
1026            (left, right)
1027        },
1028        (_, 1) => (left.rechunk().into_owned(), right),
1029        (1, _) => (left, right.rechunk().into_owned()),
1030        (_, _) => (left.rechunk().into_owned(), right.rechunk().into_owned()),
1031    }
1032}
1033
1034/// # Panics
1035/// This will panic if `a.len() != b.len() || b.len() != c.len()` and array is chunked.
1036#[allow(clippy::type_complexity)]
1037pub fn align_chunks_ternary<'a, A, B, C>(
1038    a: &'a ChunkedArray<A>,
1039    b: &'a ChunkedArray<B>,
1040    c: &'a ChunkedArray<C>,
1041) -> (
1042    Cow<'a, ChunkedArray<A>>,
1043    Cow<'a, ChunkedArray<B>>,
1044    Cow<'a, ChunkedArray<C>>,
1045)
1046where
1047    A: PolarsDataType,
1048    B: PolarsDataType,
1049    C: PolarsDataType,
1050{
1051    if a.chunks.len() == 1 && b.chunks.len() == 1 && c.chunks.len() == 1 {
1052        return (Cow::Borrowed(a), Cow::Borrowed(b), Cow::Borrowed(c));
1053    }
1054
1055    assert!(
1056        a.len() == b.len() && b.len() == c.len(),
1057        "expected arrays of the same length"
1058    );
1059
1060    match (a.chunks.len(), b.chunks.len(), c.chunks.len()) {
1061        (_, 1, 1) => (
1062            Cow::Borrowed(a),
1063            Cow::Owned(b.match_chunks(a.chunk_lengths())),
1064            Cow::Owned(c.match_chunks(a.chunk_lengths())),
1065        ),
1066        (1, 1, _) => (
1067            Cow::Owned(a.match_chunks(c.chunk_lengths())),
1068            Cow::Owned(b.match_chunks(c.chunk_lengths())),
1069            Cow::Borrowed(c),
1070        ),
1071        (1, _, 1) => (
1072            Cow::Owned(a.match_chunks(b.chunk_lengths())),
1073            Cow::Borrowed(b),
1074            Cow::Owned(c.match_chunks(b.chunk_lengths())),
1075        ),
1076        (1, _, _) => {
1077            let b = b.rechunk();
1078            (
1079                Cow::Owned(a.match_chunks(c.chunk_lengths())),
1080                Cow::Owned(b.match_chunks(c.chunk_lengths())),
1081                Cow::Borrowed(c),
1082            )
1083        },
1084        (_, 1, _) => {
1085            let a = a.rechunk();
1086            (
1087                Cow::Owned(a.match_chunks(c.chunk_lengths())),
1088                Cow::Owned(b.match_chunks(c.chunk_lengths())),
1089                Cow::Borrowed(c),
1090            )
1091        },
1092        (_, _, 1) => {
1093            let b = b.rechunk();
1094            (
1095                Cow::Borrowed(a),
1096                Cow::Owned(b.match_chunks(a.chunk_lengths())),
1097                Cow::Owned(c.match_chunks(a.chunk_lengths())),
1098            )
1099        },
1100        (len_a, len_b, len_c)
1101            if len_a == len_b
1102                && len_b == len_c
1103                && a.chunk_lengths()
1104                    .zip(b.chunk_lengths())
1105                    .zip(c.chunk_lengths())
1106                    .all(|((a, b), c)| a == b && b == c) =>
1107        {
1108            (Cow::Borrowed(a), Cow::Borrowed(b), Cow::Borrowed(c))
1109        },
1110        _ => {
1111            // could optimize to choose to rechunk a primitive and not a string or list type
1112            let a = a.rechunk();
1113            let b = b.rechunk();
1114            (
1115                Cow::Owned(a.match_chunks(c.chunk_lengths())),
1116                Cow::Owned(b.match_chunks(c.chunk_lengths())),
1117                Cow::Borrowed(c),
1118            )
1119        },
1120    }
1121}
1122
1123pub fn binary_concatenate_validities<'a, T, B>(
1124    left: &'a ChunkedArray<T>,
1125    right: &'a ChunkedArray<B>,
1126) -> Option<Bitmap>
1127where
1128    B: PolarsDataType,
1129    T: PolarsDataType,
1130{
1131    let (left, right) = align_chunks_binary(left, right);
1132    let left_validity = concatenate_validities(left.chunks());
1133    let right_validity = concatenate_validities(right.chunks());
1134    combine_validities_and(left_validity.as_ref(), right_validity.as_ref())
1135}
1136
1137/// Convenience for `x.into_iter().map(Into::into).collect()` using an `into_vec()` function.
1138pub trait IntoVec<T> {
1139    fn into_vec(self) -> Vec<T>;
1140}
1141
1142impl<I, S> IntoVec<PlSmallStr> for I
1143where
1144    I: IntoIterator<Item = S>,
1145    S: Into<PlSmallStr>,
1146{
1147    fn into_vec(self) -> Vec<PlSmallStr> {
1148        self.into_iter().map(|s| s.into()).collect()
1149    }
1150}
1151
1152/// This logic is same as the impl on ChunkedArray
1153/// The difference is that there is less indirection because the caller should preallocate
1154/// `chunk_lens` once. On the `ChunkedArray` we indirect through an `ArrayRef` which is an indirection
1155/// and a vtable.
1156#[inline]
1157pub(crate) fn index_to_chunked_index<
1158    I: Iterator<Item = Idx>,
1159    Idx: PartialOrd + std::ops::AddAssign + std::ops::SubAssign + Zero + One,
1160>(
1161    chunk_lens: I,
1162    index: Idx,
1163) -> (Idx, Idx) {
1164    let mut index_remainder = index;
1165    let mut current_chunk_idx = Zero::zero();
1166
1167    for chunk_len in chunk_lens {
1168        if chunk_len > index_remainder {
1169            break;
1170        } else {
1171            index_remainder -= chunk_len;
1172            current_chunk_idx += One::one();
1173        }
1174    }
1175    (current_chunk_idx, index_remainder)
1176}
1177
1178pub(crate) fn index_to_chunked_index_rev<
1179    I: Iterator<Item = Idx>,
1180    Idx: PartialOrd
1181        + std::ops::AddAssign
1182        + std::ops::SubAssign
1183        + std::ops::Sub<Output = Idx>
1184        + Zero
1185        + One
1186        + Copy
1187        + std::fmt::Debug,
1188>(
1189    chunk_lens_rev: I,
1190    index_from_back: Idx,
1191    total_chunks: Idx,
1192) -> (Idx, Idx) {
1193    debug_assert!(index_from_back > Zero::zero(), "at least -1");
1194    let mut index_remainder = index_from_back;
1195    let mut current_chunk_idx = One::one();
1196    let mut current_chunk_len = Zero::zero();
1197
1198    for chunk_len in chunk_lens_rev {
1199        current_chunk_len = chunk_len;
1200        if chunk_len >= index_remainder {
1201            break;
1202        } else {
1203            index_remainder -= chunk_len;
1204            current_chunk_idx += One::one();
1205        }
1206    }
1207    (
1208        total_chunks - current_chunk_idx,
1209        current_chunk_len - index_remainder,
1210    )
1211}
1212
1213pub fn first_non_null<'a, I>(iter: I) -> Option<usize>
1214where
1215    I: Iterator<Item = Option<&'a Bitmap>>,
1216{
1217    let mut offset = 0;
1218    for validity in iter {
1219        if let Some(mask) = validity {
1220            let len_mask = mask.len();
1221            let n = mask.leading_zeros();
1222            if n < len_mask {
1223                return Some(offset + n);
1224            }
1225            offset += len_mask
1226        } else {
1227            return Some(offset);
1228        }
1229    }
1230    None
1231}
1232
1233pub fn last_non_null<'a, I>(iter: I, len: usize) -> Option<usize>
1234where
1235    I: DoubleEndedIterator<Item = Option<&'a Bitmap>>,
1236{
1237    if len == 0 {
1238        return None;
1239    }
1240    let mut offset = 0;
1241    for validity in iter.rev() {
1242        if let Some(mask) = validity {
1243            let len_mask = mask.len();
1244            let n = mask.trailing_zeros();
1245            if n < len_mask {
1246                return Some(len - offset - n - 1);
1247            }
1248            offset += len_mask;
1249        } else {
1250            return Some(len - offset - 1);
1251        }
1252    }
1253    None
1254}
1255
1256/// ensure that nulls are propagated to both arrays
1257pub fn coalesce_nulls<'a, T: PolarsDataType>(
1258    a: &'a ChunkedArray<T>,
1259    b: &'a ChunkedArray<T>,
1260) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, ChunkedArray<T>>) {
1261    if a.null_count() > 0 || b.null_count() > 0 {
1262        let (a, b) = align_chunks_binary(a, b);
1263        let mut b = b.into_owned();
1264        let a = a.coalesce_nulls(b.chunks());
1265
1266        for arr in a.chunks().iter() {
1267            for arr_b in unsafe { b.chunks_mut() } {
1268                *arr_b = arr_b.with_validity(arr.validity().cloned())
1269            }
1270        }
1271        b.compute_len();
1272        (Cow::Owned(a), Cow::Owned(b))
1273    } else {
1274        (Cow::Borrowed(a), Cow::Borrowed(b))
1275    }
1276}
1277
1278pub fn coalesce_nulls_columns(a: &Column, b: &Column) -> (Column, Column) {
1279    if a.null_count() > 0 || b.null_count() > 0 {
1280        let mut a = a.as_materialized_series().rechunk();
1281        let mut b = b.as_materialized_series().rechunk();
1282        for (arr_a, arr_b) in unsafe { a.chunks_mut().iter_mut().zip(b.chunks_mut()) } {
1283            let validity = match (arr_a.validity(), arr_b.validity()) {
1284                (None, Some(b)) => Some(b.clone()),
1285                (Some(a), Some(b)) => Some(a & b),
1286                (Some(a), None) => Some(a.clone()),
1287                (None, None) => None,
1288            };
1289            *arr_a = arr_a.with_validity(validity.clone());
1290            *arr_b = arr_b.with_validity(validity);
1291        }
1292        a.compute_len();
1293        b.compute_len();
1294        (a.into(), b.into())
1295    } else {
1296        (a.clone(), b.clone())
1297    }
1298}
1299
1300pub fn operation_exceeded_idxsize_msg(operation: &str) -> String {
1301    if size_of::<IdxSize>() == size_of::<u32>() {
1302        format!(
1303            "{} exceeded the maximum supported limit of {} rows. Consider installing 'polars-u64-idx'.",
1304            operation,
1305            IdxSize::MAX,
1306        )
1307    } else {
1308        format!(
1309            "{} exceeded the maximum supported limit of {} rows.",
1310            operation,
1311            IdxSize::MAX,
1312        )
1313    }
1314}
1315
1316#[cfg(test)]
1317mod test {
1318    use super::*;
1319
1320    #[test]
1321    fn test_split() {
1322        let ca: Int32Chunked = (0..10).collect_ca("a".into());
1323
1324        let out = split(&ca, 3);
1325        assert_eq!(out[0].len(), 3);
1326        assert_eq!(out[1].len(), 3);
1327        assert_eq!(out[2].len(), 4);
1328    }
1329
1330    #[test]
1331    fn test_align_chunks() -> PolarsResult<()> {
1332        let a = Int32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3, 4]);
1333        let mut b = Int32Chunked::new(PlSmallStr::EMPTY, &[1]);
1334        let b2 = Int32Chunked::new(PlSmallStr::EMPTY, &[2, 3, 4]);
1335
1336        b.append(&b2)?;
1337        let (a, b) = align_chunks_binary(&a, &b);
1338        assert_eq!(
1339            a.chunk_lengths().collect::<Vec<_>>(),
1340            b.chunk_lengths().collect::<Vec<_>>()
1341        );
1342
1343        let a = Int32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3, 4]);
1344        let mut b = Int32Chunked::new(PlSmallStr::EMPTY, &[1]);
1345        let b1 = b.clone();
1346        b.append(&b1)?;
1347        b.append(&b1)?;
1348        b.append(&b1)?;
1349        let (a, b) = align_chunks_binary(&a, &b);
1350        assert_eq!(
1351            a.chunk_lengths().collect::<Vec<_>>(),
1352            b.chunk_lengths().collect::<Vec<_>>()
1353        );
1354
1355        Ok(())
1356    }
1357}