polars_core/utils/
mod.rs

1mod any_value;
2use arrow::compute::concatenate::concatenate_validities;
3use arrow::compute::utils::combine_validities_and;
4pub mod flatten;
5pub(crate) mod series;
6mod supertype;
7use std::borrow::Cow;
8use std::ops::{Deref, DerefMut};
9mod schema;
10
11pub use any_value::*;
12use arrow::bitmap::Bitmap;
13pub use arrow::legacy::utils::*;
14pub use arrow::trusted_len::TrustMyLength;
15use flatten::*;
16use num_traits::{One, Zero};
17use rayon::prelude::*;
18pub use schema::*;
19pub use series::*;
20pub use supertype::*;
21pub use {arrow, rayon};
22
23use crate::POOL;
24use crate::prelude::*;
25
26#[repr(transparent)]
27pub struct Wrap<T>(pub T);
28
29impl<T> Deref for Wrap<T> {
30    type Target = T;
31    fn deref(&self) -> &Self::Target {
32        &self.0
33    }
34}
35
36#[inline(always)]
37pub fn _set_partition_size() -> usize {
38    POOL.current_num_threads()
39}
40
41/// Just a wrapper structure which is useful for certain impl specializations.
42///
43/// This is for instance use to implement
44/// `impl<T> FromIterator<T::Native> for NoNull<ChunkedArray<T>>`
45/// as `Option<T::Native>` was already implemented:
46/// `impl<T> FromIterator<Option<T::Native>> for ChunkedArray<T>`
47pub struct NoNull<T> {
48    inner: T,
49}
50
51impl<T> NoNull<T> {
52    pub fn new(inner: T) -> Self {
53        NoNull { inner }
54    }
55
56    pub fn into_inner(self) -> T {
57        self.inner
58    }
59}
60
61impl<T> Deref for NoNull<T> {
62    type Target = T;
63
64    fn deref(&self) -> &Self::Target {
65        &self.inner
66    }
67}
68
69impl<T> DerefMut for NoNull<T> {
70    fn deref_mut(&mut self) -> &mut Self::Target {
71        &mut self.inner
72    }
73}
74
75pub(crate) fn get_iter_capacity<T, I: Iterator<Item = T>>(iter: &I) -> usize {
76    match iter.size_hint() {
77        (_lower, Some(upper)) => upper,
78        (0, None) => 1024,
79        (lower, None) => lower,
80    }
81}
82
83// prefer this one over split_ca, as this can push the null_count into the thread pool
84// returns an `(offset, length)` tuple
85#[doc(hidden)]
86pub fn _split_offsets(len: usize, n: usize) -> Vec<(usize, usize)> {
87    if n == 1 {
88        vec![(0, len)]
89    } else {
90        let chunk_size = len / n;
91
92        (0..n)
93            .map(|partition| {
94                let offset = partition * chunk_size;
95                let len = if partition == (n - 1) {
96                    len - offset
97                } else {
98                    chunk_size
99                };
100                (partition * chunk_size, len)
101            })
102            .collect_trusted()
103    }
104}
105
106#[allow(clippy::len_without_is_empty)]
107pub trait Container: Clone {
108    fn slice(&self, offset: i64, len: usize) -> Self;
109
110    fn split_at(&self, offset: i64) -> (Self, Self);
111
112    fn len(&self) -> usize;
113
114    fn iter_chunks(&self) -> impl Iterator<Item = Self>;
115
116    fn should_rechunk(&self) -> bool;
117
118    fn n_chunks(&self) -> usize;
119
120    fn chunk_lengths(&self) -> impl Iterator<Item = usize>;
121}
122
123impl Container for DataFrame {
124    fn slice(&self, offset: i64, len: usize) -> Self {
125        DataFrame::slice(self, offset, len)
126    }
127
128    fn split_at(&self, offset: i64) -> (Self, Self) {
129        DataFrame::split_at(self, offset)
130    }
131
132    fn len(&self) -> usize {
133        self.height()
134    }
135
136    fn iter_chunks(&self) -> impl Iterator<Item = Self> {
137        flatten_df_iter(self)
138    }
139
140    fn should_rechunk(&self) -> bool {
141        self.should_rechunk()
142    }
143
144    fn n_chunks(&self) -> usize {
145        DataFrame::first_col_n_chunks(self)
146    }
147
148    fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
149        // @scalar-correctness?
150        self.columns[0].as_materialized_series().chunk_lengths()
151    }
152}
153
154impl<T: PolarsDataType> Container for ChunkedArray<T> {
155    fn slice(&self, offset: i64, len: usize) -> Self {
156        ChunkedArray::slice(self, offset, len)
157    }
158
159    fn split_at(&self, offset: i64) -> (Self, Self) {
160        ChunkedArray::split_at(self, offset)
161    }
162
163    fn len(&self) -> usize {
164        ChunkedArray::len(self)
165    }
166
167    fn iter_chunks(&self) -> impl Iterator<Item = Self> {
168        self.downcast_iter()
169            .map(|arr| Self::with_chunk(self.name().clone(), arr.clone()))
170    }
171
172    fn should_rechunk(&self) -> bool {
173        false
174    }
175
176    fn n_chunks(&self) -> usize {
177        self.chunks().len()
178    }
179
180    fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
181        ChunkedArray::chunk_lengths(self)
182    }
183}
184
185impl Container for Series {
186    fn slice(&self, offset: i64, len: usize) -> Self {
187        self.0.slice(offset, len)
188    }
189
190    fn split_at(&self, offset: i64) -> (Self, Self) {
191        self.0.split_at(offset)
192    }
193
194    fn len(&self) -> usize {
195        self.0.len()
196    }
197
198    fn iter_chunks(&self) -> impl Iterator<Item = Self> {
199        (0..self.0.n_chunks()).map(|i| self.select_chunk(i))
200    }
201
202    fn should_rechunk(&self) -> bool {
203        false
204    }
205
206    fn n_chunks(&self) -> usize {
207        self.chunks().len()
208    }
209
210    fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
211        self.0.chunk_lengths()
212    }
213}
214
215fn split_impl<C: Container>(container: &C, target: usize, chunk_size: usize) -> Vec<C> {
216    if target == 1 {
217        return vec![container.clone()];
218    }
219    let mut out = Vec::with_capacity(target);
220    let chunk_size = chunk_size as i64;
221
222    // First split
223    let (chunk, mut remainder) = container.split_at(chunk_size);
224    out.push(chunk);
225
226    // Take the rest of the splits of exactly chunk size, but skip the last remainder as we won't split that.
227    for _ in 1..target - 1 {
228        let (a, b) = remainder.split_at(chunk_size);
229        out.push(a);
230        remainder = b
231    }
232    // This can be slightly larger than `chunk_size`, but is smaller than `2 * chunk_size`.
233    out.push(remainder);
234    out
235}
236
237/// Splits, but doesn't flatten chunks. E.g. a container can still have multiple chunks.
238pub fn split<C: Container>(container: &C, target: usize) -> Vec<C> {
239    let total_len = container.len();
240    if total_len == 0 {
241        return vec![container.clone()];
242    }
243
244    let chunk_size = std::cmp::max(total_len / target, 1);
245
246    if container.n_chunks() == target
247        && container
248            .chunk_lengths()
249            .all(|len| len.abs_diff(chunk_size) < 100)
250        // We cannot get chunks if they are misaligned
251        && !container.should_rechunk()
252    {
253        return container.iter_chunks().collect();
254    }
255    split_impl(container, target, chunk_size)
256}
257
258/// Split a [`Container`] in `target` elements. The target doesn't have to be respected if not
259/// Deviation of the target might be done to create more equal size chunks.
260pub fn split_and_flatten<C: Container>(container: &C, target: usize) -> Vec<C> {
261    let total_len = container.len();
262    if total_len == 0 {
263        return vec![container.clone()];
264    }
265
266    let chunk_size = std::cmp::max(total_len / target, 1);
267
268    if container.n_chunks() == target
269        && container
270            .chunk_lengths()
271            .all(|len| len.abs_diff(chunk_size) < 100)
272        // We cannot get chunks if they are misaligned
273        && !container.should_rechunk()
274    {
275        return container.iter_chunks().collect();
276    }
277
278    if container.n_chunks() == 1 {
279        split_impl(container, target, chunk_size)
280    } else {
281        let mut out = Vec::with_capacity(target);
282        let chunks = container.iter_chunks();
283
284        'new_chunk: for mut chunk in chunks {
285            loop {
286                let h = chunk.len();
287                if h < chunk_size {
288                    // TODO if the chunk is much smaller than chunk size, we should try to merge it with the next one.
289                    out.push(chunk);
290                    continue 'new_chunk;
291                }
292
293                // If a split leads to the next chunk being smaller than 30% take the whole chunk
294                if ((h - chunk_size) as f64 / chunk_size as f64) < 0.3 {
295                    out.push(chunk);
296                    continue 'new_chunk;
297                }
298
299                let (a, b) = chunk.split_at(chunk_size as i64);
300                out.push(a);
301                chunk = b;
302            }
303        }
304        out
305    }
306}
307
308/// Split a [`DataFrame`] in `target` elements. The target doesn't have to be respected if not
309/// strict. Deviation of the target might be done to create more equal size chunks.
310///
311/// # Panics
312/// if chunks are not aligned
313pub fn split_df_as_ref(df: &DataFrame, target: usize, strict: bool) -> Vec<DataFrame> {
314    if strict {
315        split(df, target)
316    } else {
317        split_and_flatten(df, target)
318    }
319}
320
321#[doc(hidden)]
322/// Split a [`DataFrame`] into `n` parts. We take a `&mut` to be able to repartition/align chunks.
323/// `strict` in that it respects `n` even if the chunks are suboptimal.
324pub fn split_df(df: &mut DataFrame, target: usize, strict: bool) -> Vec<DataFrame> {
325    if target == 0 || df.is_empty() {
326        return vec![df.clone()];
327    }
328    // make sure that chunks are aligned.
329    df.align_chunks_par();
330    split_df_as_ref(df, target, strict)
331}
332
333pub fn slice_slice<T>(vals: &[T], offset: i64, len: usize) -> &[T] {
334    let (raw_offset, slice_len) = slice_offsets(offset, len, vals.len());
335    &vals[raw_offset..raw_offset + slice_len]
336}
337
338#[inline]
339pub fn slice_offsets(offset: i64, length: usize, array_len: usize) -> (usize, usize) {
340    let signed_start_offset = if offset < 0 {
341        offset.saturating_add_unsigned(array_len as u64)
342    } else {
343        offset
344    };
345    let signed_stop_offset = signed_start_offset.saturating_add_unsigned(length as u64);
346
347    let signed_array_len: i64 = array_len
348        .try_into()
349        .expect("array length larger than i64::MAX");
350    let clamped_start_offset = signed_start_offset.clamp(0, signed_array_len);
351    let clamped_stop_offset = signed_stop_offset.clamp(0, signed_array_len);
352
353    let slice_start_idx = clamped_start_offset as usize;
354    let slice_len = (clamped_stop_offset - clamped_start_offset) as usize;
355    (slice_start_idx, slice_len)
356}
357
358/// Apply a macro on the Series
359#[macro_export]
360macro_rules! match_dtype_to_physical_apply_macro {
361    ($obj:expr, $macro:ident, $macro_string:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
362        match $obj {
363            DataType::String => $macro_string!($($opt_args)*),
364            DataType::Boolean => $macro_bool!($($opt_args)*),
365            #[cfg(feature = "dtype-u8")]
366            DataType::UInt8 => $macro!(u8 $(, $opt_args)*),
367            #[cfg(feature = "dtype-u16")]
368            DataType::UInt16 => $macro!(u16 $(, $opt_args)*),
369            DataType::UInt32 => $macro!(u32 $(, $opt_args)*),
370            DataType::UInt64 => $macro!(u64 $(, $opt_args)*),
371            #[cfg(feature = "dtype-i8")]
372            DataType::Int8 => $macro!(i8 $(, $opt_args)*),
373            #[cfg(feature = "dtype-i16")]
374            DataType::Int16 => $macro!(i16 $(, $opt_args)*),
375            DataType::Int32 => $macro!(i32 $(, $opt_args)*),
376            DataType::Int64 => $macro!(i64 $(, $opt_args)*),
377            #[cfg(feature = "dtype-i128")]
378            DataType::Int128 => $macro!(i128 $(, $opt_args)*),
379            DataType::Float32 => $macro!(f32 $(, $opt_args)*),
380            DataType::Float64 => $macro!(f64 $(, $opt_args)*),
381            dt => panic!("not implemented for dtype {:?}", dt),
382        }
383    }};
384}
385
386/// Apply a macro on the Series
387#[macro_export]
388macro_rules! match_dtype_to_logical_apply_macro {
389    ($obj:expr, $macro:ident, $macro_string:ident, $macro_binary:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
390        match $obj {
391            DataType::String => $macro_string!($($opt_args)*),
392            DataType::Binary => $macro_binary!($($opt_args)*),
393            DataType::Boolean => $macro_bool!($($opt_args)*),
394            #[cfg(feature = "dtype-u8")]
395            DataType::UInt8 => $macro!(UInt8Type $(, $opt_args)*),
396            #[cfg(feature = "dtype-u16")]
397            DataType::UInt16 => $macro!(UInt16Type $(, $opt_args)*),
398            DataType::UInt32 => $macro!(UInt32Type $(, $opt_args)*),
399            DataType::UInt64 => $macro!(UInt64Type $(, $opt_args)*),
400            #[cfg(feature = "dtype-u128")]
401            DataType::UInt128 => $macro!(UInt128Type $(, $opt_args)*),
402            #[cfg(feature = "dtype-i8")]
403            DataType::Int8 => $macro!(Int8Type $(, $opt_args)*),
404            #[cfg(feature = "dtype-i16")]
405            DataType::Int16 => $macro!(Int16Type $(, $opt_args)*),
406            DataType::Int32 => $macro!(Int32Type $(, $opt_args)*),
407            DataType::Int64 => $macro!(Int64Type $(, $opt_args)*),
408            #[cfg(feature = "dtype-i128")]
409            DataType::Int128 => $macro!(Int128Type $(, $opt_args)*),
410            DataType::Float32 => $macro!(Float32Type $(, $opt_args)*),
411            DataType::Float64 => $macro!(Float64Type $(, $opt_args)*),
412            dt => panic!("not implemented for dtype {:?}", dt),
413        }
414    }};
415}
416
417/// Apply a macro on the Downcasted ChunkedArrays
418#[macro_export]
419macro_rules! match_arrow_dtype_apply_macro_ca {
420    ($self:expr, $macro:ident, $macro_string:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
421        match $self.dtype() {
422            DataType::String => $macro_string!($self.str().unwrap() $(, $opt_args)*),
423            DataType::Boolean => $macro_bool!($self.bool().unwrap() $(, $opt_args)*),
424            #[cfg(feature = "dtype-u8")]
425            DataType::UInt8 => $macro!($self.u8().unwrap() $(, $opt_args)*),
426            #[cfg(feature = "dtype-u16")]
427            DataType::UInt16 => $macro!($self.u16().unwrap() $(, $opt_args)*),
428            DataType::UInt32 => $macro!($self.u32().unwrap() $(, $opt_args)*),
429            DataType::UInt64 => $macro!($self.u64().unwrap() $(, $opt_args)*),
430            #[cfg(feature = "dtype-u128")]
431            DataType::UInt128 => $macro!($self.u128().unwrap() $(, $opt_args)*),
432            #[cfg(feature = "dtype-i8")]
433            DataType::Int8 => $macro!($self.i8().unwrap() $(, $opt_args)*),
434            #[cfg(feature = "dtype-i16")]
435            DataType::Int16 => $macro!($self.i16().unwrap() $(, $opt_args)*),
436            DataType::Int32 => $macro!($self.i32().unwrap() $(, $opt_args)*),
437            DataType::Int64 => $macro!($self.i64().unwrap() $(, $opt_args)*),
438            #[cfg(feature = "dtype-i128")]
439            DataType::Int128 => $macro!($self.i128().unwrap() $(, $opt_args)*),
440            DataType::Float32 => $macro!($self.f32().unwrap() $(, $opt_args)*),
441            DataType::Float64 => $macro!($self.f64().unwrap() $(, $opt_args)*),
442            dt => panic!("not implemented for dtype {:?}", dt),
443        }
444    }};
445}
446
447#[macro_export]
448macro_rules! with_match_physical_numeric_type {(
449    $dtype:expr, | $_:tt $T:ident | $($body:tt)*
450) => ({
451    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
452    use $crate::datatypes::DataType::*;
453    match $dtype {
454        #[cfg(feature = "dtype-i8")]
455        Int8 => __with_ty__! { i8 },
456        #[cfg(feature = "dtype-i16")]
457        Int16 => __with_ty__! { i16 },
458        Int32 => __with_ty__! { i32 },
459        Int64 => __with_ty__! { i64 },
460        #[cfg(feature = "dtype-i128")]
461        Int128 => __with_ty__! { i128 },
462        #[cfg(feature = "dtype-u8")]
463        UInt8 => __with_ty__! { u8 },
464        #[cfg(feature = "dtype-u16")]
465        UInt16 => __with_ty__! { u16 },
466        UInt32 => __with_ty__! { u32 },
467        UInt64 => __with_ty__! { u64 },
468        #[cfg(feature = "dtype-u128")]
469        UInt128 => __with_ty__! { u128 },
470        Float32 => __with_ty__! { f32 },
471        Float64 => __with_ty__! { f64 },
472        dt => panic!("not implemented for dtype {:?}", dt),
473    }
474})}
475
476#[macro_export]
477macro_rules! with_match_physical_integer_type {(
478    $dtype:expr, | $_:tt $T:ident | $($body:tt)*
479) => ({
480    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
481    use $crate::datatypes::DataType::*;
482    match $dtype {
483        #[cfg(feature = "dtype-i8")]
484        Int8 => __with_ty__! { i8 },
485        #[cfg(feature = "dtype-i16")]
486        Int16 => __with_ty__! { i16 },
487        Int32 => __with_ty__! { i32 },
488        Int64 => __with_ty__! { i64 },
489        #[cfg(feature = "dtype-i128")]
490        Int128 => __with_ty__! { i128 },
491        #[cfg(feature = "dtype-u8")]
492        UInt8 => __with_ty__! { u8 },
493        #[cfg(feature = "dtype-u16")]
494        UInt16 => __with_ty__! { u16 },
495        UInt32 => __with_ty__! { u32 },
496        UInt64 => __with_ty__! { u64 },
497        #[cfg(feature = "dtype-u128")]
498        UInt128 => __with_ty__! { u128 },
499        dt => panic!("not implemented for dtype {:?}", dt),
500    }
501})}
502
503#[macro_export]
504macro_rules! with_match_physical_float_type {(
505    $dtype:expr, | $_:tt $T:ident | $($body:tt)*
506) => ({
507    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
508    use $crate::datatypes::DataType::*;
509    match $dtype {
510        Float32 => __with_ty__! { f32 },
511        Float64 => __with_ty__! { f64 },
512        dt => panic!("not implemented for dtype {:?}", dt),
513    }
514})}
515
516#[macro_export]
517macro_rules! with_match_physical_float_polars_type {(
518    $key_type:expr, | $_:tt $T:ident | $($body:tt)*
519) => ({
520    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
521    use $crate::datatypes::DataType::*;
522    match $key_type {
523        Float32 => __with_ty__! { Float32Type },
524        Float64 => __with_ty__! { Float64Type },
525        dt => panic!("not implemented for dtype {:?}", dt),
526    }
527})}
528
529#[macro_export]
530macro_rules! with_match_physical_numeric_polars_type {(
531    $key_type:expr, | $_:tt $T:ident | $($body:tt)*
532) => ({
533    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
534    use $crate::datatypes::DataType::*;
535    match $key_type {
536            #[cfg(feature = "dtype-i8")]
537        Int8 => __with_ty__! { Int8Type },
538            #[cfg(feature = "dtype-i16")]
539        Int16 => __with_ty__! { Int16Type },
540        Int32 => __with_ty__! { Int32Type },
541        Int64 => __with_ty__! { Int64Type },
542            #[cfg(feature = "dtype-i128")]
543        Int128 => __with_ty__! { Int128Type },
544            #[cfg(feature = "dtype-u8")]
545        UInt8 => __with_ty__! { UInt8Type },
546            #[cfg(feature = "dtype-u16")]
547        UInt16 => __with_ty__! { UInt16Type },
548        UInt32 => __with_ty__! { UInt32Type },
549        UInt64 => __with_ty__! { UInt64Type },
550            #[cfg(feature = "dtype-u128")]
551        UInt128 => __with_ty__! { UInt128Type },
552        Float32 => __with_ty__! { Float32Type },
553        Float64 => __with_ty__! { Float64Type },
554        dt => panic!("not implemented for dtype {:?}", dt),
555    }
556})}
557
558#[macro_export]
559macro_rules! with_match_physical_integer_polars_type {(
560    $key_type:expr, | $_:tt $T:ident | $($body:tt)*
561) => ({
562    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
563    use $crate::datatypes::DataType::*;
564    use $crate::datatypes::*;
565    match $key_type {
566        #[cfg(feature = "dtype-i8")]
567        Int8 => __with_ty__! { Int8Type },
568        #[cfg(feature = "dtype-i16")]
569        Int16 => __with_ty__! { Int16Type },
570        Int32 => __with_ty__! { Int32Type },
571        Int64 => __with_ty__! { Int64Type },
572        #[cfg(feature = "dtype-i128")]
573        Int128 => __with_ty__! { Int128Type },
574        #[cfg(feature = "dtype-u8")]
575        UInt8 => __with_ty__! { UInt8Type },
576        #[cfg(feature = "dtype-u16")]
577        UInt16 => __with_ty__! { UInt16Type },
578        UInt32 => __with_ty__! { UInt32Type },
579        UInt64 => __with_ty__! { UInt64Type },
580        #[cfg(feature = "dtype-u128")]
581        UInt128 => __with_ty__! { UInt128Type },
582        dt => panic!("not implemented for dtype {:?}", dt),
583    }
584})}
585
586#[macro_export]
587macro_rules! with_match_categorical_physical_type {(
588    $dtype:expr, | $_:tt $T:ident | $($body:tt)*
589) => ({
590    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
591    match $dtype {
592        CategoricalPhysical::U8 => __with_ty__! { Categorical8Type },
593        CategoricalPhysical::U16 => __with_ty__! { Categorical16Type },
594        CategoricalPhysical::U32 => __with_ty__! { Categorical32Type },
595    }
596})}
597
598/// Apply a macro on the Downcasted ChunkedArrays of DataTypes that are logical numerics.
599/// So no logical.
600#[macro_export]
601macro_rules! downcast_as_macro_arg_physical {
602    ($self:expr, $macro:ident $(, $opt_args:expr)*) => {{
603        match $self.dtype() {
604            #[cfg(feature = "dtype-u8")]
605            DataType::UInt8 => $macro!($self.u8().unwrap() $(, $opt_args)*),
606            #[cfg(feature = "dtype-u16")]
607            DataType::UInt16 => $macro!($self.u16().unwrap() $(, $opt_args)*),
608            DataType::UInt32 => $macro!($self.u32().unwrap() $(, $opt_args)*),
609            DataType::UInt64 => $macro!($self.u64().unwrap() $(, $opt_args)*),
610            #[cfg(feature = "dtype-u128")]
611            DataType::UInt128 => $macro!($self.u128().unwrap() $(, $opt_args)*),
612            #[cfg(feature = "dtype-i8")]
613            DataType::Int8 => $macro!($self.i8().unwrap() $(, $opt_args)*),
614            #[cfg(feature = "dtype-i16")]
615            DataType::Int16 => $macro!($self.i16().unwrap() $(, $opt_args)*),
616            DataType::Int32 => $macro!($self.i32().unwrap() $(, $opt_args)*),
617            DataType::Int64 => $macro!($self.i64().unwrap() $(, $opt_args)*),
618            #[cfg(feature = "dtype-i128")]
619            DataType::Int128 => $macro!($self.i128().unwrap() $(, $opt_args)*),
620            DataType::Float32 => $macro!($self.f32().unwrap() $(, $opt_args)*),
621            DataType::Float64 => $macro!($self.f64().unwrap() $(, $opt_args)*),
622            dt => panic!("not implemented for {:?}", dt),
623        }
624    }};
625}
626
627/// Apply a macro on the Downcasted ChunkedArrays of DataTypes that are logical numerics.
628/// So no logical.
629#[macro_export]
630macro_rules! downcast_as_macro_arg_physical_mut {
631    ($self:expr, $macro:ident $(, $opt_args:expr)*) => {{
632        // clone so that we do not borrow
633        match $self.dtype().clone() {
634            #[cfg(feature = "dtype-u8")]
635            DataType::UInt8 => {
636                let ca: &mut UInt8Chunked = $self.as_mut();
637                $macro!(UInt8Type, ca $(, $opt_args)*)
638            },
639            #[cfg(feature = "dtype-u16")]
640            DataType::UInt16 => {
641                let ca: &mut UInt16Chunked = $self.as_mut();
642                $macro!(UInt16Type, ca $(, $opt_args)*)
643            },
644            DataType::UInt32 => {
645                let ca: &mut UInt32Chunked = $self.as_mut();
646                $macro!(UInt32Type, ca $(, $opt_args)*)
647            },
648            DataType::UInt64 => {
649                let ca: &mut UInt64Chunked = $self.as_mut();
650                $macro!(UInt64Type, ca $(, $opt_args)*)
651            },
652            #[cfg(feature = "dtype-u128")]
653            DataType::UInt128 => {
654                let ca: &mut UInt128Chunked = $self.as_mut();
655                $macro!(UInt128Type, ca $(, $opt_args)*)
656            },
657            #[cfg(feature = "dtype-i8")]
658            DataType::Int8 => {
659                let ca: &mut Int8Chunked = $self.as_mut();
660                $macro!(Int8Type, ca $(, $opt_args)*)
661            },
662            #[cfg(feature = "dtype-i16")]
663            DataType::Int16 => {
664                let ca: &mut Int16Chunked = $self.as_mut();
665                $macro!(Int16Type, ca $(, $opt_args)*)
666            },
667            DataType::Int32 => {
668                let ca: &mut Int32Chunked = $self.as_mut();
669                $macro!(Int32Type, ca $(, $opt_args)*)
670            },
671            DataType::Int64 => {
672                let ca: &mut Int64Chunked = $self.as_mut();
673                $macro!(Int64Type, ca $(, $opt_args)*)
674            },
675            #[cfg(feature = "dtype-i128")]
676            DataType::Int128 => {
677                let ca: &mut Int128Chunked = $self.as_mut();
678                $macro!(Int128Type, ca $(, $opt_args)*)
679            },
680            DataType::Float32 => {
681                let ca: &mut Float32Chunked = $self.as_mut();
682                $macro!(Float32Type, ca $(, $opt_args)*)
683            },
684            DataType::Float64 => {
685                let ca: &mut Float64Chunked = $self.as_mut();
686                $macro!(Float64Type, ca $(, $opt_args)*)
687            },
688            dt => panic!("not implemented for {:?}", dt),
689        }
690    }};
691}
692
693#[macro_export]
694macro_rules! apply_method_all_arrow_series {
695    ($self:expr, $method:ident, $($args:expr),*) => {
696        match $self.dtype() {
697            DataType::Boolean => $self.bool().unwrap().$method($($args),*),
698            DataType::String => $self.str().unwrap().$method($($args),*),
699            #[cfg(feature = "dtype-u8")]
700            DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
701            #[cfg(feature = "dtype-u16")]
702            DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
703            DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
704            DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
705            #[cfg(feature = "dtype-u128")]
706            DataType::UInt128 => $self.u128().unwrap().$medthod($($args),*),
707            #[cfg(feature = "dtype-i8")]
708            DataType::Int8 => $self.i8().unwrap().$method($($args),*),
709            #[cfg(feature = "dtype-i16")]
710            DataType::Int16 => $self.i16().unwrap().$method($($args),*),
711            DataType::Int32 => $self.i32().unwrap().$method($($args),*),
712            DataType::Int64 => $self.i64().unwrap().$method($($args),*),
713            #[cfg(feature = "dtype-i128")]
714            DataType::Int128 => $self.i128().unwrap().$method($($args),*),
715            DataType::Float32 => $self.f32().unwrap().$method($($args),*),
716            DataType::Float64 => $self.f64().unwrap().$method($($args),*),
717            DataType::Time => $self.time().unwrap().$method($($args),*),
718            DataType::Date => $self.date().unwrap().$method($($args),*),
719            DataType::Datetime(_, _) => $self.datetime().unwrap().$method($($args),*),
720            DataType::List(_) => $self.list().unwrap().$method($($args),*),
721            DataType::Struct(_) => $self.struct_().unwrap().$method($($args),*),
722            dt => panic!("dtype {:?} not supported", dt)
723        }
724    }
725}
726
727#[macro_export]
728macro_rules! apply_method_physical_integer {
729    ($self:expr, $method:ident, $($args:expr),*) => {
730        match $self.dtype() {
731            #[cfg(feature = "dtype-u8")]
732            DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
733            #[cfg(feature = "dtype-u16")]
734            DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
735            DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
736            DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
737            #[cfg(feature = "dtype-u128")]
738            DataType::UInt128 => $self.u128().unwrap().$method($($args),*),
739            #[cfg(feature = "dtype-i8")]
740            DataType::Int8 => $self.i8().unwrap().$method($($args),*),
741            #[cfg(feature = "dtype-i16")]
742            DataType::Int16 => $self.i16().unwrap().$method($($args),*),
743            DataType::Int32 => $self.i32().unwrap().$method($($args),*),
744            DataType::Int64 => $self.i64().unwrap().$method($($args),*),
745            #[cfg(feature = "dtype-i128")]
746            DataType::Int128 => $self.i128().unwrap().$method($($args),*),
747            dt => panic!("not implemented for dtype {:?}", dt),
748        }
749    }
750}
751
752// doesn't include Bool and String
753#[macro_export]
754macro_rules! apply_method_physical_numeric {
755    ($self:expr, $method:ident, $($args:expr),*) => {
756        match $self.dtype() {
757            DataType::Float32 => $self.f32().unwrap().$method($($args),*),
758            DataType::Float64 => $self.f64().unwrap().$method($($args),*),
759            _ => apply_method_physical_integer!($self, $method, $($args),*),
760        }
761    }
762}
763
764#[macro_export]
765macro_rules! df {
766    ($($col_name:expr => $slice:expr), + $(,)?) => {
767        $crate::prelude::DataFrame::new(vec![
768            $($crate::prelude::Column::from(<$crate::prelude::Series as $crate::prelude::NamedFrom::<_, _>>::new($col_name.into(), $slice)),)+
769        ])
770    }
771}
772
773pub fn get_time_units(tu_l: &TimeUnit, tu_r: &TimeUnit) -> TimeUnit {
774    use crate::datatypes::time_unit::TimeUnit::*;
775    match (tu_l, tu_r) {
776        (Nanoseconds, Microseconds) => Microseconds,
777        (_, Milliseconds) => Milliseconds,
778        _ => *tu_l,
779    }
780}
781
782#[cold]
783#[inline(never)]
784fn width_mismatch(df1: &DataFrame, df2: &DataFrame) -> PolarsError {
785    let mut df1_extra = Vec::new();
786    let mut df2_extra = Vec::new();
787
788    let s1 = df1.schema();
789    let s2 = df2.schema();
790
791    s1.field_compare(s2, &mut df1_extra, &mut df2_extra);
792
793    let df1_extra = df1_extra
794        .into_iter()
795        .map(|(_, (n, _))| n.as_str())
796        .collect::<Vec<_>>()
797        .join(", ");
798    let df2_extra = df2_extra
799        .into_iter()
800        .map(|(_, (n, _))| n.as_str())
801        .collect::<Vec<_>>()
802        .join(", ");
803
804    polars_err!(
805        SchemaMismatch: r#"unable to vstack, dataframes have different widths ({} != {}).
806One dataframe has additional columns: [{df1_extra}].
807Other dataframe has additional columns: [{df2_extra}]."#,
808        df1.width(),
809        df2.width(),
810    )
811}
812
813pub fn accumulate_dataframes_vertical_unchecked_optional<I>(dfs: I) -> Option<DataFrame>
814where
815    I: IntoIterator<Item = DataFrame>,
816{
817    let mut iter = dfs.into_iter();
818    let additional = iter.size_hint().0;
819    let mut acc_df = iter.next()?;
820    acc_df.reserve_chunks(additional);
821
822    for df in iter {
823        if acc_df.width() != df.width() {
824            panic!("{}", width_mismatch(&acc_df, &df));
825        }
826
827        acc_df.vstack_mut_owned_unchecked(df);
828    }
829    Some(acc_df)
830}
831
832/// This takes ownership of the DataFrame so that drop is called earlier.
833/// Does not check if schema is correct
834pub fn accumulate_dataframes_vertical_unchecked<I>(dfs: I) -> DataFrame
835where
836    I: IntoIterator<Item = DataFrame>,
837{
838    let mut iter = dfs.into_iter();
839    let additional = iter.size_hint().0;
840    let mut acc_df = iter.next().unwrap();
841    acc_df.reserve_chunks(additional);
842
843    for df in iter {
844        if acc_df.width() != df.width() {
845            panic!("{}", width_mismatch(&acc_df, &df));
846        }
847
848        acc_df.vstack_mut_owned_unchecked(df);
849    }
850    acc_df
851}
852
853/// This takes ownership of the DataFrame so that drop is called earlier.
854/// # Panics
855/// Panics if `dfs` is empty.
856pub fn accumulate_dataframes_vertical<I>(dfs: I) -> PolarsResult<DataFrame>
857where
858    I: IntoIterator<Item = DataFrame>,
859{
860    let mut iter = dfs.into_iter();
861    let additional = iter.size_hint().0;
862    let mut acc_df = iter.next().unwrap();
863    acc_df.reserve_chunks(additional);
864    for df in iter {
865        if acc_df.width() != df.width() {
866            return Err(width_mismatch(&acc_df, &df));
867        }
868
869        acc_df.vstack_mut_owned(df)?;
870    }
871
872    Ok(acc_df)
873}
874
875/// Concat the DataFrames to a single DataFrame.
876pub fn concat_df<'a, I>(dfs: I) -> PolarsResult<DataFrame>
877where
878    I: IntoIterator<Item = &'a DataFrame>,
879{
880    let mut iter = dfs.into_iter();
881    let additional = iter.size_hint().0;
882    let mut acc_df = iter.next().unwrap().clone();
883    acc_df.reserve_chunks(additional);
884    for df in iter {
885        acc_df.vstack_mut(df)?;
886    }
887    Ok(acc_df)
888}
889
890/// Concat the DataFrames to a single DataFrame.
891pub fn concat_df_unchecked<'a, I>(dfs: I) -> DataFrame
892where
893    I: IntoIterator<Item = &'a DataFrame>,
894{
895    let mut iter = dfs.into_iter();
896    let additional = iter.size_hint().0;
897    let mut acc_df = iter.next().unwrap().clone();
898    acc_df.reserve_chunks(additional);
899    for df in iter {
900        acc_df.vstack_mut_unchecked(df);
901    }
902    acc_df
903}
904
905pub fn accumulate_dataframes_horizontal(dfs: Vec<DataFrame>) -> PolarsResult<DataFrame> {
906    let mut iter = dfs.into_iter();
907    let mut acc_df = iter.next().unwrap();
908    for df in iter {
909        acc_df.hstack_mut(df.get_columns())?;
910    }
911    Ok(acc_df)
912}
913
914/// Ensure the chunks in both ChunkedArrays have the same length.
915/// # Panics
916/// This will panic if `left.len() != right.len()` and array is chunked.
917pub fn align_chunks_binary<'a, T, B>(
918    left: &'a ChunkedArray<T>,
919    right: &'a ChunkedArray<B>,
920) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, ChunkedArray<B>>)
921where
922    B: PolarsDataType,
923    T: PolarsDataType,
924{
925    let assert = || {
926        assert_eq!(
927            left.len(),
928            right.len(),
929            "expected arrays of the same length"
930        )
931    };
932    match (left.chunks.len(), right.chunks.len()) {
933        // All chunks are equal length
934        (1, 1) => (Cow::Borrowed(left), Cow::Borrowed(right)),
935        // All chunks are equal length
936        (a, b)
937            if a == b
938                && left
939                    .chunk_lengths()
940                    .zip(right.chunk_lengths())
941                    .all(|(l, r)| l == r) =>
942        {
943            (Cow::Borrowed(left), Cow::Borrowed(right))
944        },
945        (_, 1) => {
946            assert();
947            (
948                Cow::Borrowed(left),
949                Cow::Owned(right.match_chunks(left.chunk_lengths())),
950            )
951        },
952        (1, _) => {
953            assert();
954            (
955                Cow::Owned(left.match_chunks(right.chunk_lengths())),
956                Cow::Borrowed(right),
957            )
958        },
959        (_, _) => {
960            assert();
961            // could optimize to choose to rechunk a primitive and not a string or list type
962            let left = left.rechunk();
963            (
964                Cow::Owned(left.match_chunks(right.chunk_lengths())),
965                Cow::Borrowed(right),
966            )
967        },
968    }
969}
970
971/// Ensure the chunks in ChunkedArray and Series have the same length.
972/// # Panics
973/// This will panic if `left.len() != right.len()` and array is chunked.
974pub fn align_chunks_binary_ca_series<'a, T>(
975    left: &'a ChunkedArray<T>,
976    right: &'a Series,
977) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, Series>)
978where
979    T: PolarsDataType,
980{
981    let assert = || {
982        assert_eq!(
983            left.len(),
984            right.len(),
985            "expected arrays of the same length"
986        )
987    };
988    match (left.chunks.len(), right.chunks().len()) {
989        // All chunks are equal length
990        (1, 1) => (Cow::Borrowed(left), Cow::Borrowed(right)),
991        // All chunks are equal length
992        (a, b)
993            if a == b
994                && left
995                    .chunk_lengths()
996                    .zip(right.chunk_lengths())
997                    .all(|(l, r)| l == r) =>
998        {
999            assert();
1000            (Cow::Borrowed(left), Cow::Borrowed(right))
1001        },
1002        (_, 1) => (left.rechunk(), Cow::Borrowed(right)),
1003        (1, _) => (Cow::Borrowed(left), Cow::Owned(right.rechunk())),
1004        (_, _) => {
1005            assert();
1006            (left.rechunk(), Cow::Owned(right.rechunk()))
1007        },
1008    }
1009}
1010
1011#[cfg(feature = "performant")]
1012pub(crate) fn align_chunks_binary_owned_series(left: Series, right: Series) -> (Series, Series) {
1013    match (left.chunks().len(), right.chunks().len()) {
1014        (1, 1) => (left, right),
1015        // All chunks are equal length
1016        (a, b)
1017            if a == b
1018                && left
1019                    .chunk_lengths()
1020                    .zip(right.chunk_lengths())
1021                    .all(|(l, r)| l == r) =>
1022        {
1023            (left, right)
1024        },
1025        (_, 1) => (left.rechunk(), right),
1026        (1, _) => (left, right.rechunk()),
1027        (_, _) => (left.rechunk(), right.rechunk()),
1028    }
1029}
1030
1031pub(crate) fn align_chunks_binary_owned<T, B>(
1032    left: ChunkedArray<T>,
1033    right: ChunkedArray<B>,
1034) -> (ChunkedArray<T>, ChunkedArray<B>)
1035where
1036    B: PolarsDataType,
1037    T: PolarsDataType,
1038{
1039    match (left.chunks.len(), right.chunks.len()) {
1040        (1, 1) => (left, right),
1041        // All chunks are equal length
1042        (a, b)
1043            if a == b
1044                && left
1045                    .chunk_lengths()
1046                    .zip(right.chunk_lengths())
1047                    .all(|(l, r)| l == r) =>
1048        {
1049            (left, right)
1050        },
1051        (_, 1) => (left.rechunk().into_owned(), right),
1052        (1, _) => (left, right.rechunk().into_owned()),
1053        (_, _) => (left.rechunk().into_owned(), right.rechunk().into_owned()),
1054    }
1055}
1056
1057/// # Panics
1058/// This will panic if `a.len() != b.len() || b.len() != c.len()` and array is chunked.
1059#[allow(clippy::type_complexity)]
1060pub fn align_chunks_ternary<'a, A, B, C>(
1061    a: &'a ChunkedArray<A>,
1062    b: &'a ChunkedArray<B>,
1063    c: &'a ChunkedArray<C>,
1064) -> (
1065    Cow<'a, ChunkedArray<A>>,
1066    Cow<'a, ChunkedArray<B>>,
1067    Cow<'a, ChunkedArray<C>>,
1068)
1069where
1070    A: PolarsDataType,
1071    B: PolarsDataType,
1072    C: PolarsDataType,
1073{
1074    if a.chunks.len() == 1 && b.chunks.len() == 1 && c.chunks.len() == 1 {
1075        return (Cow::Borrowed(a), Cow::Borrowed(b), Cow::Borrowed(c));
1076    }
1077
1078    assert!(
1079        a.len() == b.len() && b.len() == c.len(),
1080        "expected arrays of the same length"
1081    );
1082
1083    match (a.chunks.len(), b.chunks.len(), c.chunks.len()) {
1084        (_, 1, 1) => (
1085            Cow::Borrowed(a),
1086            Cow::Owned(b.match_chunks(a.chunk_lengths())),
1087            Cow::Owned(c.match_chunks(a.chunk_lengths())),
1088        ),
1089        (1, 1, _) => (
1090            Cow::Owned(a.match_chunks(c.chunk_lengths())),
1091            Cow::Owned(b.match_chunks(c.chunk_lengths())),
1092            Cow::Borrowed(c),
1093        ),
1094        (1, _, 1) => (
1095            Cow::Owned(a.match_chunks(b.chunk_lengths())),
1096            Cow::Borrowed(b),
1097            Cow::Owned(c.match_chunks(b.chunk_lengths())),
1098        ),
1099        (1, _, _) => {
1100            let b = b.rechunk();
1101            (
1102                Cow::Owned(a.match_chunks(c.chunk_lengths())),
1103                Cow::Owned(b.match_chunks(c.chunk_lengths())),
1104                Cow::Borrowed(c),
1105            )
1106        },
1107        (_, 1, _) => {
1108            let a = a.rechunk();
1109            (
1110                Cow::Owned(a.match_chunks(c.chunk_lengths())),
1111                Cow::Owned(b.match_chunks(c.chunk_lengths())),
1112                Cow::Borrowed(c),
1113            )
1114        },
1115        (_, _, 1) => {
1116            let b = b.rechunk();
1117            (
1118                Cow::Borrowed(a),
1119                Cow::Owned(b.match_chunks(a.chunk_lengths())),
1120                Cow::Owned(c.match_chunks(a.chunk_lengths())),
1121            )
1122        },
1123        (len_a, len_b, len_c)
1124            if len_a == len_b
1125                && len_b == len_c
1126                && a.chunk_lengths()
1127                    .zip(b.chunk_lengths())
1128                    .zip(c.chunk_lengths())
1129                    .all(|((a, b), c)| a == b && b == c) =>
1130        {
1131            (Cow::Borrowed(a), Cow::Borrowed(b), Cow::Borrowed(c))
1132        },
1133        _ => {
1134            // could optimize to choose to rechunk a primitive and not a string or list type
1135            let a = a.rechunk();
1136            let b = b.rechunk();
1137            (
1138                Cow::Owned(a.match_chunks(c.chunk_lengths())),
1139                Cow::Owned(b.match_chunks(c.chunk_lengths())),
1140                Cow::Borrowed(c),
1141            )
1142        },
1143    }
1144}
1145
1146pub fn binary_concatenate_validities<'a, T, B>(
1147    left: &'a ChunkedArray<T>,
1148    right: &'a ChunkedArray<B>,
1149) -> Option<Bitmap>
1150where
1151    B: PolarsDataType,
1152    T: PolarsDataType,
1153{
1154    let (left, right) = align_chunks_binary(left, right);
1155    let left_validity = concatenate_validities(left.chunks());
1156    let right_validity = concatenate_validities(right.chunks());
1157    combine_validities_and(left_validity.as_ref(), right_validity.as_ref())
1158}
1159
1160/// Convenience for `x.into_iter().map(Into::into).collect()` using an `into_vec()` function.
1161pub trait IntoVec<T> {
1162    fn into_vec(self) -> Vec<T>;
1163}
1164
1165impl<I, S> IntoVec<PlSmallStr> for I
1166where
1167    I: IntoIterator<Item = S>,
1168    S: Into<PlSmallStr>,
1169{
1170    fn into_vec(self) -> Vec<PlSmallStr> {
1171        self.into_iter().map(|s| s.into()).collect()
1172    }
1173}
1174
1175/// This logic is same as the impl on ChunkedArray
1176/// The difference is that there is less indirection because the caller should preallocate
1177/// `chunk_lens` once. On the `ChunkedArray` we indirect through an `ArrayRef` which is an indirection
1178/// and a vtable.
1179#[inline]
1180pub(crate) fn index_to_chunked_index<
1181    I: Iterator<Item = Idx>,
1182    Idx: PartialOrd + std::ops::AddAssign + std::ops::SubAssign + Zero + One,
1183>(
1184    chunk_lens: I,
1185    index: Idx,
1186) -> (Idx, Idx) {
1187    let mut index_remainder = index;
1188    let mut current_chunk_idx = Zero::zero();
1189
1190    for chunk_len in chunk_lens {
1191        if chunk_len > index_remainder {
1192            break;
1193        } else {
1194            index_remainder -= chunk_len;
1195            current_chunk_idx += One::one();
1196        }
1197    }
1198    (current_chunk_idx, index_remainder)
1199}
1200
1201pub(crate) fn index_to_chunked_index_rev<
1202    I: Iterator<Item = Idx>,
1203    Idx: PartialOrd
1204        + std::ops::AddAssign
1205        + std::ops::SubAssign
1206        + std::ops::Sub<Output = Idx>
1207        + Zero
1208        + One
1209        + Copy
1210        + std::fmt::Debug,
1211>(
1212    chunk_lens_rev: I,
1213    index_from_back: Idx,
1214    total_chunks: Idx,
1215) -> (Idx, Idx) {
1216    debug_assert!(index_from_back > Zero::zero(), "at least -1");
1217    let mut index_remainder = index_from_back;
1218    let mut current_chunk_idx = One::one();
1219    let mut current_chunk_len = Zero::zero();
1220
1221    for chunk_len in chunk_lens_rev {
1222        current_chunk_len = chunk_len;
1223        if chunk_len >= index_remainder {
1224            break;
1225        } else {
1226            index_remainder -= chunk_len;
1227            current_chunk_idx += One::one();
1228        }
1229    }
1230    (
1231        total_chunks - current_chunk_idx,
1232        current_chunk_len - index_remainder,
1233    )
1234}
1235
1236pub fn first_non_null<'a, I>(iter: I) -> Option<usize>
1237where
1238    I: Iterator<Item = Option<&'a Bitmap>>,
1239{
1240    let mut offset = 0;
1241    for validity in iter {
1242        if let Some(mask) = validity {
1243            let len_mask = mask.len();
1244            let n = mask.leading_zeros();
1245            if n < len_mask {
1246                return Some(offset + n);
1247            }
1248            offset += len_mask
1249        } else {
1250            return Some(offset);
1251        }
1252    }
1253    None
1254}
1255
1256pub fn last_non_null<'a, I>(iter: I, len: usize) -> Option<usize>
1257where
1258    I: DoubleEndedIterator<Item = Option<&'a Bitmap>>,
1259{
1260    if len == 0 {
1261        return None;
1262    }
1263    let mut offset = 0;
1264    for validity in iter.rev() {
1265        if let Some(mask) = validity {
1266            let len_mask = mask.len();
1267            let n = mask.trailing_zeros();
1268            if n < len_mask {
1269                return Some(len - offset - n - 1);
1270            }
1271            offset += len_mask;
1272        } else {
1273            return Some(len - offset - 1);
1274        }
1275    }
1276    None
1277}
1278
1279/// ensure that nulls are propagated to both arrays
1280pub fn coalesce_nulls<'a, T: PolarsDataType>(
1281    a: &'a ChunkedArray<T>,
1282    b: &'a ChunkedArray<T>,
1283) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, ChunkedArray<T>>) {
1284    if a.null_count() > 0 || b.null_count() > 0 {
1285        let (a, b) = align_chunks_binary(a, b);
1286        let mut b = b.into_owned();
1287        let a = a.coalesce_nulls(b.chunks());
1288
1289        for arr in a.chunks().iter() {
1290            for arr_b in unsafe { b.chunks_mut() } {
1291                *arr_b = arr_b.with_validity(arr.validity().cloned())
1292            }
1293        }
1294        b.compute_len();
1295        (Cow::Owned(a), Cow::Owned(b))
1296    } else {
1297        (Cow::Borrowed(a), Cow::Borrowed(b))
1298    }
1299}
1300
1301pub fn coalesce_nulls_columns(a: &Column, b: &Column) -> (Column, Column) {
1302    if a.null_count() > 0 || b.null_count() > 0 {
1303        let mut a = a.as_materialized_series().rechunk();
1304        let mut b = b.as_materialized_series().rechunk();
1305        for (arr_a, arr_b) in unsafe { a.chunks_mut().iter_mut().zip(b.chunks_mut()) } {
1306            let validity = match (arr_a.validity(), arr_b.validity()) {
1307                (None, Some(b)) => Some(b.clone()),
1308                (Some(a), Some(b)) => Some(a & b),
1309                (Some(a), None) => Some(a.clone()),
1310                (None, None) => None,
1311            };
1312            *arr_a = arr_a.with_validity(validity.clone());
1313            *arr_b = arr_b.with_validity(validity);
1314        }
1315        a.compute_len();
1316        b.compute_len();
1317        (a.into(), b.into())
1318    } else {
1319        (a.clone(), b.clone())
1320    }
1321}
1322
1323pub fn operation_exceeded_idxsize_msg(operation: &str) -> String {
1324    if size_of::<IdxSize>() == size_of::<u32>() {
1325        format!(
1326            "{} exceeded the maximum supported limit of {} rows. Consider installing 'polars-u64-idx'.",
1327            operation,
1328            IdxSize::MAX,
1329        )
1330    } else {
1331        format!(
1332            "{} exceeded the maximum supported limit of {} rows.",
1333            operation,
1334            IdxSize::MAX,
1335        )
1336    }
1337}
1338
1339#[cfg(test)]
1340mod test {
1341    use super::*;
1342
1343    #[test]
1344    fn test_split() {
1345        let ca: Int32Chunked = (0..10).collect_ca("a".into());
1346
1347        let out = split(&ca, 3);
1348        assert_eq!(out[0].len(), 3);
1349        assert_eq!(out[1].len(), 3);
1350        assert_eq!(out[2].len(), 4);
1351    }
1352
1353    #[test]
1354    fn test_align_chunks() -> PolarsResult<()> {
1355        let a = Int32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3, 4]);
1356        let mut b = Int32Chunked::new(PlSmallStr::EMPTY, &[1]);
1357        let b2 = Int32Chunked::new(PlSmallStr::EMPTY, &[2, 3, 4]);
1358
1359        b.append(&b2)?;
1360        let (a, b) = align_chunks_binary(&a, &b);
1361        assert_eq!(
1362            a.chunk_lengths().collect::<Vec<_>>(),
1363            b.chunk_lengths().collect::<Vec<_>>()
1364        );
1365
1366        let a = Int32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3, 4]);
1367        let mut b = Int32Chunked::new(PlSmallStr::EMPTY, &[1]);
1368        let b1 = b.clone();
1369        b.append(&b1)?;
1370        b.append(&b1)?;
1371        b.append(&b1)?;
1372        let (a, b) = align_chunks_binary(&a, &b);
1373        assert_eq!(
1374            a.chunk_lengths().collect::<Vec<_>>(),
1375            b.chunk_lengths().collect::<Vec<_>>()
1376        );
1377
1378        Ok(())
1379    }
1380}