polars_core/utils/
mod.rs

1mod any_value;
2use arrow::compute::concatenate::concatenate_validities;
3use arrow::compute::utils::combine_validities_and;
4pub mod flatten;
5pub(crate) mod series;
6mod supertype;
7use std::borrow::Cow;
8use std::ops::{Deref, DerefMut};
9mod schema;
10
11pub use any_value::*;
12use arrow::bitmap::Bitmap;
13pub use arrow::legacy::utils::*;
14pub use arrow::trusted_len::TrustMyLength;
15use flatten::*;
16use num_traits::{One, Zero};
17use rayon::prelude::*;
18pub use schema::*;
19pub use series::*;
20pub use supertype::*;
21pub use {arrow, rayon};
22
23use crate::POOL;
24use crate::prelude::*;
25
26#[repr(transparent)]
27pub struct Wrap<T>(pub T);
28
29impl<T> Deref for Wrap<T> {
30    type Target = T;
31    fn deref(&self) -> &Self::Target {
32        &self.0
33    }
34}
35
36#[inline(always)]
37pub fn _set_partition_size() -> usize {
38    POOL.current_num_threads()
39}
40
41/// Just a wrapper structure which is useful for certain impl specializations.
42///
43/// This is for instance use to implement
44/// `impl<T> FromIterator<T::Native> for NoNull<ChunkedArray<T>>`
45/// as `Option<T::Native>` was already implemented:
46/// `impl<T> FromIterator<Option<T::Native>> for ChunkedArray<T>`
47pub struct NoNull<T> {
48    inner: T,
49}
50
51impl<T> NoNull<T> {
52    pub fn new(inner: T) -> Self {
53        NoNull { inner }
54    }
55
56    pub fn into_inner(self) -> T {
57        self.inner
58    }
59}
60
61impl<T> Deref for NoNull<T> {
62    type Target = T;
63
64    fn deref(&self) -> &Self::Target {
65        &self.inner
66    }
67}
68
69impl<T> DerefMut for NoNull<T> {
70    fn deref_mut(&mut self) -> &mut Self::Target {
71        &mut self.inner
72    }
73}
74
75pub(crate) fn get_iter_capacity<T, I: Iterator<Item = T>>(iter: &I) -> usize {
76    match iter.size_hint() {
77        (_lower, Some(upper)) => upper,
78        (0, None) => 1024,
79        (lower, None) => lower,
80    }
81}
82
83// prefer this one over split_ca, as this can push the null_count into the thread pool
84// returns an `(offset, length)` tuple
85#[doc(hidden)]
86pub fn _split_offsets(len: usize, n: usize) -> Vec<(usize, usize)> {
87    if n == 1 {
88        vec![(0, len)]
89    } else {
90        let chunk_size = len / n;
91
92        (0..n)
93            .map(|partition| {
94                let offset = partition * chunk_size;
95                let len = if partition == (n - 1) {
96                    len - offset
97                } else {
98                    chunk_size
99                };
100                (partition * chunk_size, len)
101            })
102            .collect_trusted()
103    }
104}
105
106#[allow(clippy::len_without_is_empty)]
107pub trait Container: Clone {
108    fn slice(&self, offset: i64, len: usize) -> Self;
109
110    fn split_at(&self, offset: i64) -> (Self, Self);
111
112    fn len(&self) -> usize;
113
114    fn iter_chunks(&self) -> impl Iterator<Item = Self>;
115
116    fn should_rechunk(&self) -> bool;
117
118    fn n_chunks(&self) -> usize;
119
120    fn chunk_lengths(&self) -> impl Iterator<Item = usize>;
121}
122
123impl Container for DataFrame {
124    fn slice(&self, offset: i64, len: usize) -> Self {
125        DataFrame::slice(self, offset, len)
126    }
127
128    fn split_at(&self, offset: i64) -> (Self, Self) {
129        DataFrame::split_at(self, offset)
130    }
131
132    fn len(&self) -> usize {
133        self.height()
134    }
135
136    fn iter_chunks(&self) -> impl Iterator<Item = Self> {
137        flatten_df_iter(self)
138    }
139
140    fn should_rechunk(&self) -> bool {
141        self.should_rechunk()
142    }
143
144    fn n_chunks(&self) -> usize {
145        DataFrame::first_col_n_chunks(self)
146    }
147
148    fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
149        // @scalar-correctness?
150        self.columns[0].as_materialized_series().chunk_lengths()
151    }
152}
153
154impl<T: PolarsDataType> Container for ChunkedArray<T> {
155    fn slice(&self, offset: i64, len: usize) -> Self {
156        ChunkedArray::slice(self, offset, len)
157    }
158
159    fn split_at(&self, offset: i64) -> (Self, Self) {
160        ChunkedArray::split_at(self, offset)
161    }
162
163    fn len(&self) -> usize {
164        ChunkedArray::len(self)
165    }
166
167    fn iter_chunks(&self) -> impl Iterator<Item = Self> {
168        self.downcast_iter()
169            .map(|arr| Self::with_chunk(self.name().clone(), arr.clone()))
170    }
171
172    fn should_rechunk(&self) -> bool {
173        false
174    }
175
176    fn n_chunks(&self) -> usize {
177        self.chunks().len()
178    }
179
180    fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
181        ChunkedArray::chunk_lengths(self)
182    }
183}
184
185impl Container for Series {
186    fn slice(&self, offset: i64, len: usize) -> Self {
187        self.0.slice(offset, len)
188    }
189
190    fn split_at(&self, offset: i64) -> (Self, Self) {
191        self.0.split_at(offset)
192    }
193
194    fn len(&self) -> usize {
195        self.0.len()
196    }
197
198    fn iter_chunks(&self) -> impl Iterator<Item = Self> {
199        (0..self.0.n_chunks()).map(|i| self.select_chunk(i))
200    }
201
202    fn should_rechunk(&self) -> bool {
203        false
204    }
205
206    fn n_chunks(&self) -> usize {
207        self.chunks().len()
208    }
209
210    fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
211        self.0.chunk_lengths()
212    }
213}
214
215fn split_impl<C: Container>(container: &C, target: usize, chunk_size: usize) -> Vec<C> {
216    if target == 1 {
217        return vec![container.clone()];
218    }
219    let mut out = Vec::with_capacity(target);
220    let chunk_size = chunk_size as i64;
221
222    // First split
223    let (chunk, mut remainder) = container.split_at(chunk_size);
224    out.push(chunk);
225
226    // Take the rest of the splits of exactly chunk size, but skip the last remainder as we won't split that.
227    for _ in 1..target - 1 {
228        let (a, b) = remainder.split_at(chunk_size);
229        out.push(a);
230        remainder = b
231    }
232    // This can be slightly larger than `chunk_size`, but is smaller than `2 * chunk_size`.
233    out.push(remainder);
234    out
235}
236
237/// Splits, but doesn't flatten chunks. E.g. a container can still have multiple chunks.
238pub fn split<C: Container>(container: &C, target: usize) -> Vec<C> {
239    let total_len = container.len();
240    if total_len == 0 {
241        return vec![container.clone()];
242    }
243
244    let chunk_size = std::cmp::max(total_len / target, 1);
245
246    if container.n_chunks() == target
247        && container
248            .chunk_lengths()
249            .all(|len| len.abs_diff(chunk_size) < 100)
250        // We cannot get chunks if they are misaligned
251        && !container.should_rechunk()
252    {
253        return container.iter_chunks().collect();
254    }
255    split_impl(container, target, chunk_size)
256}
257
258/// Split a [`Container`] in `target` elements. The target doesn't have to be respected if not
259/// Deviation of the target might be done to create more equal size chunks.
260pub fn split_and_flatten<C: Container>(container: &C, target: usize) -> Vec<C> {
261    let total_len = container.len();
262    if total_len == 0 {
263        return vec![container.clone()];
264    }
265
266    let chunk_size = std::cmp::max(total_len / target, 1);
267
268    if container.n_chunks() == target
269        && container
270            .chunk_lengths()
271            .all(|len| len.abs_diff(chunk_size) < 100)
272        // We cannot get chunks if they are misaligned
273        && !container.should_rechunk()
274    {
275        return container.iter_chunks().collect();
276    }
277
278    if container.n_chunks() == 1 {
279        split_impl(container, target, chunk_size)
280    } else {
281        let mut out = Vec::with_capacity(target);
282        let chunks = container.iter_chunks();
283
284        'new_chunk: for mut chunk in chunks {
285            loop {
286                let h = chunk.len();
287                if h < chunk_size {
288                    // TODO if the chunk is much smaller than chunk size, we should try to merge it with the next one.
289                    out.push(chunk);
290                    continue 'new_chunk;
291                }
292
293                // If a split leads to the next chunk being smaller than 30% take the whole chunk
294                if ((h - chunk_size) as f64 / chunk_size as f64) < 0.3 {
295                    out.push(chunk);
296                    continue 'new_chunk;
297                }
298
299                let (a, b) = chunk.split_at(chunk_size as i64);
300                out.push(a);
301                chunk = b;
302            }
303        }
304        out
305    }
306}
307
308/// Split a [`DataFrame`] in `target` elements. The target doesn't have to be respected if not
309/// strict. Deviation of the target might be done to create more equal size chunks.
310///
311/// # Panics
312/// if chunks are not aligned
313pub fn split_df_as_ref(df: &DataFrame, target: usize, strict: bool) -> Vec<DataFrame> {
314    if strict {
315        split(df, target)
316    } else {
317        split_and_flatten(df, target)
318    }
319}
320
321#[doc(hidden)]
322/// Split a [`DataFrame`] into `n` parts. We take a `&mut` to be able to repartition/align chunks.
323/// `strict` in that it respects `n` even if the chunks are suboptimal.
324pub fn split_df(df: &mut DataFrame, target: usize, strict: bool) -> Vec<DataFrame> {
325    if target == 0 || df.is_empty() {
326        return vec![df.clone()];
327    }
328    // make sure that chunks are aligned.
329    df.align_chunks_par();
330    split_df_as_ref(df, target, strict)
331}
332
333pub fn slice_slice<T>(vals: &[T], offset: i64, len: usize) -> &[T] {
334    let (raw_offset, slice_len) = slice_offsets(offset, len, vals.len());
335    &vals[raw_offset..raw_offset + slice_len]
336}
337
338#[inline]
339pub fn slice_offsets(offset: i64, length: usize, array_len: usize) -> (usize, usize) {
340    let signed_start_offset = if offset < 0 {
341        offset.saturating_add_unsigned(array_len as u64)
342    } else {
343        offset
344    };
345    let signed_stop_offset = signed_start_offset.saturating_add_unsigned(length as u64);
346
347    let signed_array_len: i64 = array_len
348        .try_into()
349        .expect("array length larger than i64::MAX");
350    let clamped_start_offset = signed_start_offset.clamp(0, signed_array_len);
351    let clamped_stop_offset = signed_stop_offset.clamp(0, signed_array_len);
352
353    let slice_start_idx = clamped_start_offset as usize;
354    let slice_len = (clamped_stop_offset - clamped_start_offset) as usize;
355    (slice_start_idx, slice_len)
356}
357
358/// Apply a macro on the Series
359#[macro_export]
360macro_rules! match_dtype_to_physical_apply_macro {
361    ($obj:expr, $macro:ident, $macro_string:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
362        match $obj {
363            DataType::String => $macro_string!($($opt_args)*),
364            DataType::Boolean => $macro_bool!($($opt_args)*),
365            #[cfg(feature = "dtype-u8")]
366            DataType::UInt8 => $macro!(u8 $(, $opt_args)*),
367            #[cfg(feature = "dtype-u16")]
368            DataType::UInt16 => $macro!(u16 $(, $opt_args)*),
369            DataType::UInt32 => $macro!(u32 $(, $opt_args)*),
370            DataType::UInt64 => $macro!(u64 $(, $opt_args)*),
371            #[cfg(feature = "dtype-i8")]
372            DataType::Int8 => $macro!(i8 $(, $opt_args)*),
373            #[cfg(feature = "dtype-i16")]
374            DataType::Int16 => $macro!(i16 $(, $opt_args)*),
375            DataType::Int32 => $macro!(i32 $(, $opt_args)*),
376            DataType::Int64 => $macro!(i64 $(, $opt_args)*),
377            #[cfg(feature = "dtype-i128")]
378            DataType::Int128 => $macro!(i128 $(, $opt_args)*),
379            #[cfg(feature = "dtype-f16")]
380            DataType::Float16 => $macro!(pf16 $(, $opt_args)*),
381            DataType::Float32 => $macro!(f32 $(, $opt_args)*),
382            DataType::Float64 => $macro!(f64 $(, $opt_args)*),
383            dt => panic!("not implemented for dtype {:?}", dt),
384        }
385    }};
386}
387
388/// Apply a macro on the Series
389#[macro_export]
390macro_rules! match_dtype_to_logical_apply_macro {
391    ($obj:expr, $macro:ident, $macro_string:ident, $macro_binary:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
392        match $obj {
393            DataType::String => $macro_string!($($opt_args)*),
394            DataType::Binary => $macro_binary!($($opt_args)*),
395            DataType::Boolean => $macro_bool!($($opt_args)*),
396            #[cfg(feature = "dtype-u8")]
397            DataType::UInt8 => $macro!(UInt8Type $(, $opt_args)*),
398            #[cfg(feature = "dtype-u16")]
399            DataType::UInt16 => $macro!(UInt16Type $(, $opt_args)*),
400            DataType::UInt32 => $macro!(UInt32Type $(, $opt_args)*),
401            DataType::UInt64 => $macro!(UInt64Type $(, $opt_args)*),
402            #[cfg(feature = "dtype-u128")]
403            DataType::UInt128 => $macro!(UInt128Type $(, $opt_args)*),
404            #[cfg(feature = "dtype-i8")]
405            DataType::Int8 => $macro!(Int8Type $(, $opt_args)*),
406            #[cfg(feature = "dtype-i16")]
407            DataType::Int16 => $macro!(Int16Type $(, $opt_args)*),
408            DataType::Int32 => $macro!(Int32Type $(, $opt_args)*),
409            DataType::Int64 => $macro!(Int64Type $(, $opt_args)*),
410            #[cfg(feature = "dtype-i128")]
411            DataType::Int128 => $macro!(Int128Type $(, $opt_args)*),
412            #[cfg(feature = "dtype-f16")]
413            DataType::Float16 => $macro!(Float16Type $(, $opt_args)*),
414            DataType::Float32 => $macro!(Float32Type $(, $opt_args)*),
415            DataType::Float64 => $macro!(Float64Type $(, $opt_args)*),
416            dt => panic!("not implemented for dtype {:?}", dt),
417        }
418    }};
419}
420
421/// Apply a macro on the Downcasted ChunkedArrays
422#[macro_export]
423macro_rules! match_arrow_dtype_apply_macro_ca {
424    ($self:expr, $macro:ident, $macro_string:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
425        match $self.dtype() {
426            DataType::String => $macro_string!($self.str().unwrap() $(, $opt_args)*),
427            DataType::Boolean => $macro_bool!($self.bool().unwrap() $(, $opt_args)*),
428            #[cfg(feature = "dtype-u8")]
429            DataType::UInt8 => $macro!($self.u8().unwrap() $(, $opt_args)*),
430            #[cfg(feature = "dtype-u16")]
431            DataType::UInt16 => $macro!($self.u16().unwrap() $(, $opt_args)*),
432            DataType::UInt32 => $macro!($self.u32().unwrap() $(, $opt_args)*),
433            DataType::UInt64 => $macro!($self.u64().unwrap() $(, $opt_args)*),
434            #[cfg(feature = "dtype-u128")]
435            DataType::UInt128 => $macro!($self.u128().unwrap() $(, $opt_args)*),
436            #[cfg(feature = "dtype-i8")]
437            DataType::Int8 => $macro!($self.i8().unwrap() $(, $opt_args)*),
438            #[cfg(feature = "dtype-i16")]
439            DataType::Int16 => $macro!($self.i16().unwrap() $(, $opt_args)*),
440            DataType::Int32 => $macro!($self.i32().unwrap() $(, $opt_args)*),
441            DataType::Int64 => $macro!($self.i64().unwrap() $(, $opt_args)*),
442            #[cfg(feature = "dtype-i128")]
443            DataType::Int128 => $macro!($self.i128().unwrap() $(, $opt_args)*),
444            #[cfg(feature = "dtype-f16")]
445            DataType::Float16 => $macro!($self.f16().unwrap() $(, $opt_args)*),
446            DataType::Float32 => $macro!($self.f32().unwrap() $(, $opt_args)*),
447            DataType::Float64 => $macro!($self.f64().unwrap() $(, $opt_args)*),
448            dt => panic!("not implemented for dtype {:?}", dt),
449        }
450    }};
451}
452
453#[macro_export]
454macro_rules! with_match_physical_numeric_type {(
455    $dtype:expr, | $_:tt $T:ident | $($body:tt)*
456) => ({
457    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
458    #[cfg(feature = "dtype-f16")]
459    use polars_utils::float16::pf16;
460    use $crate::datatypes::DataType::*;
461    match $dtype {
462        #[cfg(feature = "dtype-i8")]
463        Int8 => __with_ty__! { i8 },
464        #[cfg(feature = "dtype-i16")]
465        Int16 => __with_ty__! { i16 },
466        Int32 => __with_ty__! { i32 },
467        Int64 => __with_ty__! { i64 },
468        #[cfg(feature = "dtype-i128")]
469        Int128 => __with_ty__! { i128 },
470        #[cfg(feature = "dtype-u8")]
471        UInt8 => __with_ty__! { u8 },
472        #[cfg(feature = "dtype-u16")]
473        UInt16 => __with_ty__! { u16 },
474        UInt32 => __with_ty__! { u32 },
475        UInt64 => __with_ty__! { u64 },
476        #[cfg(feature = "dtype-u128")]
477        UInt128 => __with_ty__! { u128 },
478        #[cfg(feature = "dtype-f16")]
479        Float16 => __with_ty__! { pf16 },
480        Float32 => __with_ty__! { f32 },
481        Float64 => __with_ty__! { f64 },
482        dt => panic!("not implemented for dtype {:?}", dt),
483    }
484})}
485
486#[macro_export]
487macro_rules! with_match_physical_integer_type {(
488    $dtype:expr, | $_:tt $T:ident | $($body:tt)*
489) => ({
490    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
491    #[cfg(feature = "dtype-f16")]
492    use polars_utils::float16::pf16;
493    use $crate::datatypes::DataType::*;
494    match $dtype {
495        #[cfg(feature = "dtype-i8")]
496        Int8 => __with_ty__! { i8 },
497        #[cfg(feature = "dtype-i16")]
498        Int16 => __with_ty__! { i16 },
499        Int32 => __with_ty__! { i32 },
500        Int64 => __with_ty__! { i64 },
501        #[cfg(feature = "dtype-i128")]
502        Int128 => __with_ty__! { i128 },
503        #[cfg(feature = "dtype-u8")]
504        UInt8 => __with_ty__! { u8 },
505        #[cfg(feature = "dtype-u16")]
506        UInt16 => __with_ty__! { u16 },
507        UInt32 => __with_ty__! { u32 },
508        UInt64 => __with_ty__! { u64 },
509        #[cfg(feature = "dtype-u128")]
510        UInt128 => __with_ty__! { u128 },
511        dt => panic!("not implemented for dtype {:?}", dt),
512    }
513})}
514
515#[macro_export]
516macro_rules! with_match_physical_float_type {(
517    $dtype:expr, | $_:tt $T:ident | $($body:tt)*
518) => ({
519    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
520    use polars_utils::float16::pf16;
521    use $crate::datatypes::DataType::*;
522    match $dtype {
523        #[cfg(feature = "dtype-f16")]
524        Float16 => __with_ty__! { pf16 },
525        Float32 => __with_ty__! { f32 },
526        Float64 => __with_ty__! { f64 },
527        dt => panic!("not implemented for dtype {:?}", dt),
528    }
529})}
530
531#[macro_export]
532macro_rules! with_match_physical_float_polars_type {(
533    $key_type:expr, | $_:tt $T:ident | $($body:tt)*
534) => ({
535    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
536    use $crate::datatypes::DataType::*;
537    match $key_type {
538        #[cfg(feature = "dtype-f16")]
539        Float16 => __with_ty__! { Float16Type },
540        Float32 => __with_ty__! { Float32Type },
541        Float64 => __with_ty__! { Float64Type },
542        dt => panic!("not implemented for dtype {:?}", dt),
543    }
544})}
545
546#[macro_export]
547macro_rules! with_match_physical_numeric_polars_type {(
548    $key_type:expr, | $_:tt $T:ident | $($body:tt)*
549) => ({
550    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
551    use $crate::datatypes::DataType::*;
552    match $key_type {
553            #[cfg(feature = "dtype-i8")]
554        Int8 => __with_ty__! { Int8Type },
555            #[cfg(feature = "dtype-i16")]
556        Int16 => __with_ty__! { Int16Type },
557        Int32 => __with_ty__! { Int32Type },
558        Int64 => __with_ty__! { Int64Type },
559            #[cfg(feature = "dtype-i128")]
560        Int128 => __with_ty__! { Int128Type },
561            #[cfg(feature = "dtype-u8")]
562        UInt8 => __with_ty__! { UInt8Type },
563            #[cfg(feature = "dtype-u16")]
564        UInt16 => __with_ty__! { UInt16Type },
565        UInt32 => __with_ty__! { UInt32Type },
566        UInt64 => __with_ty__! { UInt64Type },
567            #[cfg(feature = "dtype-u128")]
568        UInt128 => __with_ty__! { UInt128Type },
569            #[cfg(feature = "dtype-f16")]
570        Float16 => __with_ty__! { Float16Type },
571        Float32 => __with_ty__! { Float32Type },
572        Float64 => __with_ty__! { Float64Type },
573        dt => panic!("not implemented for dtype {:?}", dt),
574    }
575})}
576
577#[macro_export]
578macro_rules! with_match_physical_integer_polars_type {(
579    $key_type:expr, | $_:tt $T:ident | $($body:tt)*
580) => ({
581    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
582    use $crate::datatypes::DataType::*;
583    use $crate::datatypes::*;
584    match $key_type {
585        #[cfg(feature = "dtype-i8")]
586        Int8 => __with_ty__! { Int8Type },
587        #[cfg(feature = "dtype-i16")]
588        Int16 => __with_ty__! { Int16Type },
589        Int32 => __with_ty__! { Int32Type },
590        Int64 => __with_ty__! { Int64Type },
591        #[cfg(feature = "dtype-i128")]
592        Int128 => __with_ty__! { Int128Type },
593        #[cfg(feature = "dtype-u8")]
594        UInt8 => __with_ty__! { UInt8Type },
595        #[cfg(feature = "dtype-u16")]
596        UInt16 => __with_ty__! { UInt16Type },
597        UInt32 => __with_ty__! { UInt32Type },
598        UInt64 => __with_ty__! { UInt64Type },
599        #[cfg(feature = "dtype-u128")]
600        UInt128 => __with_ty__! { UInt128Type },
601        dt => panic!("not implemented for dtype {:?}", dt),
602    }
603})}
604
605#[macro_export]
606macro_rules! with_match_categorical_physical_type {(
607    $dtype:expr, | $_:tt $T:ident | $($body:tt)*
608) => ({
609    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
610    match $dtype {
611        CategoricalPhysical::U8 => __with_ty__! { Categorical8Type },
612        CategoricalPhysical::U16 => __with_ty__! { Categorical16Type },
613        CategoricalPhysical::U32 => __with_ty__! { Categorical32Type },
614    }
615})}
616
617/// Apply a macro on the Downcasted ChunkedArrays of DataTypes that are logical numerics.
618/// So no logical.
619#[macro_export]
620macro_rules! downcast_as_macro_arg_physical {
621    ($self:expr, $macro:ident $(, $opt_args:expr)*) => {{
622        match $self.dtype() {
623            #[cfg(feature = "dtype-u8")]
624            DataType::UInt8 => $macro!($self.u8().unwrap() $(, $opt_args)*),
625            #[cfg(feature = "dtype-u16")]
626            DataType::UInt16 => $macro!($self.u16().unwrap() $(, $opt_args)*),
627            DataType::UInt32 => $macro!($self.u32().unwrap() $(, $opt_args)*),
628            DataType::UInt64 => $macro!($self.u64().unwrap() $(, $opt_args)*),
629            #[cfg(feature = "dtype-u128")]
630            DataType::UInt128 => $macro!($self.u128().unwrap() $(, $opt_args)*),
631            #[cfg(feature = "dtype-i8")]
632            DataType::Int8 => $macro!($self.i8().unwrap() $(, $opt_args)*),
633            #[cfg(feature = "dtype-i16")]
634            DataType::Int16 => $macro!($self.i16().unwrap() $(, $opt_args)*),
635            DataType::Int32 => $macro!($self.i32().unwrap() $(, $opt_args)*),
636            DataType::Int64 => $macro!($self.i64().unwrap() $(, $opt_args)*),
637            #[cfg(feature = "dtype-i128")]
638            DataType::Int128 => $macro!($self.i128().unwrap() $(, $opt_args)*),
639            #[cfg(feature = "dtype-f16")]
640            DataType::Float16 => $macro!($self.f16().unwrap() $(, $opt_args)*),
641            DataType::Float32 => $macro!($self.f32().unwrap() $(, $opt_args)*),
642            DataType::Float64 => $macro!($self.f64().unwrap() $(, $opt_args)*),
643            dt => panic!("not implemented for {:?}", dt),
644        }
645    }};
646}
647
648/// Apply a macro on the Downcasted ChunkedArrays of DataTypes that are logical numerics.
649/// So no logical.
650#[macro_export]
651macro_rules! downcast_as_macro_arg_physical_mut {
652    ($self:expr, $macro:ident $(, $opt_args:expr)*) => {{
653        // clone so that we do not borrow
654        match $self.dtype().clone() {
655            #[cfg(feature = "dtype-u8")]
656            DataType::UInt8 => {
657                let ca: &mut UInt8Chunked = $self.as_mut();
658                $macro!(UInt8Type, ca $(, $opt_args)*)
659            },
660            #[cfg(feature = "dtype-u16")]
661            DataType::UInt16 => {
662                let ca: &mut UInt16Chunked = $self.as_mut();
663                $macro!(UInt16Type, ca $(, $opt_args)*)
664            },
665            DataType::UInt32 => {
666                let ca: &mut UInt32Chunked = $self.as_mut();
667                $macro!(UInt32Type, ca $(, $opt_args)*)
668            },
669            DataType::UInt64 => {
670                let ca: &mut UInt64Chunked = $self.as_mut();
671                $macro!(UInt64Type, ca $(, $opt_args)*)
672            },
673            #[cfg(feature = "dtype-u128")]
674            DataType::UInt128 => {
675                let ca: &mut UInt128Chunked = $self.as_mut();
676                $macro!(UInt128Type, ca $(, $opt_args)*)
677            },
678            #[cfg(feature = "dtype-i8")]
679            DataType::Int8 => {
680                let ca: &mut Int8Chunked = $self.as_mut();
681                $macro!(Int8Type, ca $(, $opt_args)*)
682            },
683            #[cfg(feature = "dtype-i16")]
684            DataType::Int16 => {
685                let ca: &mut Int16Chunked = $self.as_mut();
686                $macro!(Int16Type, ca $(, $opt_args)*)
687            },
688            DataType::Int32 => {
689                let ca: &mut Int32Chunked = $self.as_mut();
690                $macro!(Int32Type, ca $(, $opt_args)*)
691            },
692            DataType::Int64 => {
693                let ca: &mut Int64Chunked = $self.as_mut();
694                $macro!(Int64Type, ca $(, $opt_args)*)
695            },
696            #[cfg(feature = "dtype-i128")]
697            DataType::Int128 => {
698                let ca: &mut Int128Chunked = $self.as_mut();
699                $macro!(Int128Type, ca $(, $opt_args)*)
700            },
701            #[cfg(feature = "dtype-f16")]
702            DataType::Float16 => {
703                let ca: &mut Float16Chunked = $self.as_mut();
704                $macro!(Float16Type, ca $(, $opt_args)*)
705            },
706            DataType::Float32 => {
707                let ca: &mut Float32Chunked = $self.as_mut();
708                $macro!(Float32Type, ca $(, $opt_args)*)
709            },
710            DataType::Float64 => {
711                let ca: &mut Float64Chunked = $self.as_mut();
712                $macro!(Float64Type, ca $(, $opt_args)*)
713            },
714            dt => panic!("not implemented for {:?}", dt),
715        }
716    }};
717}
718
719#[macro_export]
720macro_rules! apply_method_all_arrow_series {
721    ($self:expr, $method:ident, $($args:expr),*) => {
722        match $self.dtype() {
723            DataType::Boolean => $self.bool().unwrap().$method($($args),*),
724            DataType::String => $self.str().unwrap().$method($($args),*),
725            #[cfg(feature = "dtype-u8")]
726            DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
727            #[cfg(feature = "dtype-u16")]
728            DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
729            DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
730            DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
731            #[cfg(feature = "dtype-u128")]
732            DataType::UInt128 => $self.u128().unwrap().$medthod($($args),*),
733            #[cfg(feature = "dtype-i8")]
734            DataType::Int8 => $self.i8().unwrap().$method($($args),*),
735            #[cfg(feature = "dtype-i16")]
736            DataType::Int16 => $self.i16().unwrap().$method($($args),*),
737            DataType::Int32 => $self.i32().unwrap().$method($($args),*),
738            DataType::Int64 => $self.i64().unwrap().$method($($args),*),
739            #[cfg(feature = "dtype-i128")]
740            DataType::Int128 => $self.i128().unwrap().$method($($args),*),
741            #[cfg(feature = "dtype-f16")]
742            DataType::Float16 => $self.f16().unwrap().$method($($args),*),
743            DataType::Float32 => $self.f32().unwrap().$method($($args),*),
744            DataType::Float64 => $self.f64().unwrap().$method($($args),*),
745            DataType::Time => $self.time().unwrap().$method($($args),*),
746            DataType::Date => $self.date().unwrap().$method($($args),*),
747            DataType::Datetime(_, _) => $self.datetime().unwrap().$method($($args),*),
748            DataType::List(_) => $self.list().unwrap().$method($($args),*),
749            DataType::Struct(_) => $self.struct_().unwrap().$method($($args),*),
750            dt => panic!("dtype {:?} not supported", dt)
751        }
752    }
753}
754
755#[macro_export]
756macro_rules! apply_method_physical_integer {
757    ($self:expr, $method:ident, $($args:expr),*) => {
758        match $self.dtype() {
759            #[cfg(feature = "dtype-u8")]
760            DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
761            #[cfg(feature = "dtype-u16")]
762            DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
763            DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
764            DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
765            #[cfg(feature = "dtype-u128")]
766            DataType::UInt128 => $self.u128().unwrap().$method($($args),*),
767            #[cfg(feature = "dtype-i8")]
768            DataType::Int8 => $self.i8().unwrap().$method($($args),*),
769            #[cfg(feature = "dtype-i16")]
770            DataType::Int16 => $self.i16().unwrap().$method($($args),*),
771            DataType::Int32 => $self.i32().unwrap().$method($($args),*),
772            DataType::Int64 => $self.i64().unwrap().$method($($args),*),
773            #[cfg(feature = "dtype-i128")]
774            DataType::Int128 => $self.i128().unwrap().$method($($args),*),
775            dt => panic!("not implemented for dtype {:?}", dt),
776        }
777    }
778}
779
780// doesn't include Bool and String
781#[macro_export]
782macro_rules! apply_method_physical_numeric {
783    ($self:expr, $method:ident, $($args:expr),*) => {
784        match $self.dtype() {
785            #[cfg(feature = "dtype-f16")]
786            DataType::Float16 => $self.f16().unwrap().$method($($args),*),
787            DataType::Float32 => $self.f32().unwrap().$method($($args),*),
788            DataType::Float64 => $self.f64().unwrap().$method($($args),*),
789            _ => apply_method_physical_integer!($self, $method, $($args),*),
790        }
791    }
792}
793
794#[macro_export]
795macro_rules! df {
796    ($($col_name:expr => $slice:expr), + $(,)?) => {
797        $crate::prelude::DataFrame::new(vec![
798            $($crate::prelude::Column::from(<$crate::prelude::Series as $crate::prelude::NamedFrom::<_, _>>::new($col_name.into(), $slice)),)+
799        ])
800    }
801}
802
803pub fn get_time_units(tu_l: &TimeUnit, tu_r: &TimeUnit) -> TimeUnit {
804    use crate::datatypes::time_unit::TimeUnit::*;
805    match (tu_l, tu_r) {
806        (Nanoseconds, Microseconds) => Microseconds,
807        (_, Milliseconds) => Milliseconds,
808        _ => *tu_l,
809    }
810}
811
812#[cold]
813#[inline(never)]
814fn width_mismatch(df1: &DataFrame, df2: &DataFrame) -> PolarsError {
815    let mut df1_extra = Vec::new();
816    let mut df2_extra = Vec::new();
817
818    let s1 = df1.schema();
819    let s2 = df2.schema();
820
821    s1.field_compare(s2, &mut df1_extra, &mut df2_extra);
822
823    let df1_extra = df1_extra
824        .into_iter()
825        .map(|(_, (n, _))| n.as_str())
826        .collect::<Vec<_>>()
827        .join(", ");
828    let df2_extra = df2_extra
829        .into_iter()
830        .map(|(_, (n, _))| n.as_str())
831        .collect::<Vec<_>>()
832        .join(", ");
833
834    polars_err!(
835        SchemaMismatch: r#"unable to vstack, dataframes have different widths ({} != {}).
836One dataframe has additional columns: [{df1_extra}].
837Other dataframe has additional columns: [{df2_extra}]."#,
838        df1.width(),
839        df2.width(),
840    )
841}
842
843pub fn accumulate_dataframes_vertical_unchecked_optional<I>(dfs: I) -> Option<DataFrame>
844where
845    I: IntoIterator<Item = DataFrame>,
846{
847    let mut iter = dfs.into_iter();
848    let additional = iter.size_hint().0;
849    let mut acc_df = iter.next()?;
850    acc_df.reserve_chunks(additional);
851
852    for df in iter {
853        if acc_df.width() != df.width() {
854            panic!("{}", width_mismatch(&acc_df, &df));
855        }
856
857        acc_df.vstack_mut_owned_unchecked(df);
858    }
859    Some(acc_df)
860}
861
862/// This takes ownership of the DataFrame so that drop is called earlier.
863/// Does not check if schema is correct
864pub fn accumulate_dataframes_vertical_unchecked<I>(dfs: I) -> DataFrame
865where
866    I: IntoIterator<Item = DataFrame>,
867{
868    let mut iter = dfs.into_iter();
869    let additional = iter.size_hint().0;
870    let mut acc_df = iter.next().unwrap();
871    acc_df.reserve_chunks(additional);
872
873    for df in iter {
874        if acc_df.width() != df.width() {
875            panic!("{}", width_mismatch(&acc_df, &df));
876        }
877
878        acc_df.vstack_mut_owned_unchecked(df);
879    }
880    acc_df
881}
882
883/// This takes ownership of the DataFrame so that drop is called earlier.
884/// # Panics
885/// Panics if `dfs` is empty.
886pub fn accumulate_dataframes_vertical<I>(dfs: I) -> PolarsResult<DataFrame>
887where
888    I: IntoIterator<Item = DataFrame>,
889{
890    let mut iter = dfs.into_iter();
891    let additional = iter.size_hint().0;
892    let mut acc_df = iter.next().unwrap();
893    acc_df.reserve_chunks(additional);
894    for df in iter {
895        if acc_df.width() != df.width() {
896            return Err(width_mismatch(&acc_df, &df));
897        }
898
899        acc_df.vstack_mut_owned(df)?;
900    }
901
902    Ok(acc_df)
903}
904
905/// Concat the DataFrames to a single DataFrame.
906pub fn concat_df<'a, I>(dfs: I) -> PolarsResult<DataFrame>
907where
908    I: IntoIterator<Item = &'a DataFrame>,
909{
910    let mut iter = dfs.into_iter();
911    let additional = iter.size_hint().0;
912    let mut acc_df = iter.next().unwrap().clone();
913    acc_df.reserve_chunks(additional);
914    for df in iter {
915        acc_df.vstack_mut(df)?;
916    }
917    Ok(acc_df)
918}
919
920/// Concat the DataFrames to a single DataFrame.
921pub fn concat_df_unchecked<'a, I>(dfs: I) -> DataFrame
922where
923    I: IntoIterator<Item = &'a DataFrame>,
924{
925    let mut iter = dfs.into_iter();
926    let additional = iter.size_hint().0;
927    let mut acc_df = iter.next().unwrap().clone();
928    acc_df.reserve_chunks(additional);
929    for df in iter {
930        acc_df.vstack_mut_unchecked(df);
931    }
932    acc_df
933}
934
935pub fn accumulate_dataframes_horizontal(dfs: Vec<DataFrame>) -> PolarsResult<DataFrame> {
936    let mut iter = dfs.into_iter();
937    let mut acc_df = iter.next().unwrap();
938    for df in iter {
939        acc_df.hstack_mut(df.get_columns())?;
940    }
941    Ok(acc_df)
942}
943
944/// Ensure the chunks in both ChunkedArrays have the same length.
945/// # Panics
946/// This will panic if `left.len() != right.len()` and array is chunked.
947pub fn align_chunks_binary<'a, T, B>(
948    left: &'a ChunkedArray<T>,
949    right: &'a ChunkedArray<B>,
950) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, ChunkedArray<B>>)
951where
952    B: PolarsDataType,
953    T: PolarsDataType,
954{
955    let assert = || {
956        assert_eq!(
957            left.len(),
958            right.len(),
959            "expected arrays of the same length"
960        )
961    };
962    match (left.chunks.len(), right.chunks.len()) {
963        // All chunks are equal length
964        (1, 1) => (Cow::Borrowed(left), Cow::Borrowed(right)),
965        // All chunks are equal length
966        (a, b)
967            if a == b
968                && left
969                    .chunk_lengths()
970                    .zip(right.chunk_lengths())
971                    .all(|(l, r)| l == r) =>
972        {
973            (Cow::Borrowed(left), Cow::Borrowed(right))
974        },
975        (_, 1) => {
976            assert();
977            (
978                Cow::Borrowed(left),
979                Cow::Owned(right.match_chunks(left.chunk_lengths())),
980            )
981        },
982        (1, _) => {
983            assert();
984            (
985                Cow::Owned(left.match_chunks(right.chunk_lengths())),
986                Cow::Borrowed(right),
987            )
988        },
989        (_, _) => {
990            assert();
991            // could optimize to choose to rechunk a primitive and not a string or list type
992            let left = left.rechunk();
993            (
994                Cow::Owned(left.match_chunks(right.chunk_lengths())),
995                Cow::Borrowed(right),
996            )
997        },
998    }
999}
1000
1001/// Ensure the chunks in ChunkedArray and Series have the same length.
1002/// # Panics
1003/// This will panic if `left.len() != right.len()` and array is chunked.
1004pub fn align_chunks_binary_ca_series<'a, T>(
1005    left: &'a ChunkedArray<T>,
1006    right: &'a Series,
1007) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, Series>)
1008where
1009    T: PolarsDataType,
1010{
1011    let assert = || {
1012        assert_eq!(
1013            left.len(),
1014            right.len(),
1015            "expected arrays of the same length"
1016        )
1017    };
1018    match (left.chunks.len(), right.chunks().len()) {
1019        // All chunks are equal length
1020        (1, 1) => (Cow::Borrowed(left), Cow::Borrowed(right)),
1021        // All chunks are equal length
1022        (a, b)
1023            if a == b
1024                && left
1025                    .chunk_lengths()
1026                    .zip(right.chunk_lengths())
1027                    .all(|(l, r)| l == r) =>
1028        {
1029            assert();
1030            (Cow::Borrowed(left), Cow::Borrowed(right))
1031        },
1032        (_, 1) => (left.rechunk(), Cow::Borrowed(right)),
1033        (1, _) => (Cow::Borrowed(left), Cow::Owned(right.rechunk())),
1034        (_, _) => {
1035            assert();
1036            (left.rechunk(), Cow::Owned(right.rechunk()))
1037        },
1038    }
1039}
1040
1041#[cfg(feature = "performant")]
1042pub(crate) fn align_chunks_binary_owned_series(left: Series, right: Series) -> (Series, Series) {
1043    match (left.chunks().len(), right.chunks().len()) {
1044        (1, 1) => (left, right),
1045        // All chunks are equal length
1046        (a, b)
1047            if a == b
1048                && left
1049                    .chunk_lengths()
1050                    .zip(right.chunk_lengths())
1051                    .all(|(l, r)| l == r) =>
1052        {
1053            (left, right)
1054        },
1055        (_, 1) => (left.rechunk(), right),
1056        (1, _) => (left, right.rechunk()),
1057        (_, _) => (left.rechunk(), right.rechunk()),
1058    }
1059}
1060
1061pub(crate) fn align_chunks_binary_owned<T, B>(
1062    left: ChunkedArray<T>,
1063    right: ChunkedArray<B>,
1064) -> (ChunkedArray<T>, ChunkedArray<B>)
1065where
1066    B: PolarsDataType,
1067    T: PolarsDataType,
1068{
1069    match (left.chunks.len(), right.chunks.len()) {
1070        (1, 1) => (left, right),
1071        // All chunks are equal length
1072        (a, b)
1073            if a == b
1074                && left
1075                    .chunk_lengths()
1076                    .zip(right.chunk_lengths())
1077                    .all(|(l, r)| l == r) =>
1078        {
1079            (left, right)
1080        },
1081        (_, 1) => (left.rechunk().into_owned(), right),
1082        (1, _) => (left, right.rechunk().into_owned()),
1083        (_, _) => (left.rechunk().into_owned(), right.rechunk().into_owned()),
1084    }
1085}
1086
1087/// # Panics
1088/// This will panic if `a.len() != b.len() || b.len() != c.len()` and array is chunked.
1089#[allow(clippy::type_complexity)]
1090pub fn align_chunks_ternary<'a, A, B, C>(
1091    a: &'a ChunkedArray<A>,
1092    b: &'a ChunkedArray<B>,
1093    c: &'a ChunkedArray<C>,
1094) -> (
1095    Cow<'a, ChunkedArray<A>>,
1096    Cow<'a, ChunkedArray<B>>,
1097    Cow<'a, ChunkedArray<C>>,
1098)
1099where
1100    A: PolarsDataType,
1101    B: PolarsDataType,
1102    C: PolarsDataType,
1103{
1104    if a.chunks.len() == 1 && b.chunks.len() == 1 && c.chunks.len() == 1 {
1105        return (Cow::Borrowed(a), Cow::Borrowed(b), Cow::Borrowed(c));
1106    }
1107
1108    assert!(
1109        a.len() == b.len() && b.len() == c.len(),
1110        "expected arrays of the same length"
1111    );
1112
1113    match (a.chunks.len(), b.chunks.len(), c.chunks.len()) {
1114        (_, 1, 1) => (
1115            Cow::Borrowed(a),
1116            Cow::Owned(b.match_chunks(a.chunk_lengths())),
1117            Cow::Owned(c.match_chunks(a.chunk_lengths())),
1118        ),
1119        (1, 1, _) => (
1120            Cow::Owned(a.match_chunks(c.chunk_lengths())),
1121            Cow::Owned(b.match_chunks(c.chunk_lengths())),
1122            Cow::Borrowed(c),
1123        ),
1124        (1, _, 1) => (
1125            Cow::Owned(a.match_chunks(b.chunk_lengths())),
1126            Cow::Borrowed(b),
1127            Cow::Owned(c.match_chunks(b.chunk_lengths())),
1128        ),
1129        (1, _, _) => {
1130            let b = b.rechunk();
1131            (
1132                Cow::Owned(a.match_chunks(c.chunk_lengths())),
1133                Cow::Owned(b.match_chunks(c.chunk_lengths())),
1134                Cow::Borrowed(c),
1135            )
1136        },
1137        (_, 1, _) => {
1138            let a = a.rechunk();
1139            (
1140                Cow::Owned(a.match_chunks(c.chunk_lengths())),
1141                Cow::Owned(b.match_chunks(c.chunk_lengths())),
1142                Cow::Borrowed(c),
1143            )
1144        },
1145        (_, _, 1) => {
1146            let b = b.rechunk();
1147            (
1148                Cow::Borrowed(a),
1149                Cow::Owned(b.match_chunks(a.chunk_lengths())),
1150                Cow::Owned(c.match_chunks(a.chunk_lengths())),
1151            )
1152        },
1153        (len_a, len_b, len_c)
1154            if len_a == len_b
1155                && len_b == len_c
1156                && a.chunk_lengths()
1157                    .zip(b.chunk_lengths())
1158                    .zip(c.chunk_lengths())
1159                    .all(|((a, b), c)| a == b && b == c) =>
1160        {
1161            (Cow::Borrowed(a), Cow::Borrowed(b), Cow::Borrowed(c))
1162        },
1163        _ => {
1164            // could optimize to choose to rechunk a primitive and not a string or list type
1165            let a = a.rechunk();
1166            let b = b.rechunk();
1167            (
1168                Cow::Owned(a.match_chunks(c.chunk_lengths())),
1169                Cow::Owned(b.match_chunks(c.chunk_lengths())),
1170                Cow::Borrowed(c),
1171            )
1172        },
1173    }
1174}
1175
1176pub fn binary_concatenate_validities<'a, T, B>(
1177    left: &'a ChunkedArray<T>,
1178    right: &'a ChunkedArray<B>,
1179) -> Option<Bitmap>
1180where
1181    B: PolarsDataType,
1182    T: PolarsDataType,
1183{
1184    let (left, right) = align_chunks_binary(left, right);
1185    let left_validity = concatenate_validities(left.chunks());
1186    let right_validity = concatenate_validities(right.chunks());
1187    combine_validities_and(left_validity.as_ref(), right_validity.as_ref())
1188}
1189
1190/// Convenience for `x.into_iter().map(Into::into).collect()` using an `into_vec()` function.
1191pub trait IntoVec<T> {
1192    fn into_vec(self) -> Vec<T>;
1193}
1194
1195impl<I, S> IntoVec<PlSmallStr> for I
1196where
1197    I: IntoIterator<Item = S>,
1198    S: Into<PlSmallStr>,
1199{
1200    fn into_vec(self) -> Vec<PlSmallStr> {
1201        self.into_iter().map(|s| s.into()).collect()
1202    }
1203}
1204
1205/// This logic is same as the impl on ChunkedArray
1206/// The difference is that there is less indirection because the caller should preallocate
1207/// `chunk_lens` once. On the `ChunkedArray` we indirect through an `ArrayRef` which is an indirection
1208/// and a vtable.
1209#[inline]
1210pub(crate) fn index_to_chunked_index<
1211    I: Iterator<Item = Idx>,
1212    Idx: PartialOrd + std::ops::AddAssign + std::ops::SubAssign + Zero + One,
1213>(
1214    chunk_lens: I,
1215    index: Idx,
1216) -> (Idx, Idx) {
1217    let mut index_remainder = index;
1218    let mut current_chunk_idx = Zero::zero();
1219
1220    for chunk_len in chunk_lens {
1221        if chunk_len > index_remainder {
1222            break;
1223        } else {
1224            index_remainder -= chunk_len;
1225            current_chunk_idx += One::one();
1226        }
1227    }
1228    (current_chunk_idx, index_remainder)
1229}
1230
1231pub(crate) fn index_to_chunked_index_rev<
1232    I: Iterator<Item = Idx>,
1233    Idx: PartialOrd
1234        + std::ops::AddAssign
1235        + std::ops::SubAssign
1236        + std::ops::Sub<Output = Idx>
1237        + Zero
1238        + One
1239        + Copy
1240        + std::fmt::Debug,
1241>(
1242    chunk_lens_rev: I,
1243    index_from_back: Idx,
1244    total_chunks: Idx,
1245) -> (Idx, Idx) {
1246    debug_assert!(index_from_back > Zero::zero(), "at least -1");
1247    let mut index_remainder = index_from_back;
1248    let mut current_chunk_idx = One::one();
1249    let mut current_chunk_len = Zero::zero();
1250
1251    for chunk_len in chunk_lens_rev {
1252        current_chunk_len = chunk_len;
1253        if chunk_len >= index_remainder {
1254            break;
1255        } else {
1256            index_remainder -= chunk_len;
1257            current_chunk_idx += One::one();
1258        }
1259    }
1260    (
1261        total_chunks - current_chunk_idx,
1262        current_chunk_len - index_remainder,
1263    )
1264}
1265
1266pub fn first_null<'a, I>(iter: I) -> Option<usize>
1267where
1268    I: Iterator<Item = &'a dyn Array>,
1269{
1270    let mut offset = 0;
1271    for arr in iter {
1272        if let Some(mask) = arr.validity() {
1273            let len_mask = mask.len();
1274            let n = mask.leading_ones();
1275            if n < len_mask {
1276                return Some(offset + n);
1277            }
1278            offset += len_mask
1279        } else {
1280            offset += arr.len();
1281        }
1282    }
1283    None
1284}
1285
1286pub fn first_non_null<'a, I>(iter: I) -> Option<usize>
1287where
1288    I: Iterator<Item = &'a dyn Array>,
1289{
1290    let mut offset = 0;
1291    for arr in iter {
1292        if let Some(mask) = arr.validity() {
1293            let len_mask = mask.len();
1294            let n = mask.leading_zeros();
1295            if n < len_mask {
1296                return Some(offset + n);
1297            }
1298            offset += len_mask
1299        } else if !arr.is_empty() {
1300            return Some(offset);
1301        }
1302    }
1303    None
1304}
1305
1306pub fn last_non_null<'a, I>(iter: I, len: usize) -> Option<usize>
1307where
1308    I: DoubleEndedIterator<Item = &'a dyn Array>,
1309{
1310    if len == 0 {
1311        return None;
1312    }
1313    let mut offset = 0;
1314    for arr in iter.rev() {
1315        if let Some(mask) = arr.validity() {
1316            let len_mask = mask.len();
1317            let n = mask.trailing_zeros();
1318            if n < len_mask {
1319                return Some(len - offset - n - 1);
1320            }
1321            offset += len_mask;
1322        } else if !arr.is_empty() {
1323            return Some(len - offset - 1);
1324        }
1325    }
1326    None
1327}
1328
1329/// ensure that nulls are propagated to both arrays
1330pub fn coalesce_nulls<'a, T: PolarsDataType>(
1331    a: &'a ChunkedArray<T>,
1332    b: &'a ChunkedArray<T>,
1333) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, ChunkedArray<T>>) {
1334    if a.null_count() > 0 || b.null_count() > 0 {
1335        let (a, b) = align_chunks_binary(a, b);
1336        let mut b = b.into_owned();
1337        let a = a.coalesce_nulls(b.chunks());
1338
1339        for arr in a.chunks().iter() {
1340            for arr_b in unsafe { b.chunks_mut() } {
1341                *arr_b = arr_b.with_validity(arr.validity().cloned())
1342            }
1343        }
1344        b.compute_len();
1345        (Cow::Owned(a), Cow::Owned(b))
1346    } else {
1347        (Cow::Borrowed(a), Cow::Borrowed(b))
1348    }
1349}
1350
1351pub fn coalesce_nulls_columns(a: &Column, b: &Column) -> (Column, Column) {
1352    if a.null_count() > 0 || b.null_count() > 0 {
1353        let mut a = a.as_materialized_series().rechunk();
1354        let mut b = b.as_materialized_series().rechunk();
1355        for (arr_a, arr_b) in unsafe { a.chunks_mut().iter_mut().zip(b.chunks_mut()) } {
1356            let validity = match (arr_a.validity(), arr_b.validity()) {
1357                (None, Some(b)) => Some(b.clone()),
1358                (Some(a), Some(b)) => Some(a & b),
1359                (Some(a), None) => Some(a.clone()),
1360                (None, None) => None,
1361            };
1362            *arr_a = arr_a.with_validity(validity.clone());
1363            *arr_b = arr_b.with_validity(validity);
1364        }
1365        a.compute_len();
1366        b.compute_len();
1367        (a.into(), b.into())
1368    } else {
1369        (a.clone(), b.clone())
1370    }
1371}
1372
1373#[cfg(test)]
1374mod test {
1375    use super::*;
1376
1377    #[test]
1378    fn test_split() {
1379        let ca: Int32Chunked = (0..10).collect_ca("a".into());
1380
1381        let out = split(&ca, 3);
1382        assert_eq!(out[0].len(), 3);
1383        assert_eq!(out[1].len(), 3);
1384        assert_eq!(out[2].len(), 4);
1385    }
1386
1387    #[test]
1388    fn test_align_chunks() -> PolarsResult<()> {
1389        let a = Int32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3, 4]);
1390        let mut b = Int32Chunked::new(PlSmallStr::EMPTY, &[1]);
1391        let b2 = Int32Chunked::new(PlSmallStr::EMPTY, &[2, 3, 4]);
1392
1393        b.append(&b2)?;
1394        let (a, b) = align_chunks_binary(&a, &b);
1395        assert_eq!(
1396            a.chunk_lengths().collect::<Vec<_>>(),
1397            b.chunk_lengths().collect::<Vec<_>>()
1398        );
1399
1400        let a = Int32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3, 4]);
1401        let mut b = Int32Chunked::new(PlSmallStr::EMPTY, &[1]);
1402        let b1 = b.clone();
1403        b.append(&b1)?;
1404        b.append(&b1)?;
1405        b.append(&b1)?;
1406        let (a, b) = align_chunks_binary(&a, &b);
1407        assert_eq!(
1408            a.chunk_lengths().collect::<Vec<_>>(),
1409            b.chunk_lengths().collect::<Vec<_>>()
1410        );
1411
1412        Ok(())
1413    }
1414}