Skip to main content

polars_core/utils/
mod.rs

1mod any_value;
2use arrow::compute::concatenate::concatenate_validities;
3use arrow::compute::utils::combine_validities_and;
4pub mod flatten;
5pub(crate) mod series;
6mod supertype;
7use std::borrow::Cow;
8use std::ops::{Deref, DerefMut};
9mod schema;
10
11pub use any_value::*;
12pub use arrow;
13use arrow::bitmap::Bitmap;
14pub use arrow::legacy::utils::*;
15pub use arrow::trusted_len::TrustMyLength;
16use flatten::*;
17use num_traits::{One, Zero};
18pub use rayon;
19use rayon::prelude::*;
20pub use schema::*;
21pub use series::*;
22pub use supertype::*;
23
24use crate::POOL;
25use crate::prelude::*;
26
27#[repr(transparent)]
28pub struct Wrap<T>(pub T);
29
30impl<T> Deref for Wrap<T> {
31    type Target = T;
32    fn deref(&self) -> &Self::Target {
33        &self.0
34    }
35}
36
37#[inline(always)]
38pub fn _set_partition_size() -> usize {
39    POOL.current_num_threads()
40}
41
42/// Just a wrapper structure which is useful for certain impl specializations.
43///
44/// This is for instance use to implement
45/// `impl<T> FromIterator<T::Native> for NoNull<ChunkedArray<T>>`
46/// as `Option<T::Native>` was already implemented:
47/// `impl<T> FromIterator<Option<T::Native>> for ChunkedArray<T>`
48pub struct NoNull<T> {
49    inner: T,
50}
51
52impl<T> NoNull<T> {
53    pub fn new(inner: T) -> Self {
54        NoNull { inner }
55    }
56
57    pub fn into_inner(self) -> T {
58        self.inner
59    }
60}
61
62impl<T> Deref for NoNull<T> {
63    type Target = T;
64
65    fn deref(&self) -> &Self::Target {
66        &self.inner
67    }
68}
69
70impl<T> DerefMut for NoNull<T> {
71    fn deref_mut(&mut self) -> &mut Self::Target {
72        &mut self.inner
73    }
74}
75
76pub(crate) fn get_iter_capacity<T, I: Iterator<Item = T>>(iter: &I) -> usize {
77    match iter.size_hint() {
78        (_lower, Some(upper)) => upper,
79        (0, None) => 1024,
80        (lower, None) => lower,
81    }
82}
83
84// prefer this one over split_ca, as this can push the null_count into the thread pool
85// returns an `(offset, length)` tuple
86#[doc(hidden)]
87pub fn _split_offsets(len: usize, n: usize) -> Vec<(usize, usize)> {
88    if n == 1 {
89        vec![(0, len)]
90    } else {
91        let chunk_size = len / n;
92
93        (0..n)
94            .map(|partition| {
95                let offset = partition * chunk_size;
96                let len = if partition == (n - 1) {
97                    len - offset
98                } else {
99                    chunk_size
100                };
101                (partition * chunk_size, len)
102            })
103            .collect_trusted()
104    }
105}
106
107#[allow(clippy::len_without_is_empty)]
108pub trait Container: Clone {
109    fn slice(&self, offset: i64, len: usize) -> Self;
110
111    fn split_at(&self, offset: i64) -> (Self, Self);
112
113    fn len(&self) -> usize;
114
115    fn iter_chunks(&self) -> impl Iterator<Item = Self>;
116
117    fn should_rechunk(&self) -> bool;
118
119    fn n_chunks(&self) -> usize;
120
121    fn chunk_lengths(&self) -> impl Iterator<Item = usize>;
122}
123
124impl Container for DataFrame {
125    fn slice(&self, offset: i64, len: usize) -> Self {
126        DataFrame::slice(self, offset, len)
127    }
128
129    fn split_at(&self, offset: i64) -> (Self, Self) {
130        DataFrame::split_at(self, offset)
131    }
132
133    fn len(&self) -> usize {
134        self.height()
135    }
136
137    fn iter_chunks(&self) -> impl Iterator<Item = Self> {
138        flatten_df_iter(self)
139    }
140
141    fn should_rechunk(&self) -> bool {
142        self.should_rechunk()
143    }
144
145    fn n_chunks(&self) -> usize {
146        DataFrame::first_col_n_chunks(self)
147    }
148
149    fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
150        // @scalar-correctness?
151        self.columns()[0].as_materialized_series().chunk_lengths()
152    }
153}
154
155impl<T: PolarsDataType> Container for ChunkedArray<T> {
156    fn slice(&self, offset: i64, len: usize) -> Self {
157        ChunkedArray::slice(self, offset, len)
158    }
159
160    fn split_at(&self, offset: i64) -> (Self, Self) {
161        ChunkedArray::split_at(self, offset)
162    }
163
164    fn len(&self) -> usize {
165        ChunkedArray::len(self)
166    }
167
168    fn iter_chunks(&self) -> impl Iterator<Item = Self> {
169        self.downcast_iter()
170            .map(|arr| Self::with_chunk(self.name().clone(), arr.clone()))
171    }
172
173    fn should_rechunk(&self) -> bool {
174        false
175    }
176
177    fn n_chunks(&self) -> usize {
178        self.chunks().len()
179    }
180
181    fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
182        ChunkedArray::chunk_lengths(self)
183    }
184}
185
186impl Container for Series {
187    fn slice(&self, offset: i64, len: usize) -> Self {
188        self.0.slice(offset, len)
189    }
190
191    fn split_at(&self, offset: i64) -> (Self, Self) {
192        self.0.split_at(offset)
193    }
194
195    fn len(&self) -> usize {
196        self.0.len()
197    }
198
199    fn iter_chunks(&self) -> impl Iterator<Item = Self> {
200        (0..self.0.n_chunks()).map(|i| self.select_chunk(i))
201    }
202
203    fn should_rechunk(&self) -> bool {
204        false
205    }
206
207    fn n_chunks(&self) -> usize {
208        self.chunks().len()
209    }
210
211    fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
212        self.0.chunk_lengths()
213    }
214}
215
216fn split_impl<C: Container>(container: &C, target: usize, chunk_size: usize) -> Vec<C> {
217    if target == 1 {
218        return vec![container.clone()];
219    }
220    let mut out = Vec::with_capacity(target);
221    let chunk_size = chunk_size as i64;
222
223    // First split
224    let (chunk, mut remainder) = container.split_at(chunk_size);
225    out.push(chunk);
226
227    // Take the rest of the splits of exactly chunk size, but skip the last remainder as we won't split that.
228    for _ in 1..target - 1 {
229        let (a, b) = remainder.split_at(chunk_size);
230        out.push(a);
231        remainder = b
232    }
233    // This can be slightly larger than `chunk_size`, but is smaller than `2 * chunk_size`.
234    out.push(remainder);
235    out
236}
237
238/// Splits, but doesn't flatten chunks. E.g. a container can still have multiple chunks.
239pub fn split<C: Container>(container: &C, target: usize) -> Vec<C> {
240    let total_len = container.len();
241    if total_len == 0 {
242        return vec![container.clone()];
243    }
244
245    let chunk_size = std::cmp::max(total_len / target, 1);
246
247    if container.n_chunks() == target
248        && container
249            .chunk_lengths()
250            .all(|len| len.abs_diff(chunk_size) < 100)
251        // We cannot get chunks if they are misaligned
252        && !container.should_rechunk()
253    {
254        return container.iter_chunks().collect();
255    }
256    split_impl(container, target, chunk_size)
257}
258
259/// Split a [`Container`] in `target` elements. The target doesn't have to be respected if not
260/// Deviation of the target might be done to create more equal size chunks.
261pub fn split_and_flatten<C: Container>(container: &C, target: usize) -> Vec<C> {
262    let total_len = container.len();
263    if total_len == 0 {
264        return vec![container.clone()];
265    }
266
267    let chunk_size = std::cmp::max(total_len / target, 1);
268
269    if container.n_chunks() == target
270        && container
271            .chunk_lengths()
272            .all(|len| len.abs_diff(chunk_size) < 100)
273        // We cannot get chunks if they are misaligned
274        && !container.should_rechunk()
275    {
276        return container.iter_chunks().collect();
277    }
278
279    if container.n_chunks() == 1 {
280        split_impl(container, target, chunk_size)
281    } else {
282        let mut out = Vec::with_capacity(target);
283        let chunks = container.iter_chunks();
284
285        'new_chunk: for mut chunk in chunks {
286            loop {
287                let h = chunk.len();
288                if h < chunk_size {
289                    // TODO if the chunk is much smaller than chunk size, we should try to merge it with the next one.
290                    out.push(chunk);
291                    continue 'new_chunk;
292                }
293
294                // If a split leads to the next chunk being smaller than 30% take the whole chunk
295                if ((h - chunk_size) as f64 / chunk_size as f64) < 0.3 {
296                    out.push(chunk);
297                    continue 'new_chunk;
298                }
299
300                let (a, b) = chunk.split_at(chunk_size as i64);
301                out.push(a);
302                chunk = b;
303            }
304        }
305        out
306    }
307}
308
309/// Split a [`DataFrame`] in `target` elements. The target doesn't have to be respected if not
310/// strict. Deviation of the target might be done to create more equal size chunks.
311///
312/// # Panics
313/// if chunks are not aligned
314pub fn split_df_as_ref(df: &DataFrame, target: usize, strict: bool) -> Vec<DataFrame> {
315    if strict {
316        split(df, target)
317    } else {
318        split_and_flatten(df, target)
319    }
320}
321
322#[doc(hidden)]
323/// Split a [`DataFrame`] into `n` parts. We take a `&mut` to be able to repartition/align chunks.
324/// `strict` in that it respects `n` even if the chunks are suboptimal.
325pub fn split_df(df: &mut DataFrame, target: usize, strict: bool) -> Vec<DataFrame> {
326    if target == 0 || df.height() == 0 {
327        return vec![df.clone()];
328    }
329    // make sure that chunks are aligned.
330    df.align_chunks_par();
331    split_df_as_ref(df, target, strict)
332}
333
334pub fn slice_slice<T>(vals: &[T], offset: i64, len: usize) -> &[T] {
335    let (raw_offset, slice_len) = slice_offsets(offset, len, vals.len());
336    &vals[raw_offset..raw_offset + slice_len]
337}
338
339#[inline]
340pub fn slice_offsets(offset: i64, length: usize, array_len: usize) -> (usize, usize) {
341    let signed_start_offset = if offset < 0 {
342        offset.saturating_add_unsigned(array_len as u64)
343    } else {
344        offset
345    };
346    let signed_stop_offset = signed_start_offset.saturating_add_unsigned(length as u64);
347
348    let signed_array_len: i64 = array_len
349        .try_into()
350        .expect("array length larger than i64::MAX");
351    let clamped_start_offset = signed_start_offset.clamp(0, signed_array_len);
352    let clamped_stop_offset = signed_stop_offset.clamp(0, signed_array_len);
353
354    let slice_start_idx = clamped_start_offset as usize;
355    let slice_len = (clamped_stop_offset - clamped_start_offset) as usize;
356    (slice_start_idx, slice_len)
357}
358
359/// Apply a macro on the Series
360#[macro_export]
361macro_rules! match_dtype_to_physical_apply_macro {
362    ($obj:expr, $macro:ident, $macro_string:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
363        match $obj {
364            DataType::String => $macro_string!($($opt_args)*),
365            DataType::Boolean => $macro_bool!($($opt_args)*),
366            #[cfg(feature = "dtype-u8")]
367            DataType::UInt8 => $macro!(u8 $(, $opt_args)*),
368            #[cfg(feature = "dtype-u16")]
369            DataType::UInt16 => $macro!(u16 $(, $opt_args)*),
370            DataType::UInt32 => $macro!(u32 $(, $opt_args)*),
371            DataType::UInt64 => $macro!(u64 $(, $opt_args)*),
372            #[cfg(feature = "dtype-i8")]
373            DataType::Int8 => $macro!(i8 $(, $opt_args)*),
374            #[cfg(feature = "dtype-i16")]
375            DataType::Int16 => $macro!(i16 $(, $opt_args)*),
376            DataType::Int32 => $macro!(i32 $(, $opt_args)*),
377            DataType::Int64 => $macro!(i64 $(, $opt_args)*),
378            #[cfg(feature = "dtype-i128")]
379            DataType::Int128 => $macro!(i128 $(, $opt_args)*),
380            #[cfg(feature = "dtype-f16")]
381            DataType::Float16 => $macro!(pf16 $(, $opt_args)*),
382            DataType::Float32 => $macro!(f32 $(, $opt_args)*),
383            DataType::Float64 => $macro!(f64 $(, $opt_args)*),
384            dt => panic!("not implemented for dtype {:?}", dt),
385        }
386    }};
387}
388
389/// Apply a macro on the Series
390#[macro_export]
391macro_rules! match_dtype_to_logical_apply_macro {
392    ($obj:expr, $macro:ident, $macro_string:ident, $macro_binary:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
393        match $obj {
394            DataType::String => $macro_string!($($opt_args)*),
395            DataType::Binary => $macro_binary!($($opt_args)*),
396            DataType::Boolean => $macro_bool!($($opt_args)*),
397            #[cfg(feature = "dtype-u8")]
398            DataType::UInt8 => $macro!(UInt8Type $(, $opt_args)*),
399            #[cfg(feature = "dtype-u16")]
400            DataType::UInt16 => $macro!(UInt16Type $(, $opt_args)*),
401            DataType::UInt32 => $macro!(UInt32Type $(, $opt_args)*),
402            DataType::UInt64 => $macro!(UInt64Type $(, $opt_args)*),
403            #[cfg(feature = "dtype-u128")]
404            DataType::UInt128 => $macro!(UInt128Type $(, $opt_args)*),
405            #[cfg(feature = "dtype-i8")]
406            DataType::Int8 => $macro!(Int8Type $(, $opt_args)*),
407            #[cfg(feature = "dtype-i16")]
408            DataType::Int16 => $macro!(Int16Type $(, $opt_args)*),
409            DataType::Int32 => $macro!(Int32Type $(, $opt_args)*),
410            DataType::Int64 => $macro!(Int64Type $(, $opt_args)*),
411            #[cfg(feature = "dtype-i128")]
412            DataType::Int128 => $macro!(Int128Type $(, $opt_args)*),
413            #[cfg(feature = "dtype-f16")]
414            DataType::Float16 => $macro!(Float16Type $(, $opt_args)*),
415            DataType::Float32 => $macro!(Float32Type $(, $opt_args)*),
416            DataType::Float64 => $macro!(Float64Type $(, $opt_args)*),
417            dt => panic!("not implemented for dtype {:?}", dt),
418        }
419    }};
420}
421
422/// Apply a macro on the Downcasted ChunkedArrays
423#[macro_export]
424macro_rules! match_arrow_dtype_apply_macro_ca {
425    ($self:expr, $macro:ident, $macro_string:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
426        match $self.dtype() {
427            DataType::String => $macro_string!($self.str().unwrap() $(, $opt_args)*),
428            DataType::Boolean => $macro_bool!($self.bool().unwrap() $(, $opt_args)*),
429            #[cfg(feature = "dtype-u8")]
430            DataType::UInt8 => $macro!($self.u8().unwrap() $(, $opt_args)*),
431            #[cfg(feature = "dtype-u16")]
432            DataType::UInt16 => $macro!($self.u16().unwrap() $(, $opt_args)*),
433            DataType::UInt32 => $macro!($self.u32().unwrap() $(, $opt_args)*),
434            DataType::UInt64 => $macro!($self.u64().unwrap() $(, $opt_args)*),
435            #[cfg(feature = "dtype-u128")]
436            DataType::UInt128 => $macro!($self.u128().unwrap() $(, $opt_args)*),
437            #[cfg(feature = "dtype-i8")]
438            DataType::Int8 => $macro!($self.i8().unwrap() $(, $opt_args)*),
439            #[cfg(feature = "dtype-i16")]
440            DataType::Int16 => $macro!($self.i16().unwrap() $(, $opt_args)*),
441            DataType::Int32 => $macro!($self.i32().unwrap() $(, $opt_args)*),
442            DataType::Int64 => $macro!($self.i64().unwrap() $(, $opt_args)*),
443            #[cfg(feature = "dtype-i128")]
444            DataType::Int128 => $macro!($self.i128().unwrap() $(, $opt_args)*),
445            #[cfg(feature = "dtype-f16")]
446            DataType::Float16 => $macro!($self.f16().unwrap() $(, $opt_args)*),
447            DataType::Float32 => $macro!($self.f32().unwrap() $(, $opt_args)*),
448            DataType::Float64 => $macro!($self.f64().unwrap() $(, $opt_args)*),
449            dt => panic!("not implemented for dtype {:?}", dt),
450        }
451    }};
452}
453
454#[macro_export]
455macro_rules! with_match_physical_numeric_type {(
456    $dtype:expr, | $_:tt $T:ident | $($body:tt)*
457) => ({
458    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
459    #[cfg(feature = "dtype-f16")]
460    use polars_utils::float16::pf16;
461    use $crate::datatypes::DataType::*;
462    match $dtype {
463        #[cfg(feature = "dtype-i8")]
464        Int8 => __with_ty__! { i8 },
465        #[cfg(feature = "dtype-i16")]
466        Int16 => __with_ty__! { i16 },
467        Int32 => __with_ty__! { i32 },
468        Int64 => __with_ty__! { i64 },
469        #[cfg(feature = "dtype-i128")]
470        Int128 => __with_ty__! { i128 },
471        #[cfg(feature = "dtype-u8")]
472        UInt8 => __with_ty__! { u8 },
473        #[cfg(feature = "dtype-u16")]
474        UInt16 => __with_ty__! { u16 },
475        UInt32 => __with_ty__! { u32 },
476        UInt64 => __with_ty__! { u64 },
477        #[cfg(feature = "dtype-u128")]
478        UInt128 => __with_ty__! { u128 },
479        #[cfg(feature = "dtype-f16")]
480        Float16 => __with_ty__! { pf16 },
481        Float32 => __with_ty__! { f32 },
482        Float64 => __with_ty__! { f64 },
483        dt => panic!("not implemented for dtype {:?}", dt),
484    }
485})}
486
487#[macro_export]
488macro_rules! with_match_physical_integer_type {(
489    $dtype:expr, | $_:tt $T:ident | $($body:tt)*
490) => ({
491    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
492    #[cfg(feature = "dtype-f16")]
493    use polars_utils::float16::pf16;
494    use $crate::datatypes::DataType::*;
495    match $dtype {
496        #[cfg(feature = "dtype-i8")]
497        Int8 => __with_ty__! { i8 },
498        #[cfg(feature = "dtype-i16")]
499        Int16 => __with_ty__! { i16 },
500        Int32 => __with_ty__! { i32 },
501        Int64 => __with_ty__! { i64 },
502        #[cfg(feature = "dtype-i128")]
503        Int128 => __with_ty__! { i128 },
504        #[cfg(feature = "dtype-u8")]
505        UInt8 => __with_ty__! { u8 },
506        #[cfg(feature = "dtype-u16")]
507        UInt16 => __with_ty__! { u16 },
508        UInt32 => __with_ty__! { u32 },
509        UInt64 => __with_ty__! { u64 },
510        #[cfg(feature = "dtype-u128")]
511        UInt128 => __with_ty__! { u128 },
512        dt => panic!("not implemented for dtype {:?}", dt),
513    }
514})}
515
516#[macro_export]
517macro_rules! with_match_physical_float_type {(
518    $dtype:expr, | $_:tt $T:ident | $($body:tt)*
519) => ({
520    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
521    use polars_utils::float16::pf16;
522    use $crate::datatypes::DataType::*;
523    match $dtype {
524        #[cfg(feature = "dtype-f16")]
525        Float16 => __with_ty__! { pf16 },
526        Float32 => __with_ty__! { f32 },
527        Float64 => __with_ty__! { f64 },
528        dt => panic!("not implemented for dtype {:?}", dt),
529    }
530})}
531
532#[macro_export]
533macro_rules! with_match_physical_float_polars_type {(
534    $key_type:expr, | $_:tt $T:ident | $($body:tt)*
535) => ({
536    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
537    use $crate::datatypes::DataType::*;
538    match $key_type {
539        #[cfg(feature = "dtype-f16")]
540        Float16 => __with_ty__! { Float16Type },
541        Float32 => __with_ty__! { Float32Type },
542        Float64 => __with_ty__! { Float64Type },
543        dt => panic!("not implemented for dtype {:?}", dt),
544    }
545})}
546
547#[macro_export]
548macro_rules! with_match_physical_numeric_polars_type {(
549    $key_type:expr, | $_:tt $T:ident | $($body:tt)*
550) => ({
551    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
552    use $crate::datatypes::DataType::*;
553    match $key_type {
554            #[cfg(feature = "dtype-i8")]
555        Int8 => __with_ty__! { Int8Type },
556            #[cfg(feature = "dtype-i16")]
557        Int16 => __with_ty__! { Int16Type },
558        Int32 => __with_ty__! { Int32Type },
559        Int64 => __with_ty__! { Int64Type },
560            #[cfg(feature = "dtype-i128")]
561        Int128 => __with_ty__! { Int128Type },
562            #[cfg(feature = "dtype-u8")]
563        UInt8 => __with_ty__! { UInt8Type },
564            #[cfg(feature = "dtype-u16")]
565        UInt16 => __with_ty__! { UInt16Type },
566        UInt32 => __with_ty__! { UInt32Type },
567        UInt64 => __with_ty__! { UInt64Type },
568            #[cfg(feature = "dtype-u128")]
569        UInt128 => __with_ty__! { UInt128Type },
570            #[cfg(feature = "dtype-f16")]
571        Float16 => __with_ty__! { Float16Type },
572        Float32 => __with_ty__! { Float32Type },
573        Float64 => __with_ty__! { Float64Type },
574        dt => panic!("not implemented for dtype {:?}", dt),
575    }
576})}
577
578#[macro_export]
579macro_rules! with_match_physical_integer_polars_type {(
580    $key_type:expr, | $_:tt $T:ident | $($body:tt)*
581) => ({
582    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
583    use $crate::datatypes::DataType::*;
584    use $crate::datatypes::*;
585    match $key_type {
586        #[cfg(feature = "dtype-i8")]
587        Int8 => __with_ty__! { Int8Type },
588        #[cfg(feature = "dtype-i16")]
589        Int16 => __with_ty__! { Int16Type },
590        Int32 => __with_ty__! { Int32Type },
591        Int64 => __with_ty__! { Int64Type },
592        #[cfg(feature = "dtype-i128")]
593        Int128 => __with_ty__! { Int128Type },
594        #[cfg(feature = "dtype-u8")]
595        UInt8 => __with_ty__! { UInt8Type },
596        #[cfg(feature = "dtype-u16")]
597        UInt16 => __with_ty__! { UInt16Type },
598        UInt32 => __with_ty__! { UInt32Type },
599        UInt64 => __with_ty__! { UInt64Type },
600        #[cfg(feature = "dtype-u128")]
601        UInt128 => __with_ty__! { UInt128Type },
602        dt => panic!("not implemented for dtype {:?}", dt),
603    }
604})}
605
606#[macro_export]
607macro_rules! with_match_categorical_physical_type {(
608    $dtype:expr, | $_:tt $T:ident | $($body:tt)*
609) => ({
610    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
611    match $dtype {
612        CategoricalPhysical::U8 => __with_ty__! { Categorical8Type },
613        CategoricalPhysical::U16 => __with_ty__! { Categorical16Type },
614        CategoricalPhysical::U32 => __with_ty__! { Categorical32Type },
615    }
616})}
617
618/// Apply a macro on the Downcasted ChunkedArrays of DataTypes that are logical numerics.
619/// So no logical.
620#[macro_export]
621macro_rules! downcast_as_macro_arg_physical {
622    ($self:expr, $macro:ident $(, $opt_args:expr)*) => {{
623        match $self.dtype() {
624            #[cfg(feature = "dtype-u8")]
625            DataType::UInt8 => $macro!($self.u8().unwrap() $(, $opt_args)*),
626            #[cfg(feature = "dtype-u16")]
627            DataType::UInt16 => $macro!($self.u16().unwrap() $(, $opt_args)*),
628            DataType::UInt32 => $macro!($self.u32().unwrap() $(, $opt_args)*),
629            DataType::UInt64 => $macro!($self.u64().unwrap() $(, $opt_args)*),
630            #[cfg(feature = "dtype-u128")]
631            DataType::UInt128 => $macro!($self.u128().unwrap() $(, $opt_args)*),
632            #[cfg(feature = "dtype-i8")]
633            DataType::Int8 => $macro!($self.i8().unwrap() $(, $opt_args)*),
634            #[cfg(feature = "dtype-i16")]
635            DataType::Int16 => $macro!($self.i16().unwrap() $(, $opt_args)*),
636            DataType::Int32 => $macro!($self.i32().unwrap() $(, $opt_args)*),
637            DataType::Int64 => $macro!($self.i64().unwrap() $(, $opt_args)*),
638            #[cfg(feature = "dtype-i128")]
639            DataType::Int128 => $macro!($self.i128().unwrap() $(, $opt_args)*),
640            #[cfg(feature = "dtype-f16")]
641            DataType::Float16 => $macro!($self.f16().unwrap() $(, $opt_args)*),
642            DataType::Float32 => $macro!($self.f32().unwrap() $(, $opt_args)*),
643            DataType::Float64 => $macro!($self.f64().unwrap() $(, $opt_args)*),
644            dt => panic!("not implemented for {:?}", dt),
645        }
646    }};
647}
648
649/// Apply a macro on the Downcasted ChunkedArrays of DataTypes that are logical numerics.
650/// So no logical.
651#[macro_export]
652macro_rules! downcast_as_macro_arg_physical_mut {
653    ($self:expr, $macro:ident $(, $opt_args:expr)*) => {{
654        // clone so that we do not borrow
655        match $self.dtype().clone() {
656            #[cfg(feature = "dtype-u8")]
657            DataType::UInt8 => {
658                let ca: &mut UInt8Chunked = $self.as_mut();
659                $macro!(UInt8Type, ca $(, $opt_args)*)
660            },
661            #[cfg(feature = "dtype-u16")]
662            DataType::UInt16 => {
663                let ca: &mut UInt16Chunked = $self.as_mut();
664                $macro!(UInt16Type, ca $(, $opt_args)*)
665            },
666            DataType::UInt32 => {
667                let ca: &mut UInt32Chunked = $self.as_mut();
668                $macro!(UInt32Type, ca $(, $opt_args)*)
669            },
670            DataType::UInt64 => {
671                let ca: &mut UInt64Chunked = $self.as_mut();
672                $macro!(UInt64Type, ca $(, $opt_args)*)
673            },
674            #[cfg(feature = "dtype-u128")]
675            DataType::UInt128 => {
676                let ca: &mut UInt128Chunked = $self.as_mut();
677                $macro!(UInt128Type, ca $(, $opt_args)*)
678            },
679            #[cfg(feature = "dtype-i8")]
680            DataType::Int8 => {
681                let ca: &mut Int8Chunked = $self.as_mut();
682                $macro!(Int8Type, ca $(, $opt_args)*)
683            },
684            #[cfg(feature = "dtype-i16")]
685            DataType::Int16 => {
686                let ca: &mut Int16Chunked = $self.as_mut();
687                $macro!(Int16Type, ca $(, $opt_args)*)
688            },
689            DataType::Int32 => {
690                let ca: &mut Int32Chunked = $self.as_mut();
691                $macro!(Int32Type, ca $(, $opt_args)*)
692            },
693            DataType::Int64 => {
694                let ca: &mut Int64Chunked = $self.as_mut();
695                $macro!(Int64Type, ca $(, $opt_args)*)
696            },
697            #[cfg(feature = "dtype-i128")]
698            DataType::Int128 => {
699                let ca: &mut Int128Chunked = $self.as_mut();
700                $macro!(Int128Type, ca $(, $opt_args)*)
701            },
702            #[cfg(feature = "dtype-f16")]
703            DataType::Float16 => {
704                let ca: &mut Float16Chunked = $self.as_mut();
705                $macro!(Float16Type, ca $(, $opt_args)*)
706            },
707            DataType::Float32 => {
708                let ca: &mut Float32Chunked = $self.as_mut();
709                $macro!(Float32Type, ca $(, $opt_args)*)
710            },
711            DataType::Float64 => {
712                let ca: &mut Float64Chunked = $self.as_mut();
713                $macro!(Float64Type, ca $(, $opt_args)*)
714            },
715            dt => panic!("not implemented for {:?}", dt),
716        }
717    }};
718}
719
720#[macro_export]
721macro_rules! apply_method_all_arrow_series {
722    ($self:expr, $method:ident, $($args:expr),*) => {
723        match $self.dtype() {
724            DataType::Boolean => $self.bool().unwrap().$method($($args),*),
725            DataType::String => $self.str().unwrap().$method($($args),*),
726            #[cfg(feature = "dtype-u8")]
727            DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
728            #[cfg(feature = "dtype-u16")]
729            DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
730            DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
731            DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
732            #[cfg(feature = "dtype-u128")]
733            DataType::UInt128 => $self.u128().unwrap().$medthod($($args),*),
734            #[cfg(feature = "dtype-i8")]
735            DataType::Int8 => $self.i8().unwrap().$method($($args),*),
736            #[cfg(feature = "dtype-i16")]
737            DataType::Int16 => $self.i16().unwrap().$method($($args),*),
738            DataType::Int32 => $self.i32().unwrap().$method($($args),*),
739            DataType::Int64 => $self.i64().unwrap().$method($($args),*),
740            #[cfg(feature = "dtype-i128")]
741            DataType::Int128 => $self.i128().unwrap().$method($($args),*),
742            #[cfg(feature = "dtype-f16")]
743            DataType::Float16 => $self.f16().unwrap().$method($($args),*),
744            DataType::Float32 => $self.f32().unwrap().$method($($args),*),
745            DataType::Float64 => $self.f64().unwrap().$method($($args),*),
746            DataType::Time => $self.time().unwrap().$method($($args),*),
747            DataType::Date => $self.date().unwrap().$method($($args),*),
748            DataType::Datetime(_, _) => $self.datetime().unwrap().$method($($args),*),
749            DataType::List(_) => $self.list().unwrap().$method($($args),*),
750            DataType::Struct(_) => $self.struct_().unwrap().$method($($args),*),
751            dt => panic!("dtype {:?} not supported", dt)
752        }
753    }
754}
755
756#[macro_export]
757macro_rules! apply_method_physical_integer {
758    ($self:expr, $method:ident, $($args:expr),*) => {
759        match $self.dtype() {
760            #[cfg(feature = "dtype-u8")]
761            DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
762            #[cfg(feature = "dtype-u16")]
763            DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
764            DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
765            DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
766            #[cfg(feature = "dtype-u128")]
767            DataType::UInt128 => $self.u128().unwrap().$method($($args),*),
768            #[cfg(feature = "dtype-i8")]
769            DataType::Int8 => $self.i8().unwrap().$method($($args),*),
770            #[cfg(feature = "dtype-i16")]
771            DataType::Int16 => $self.i16().unwrap().$method($($args),*),
772            DataType::Int32 => $self.i32().unwrap().$method($($args),*),
773            DataType::Int64 => $self.i64().unwrap().$method($($args),*),
774            #[cfg(feature = "dtype-i128")]
775            DataType::Int128 => $self.i128().unwrap().$method($($args),*),
776            dt => panic!("not implemented for dtype {:?}", dt),
777        }
778    }
779}
780
781// doesn't include Bool and String
782#[macro_export]
783macro_rules! apply_method_physical_numeric {
784    ($self:expr, $method:ident, $($args:expr),*) => {
785        match $self.dtype() {
786            #[cfg(feature = "dtype-f16")]
787            DataType::Float16 => $self.f16().unwrap().$method($($args),*),
788            DataType::Float32 => $self.f32().unwrap().$method($($args),*),
789            DataType::Float64 => $self.f64().unwrap().$method($($args),*),
790            _ => apply_method_physical_integer!($self, $method, $($args),*),
791        }
792    }
793}
794
795#[macro_export]
796macro_rules! df {
797    ($($col_name:expr => $slice:expr), + $(,)?) => {
798        $crate::prelude::DataFrame::new_infer_height(vec![
799            $($crate::prelude::Column::from(<$crate::prelude::Series as $crate::prelude::NamedFrom::<_, _>>::new($col_name.into(), $slice)),)+
800        ])
801    }
802}
803
804pub fn get_time_units(tu_l: &TimeUnit, tu_r: &TimeUnit) -> TimeUnit {
805    use crate::datatypes::time_unit::TimeUnit::*;
806    match (tu_l, tu_r) {
807        (Nanoseconds, Microseconds) => Microseconds,
808        (_, Milliseconds) => Milliseconds,
809        _ => *tu_l,
810    }
811}
812
813#[cold]
814#[inline(never)]
815fn width_mismatch(df1: &DataFrame, df2: &DataFrame) -> PolarsError {
816    let mut df1_extra = Vec::new();
817    let mut df2_extra = Vec::new();
818
819    let s1 = df1.schema();
820    let s2 = df2.schema();
821
822    s1.field_compare(s2, &mut df1_extra, &mut df2_extra);
823
824    let df1_extra = df1_extra
825        .into_iter()
826        .map(|(_, (n, _))| n.as_str())
827        .collect::<Vec<_>>()
828        .join(", ");
829    let df2_extra = df2_extra
830        .into_iter()
831        .map(|(_, (n, _))| n.as_str())
832        .collect::<Vec<_>>()
833        .join(", ");
834
835    polars_err!(
836        SchemaMismatch: r#"unable to vstack, dataframes have different widths ({} != {}).
837One dataframe has additional columns: [{df1_extra}].
838Other dataframe has additional columns: [{df2_extra}]."#,
839        df1.width(),
840        df2.width(),
841    )
842}
843
844pub fn accumulate_dataframes_vertical_unchecked_optional<I>(dfs: I) -> Option<DataFrame>
845where
846    I: IntoIterator<Item = DataFrame>,
847{
848    let mut iter = dfs.into_iter();
849    let additional = iter.size_hint().0;
850    let mut acc_df = iter.next()?;
851    acc_df.reserve_chunks(additional);
852
853    for df in iter {
854        if acc_df.width() != df.width() {
855            panic!("{}", width_mismatch(&acc_df, &df));
856        }
857
858        acc_df.vstack_mut_owned_unchecked(df);
859    }
860    Some(acc_df)
861}
862
863/// This takes ownership of the DataFrame so that drop is called earlier.
864/// Does not check if schema is correct
865pub fn accumulate_dataframes_vertical_unchecked<I>(dfs: I) -> DataFrame
866where
867    I: IntoIterator<Item = DataFrame>,
868{
869    let mut iter = dfs.into_iter();
870    let additional = iter.size_hint().0;
871    let mut acc_df = iter.next().unwrap();
872    acc_df.reserve_chunks(additional);
873
874    for df in iter {
875        if acc_df.width() != df.width() {
876            panic!("{}", width_mismatch(&acc_df, &df));
877        }
878
879        acc_df.vstack_mut_owned_unchecked(df);
880    }
881    acc_df
882}
883
884/// This takes ownership of the DataFrame so that drop is called earlier.
885/// # Panics
886/// Panics if `dfs` is empty.
887pub fn accumulate_dataframes_vertical<I>(dfs: I) -> PolarsResult<DataFrame>
888where
889    I: IntoIterator<Item = DataFrame>,
890{
891    let mut iter = dfs.into_iter();
892    let additional = iter.size_hint().0;
893    let mut acc_df = iter.next().unwrap();
894    acc_df.reserve_chunks(additional);
895    for df in iter {
896        if acc_df.width() != df.width() {
897            return Err(width_mismatch(&acc_df, &df));
898        }
899
900        acc_df.vstack_mut_owned(df)?;
901    }
902
903    Ok(acc_df)
904}
905
906/// Concat the DataFrames to a single DataFrame.
907pub fn concat_df<'a, I>(dfs: I) -> PolarsResult<DataFrame>
908where
909    I: IntoIterator<Item = &'a DataFrame>,
910{
911    let mut iter = dfs.into_iter();
912    let additional = iter.size_hint().0;
913    let mut acc_df = iter.next().unwrap().clone();
914    acc_df.reserve_chunks(additional);
915    for df in iter {
916        acc_df.vstack_mut(df)?;
917    }
918    Ok(acc_df)
919}
920
921/// Concat the DataFrames to a single DataFrame.
922pub fn concat_df_unchecked<'a, I>(dfs: I) -> DataFrame
923where
924    I: IntoIterator<Item = &'a DataFrame>,
925{
926    let mut iter = dfs.into_iter();
927    let additional = iter.size_hint().0;
928    let mut acc_df = iter.next().unwrap().clone();
929    acc_df.reserve_chunks(additional);
930    for df in iter {
931        acc_df.vstack_mut_unchecked(df);
932    }
933    acc_df
934}
935
936pub fn accumulate_dataframes_horizontal(dfs: Vec<DataFrame>) -> PolarsResult<DataFrame> {
937    let mut iter = dfs.into_iter();
938    let mut acc_df = iter.next().unwrap();
939    for df in iter {
940        acc_df.hstack_mut(df.columns())?;
941    }
942    Ok(acc_df)
943}
944
945/// Ensure the chunks in both ChunkedArrays have the same length.
946/// # Panics
947/// This will panic if `left.len() != right.len()` and array is chunked.
948pub fn align_chunks_binary<'a, T, B>(
949    left: &'a ChunkedArray<T>,
950    right: &'a ChunkedArray<B>,
951) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, ChunkedArray<B>>)
952where
953    B: PolarsDataType,
954    T: PolarsDataType,
955{
956    let assert = || {
957        assert_eq!(
958            left.len(),
959            right.len(),
960            "expected arrays of the same length"
961        )
962    };
963    match (left.chunks.len(), right.chunks.len()) {
964        // All chunks are equal length
965        (1, 1) => (Cow::Borrowed(left), Cow::Borrowed(right)),
966        // All chunks are equal length
967        (a, b)
968            if a == b
969                && left
970                    .chunk_lengths()
971                    .zip(right.chunk_lengths())
972                    .all(|(l, r)| l == r) =>
973        {
974            (Cow::Borrowed(left), Cow::Borrowed(right))
975        },
976        (_, 1) => {
977            assert();
978            (
979                Cow::Borrowed(left),
980                Cow::Owned(right.match_chunks(left.chunk_lengths())),
981            )
982        },
983        (1, _) => {
984            assert();
985            (
986                Cow::Owned(left.match_chunks(right.chunk_lengths())),
987                Cow::Borrowed(right),
988            )
989        },
990        (_, _) => {
991            assert();
992            // could optimize to choose to rechunk a primitive and not a string or list type
993            let left = left.rechunk();
994            (
995                Cow::Owned(left.match_chunks(right.chunk_lengths())),
996                Cow::Borrowed(right),
997            )
998        },
999    }
1000}
1001
1002/// Ensure the chunks in ChunkedArray and Series have the same length.
1003/// # Panics
1004/// This will panic if `left.len() != right.len()` and array is chunked.
1005pub fn align_chunks_binary_ca_series<'a, T>(
1006    left: &'a ChunkedArray<T>,
1007    right: &'a Series,
1008) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, Series>)
1009where
1010    T: PolarsDataType,
1011{
1012    let assert = || {
1013        assert_eq!(
1014            left.len(),
1015            right.len(),
1016            "expected arrays of the same length"
1017        )
1018    };
1019    match (left.chunks.len(), right.chunks().len()) {
1020        // All chunks are equal length
1021        (1, 1) => (Cow::Borrowed(left), Cow::Borrowed(right)),
1022        // All chunks are equal length
1023        (a, b)
1024            if a == b
1025                && left
1026                    .chunk_lengths()
1027                    .zip(right.chunk_lengths())
1028                    .all(|(l, r)| l == r) =>
1029        {
1030            assert();
1031            (Cow::Borrowed(left), Cow::Borrowed(right))
1032        },
1033        (_, 1) => (left.rechunk(), Cow::Borrowed(right)),
1034        (1, _) => (Cow::Borrowed(left), Cow::Owned(right.rechunk())),
1035        (_, _) => {
1036            assert();
1037            (left.rechunk(), Cow::Owned(right.rechunk()))
1038        },
1039    }
1040}
1041
1042#[cfg(feature = "performant")]
1043pub(crate) fn align_chunks_binary_owned_series(left: Series, right: Series) -> (Series, Series) {
1044    match (left.chunks().len(), right.chunks().len()) {
1045        (1, 1) => (left, right),
1046        // All chunks are equal length
1047        (a, b)
1048            if a == b
1049                && left
1050                    .chunk_lengths()
1051                    .zip(right.chunk_lengths())
1052                    .all(|(l, r)| l == r) =>
1053        {
1054            (left, right)
1055        },
1056        (_, 1) => (left.rechunk(), right),
1057        (1, _) => (left, right.rechunk()),
1058        (_, _) => (left.rechunk(), right.rechunk()),
1059    }
1060}
1061
1062pub(crate) fn align_chunks_binary_owned<T, B>(
1063    left: ChunkedArray<T>,
1064    right: ChunkedArray<B>,
1065) -> (ChunkedArray<T>, ChunkedArray<B>)
1066where
1067    B: PolarsDataType,
1068    T: PolarsDataType,
1069{
1070    match (left.chunks.len(), right.chunks.len()) {
1071        (1, 1) => (left, right),
1072        // All chunks are equal length
1073        (a, b)
1074            if a == b
1075                && left
1076                    .chunk_lengths()
1077                    .zip(right.chunk_lengths())
1078                    .all(|(l, r)| l == r) =>
1079        {
1080            (left, right)
1081        },
1082        (_, 1) => (left.rechunk().into_owned(), right),
1083        (1, _) => (left, right.rechunk().into_owned()),
1084        (_, _) => (left.rechunk().into_owned(), right.rechunk().into_owned()),
1085    }
1086}
1087
1088/// # Panics
1089/// This will panic if `a.len() != b.len() || b.len() != c.len()` and array is chunked.
1090#[allow(clippy::type_complexity)]
1091pub fn align_chunks_ternary<'a, A, B, C>(
1092    a: &'a ChunkedArray<A>,
1093    b: &'a ChunkedArray<B>,
1094    c: &'a ChunkedArray<C>,
1095) -> (
1096    Cow<'a, ChunkedArray<A>>,
1097    Cow<'a, ChunkedArray<B>>,
1098    Cow<'a, ChunkedArray<C>>,
1099)
1100where
1101    A: PolarsDataType,
1102    B: PolarsDataType,
1103    C: PolarsDataType,
1104{
1105    if a.chunks.len() == 1 && b.chunks.len() == 1 && c.chunks.len() == 1 {
1106        return (Cow::Borrowed(a), Cow::Borrowed(b), Cow::Borrowed(c));
1107    }
1108
1109    assert!(
1110        a.len() == b.len() && b.len() == c.len(),
1111        "expected arrays of the same length"
1112    );
1113
1114    match (a.chunks.len(), b.chunks.len(), c.chunks.len()) {
1115        (_, 1, 1) => (
1116            Cow::Borrowed(a),
1117            Cow::Owned(b.match_chunks(a.chunk_lengths())),
1118            Cow::Owned(c.match_chunks(a.chunk_lengths())),
1119        ),
1120        (1, 1, _) => (
1121            Cow::Owned(a.match_chunks(c.chunk_lengths())),
1122            Cow::Owned(b.match_chunks(c.chunk_lengths())),
1123            Cow::Borrowed(c),
1124        ),
1125        (1, _, 1) => (
1126            Cow::Owned(a.match_chunks(b.chunk_lengths())),
1127            Cow::Borrowed(b),
1128            Cow::Owned(c.match_chunks(b.chunk_lengths())),
1129        ),
1130        (1, _, _) => {
1131            let b = b.rechunk();
1132            (
1133                Cow::Owned(a.match_chunks(c.chunk_lengths())),
1134                Cow::Owned(b.match_chunks(c.chunk_lengths())),
1135                Cow::Borrowed(c),
1136            )
1137        },
1138        (_, 1, _) => {
1139            let a = a.rechunk();
1140            (
1141                Cow::Owned(a.match_chunks(c.chunk_lengths())),
1142                Cow::Owned(b.match_chunks(c.chunk_lengths())),
1143                Cow::Borrowed(c),
1144            )
1145        },
1146        (_, _, 1) => {
1147            let b = b.rechunk();
1148            (
1149                Cow::Borrowed(a),
1150                Cow::Owned(b.match_chunks(a.chunk_lengths())),
1151                Cow::Owned(c.match_chunks(a.chunk_lengths())),
1152            )
1153        },
1154        (len_a, len_b, len_c)
1155            if len_a == len_b
1156                && len_b == len_c
1157                && a.chunk_lengths()
1158                    .zip(b.chunk_lengths())
1159                    .zip(c.chunk_lengths())
1160                    .all(|((a, b), c)| a == b && b == c) =>
1161        {
1162            (Cow::Borrowed(a), Cow::Borrowed(b), Cow::Borrowed(c))
1163        },
1164        _ => {
1165            // could optimize to choose to rechunk a primitive and not a string or list type
1166            let a = a.rechunk();
1167            let b = b.rechunk();
1168            (
1169                Cow::Owned(a.match_chunks(c.chunk_lengths())),
1170                Cow::Owned(b.match_chunks(c.chunk_lengths())),
1171                Cow::Borrowed(c),
1172            )
1173        },
1174    }
1175}
1176
1177pub fn binary_concatenate_validities<'a, T, B>(
1178    left: &'a ChunkedArray<T>,
1179    right: &'a ChunkedArray<B>,
1180) -> Option<Bitmap>
1181where
1182    B: PolarsDataType,
1183    T: PolarsDataType,
1184{
1185    let (left, right) = align_chunks_binary(left, right);
1186    let left_validity = concatenate_validities(left.chunks());
1187    let right_validity = concatenate_validities(right.chunks());
1188    combine_validities_and(left_validity.as_ref(), right_validity.as_ref())
1189}
1190
1191/// Convenience for `x.into_iter().map(Into::into).collect()` using an `into_vec()` function.
1192pub trait IntoVec<T> {
1193    fn into_vec(self) -> Vec<T>;
1194}
1195
1196impl<I, S> IntoVec<PlSmallStr> for I
1197where
1198    I: IntoIterator<Item = S>,
1199    S: Into<PlSmallStr>,
1200{
1201    fn into_vec(self) -> Vec<PlSmallStr> {
1202        self.into_iter().map(|s| s.into()).collect()
1203    }
1204}
1205
1206/// This logic is same as the impl on ChunkedArray
1207/// The difference is that there is less indirection because the caller should preallocate
1208/// `chunk_lens` once. On the `ChunkedArray` we indirect through an `ArrayRef` which is an indirection
1209/// and a vtable.
1210#[inline]
1211pub(crate) fn index_to_chunked_index<
1212    I: Iterator<Item = Idx>,
1213    Idx: PartialOrd + std::ops::AddAssign + std::ops::SubAssign + Zero + One,
1214>(
1215    chunk_lens: I,
1216    index: Idx,
1217) -> (Idx, Idx) {
1218    let mut index_remainder = index;
1219    let mut current_chunk_idx = Zero::zero();
1220
1221    for chunk_len in chunk_lens {
1222        if chunk_len > index_remainder {
1223            break;
1224        } else {
1225            index_remainder -= chunk_len;
1226            current_chunk_idx += One::one();
1227        }
1228    }
1229    (current_chunk_idx, index_remainder)
1230}
1231
1232pub(crate) fn index_to_chunked_index_rev<
1233    I: Iterator<Item = Idx>,
1234    Idx: PartialOrd
1235        + std::ops::AddAssign
1236        + std::ops::SubAssign
1237        + std::ops::Sub<Output = Idx>
1238        + Zero
1239        + One
1240        + Copy
1241        + std::fmt::Debug,
1242>(
1243    chunk_lens_rev: I,
1244    index_from_back: Idx,
1245    total_chunks: Idx,
1246) -> (Idx, Idx) {
1247    debug_assert!(index_from_back > Zero::zero(), "at least -1");
1248    let mut index_remainder = index_from_back;
1249    let mut current_chunk_idx = One::one();
1250    let mut current_chunk_len = Zero::zero();
1251
1252    for chunk_len in chunk_lens_rev {
1253        current_chunk_len = chunk_len;
1254        if chunk_len >= index_remainder {
1255            break;
1256        } else {
1257            index_remainder -= chunk_len;
1258            current_chunk_idx += One::one();
1259        }
1260    }
1261    (
1262        total_chunks - current_chunk_idx,
1263        current_chunk_len - index_remainder,
1264    )
1265}
1266
1267pub fn first_null<'a, I>(iter: I) -> Option<usize>
1268where
1269    I: Iterator<Item = &'a dyn Array>,
1270{
1271    let mut offset = 0;
1272    for arr in iter {
1273        if let Some(mask) = arr.validity() {
1274            let len_mask = mask.len();
1275            let n = mask.leading_ones();
1276            if n < len_mask {
1277                return Some(offset + n);
1278            }
1279            offset += len_mask
1280        } else {
1281            offset += arr.len();
1282        }
1283    }
1284    None
1285}
1286
1287pub fn first_non_null<'a, I>(iter: I) -> Option<usize>
1288where
1289    I: Iterator<Item = &'a dyn Array>,
1290{
1291    let mut offset = 0;
1292    for arr in iter {
1293        if let Some(mask) = arr.validity() {
1294            let len_mask = mask.len();
1295            let n = mask.leading_zeros();
1296            if n < len_mask {
1297                return Some(offset + n);
1298            }
1299            offset += len_mask
1300        } else if !arr.is_empty() {
1301            return Some(offset);
1302        }
1303    }
1304    None
1305}
1306
1307pub fn last_non_null<'a, I>(iter: I, len: usize) -> Option<usize>
1308where
1309    I: DoubleEndedIterator<Item = &'a dyn Array>,
1310{
1311    if len == 0 {
1312        return None;
1313    }
1314    let mut offset = 0;
1315    for arr in iter.rev() {
1316        if let Some(mask) = arr.validity() {
1317            let len_mask = mask.len();
1318            let n = mask.trailing_zeros();
1319            if n < len_mask {
1320                return Some(len - offset - n - 1);
1321            }
1322            offset += len_mask;
1323        } else if !arr.is_empty() {
1324            return Some(len - offset - 1);
1325        }
1326    }
1327    None
1328}
1329
1330/// ensure that nulls are propagated to both arrays
1331pub fn coalesce_nulls<'a, T: PolarsDataType>(
1332    a: &'a ChunkedArray<T>,
1333    b: &'a ChunkedArray<T>,
1334) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, ChunkedArray<T>>) {
1335    if a.null_count() > 0 || b.null_count() > 0 {
1336        let (a, b) = align_chunks_binary(a, b);
1337        let mut b = b.into_owned();
1338        let a = a.coalesce_nulls(b.chunks());
1339
1340        for arr in a.chunks().iter() {
1341            for arr_b in unsafe { b.chunks_mut() } {
1342                *arr_b = arr_b.with_validity(arr.validity().cloned())
1343            }
1344        }
1345        b.compute_len();
1346        (Cow::Owned(a), Cow::Owned(b))
1347    } else {
1348        (Cow::Borrowed(a), Cow::Borrowed(b))
1349    }
1350}
1351
1352pub fn coalesce_nulls_columns(a: &Column, b: &Column) -> (Column, Column) {
1353    if a.null_count() > 0 || b.null_count() > 0 {
1354        let mut a = a.as_materialized_series().rechunk();
1355        let mut b = b.as_materialized_series().rechunk();
1356        for (arr_a, arr_b) in unsafe { a.chunks_mut().iter_mut().zip(b.chunks_mut()) } {
1357            let validity = match (arr_a.validity(), arr_b.validity()) {
1358                (None, Some(b)) => Some(b.clone()),
1359                (Some(a), Some(b)) => Some(a & b),
1360                (Some(a), None) => Some(a.clone()),
1361                (None, None) => None,
1362            };
1363            *arr_a = arr_a.with_validity(validity.clone());
1364            *arr_b = arr_b.with_validity(validity);
1365        }
1366        a.compute_len();
1367        b.compute_len();
1368        (a.into(), b.into())
1369    } else {
1370        (a.clone(), b.clone())
1371    }
1372}
1373
1374#[cfg(test)]
1375mod test {
1376    use super::*;
1377
1378    #[test]
1379    fn test_split() {
1380        let ca: Int32Chunked = (0..10).collect_ca("a".into());
1381
1382        let out = split(&ca, 3);
1383        assert_eq!(out[0].len(), 3);
1384        assert_eq!(out[1].len(), 3);
1385        assert_eq!(out[2].len(), 4);
1386    }
1387
1388    #[test]
1389    fn test_align_chunks() -> PolarsResult<()> {
1390        let a = Int32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3, 4]);
1391        let mut b = Int32Chunked::new(PlSmallStr::EMPTY, &[1]);
1392        let b2 = Int32Chunked::new(PlSmallStr::EMPTY, &[2, 3, 4]);
1393
1394        b.append(&b2)?;
1395        let (a, b) = align_chunks_binary(&a, &b);
1396        assert_eq!(
1397            a.chunk_lengths().collect::<Vec<_>>(),
1398            b.chunk_lengths().collect::<Vec<_>>()
1399        );
1400
1401        let a = Int32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3, 4]);
1402        let mut b = Int32Chunked::new(PlSmallStr::EMPTY, &[1]);
1403        let b1 = b.clone();
1404        b.append(&b1)?;
1405        b.append(&b1)?;
1406        b.append(&b1)?;
1407        let (a, b) = align_chunks_binary(&a, &b);
1408        assert_eq!(
1409            a.chunk_lengths().collect::<Vec<_>>(),
1410            b.chunk_lengths().collect::<Vec<_>>()
1411        );
1412
1413        Ok(())
1414    }
1415}