polars_core/utils/
mod.rs

1mod any_value;
2use arrow::compute::concatenate::concatenate_validities;
3use arrow::compute::utils::combine_validities_and;
4pub mod flatten;
5pub(crate) mod series;
6mod supertype;
7use std::borrow::Cow;
8use std::ops::{Deref, DerefMut};
9mod schema;
10
11pub use any_value::*;
12use arrow::bitmap::Bitmap;
13use arrow::bitmap::bitmask::BitMask;
14pub use arrow::legacy::utils::*;
15pub use arrow::trusted_len::TrustMyLength;
16use flatten::*;
17use num_traits::{One, Zero};
18use rayon::prelude::*;
19pub use schema::*;
20pub use series::*;
21pub use supertype::*;
22pub use {arrow, rayon};
23
24use crate::POOL;
25use crate::prelude::*;
26
27#[repr(transparent)]
28pub struct Wrap<T>(pub T);
29
30impl<T> Deref for Wrap<T> {
31    type Target = T;
32    fn deref(&self) -> &Self::Target {
33        &self.0
34    }
35}
36
37#[inline(always)]
38pub fn _set_partition_size() -> usize {
39    POOL.current_num_threads()
40}
41
42/// Just a wrapper structure which is useful for certain impl specializations.
43///
44/// This is for instance use to implement
45/// `impl<T> FromIterator<T::Native> for NoNull<ChunkedArray<T>>`
46/// as `Option<T::Native>` was already implemented:
47/// `impl<T> FromIterator<Option<T::Native>> for ChunkedArray<T>`
48pub struct NoNull<T> {
49    inner: T,
50}
51
52impl<T> NoNull<T> {
53    pub fn new(inner: T) -> Self {
54        NoNull { inner }
55    }
56
57    pub fn into_inner(self) -> T {
58        self.inner
59    }
60}
61
62impl<T> Deref for NoNull<T> {
63    type Target = T;
64
65    fn deref(&self) -> &Self::Target {
66        &self.inner
67    }
68}
69
70impl<T> DerefMut for NoNull<T> {
71    fn deref_mut(&mut self) -> &mut Self::Target {
72        &mut self.inner
73    }
74}
75
76pub(crate) fn get_iter_capacity<T, I: Iterator<Item = T>>(iter: &I) -> usize {
77    match iter.size_hint() {
78        (_lower, Some(upper)) => upper,
79        (0, None) => 1024,
80        (lower, None) => lower,
81    }
82}
83
84// prefer this one over split_ca, as this can push the null_count into the thread pool
85// returns an `(offset, length)` tuple
86#[doc(hidden)]
87pub fn _split_offsets(len: usize, n: usize) -> Vec<(usize, usize)> {
88    if n == 1 {
89        vec![(0, len)]
90    } else {
91        let chunk_size = len / n;
92
93        (0..n)
94            .map(|partition| {
95                let offset = partition * chunk_size;
96                let len = if partition == (n - 1) {
97                    len - offset
98                } else {
99                    chunk_size
100                };
101                (partition * chunk_size, len)
102            })
103            .collect_trusted()
104    }
105}
106
107#[allow(clippy::len_without_is_empty)]
108pub trait Container: Clone {
109    fn slice(&self, offset: i64, len: usize) -> Self;
110
111    fn split_at(&self, offset: i64) -> (Self, Self);
112
113    fn len(&self) -> usize;
114
115    fn iter_chunks(&self) -> impl Iterator<Item = Self>;
116
117    fn n_chunks(&self) -> usize;
118
119    fn chunk_lengths(&self) -> impl Iterator<Item = usize>;
120}
121
122impl Container for DataFrame {
123    fn slice(&self, offset: i64, len: usize) -> Self {
124        DataFrame::slice(self, offset, len)
125    }
126
127    fn split_at(&self, offset: i64) -> (Self, Self) {
128        DataFrame::split_at(self, offset)
129    }
130
131    fn len(&self) -> usize {
132        self.height()
133    }
134
135    fn iter_chunks(&self) -> impl Iterator<Item = Self> {
136        flatten_df_iter(self)
137    }
138
139    fn n_chunks(&self) -> usize {
140        DataFrame::first_col_n_chunks(self)
141    }
142
143    fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
144        // @scalar-correctness?
145        self.columns[0].as_materialized_series().chunk_lengths()
146    }
147}
148
149impl<T: PolarsDataType> Container for ChunkedArray<T> {
150    fn slice(&self, offset: i64, len: usize) -> Self {
151        ChunkedArray::slice(self, offset, len)
152    }
153
154    fn split_at(&self, offset: i64) -> (Self, Self) {
155        ChunkedArray::split_at(self, offset)
156    }
157
158    fn len(&self) -> usize {
159        ChunkedArray::len(self)
160    }
161
162    fn iter_chunks(&self) -> impl Iterator<Item = Self> {
163        self.downcast_iter()
164            .map(|arr| Self::with_chunk(self.name().clone(), arr.clone()))
165    }
166
167    fn n_chunks(&self) -> usize {
168        self.chunks().len()
169    }
170
171    fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
172        ChunkedArray::chunk_lengths(self)
173    }
174}
175
176impl Container for Series {
177    fn slice(&self, offset: i64, len: usize) -> Self {
178        self.0.slice(offset, len)
179    }
180
181    fn split_at(&self, offset: i64) -> (Self, Self) {
182        self.0.split_at(offset)
183    }
184
185    fn len(&self) -> usize {
186        self.0.len()
187    }
188
189    fn iter_chunks(&self) -> impl Iterator<Item = Self> {
190        (0..self.0.n_chunks()).map(|i| self.select_chunk(i))
191    }
192
193    fn n_chunks(&self) -> usize {
194        self.chunks().len()
195    }
196
197    fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
198        self.0.chunk_lengths()
199    }
200}
201
202fn split_impl<C: Container>(container: &C, target: usize, chunk_size: usize) -> Vec<C> {
203    if target == 1 {
204        return vec![container.clone()];
205    }
206    let mut out = Vec::with_capacity(target);
207    let chunk_size = chunk_size as i64;
208
209    // First split
210    let (chunk, mut remainder) = container.split_at(chunk_size);
211    out.push(chunk);
212
213    // Take the rest of the splits of exactly chunk size, but skip the last remainder as we won't split that.
214    for _ in 1..target - 1 {
215        let (a, b) = remainder.split_at(chunk_size);
216        out.push(a);
217        remainder = b
218    }
219    // This can be slightly larger than `chunk_size`, but is smaller than `2 * chunk_size`.
220    out.push(remainder);
221    out
222}
223
224/// Splits, but doesn't flatten chunks. E.g. a container can still have multiple chunks.
225pub fn split<C: Container>(container: &C, target: usize) -> Vec<C> {
226    let total_len = container.len();
227    if total_len == 0 {
228        return vec![container.clone()];
229    }
230
231    let chunk_size = std::cmp::max(total_len / target, 1);
232
233    if container.n_chunks() == target
234        && container
235            .chunk_lengths()
236            .all(|len| len.abs_diff(chunk_size) < 100)
237    {
238        return container.iter_chunks().collect();
239    }
240    split_impl(container, target, chunk_size)
241}
242
243/// Split a [`Container`] in `target` elements. The target doesn't have to be respected if not
244/// Deviation of the target might be done to create more equal size chunks.
245pub fn split_and_flatten<C: Container>(container: &C, target: usize) -> Vec<C> {
246    let total_len = container.len();
247    if total_len == 0 {
248        return vec![container.clone()];
249    }
250
251    let chunk_size = std::cmp::max(total_len / target, 1);
252
253    if container.n_chunks() == target
254        && container
255            .chunk_lengths()
256            .all(|len| len.abs_diff(chunk_size) < 100)
257    {
258        return container.iter_chunks().collect();
259    }
260
261    if container.n_chunks() == 1 {
262        split_impl(container, target, chunk_size)
263    } else {
264        let mut out = Vec::with_capacity(target);
265        let chunks = container.iter_chunks();
266
267        'new_chunk: for mut chunk in chunks {
268            loop {
269                let h = chunk.len();
270                if h < chunk_size {
271                    // TODO if the chunk is much smaller than chunk size, we should try to merge it with the next one.
272                    out.push(chunk);
273                    continue 'new_chunk;
274                }
275
276                // If a split leads to the next chunk being smaller than 30% take the whole chunk
277                if ((h - chunk_size) as f64 / chunk_size as f64) < 0.3 {
278                    out.push(chunk);
279                    continue 'new_chunk;
280                }
281
282                let (a, b) = chunk.split_at(chunk_size as i64);
283                out.push(a);
284                chunk = b;
285            }
286        }
287        out
288    }
289}
290
291/// Split a [`DataFrame`] in `target` elements. The target doesn't have to be respected if not
292/// strict. Deviation of the target might be done to create more equal size chunks.
293///
294/// # Panics
295/// if chunks are not aligned
296pub fn split_df_as_ref(df: &DataFrame, target: usize, strict: bool) -> Vec<DataFrame> {
297    if strict {
298        split(df, target)
299    } else {
300        split_and_flatten(df, target)
301    }
302}
303
304#[doc(hidden)]
305/// Split a [`DataFrame`] into `n` parts. We take a `&mut` to be able to repartition/align chunks.
306/// `strict` in that it respects `n` even if the chunks are suboptimal.
307pub fn split_df(df: &mut DataFrame, target: usize, strict: bool) -> Vec<DataFrame> {
308    if target == 0 || df.is_empty() {
309        return vec![df.clone()];
310    }
311    // make sure that chunks are aligned.
312    df.align_chunks_par();
313    split_df_as_ref(df, target, strict)
314}
315
316pub fn slice_slice<T>(vals: &[T], offset: i64, len: usize) -> &[T] {
317    let (raw_offset, slice_len) = slice_offsets(offset, len, vals.len());
318    &vals[raw_offset..raw_offset + slice_len]
319}
320
321#[inline]
322#[doc(hidden)]
323pub fn slice_offsets(offset: i64, length: usize, array_len: usize) -> (usize, usize) {
324    let signed_start_offset = if offset < 0 {
325        offset.saturating_add_unsigned(array_len as u64)
326    } else {
327        offset
328    };
329    let signed_stop_offset = signed_start_offset.saturating_add_unsigned(length as u64);
330
331    let signed_array_len: i64 = array_len
332        .try_into()
333        .expect("array length larger than i64::MAX");
334    let clamped_start_offset = signed_start_offset.clamp(0, signed_array_len);
335    let clamped_stop_offset = signed_stop_offset.clamp(0, signed_array_len);
336
337    let slice_start_idx = clamped_start_offset as usize;
338    let slice_len = (clamped_stop_offset - clamped_start_offset) as usize;
339    (slice_start_idx, slice_len)
340}
341
342/// Apply a macro on the Series
343#[macro_export]
344macro_rules! match_dtype_to_physical_apply_macro {
345    ($obj:expr, $macro:ident, $macro_string:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
346        match $obj {
347            DataType::String => $macro_string!($($opt_args)*),
348            DataType::Boolean => $macro_bool!($($opt_args)*),
349            #[cfg(feature = "dtype-u8")]
350            DataType::UInt8 => $macro!(u8 $(, $opt_args)*),
351            #[cfg(feature = "dtype-u16")]
352            DataType::UInt16 => $macro!(u16 $(, $opt_args)*),
353            DataType::UInt32 => $macro!(u32 $(, $opt_args)*),
354            DataType::UInt64 => $macro!(u64 $(, $opt_args)*),
355            #[cfg(feature = "dtype-i8")]
356            DataType::Int8 => $macro!(i8 $(, $opt_args)*),
357            #[cfg(feature = "dtype-i16")]
358            DataType::Int16 => $macro!(i16 $(, $opt_args)*),
359            DataType::Int32 => $macro!(i32 $(, $opt_args)*),
360            DataType::Int64 => $macro!(i64 $(, $opt_args)*),
361            #[cfg(feature = "dtype-i128")]
362            DataType::Int128 => $macro!(i128 $(, $opt_args)*),
363            DataType::Float32 => $macro!(f32 $(, $opt_args)*),
364            DataType::Float64 => $macro!(f64 $(, $opt_args)*),
365            dt => panic!("not implemented for dtype {:?}", dt),
366        }
367    }};
368}
369
370/// Apply a macro on the Series
371#[macro_export]
372macro_rules! match_dtype_to_logical_apply_macro {
373    ($obj:expr, $macro:ident, $macro_string:ident, $macro_binary:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
374        match $obj {
375            DataType::String => $macro_string!($($opt_args)*),
376            DataType::Binary => $macro_binary!($($opt_args)*),
377            DataType::Boolean => $macro_bool!($($opt_args)*),
378            #[cfg(feature = "dtype-u8")]
379            DataType::UInt8 => $macro!(UInt8Type $(, $opt_args)*),
380            #[cfg(feature = "dtype-u16")]
381            DataType::UInt16 => $macro!(UInt16Type $(, $opt_args)*),
382            DataType::UInt32 => $macro!(UInt32Type $(, $opt_args)*),
383            DataType::UInt64 => $macro!(UInt64Type $(, $opt_args)*),
384            #[cfg(feature = "dtype-i8")]
385            DataType::Int8 => $macro!(Int8Type $(, $opt_args)*),
386            #[cfg(feature = "dtype-i16")]
387            DataType::Int16 => $macro!(Int16Type $(, $opt_args)*),
388            DataType::Int32 => $macro!(Int32Type $(, $opt_args)*),
389            DataType::Int64 => $macro!(Int64Type $(, $opt_args)*),
390            #[cfg(feature = "dtype-i128")]
391            DataType::Int128 => $macro!(Int128Type $(, $opt_args)*),
392            DataType::Float32 => $macro!(Float32Type $(, $opt_args)*),
393            DataType::Float64 => $macro!(Float64Type $(, $opt_args)*),
394            dt => panic!("not implemented for dtype {:?}", dt),
395        }
396    }};
397}
398
399/// Apply a macro on the Downcasted ChunkedArrays
400#[macro_export]
401macro_rules! match_arrow_dtype_apply_macro_ca {
402    ($self:expr, $macro:ident, $macro_string:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
403        match $self.dtype() {
404            DataType::String => $macro_string!($self.str().unwrap() $(, $opt_args)*),
405            DataType::Boolean => $macro_bool!($self.bool().unwrap() $(, $opt_args)*),
406            #[cfg(feature = "dtype-u8")]
407            DataType::UInt8 => $macro!($self.u8().unwrap() $(, $opt_args)*),
408            #[cfg(feature = "dtype-u16")]
409            DataType::UInt16 => $macro!($self.u16().unwrap() $(, $opt_args)*),
410            DataType::UInt32 => $macro!($self.u32().unwrap() $(, $opt_args)*),
411            DataType::UInt64 => $macro!($self.u64().unwrap() $(, $opt_args)*),
412            #[cfg(feature = "dtype-i8")]
413            DataType::Int8 => $macro!($self.i8().unwrap() $(, $opt_args)*),
414            #[cfg(feature = "dtype-i16")]
415            DataType::Int16 => $macro!($self.i16().unwrap() $(, $opt_args)*),
416            DataType::Int32 => $macro!($self.i32().unwrap() $(, $opt_args)*),
417            DataType::Int64 => $macro!($self.i64().unwrap() $(, $opt_args)*),
418            #[cfg(feature = "dtype-i128")]
419            DataType::Int128 => $macro!($self.i128().unwrap() $(, $opt_args)*),
420            DataType::Float32 => $macro!($self.f32().unwrap() $(, $opt_args)*),
421            DataType::Float64 => $macro!($self.f64().unwrap() $(, $opt_args)*),
422            dt => panic!("not implemented for dtype {:?}", dt),
423        }
424    }};
425}
426
427#[macro_export]
428macro_rules! with_match_physical_numeric_type {(
429    $dtype:expr, | $_:tt $T:ident | $($body:tt)*
430) => ({
431    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
432    use $crate::datatypes::DataType::*;
433    match $dtype {
434        #[cfg(feature = "dtype-i8")]
435        Int8 => __with_ty__! { i8 },
436        #[cfg(feature = "dtype-i16")]
437        Int16 => __with_ty__! { i16 },
438        Int32 => __with_ty__! { i32 },
439        Int64 => __with_ty__! { i64 },
440        #[cfg(feature = "dtype-i128")]
441        Int128 => __with_ty__! { i128 },
442        #[cfg(feature = "dtype-u8")]
443        UInt8 => __with_ty__! { u8 },
444        #[cfg(feature = "dtype-u16")]
445        UInt16 => __with_ty__! { u16 },
446        UInt32 => __with_ty__! { u32 },
447        UInt64 => __with_ty__! { u64 },
448        Float32 => __with_ty__! { f32 },
449        Float64 => __with_ty__! { f64 },
450        dt => panic!("not implemented for dtype {:?}", dt),
451    }
452})}
453
454#[macro_export]
455macro_rules! with_match_physical_integer_type {(
456    $dtype:expr, | $_:tt $T:ident | $($body:tt)*
457) => ({
458    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
459    use $crate::datatypes::DataType::*;
460    match $dtype {
461        #[cfg(feature = "dtype-i8")]
462        Int8 => __with_ty__! { i8 },
463        #[cfg(feature = "dtype-i16")]
464        Int16 => __with_ty__! { i16 },
465        Int32 => __with_ty__! { i32 },
466        Int64 => __with_ty__! { i64 },
467        #[cfg(feature = "dtype-i128")]
468        Int128 => __with_ty__! { i128 },
469        #[cfg(feature = "dtype-u8")]
470        UInt8 => __with_ty__! { u8 },
471        #[cfg(feature = "dtype-u16")]
472        UInt16 => __with_ty__! { u16 },
473        UInt32 => __with_ty__! { u32 },
474        UInt64 => __with_ty__! { u64 },
475        dt => panic!("not implemented for dtype {:?}", dt),
476    }
477})}
478
479#[macro_export]
480macro_rules! with_match_physical_float_type {(
481    $dtype:expr, | $_:tt $T:ident | $($body:tt)*
482) => ({
483    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
484    use $crate::datatypes::DataType::*;
485    match $dtype {
486        Float32 => __with_ty__! { f32 },
487        Float64 => __with_ty__! { f64 },
488        dt => panic!("not implemented for dtype {:?}", dt),
489    }
490})}
491
492#[macro_export]
493macro_rules! with_match_physical_float_polars_type {(
494    $key_type:expr, | $_:tt $T:ident | $($body:tt)*
495) => ({
496    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
497    use $crate::datatypes::DataType::*;
498    match $key_type {
499        Float32 => __with_ty__! { Float32Type },
500        Float64 => __with_ty__! { Float64Type },
501        dt => panic!("not implemented for dtype {:?}", dt),
502    }
503})}
504
505#[macro_export]
506macro_rules! with_match_physical_numeric_polars_type {(
507    $key_type:expr, | $_:tt $T:ident | $($body:tt)*
508) => ({
509    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
510    use $crate::datatypes::DataType::*;
511    match $key_type {
512            #[cfg(feature = "dtype-i8")]
513        Int8 => __with_ty__! { Int8Type },
514            #[cfg(feature = "dtype-i16")]
515        Int16 => __with_ty__! { Int16Type },
516        Int32 => __with_ty__! { Int32Type },
517        Int64 => __with_ty__! { Int64Type },
518            #[cfg(feature = "dtype-i128")]
519        Int128 => __with_ty__! { Int128Type },
520            #[cfg(feature = "dtype-u8")]
521        UInt8 => __with_ty__! { UInt8Type },
522            #[cfg(feature = "dtype-u16")]
523        UInt16 => __with_ty__! { UInt16Type },
524        UInt32 => __with_ty__! { UInt32Type },
525        UInt64 => __with_ty__! { UInt64Type },
526        Float32 => __with_ty__! { Float32Type },
527        Float64 => __with_ty__! { Float64Type },
528        dt => panic!("not implemented for dtype {:?}", dt),
529    }
530})}
531
532#[macro_export]
533macro_rules! with_match_physical_integer_polars_type {(
534    $key_type:expr, | $_:tt $T:ident | $($body:tt)*
535) => ({
536    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
537    use $crate::datatypes::DataType::*;
538    use $crate::datatypes::*;
539    match $key_type {
540        #[cfg(feature = "dtype-i8")]
541        Int8 => __with_ty__! { Int8Type },
542        #[cfg(feature = "dtype-i16")]
543        Int16 => __with_ty__! { Int16Type },
544        Int32 => __with_ty__! { Int32Type },
545        Int64 => __with_ty__! { Int64Type },
546        #[cfg(feature = "dtype-i128")]
547        Int128 => __with_ty__! { Int128Type },
548        #[cfg(feature = "dtype-u8")]
549        UInt8 => __with_ty__! { UInt8Type },
550        #[cfg(feature = "dtype-u16")]
551        UInt16 => __with_ty__! { UInt16Type },
552        UInt32 => __with_ty__! { UInt32Type },
553        UInt64 => __with_ty__! { UInt64Type },
554        dt => panic!("not implemented for dtype {:?}", dt),
555    }
556})}
557
558/// Apply a macro on the Downcasted ChunkedArrays of DataTypes that are logical numerics.
559/// So no logical.
560#[macro_export]
561macro_rules! downcast_as_macro_arg_physical {
562    ($self:expr, $macro:ident $(, $opt_args:expr)*) => {{
563        match $self.dtype() {
564            #[cfg(feature = "dtype-u8")]
565            DataType::UInt8 => $macro!($self.u8().unwrap() $(, $opt_args)*),
566            #[cfg(feature = "dtype-u16")]
567            DataType::UInt16 => $macro!($self.u16().unwrap() $(, $opt_args)*),
568            DataType::UInt32 => $macro!($self.u32().unwrap() $(, $opt_args)*),
569            DataType::UInt64 => $macro!($self.u64().unwrap() $(, $opt_args)*),
570            #[cfg(feature = "dtype-i8")]
571            DataType::Int8 => $macro!($self.i8().unwrap() $(, $opt_args)*),
572            #[cfg(feature = "dtype-i16")]
573            DataType::Int16 => $macro!($self.i16().unwrap() $(, $opt_args)*),
574            DataType::Int32 => $macro!($self.i32().unwrap() $(, $opt_args)*),
575            DataType::Int64 => $macro!($self.i64().unwrap() $(, $opt_args)*),
576            #[cfg(feature = "dtype-i128")]
577            DataType::Int128 => $macro!($self.i128().unwrap() $(, $opt_args)*),
578            DataType::Float32 => $macro!($self.f32().unwrap() $(, $opt_args)*),
579            DataType::Float64 => $macro!($self.f64().unwrap() $(, $opt_args)*),
580            dt => panic!("not implemented for {:?}", dt),
581        }
582    }};
583}
584
585/// Apply a macro on the Downcasted ChunkedArrays of DataTypes that are logical numerics.
586/// So no logical.
587#[macro_export]
588macro_rules! downcast_as_macro_arg_physical_mut {
589    ($self:expr, $macro:ident $(, $opt_args:expr)*) => {{
590        // clone so that we do not borrow
591        match $self.dtype().clone() {
592            #[cfg(feature = "dtype-u8")]
593            DataType::UInt8 => {
594                let ca: &mut UInt8Chunked = $self.as_mut();
595                $macro!(UInt8Type, ca $(, $opt_args)*)
596            },
597            #[cfg(feature = "dtype-u16")]
598            DataType::UInt16 => {
599                let ca: &mut UInt16Chunked = $self.as_mut();
600                $macro!(UInt16Type, ca $(, $opt_args)*)
601            },
602            DataType::UInt32 => {
603                let ca: &mut UInt32Chunked = $self.as_mut();
604                $macro!(UInt32Type, ca $(, $opt_args)*)
605            },
606            DataType::UInt64 => {
607                let ca: &mut UInt64Chunked = $self.as_mut();
608                $macro!(UInt64Type, ca $(, $opt_args)*)
609            },
610            #[cfg(feature = "dtype-i8")]
611            DataType::Int8 => {
612                let ca: &mut Int8Chunked = $self.as_mut();
613                $macro!(Int8Type, ca $(, $opt_args)*)
614            },
615            #[cfg(feature = "dtype-i16")]
616            DataType::Int16 => {
617                let ca: &mut Int16Chunked = $self.as_mut();
618                $macro!(Int16Type, ca $(, $opt_args)*)
619            },
620            DataType::Int32 => {
621                let ca: &mut Int32Chunked = $self.as_mut();
622                $macro!(Int32Type, ca $(, $opt_args)*)
623            },
624            DataType::Int64 => {
625                let ca: &mut Int64Chunked = $self.as_mut();
626                $macro!(Int64Type, ca $(, $opt_args)*)
627            },
628            #[cfg(feature = "dtype-i128")]
629            DataType::Int128 => {
630                let ca: &mut Int128Chunked = $self.as_mut();
631                $macro!(Int128Type, ca $(, $opt_args)*)
632            },
633            DataType::Float32 => {
634                let ca: &mut Float32Chunked = $self.as_mut();
635                $macro!(Float32Type, ca $(, $opt_args)*)
636            },
637            DataType::Float64 => {
638                let ca: &mut Float64Chunked = $self.as_mut();
639                $macro!(Float64Type, ca $(, $opt_args)*)
640            },
641            dt => panic!("not implemented for {:?}", dt),
642        }
643    }};
644}
645
646#[macro_export]
647macro_rules! apply_method_all_arrow_series {
648    ($self:expr, $method:ident, $($args:expr),*) => {
649        match $self.dtype() {
650            DataType::Boolean => $self.bool().unwrap().$method($($args),*),
651            DataType::String => $self.str().unwrap().$method($($args),*),
652            #[cfg(feature = "dtype-u8")]
653            DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
654            #[cfg(feature = "dtype-u16")]
655            DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
656            DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
657            DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
658            #[cfg(feature = "dtype-i8")]
659            DataType::Int8 => $self.i8().unwrap().$method($($args),*),
660            #[cfg(feature = "dtype-i16")]
661            DataType::Int16 => $self.i16().unwrap().$method($($args),*),
662            DataType::Int32 => $self.i32().unwrap().$method($($args),*),
663            DataType::Int64 => $self.i64().unwrap().$method($($args),*),
664            #[cfg(feature = "dtype-i128")]
665            DataType::Int128 => $self.i128().unwrap().$method($($args),*),
666            DataType::Float32 => $self.f32().unwrap().$method($($args),*),
667            DataType::Float64 => $self.f64().unwrap().$method($($args),*),
668            DataType::Time => $self.time().unwrap().$method($($args),*),
669            DataType::Date => $self.date().unwrap().$method($($args),*),
670            DataType::Datetime(_, _) => $self.datetime().unwrap().$method($($args),*),
671            DataType::List(_) => $self.list().unwrap().$method($($args),*),
672            DataType::Struct(_) => $self.struct_().unwrap().$method($($args),*),
673            dt => panic!("dtype {:?} not supported", dt)
674        }
675    }
676}
677
678#[macro_export]
679macro_rules! apply_method_physical_integer {
680    ($self:expr, $method:ident, $($args:expr),*) => {
681        match $self.dtype() {
682            #[cfg(feature = "dtype-u8")]
683            DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
684            #[cfg(feature = "dtype-u16")]
685            DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
686            DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
687            DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
688            #[cfg(feature = "dtype-i8")]
689            DataType::Int8 => $self.i8().unwrap().$method($($args),*),
690            #[cfg(feature = "dtype-i16")]
691            DataType::Int16 => $self.i16().unwrap().$method($($args),*),
692            DataType::Int32 => $self.i32().unwrap().$method($($args),*),
693            DataType::Int64 => $self.i64().unwrap().$method($($args),*),
694            #[cfg(feature = "dtype-i128")]
695            DataType::Int128 => $self.i128().unwrap().$method($($args),*),
696            dt => panic!("not implemented for dtype {:?}", dt),
697        }
698    }
699}
700
701// doesn't include Bool and String
702#[macro_export]
703macro_rules! apply_method_physical_numeric {
704    ($self:expr, $method:ident, $($args:expr),*) => {
705        match $self.dtype() {
706            DataType::Float32 => $self.f32().unwrap().$method($($args),*),
707            DataType::Float64 => $self.f64().unwrap().$method($($args),*),
708            _ => apply_method_physical_integer!($self, $method, $($args),*),
709        }
710    }
711}
712
713#[macro_export]
714macro_rules! df {
715    ($($col_name:expr => $slice:expr), + $(,)?) => {
716        $crate::prelude::DataFrame::new(vec![
717            $($crate::prelude::Column::from(<$crate::prelude::Series as $crate::prelude::NamedFrom::<_, _>>::new($col_name.into(), $slice)),)+
718        ])
719    }
720}
721
722pub fn get_time_units(tu_l: &TimeUnit, tu_r: &TimeUnit) -> TimeUnit {
723    use TimeUnit::*;
724    match (tu_l, tu_r) {
725        (Nanoseconds, Microseconds) => Microseconds,
726        (_, Milliseconds) => Milliseconds,
727        _ => *tu_l,
728    }
729}
730
731#[cold]
732#[inline(never)]
733fn width_mismatch(df1: &DataFrame, df2: &DataFrame) -> PolarsError {
734    let mut df1_extra = Vec::new();
735    let mut df2_extra = Vec::new();
736
737    let s1 = df1.schema();
738    let s2 = df2.schema();
739
740    s1.field_compare(s2, &mut df1_extra, &mut df2_extra);
741
742    let df1_extra = df1_extra
743        .into_iter()
744        .map(|(_, (n, _))| n.as_str())
745        .collect::<Vec<_>>()
746        .join(", ");
747    let df2_extra = df2_extra
748        .into_iter()
749        .map(|(_, (n, _))| n.as_str())
750        .collect::<Vec<_>>()
751        .join(", ");
752
753    polars_err!(
754        SchemaMismatch: r#"unable to vstack, dataframes have different widths ({} != {}).
755One dataframe has additional columns: [{df1_extra}].
756Other dataframe has additional columns: [{df2_extra}]."#,
757        df1.width(),
758        df2.width(),
759    )
760}
761
762pub fn accumulate_dataframes_vertical_unchecked_optional<I>(dfs: I) -> Option<DataFrame>
763where
764    I: IntoIterator<Item = DataFrame>,
765{
766    let mut iter = dfs.into_iter();
767    let additional = iter.size_hint().0;
768    let mut acc_df = iter.next()?;
769    acc_df.reserve_chunks(additional);
770
771    for df in iter {
772        if acc_df.width() != df.width() {
773            panic!("{}", width_mismatch(&acc_df, &df));
774        }
775
776        acc_df.vstack_mut_owned_unchecked(df);
777    }
778    Some(acc_df)
779}
780
781/// This takes ownership of the DataFrame so that drop is called earlier.
782/// Does not check if schema is correct
783pub fn accumulate_dataframes_vertical_unchecked<I>(dfs: I) -> DataFrame
784where
785    I: IntoIterator<Item = DataFrame>,
786{
787    let mut iter = dfs.into_iter();
788    let additional = iter.size_hint().0;
789    let mut acc_df = iter.next().unwrap();
790    acc_df.reserve_chunks(additional);
791
792    for df in iter {
793        if acc_df.width() != df.width() {
794            panic!("{}", width_mismatch(&acc_df, &df));
795        }
796
797        acc_df.vstack_mut_owned_unchecked(df);
798    }
799    acc_df
800}
801
802/// This takes ownership of the DataFrame so that drop is called earlier.
803/// # Panics
804/// Panics if `dfs` is empty.
805pub fn accumulate_dataframes_vertical<I>(dfs: I) -> PolarsResult<DataFrame>
806where
807    I: IntoIterator<Item = DataFrame>,
808{
809    let mut iter = dfs.into_iter();
810    let additional = iter.size_hint().0;
811    let mut acc_df = iter.next().unwrap();
812    acc_df.reserve_chunks(additional);
813    for df in iter {
814        if acc_df.width() != df.width() {
815            return Err(width_mismatch(&acc_df, &df));
816        }
817
818        acc_df.vstack_mut(&df)?;
819    }
820
821    Ok(acc_df)
822}
823
824/// Concat the DataFrames to a single DataFrame.
825pub fn concat_df<'a, I>(dfs: I) -> PolarsResult<DataFrame>
826where
827    I: IntoIterator<Item = &'a DataFrame>,
828{
829    let mut iter = dfs.into_iter();
830    let additional = iter.size_hint().0;
831    let mut acc_df = iter.next().unwrap().clone();
832    acc_df.reserve_chunks(additional);
833    for df in iter {
834        acc_df.vstack_mut(df)?;
835    }
836    Ok(acc_df)
837}
838
839/// Concat the DataFrames to a single DataFrame.
840pub fn concat_df_unchecked<'a, I>(dfs: I) -> DataFrame
841where
842    I: IntoIterator<Item = &'a DataFrame>,
843{
844    let mut iter = dfs.into_iter();
845    let additional = iter.size_hint().0;
846    let mut acc_df = iter.next().unwrap().clone();
847    acc_df.reserve_chunks(additional);
848    for df in iter {
849        acc_df.vstack_mut_unchecked(df);
850    }
851    acc_df
852}
853
854pub fn accumulate_dataframes_horizontal(dfs: Vec<DataFrame>) -> PolarsResult<DataFrame> {
855    let mut iter = dfs.into_iter();
856    let mut acc_df = iter.next().unwrap();
857    for df in iter {
858        acc_df.hstack_mut(df.get_columns())?;
859    }
860    Ok(acc_df)
861}
862
863/// Ensure the chunks in both ChunkedArrays have the same length.
864/// # Panics
865/// This will panic if `left.len() != right.len()` and array is chunked.
866pub fn align_chunks_binary<'a, T, B>(
867    left: &'a ChunkedArray<T>,
868    right: &'a ChunkedArray<B>,
869) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, ChunkedArray<B>>)
870where
871    B: PolarsDataType,
872    T: PolarsDataType,
873{
874    let assert = || {
875        assert_eq!(
876            left.len(),
877            right.len(),
878            "expected arrays of the same length"
879        )
880    };
881    match (left.chunks.len(), right.chunks.len()) {
882        // All chunks are equal length
883        (1, 1) => (Cow::Borrowed(left), Cow::Borrowed(right)),
884        // All chunks are equal length
885        (a, b)
886            if a == b
887                && left
888                    .chunk_lengths()
889                    .zip(right.chunk_lengths())
890                    .all(|(l, r)| l == r) =>
891        {
892            (Cow::Borrowed(left), Cow::Borrowed(right))
893        },
894        (_, 1) => {
895            assert();
896            (
897                Cow::Borrowed(left),
898                Cow::Owned(right.match_chunks(left.chunk_lengths())),
899            )
900        },
901        (1, _) => {
902            assert();
903            (
904                Cow::Owned(left.match_chunks(right.chunk_lengths())),
905                Cow::Borrowed(right),
906            )
907        },
908        (_, _) => {
909            assert();
910            // could optimize to choose to rechunk a primitive and not a string or list type
911            let left = left.rechunk();
912            (
913                Cow::Owned(left.match_chunks(right.chunk_lengths())),
914                Cow::Borrowed(right),
915            )
916        },
917    }
918}
919
920#[cfg(feature = "performant")]
921pub(crate) fn align_chunks_binary_owned_series(left: Series, right: Series) -> (Series, Series) {
922    match (left.chunks().len(), right.chunks().len()) {
923        (1, 1) => (left, right),
924        // All chunks are equal length
925        (a, b)
926            if a == b
927                && left
928                    .chunk_lengths()
929                    .zip(right.chunk_lengths())
930                    .all(|(l, r)| l == r) =>
931        {
932            (left, right)
933        },
934        (_, 1) => (left.rechunk(), right),
935        (1, _) => (left, right.rechunk()),
936        (_, _) => (left.rechunk(), right.rechunk()),
937    }
938}
939
940pub(crate) fn align_chunks_binary_owned<T, B>(
941    left: ChunkedArray<T>,
942    right: ChunkedArray<B>,
943) -> (ChunkedArray<T>, ChunkedArray<B>)
944where
945    B: PolarsDataType,
946    T: PolarsDataType,
947{
948    match (left.chunks.len(), right.chunks.len()) {
949        (1, 1) => (left, right),
950        // All chunks are equal length
951        (a, b)
952            if a == b
953                && left
954                    .chunk_lengths()
955                    .zip(right.chunk_lengths())
956                    .all(|(l, r)| l == r) =>
957        {
958            (left, right)
959        },
960        (_, 1) => (left.rechunk().into_owned(), right),
961        (1, _) => (left, right.rechunk().into_owned()),
962        (_, _) => (left.rechunk().into_owned(), right.rechunk().into_owned()),
963    }
964}
965
966/// # Panics
967/// This will panic if `a.len() != b.len() || b.len() != c.len()` and array is chunked.
968#[allow(clippy::type_complexity)]
969pub fn align_chunks_ternary<'a, A, B, C>(
970    a: &'a ChunkedArray<A>,
971    b: &'a ChunkedArray<B>,
972    c: &'a ChunkedArray<C>,
973) -> (
974    Cow<'a, ChunkedArray<A>>,
975    Cow<'a, ChunkedArray<B>>,
976    Cow<'a, ChunkedArray<C>>,
977)
978where
979    A: PolarsDataType,
980    B: PolarsDataType,
981    C: PolarsDataType,
982{
983    if a.chunks.len() == 1 && b.chunks.len() == 1 && c.chunks.len() == 1 {
984        return (Cow::Borrowed(a), Cow::Borrowed(b), Cow::Borrowed(c));
985    }
986
987    assert!(
988        a.len() == b.len() && b.len() == c.len(),
989        "expected arrays of the same length"
990    );
991
992    match (a.chunks.len(), b.chunks.len(), c.chunks.len()) {
993        (_, 1, 1) => (
994            Cow::Borrowed(a),
995            Cow::Owned(b.match_chunks(a.chunk_lengths())),
996            Cow::Owned(c.match_chunks(a.chunk_lengths())),
997        ),
998        (1, 1, _) => (
999            Cow::Owned(a.match_chunks(c.chunk_lengths())),
1000            Cow::Owned(b.match_chunks(c.chunk_lengths())),
1001            Cow::Borrowed(c),
1002        ),
1003        (1, _, 1) => (
1004            Cow::Owned(a.match_chunks(b.chunk_lengths())),
1005            Cow::Borrowed(b),
1006            Cow::Owned(c.match_chunks(b.chunk_lengths())),
1007        ),
1008        (1, _, _) => {
1009            let b = b.rechunk();
1010            (
1011                Cow::Owned(a.match_chunks(c.chunk_lengths())),
1012                Cow::Owned(b.match_chunks(c.chunk_lengths())),
1013                Cow::Borrowed(c),
1014            )
1015        },
1016        (_, 1, _) => {
1017            let a = a.rechunk();
1018            (
1019                Cow::Owned(a.match_chunks(c.chunk_lengths())),
1020                Cow::Owned(b.match_chunks(c.chunk_lengths())),
1021                Cow::Borrowed(c),
1022            )
1023        },
1024        (_, _, 1) => {
1025            let b = b.rechunk();
1026            (
1027                Cow::Borrowed(a),
1028                Cow::Owned(b.match_chunks(a.chunk_lengths())),
1029                Cow::Owned(c.match_chunks(a.chunk_lengths())),
1030            )
1031        },
1032        (len_a, len_b, len_c)
1033            if len_a == len_b
1034                && len_b == len_c
1035                && a.chunk_lengths()
1036                    .zip(b.chunk_lengths())
1037                    .zip(c.chunk_lengths())
1038                    .all(|((a, b), c)| a == b && b == c) =>
1039        {
1040            (Cow::Borrowed(a), Cow::Borrowed(b), Cow::Borrowed(c))
1041        },
1042        _ => {
1043            // could optimize to choose to rechunk a primitive and not a string or list type
1044            let a = a.rechunk();
1045            let b = b.rechunk();
1046            (
1047                Cow::Owned(a.match_chunks(c.chunk_lengths())),
1048                Cow::Owned(b.match_chunks(c.chunk_lengths())),
1049                Cow::Borrowed(c),
1050            )
1051        },
1052    }
1053}
1054
1055pub fn binary_concatenate_validities<'a, T, B>(
1056    left: &'a ChunkedArray<T>,
1057    right: &'a ChunkedArray<B>,
1058) -> Option<Bitmap>
1059where
1060    B: PolarsDataType,
1061    T: PolarsDataType,
1062{
1063    let (left, right) = align_chunks_binary(left, right);
1064    let left_validity = concatenate_validities(left.chunks());
1065    let right_validity = concatenate_validities(right.chunks());
1066    combine_validities_and(left_validity.as_ref(), right_validity.as_ref())
1067}
1068
1069/// Convenience for `x.into_iter().map(Into::into).collect()` using an `into_vec()` function.
1070pub trait IntoVec<T> {
1071    fn into_vec(self) -> Vec<T>;
1072}
1073
1074impl<I, S> IntoVec<PlSmallStr> for I
1075where
1076    I: IntoIterator<Item = S>,
1077    S: Into<PlSmallStr>,
1078{
1079    fn into_vec(self) -> Vec<PlSmallStr> {
1080        self.into_iter().map(|s| s.into()).collect()
1081    }
1082}
1083
1084/// This logic is same as the impl on ChunkedArray
1085/// The difference is that there is less indirection because the caller should preallocate
1086/// `chunk_lens` once. On the `ChunkedArray` we indirect through an `ArrayRef` which is an indirection
1087/// and a vtable.
1088#[inline]
1089pub(crate) fn index_to_chunked_index<
1090    I: Iterator<Item = Idx>,
1091    Idx: PartialOrd + std::ops::AddAssign + std::ops::SubAssign + Zero + One,
1092>(
1093    chunk_lens: I,
1094    index: Idx,
1095) -> (Idx, Idx) {
1096    let mut index_remainder = index;
1097    let mut current_chunk_idx = Zero::zero();
1098
1099    for chunk_len in chunk_lens {
1100        if chunk_len > index_remainder {
1101            break;
1102        } else {
1103            index_remainder -= chunk_len;
1104            current_chunk_idx += One::one();
1105        }
1106    }
1107    (current_chunk_idx, index_remainder)
1108}
1109
1110pub(crate) fn index_to_chunked_index_rev<
1111    I: Iterator<Item = Idx>,
1112    Idx: PartialOrd
1113        + std::ops::AddAssign
1114        + std::ops::SubAssign
1115        + std::ops::Sub<Output = Idx>
1116        + Zero
1117        + One
1118        + Copy
1119        + std::fmt::Debug,
1120>(
1121    chunk_lens_rev: I,
1122    index_from_back: Idx,
1123    total_chunks: Idx,
1124) -> (Idx, Idx) {
1125    debug_assert!(index_from_back > Zero::zero(), "at least -1");
1126    let mut index_remainder = index_from_back;
1127    let mut current_chunk_idx = One::one();
1128    let mut current_chunk_len = Zero::zero();
1129
1130    for chunk_len in chunk_lens_rev {
1131        current_chunk_len = chunk_len;
1132        if chunk_len >= index_remainder {
1133            break;
1134        } else {
1135            index_remainder -= chunk_len;
1136            current_chunk_idx += One::one();
1137        }
1138    }
1139    (
1140        total_chunks - current_chunk_idx,
1141        current_chunk_len - index_remainder,
1142    )
1143}
1144
1145pub(crate) fn first_non_null<'a, I>(iter: I) -> Option<usize>
1146where
1147    I: Iterator<Item = Option<&'a Bitmap>>,
1148{
1149    let mut offset = 0;
1150    for validity in iter {
1151        if let Some(validity) = validity {
1152            let mask = BitMask::from_bitmap(validity);
1153            if let Some(n) = mask.nth_set_bit_idx(0, 0) {
1154                return Some(offset + n);
1155            }
1156            offset += validity.len()
1157        } else {
1158            return Some(offset);
1159        }
1160    }
1161    None
1162}
1163
1164pub(crate) fn last_non_null<'a, I>(iter: I, len: usize) -> Option<usize>
1165where
1166    I: DoubleEndedIterator<Item = Option<&'a Bitmap>>,
1167{
1168    if len == 0 {
1169        return None;
1170    }
1171    let mut offset = 0;
1172    for validity in iter.rev() {
1173        if let Some(validity) = validity {
1174            let mask = BitMask::from_bitmap(validity);
1175            if let Some(n) = mask.nth_set_bit_idx_rev(0, mask.len()) {
1176                let mask_start = len - offset - mask.len();
1177                return Some(mask_start + n);
1178            }
1179            offset += validity.len()
1180        } else {
1181            return Some(len - 1 - offset);
1182        }
1183    }
1184    None
1185}
1186
1187/// ensure that nulls are propagated to both arrays
1188pub fn coalesce_nulls<'a, T: PolarsDataType>(
1189    a: &'a ChunkedArray<T>,
1190    b: &'a ChunkedArray<T>,
1191) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, ChunkedArray<T>>) {
1192    if a.null_count() > 0 || b.null_count() > 0 {
1193        let (a, b) = align_chunks_binary(a, b);
1194        let mut b = b.into_owned();
1195        let a = a.coalesce_nulls(b.chunks());
1196
1197        for arr in a.chunks().iter() {
1198            for arr_b in unsafe { b.chunks_mut() } {
1199                *arr_b = arr_b.with_validity(arr.validity().cloned())
1200            }
1201        }
1202        b.compute_len();
1203        (Cow::Owned(a), Cow::Owned(b))
1204    } else {
1205        (Cow::Borrowed(a), Cow::Borrowed(b))
1206    }
1207}
1208
1209pub fn coalesce_nulls_columns(a: &Column, b: &Column) -> (Column, Column) {
1210    if a.null_count() > 0 || b.null_count() > 0 {
1211        let mut a = a.as_materialized_series().rechunk();
1212        let mut b = b.as_materialized_series().rechunk();
1213        for (arr_a, arr_b) in unsafe { a.chunks_mut().iter_mut().zip(b.chunks_mut()) } {
1214            let validity = match (arr_a.validity(), arr_b.validity()) {
1215                (None, Some(b)) => Some(b.clone()),
1216                (Some(a), Some(b)) => Some(a & b),
1217                (Some(a), None) => Some(a.clone()),
1218                (None, None) => None,
1219            };
1220            *arr_a = arr_a.with_validity(validity.clone());
1221            *arr_b = arr_b.with_validity(validity);
1222        }
1223        a.compute_len();
1224        b.compute_len();
1225        (a.into(), b.into())
1226    } else {
1227        (a.clone(), b.clone())
1228    }
1229}
1230
1231pub fn operation_exceeded_idxsize_msg(operation: &str) -> String {
1232    if size_of::<IdxSize>() == size_of::<u32>() {
1233        format!(
1234            "{} exceeded the maximum supported limit of {} rows. Consider installing 'polars-u64-idx'.",
1235            operation,
1236            IdxSize::MAX,
1237        )
1238    } else {
1239        format!(
1240            "{} exceeded the maximum supported limit of {} rows.",
1241            operation,
1242            IdxSize::MAX,
1243        )
1244    }
1245}
1246
1247#[cfg(test)]
1248mod test {
1249    use super::*;
1250
1251    #[test]
1252    fn test_split() {
1253        let ca: Int32Chunked = (0..10).collect_ca("a".into());
1254
1255        let out = split(&ca, 3);
1256        assert_eq!(out[0].len(), 3);
1257        assert_eq!(out[1].len(), 3);
1258        assert_eq!(out[2].len(), 4);
1259    }
1260
1261    #[test]
1262    fn test_align_chunks() -> PolarsResult<()> {
1263        let a = Int32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3, 4]);
1264        let mut b = Int32Chunked::new(PlSmallStr::EMPTY, &[1]);
1265        let b2 = Int32Chunked::new(PlSmallStr::EMPTY, &[2, 3, 4]);
1266
1267        b.append(&b2)?;
1268        let (a, b) = align_chunks_binary(&a, &b);
1269        assert_eq!(
1270            a.chunk_lengths().collect::<Vec<_>>(),
1271            b.chunk_lengths().collect::<Vec<_>>()
1272        );
1273
1274        let a = Int32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3, 4]);
1275        let mut b = Int32Chunked::new(PlSmallStr::EMPTY, &[1]);
1276        let b1 = b.clone();
1277        b.append(&b1)?;
1278        b.append(&b1)?;
1279        b.append(&b1)?;
1280        let (a, b) = align_chunks_binary(&a, &b);
1281        assert_eq!(
1282            a.chunk_lengths().collect::<Vec<_>>(),
1283            b.chunk_lengths().collect::<Vec<_>>()
1284        );
1285
1286        Ok(())
1287    }
1288}