1mod any_value;
2use arrow::compute::concatenate::concatenate_validities;
3use arrow::compute::utils::combine_validities_and;
4pub mod flatten;
5pub(crate) mod series;
6mod supertype;
7use std::borrow::Cow;
8use std::ops::{Deref, DerefMut};
9mod schema;
10
11pub use any_value::*;
12use arrow::bitmap::Bitmap;
13pub use arrow::legacy::utils::*;
14pub use arrow::trusted_len::TrustMyLength;
15use flatten::*;
16use num_traits::{One, Zero};
17use rayon::prelude::*;
18pub use schema::*;
19pub use series::*;
20pub use supertype::*;
21pub use {arrow, rayon};
22
23use crate::POOL;
24use crate::prelude::*;
25
26#[repr(transparent)]
27pub struct Wrap<T>(pub T);
28
29impl<T> Deref for Wrap<T> {
30 type Target = T;
31 fn deref(&self) -> &Self::Target {
32 &self.0
33 }
34}
35
36#[inline(always)]
37pub fn _set_partition_size() -> usize {
38 POOL.current_num_threads()
39}
40
41pub struct NoNull<T> {
48 inner: T,
49}
50
51impl<T> NoNull<T> {
52 pub fn new(inner: T) -> Self {
53 NoNull { inner }
54 }
55
56 pub fn into_inner(self) -> T {
57 self.inner
58 }
59}
60
61impl<T> Deref for NoNull<T> {
62 type Target = T;
63
64 fn deref(&self) -> &Self::Target {
65 &self.inner
66 }
67}
68
69impl<T> DerefMut for NoNull<T> {
70 fn deref_mut(&mut self) -> &mut Self::Target {
71 &mut self.inner
72 }
73}
74
75pub(crate) fn get_iter_capacity<T, I: Iterator<Item = T>>(iter: &I) -> usize {
76 match iter.size_hint() {
77 (_lower, Some(upper)) => upper,
78 (0, None) => 1024,
79 (lower, None) => lower,
80 }
81}
82
83#[doc(hidden)]
86pub fn _split_offsets(len: usize, n: usize) -> Vec<(usize, usize)> {
87 if n == 1 {
88 vec![(0, len)]
89 } else {
90 let chunk_size = len / n;
91
92 (0..n)
93 .map(|partition| {
94 let offset = partition * chunk_size;
95 let len = if partition == (n - 1) {
96 len - offset
97 } else {
98 chunk_size
99 };
100 (partition * chunk_size, len)
101 })
102 .collect_trusted()
103 }
104}
105
106#[allow(clippy::len_without_is_empty)]
107pub trait Container: Clone {
108 fn slice(&self, offset: i64, len: usize) -> Self;
109
110 fn split_at(&self, offset: i64) -> (Self, Self);
111
112 fn len(&self) -> usize;
113
114 fn iter_chunks(&self) -> impl Iterator<Item = Self>;
115
116 fn should_rechunk(&self) -> bool;
117
118 fn n_chunks(&self) -> usize;
119
120 fn chunk_lengths(&self) -> impl Iterator<Item = usize>;
121}
122
123impl Container for DataFrame {
124 fn slice(&self, offset: i64, len: usize) -> Self {
125 DataFrame::slice(self, offset, len)
126 }
127
128 fn split_at(&self, offset: i64) -> (Self, Self) {
129 DataFrame::split_at(self, offset)
130 }
131
132 fn len(&self) -> usize {
133 self.height()
134 }
135
136 fn iter_chunks(&self) -> impl Iterator<Item = Self> {
137 flatten_df_iter(self)
138 }
139
140 fn should_rechunk(&self) -> bool {
141 self.should_rechunk()
142 }
143
144 fn n_chunks(&self) -> usize {
145 DataFrame::first_col_n_chunks(self)
146 }
147
148 fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
149 self.columns[0].as_materialized_series().chunk_lengths()
151 }
152}
153
154impl<T: PolarsDataType> Container for ChunkedArray<T> {
155 fn slice(&self, offset: i64, len: usize) -> Self {
156 ChunkedArray::slice(self, offset, len)
157 }
158
159 fn split_at(&self, offset: i64) -> (Self, Self) {
160 ChunkedArray::split_at(self, offset)
161 }
162
163 fn len(&self) -> usize {
164 ChunkedArray::len(self)
165 }
166
167 fn iter_chunks(&self) -> impl Iterator<Item = Self> {
168 self.downcast_iter()
169 .map(|arr| Self::with_chunk(self.name().clone(), arr.clone()))
170 }
171
172 fn should_rechunk(&self) -> bool {
173 false
174 }
175
176 fn n_chunks(&self) -> usize {
177 self.chunks().len()
178 }
179
180 fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
181 ChunkedArray::chunk_lengths(self)
182 }
183}
184
185impl Container for Series {
186 fn slice(&self, offset: i64, len: usize) -> Self {
187 self.0.slice(offset, len)
188 }
189
190 fn split_at(&self, offset: i64) -> (Self, Self) {
191 self.0.split_at(offset)
192 }
193
194 fn len(&self) -> usize {
195 self.0.len()
196 }
197
198 fn iter_chunks(&self) -> impl Iterator<Item = Self> {
199 (0..self.0.n_chunks()).map(|i| self.select_chunk(i))
200 }
201
202 fn should_rechunk(&self) -> bool {
203 false
204 }
205
206 fn n_chunks(&self) -> usize {
207 self.chunks().len()
208 }
209
210 fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
211 self.0.chunk_lengths()
212 }
213}
214
215fn split_impl<C: Container>(container: &C, target: usize, chunk_size: usize) -> Vec<C> {
216 if target == 1 {
217 return vec![container.clone()];
218 }
219 let mut out = Vec::with_capacity(target);
220 let chunk_size = chunk_size as i64;
221
222 let (chunk, mut remainder) = container.split_at(chunk_size);
224 out.push(chunk);
225
226 for _ in 1..target - 1 {
228 let (a, b) = remainder.split_at(chunk_size);
229 out.push(a);
230 remainder = b
231 }
232 out.push(remainder);
234 out
235}
236
237pub fn split<C: Container>(container: &C, target: usize) -> Vec<C> {
239 let total_len = container.len();
240 if total_len == 0 {
241 return vec![container.clone()];
242 }
243
244 let chunk_size = std::cmp::max(total_len / target, 1);
245
246 if container.n_chunks() == target
247 && container
248 .chunk_lengths()
249 .all(|len| len.abs_diff(chunk_size) < 100)
250 && !container.should_rechunk()
252 {
253 return container.iter_chunks().collect();
254 }
255 split_impl(container, target, chunk_size)
256}
257
258pub fn split_and_flatten<C: Container>(container: &C, target: usize) -> Vec<C> {
261 let total_len = container.len();
262 if total_len == 0 {
263 return vec![container.clone()];
264 }
265
266 let chunk_size = std::cmp::max(total_len / target, 1);
267
268 if container.n_chunks() == target
269 && container
270 .chunk_lengths()
271 .all(|len| len.abs_diff(chunk_size) < 100)
272 && !container.should_rechunk()
274 {
275 return container.iter_chunks().collect();
276 }
277
278 if container.n_chunks() == 1 {
279 split_impl(container, target, chunk_size)
280 } else {
281 let mut out = Vec::with_capacity(target);
282 let chunks = container.iter_chunks();
283
284 'new_chunk: for mut chunk in chunks {
285 loop {
286 let h = chunk.len();
287 if h < chunk_size {
288 out.push(chunk);
290 continue 'new_chunk;
291 }
292
293 if ((h - chunk_size) as f64 / chunk_size as f64) < 0.3 {
295 out.push(chunk);
296 continue 'new_chunk;
297 }
298
299 let (a, b) = chunk.split_at(chunk_size as i64);
300 out.push(a);
301 chunk = b;
302 }
303 }
304 out
305 }
306}
307
308pub fn split_df_as_ref(df: &DataFrame, target: usize, strict: bool) -> Vec<DataFrame> {
314 if strict {
315 split(df, target)
316 } else {
317 split_and_flatten(df, target)
318 }
319}
320
321#[doc(hidden)]
322pub fn split_df(df: &mut DataFrame, target: usize, strict: bool) -> Vec<DataFrame> {
325 if target == 0 || df.is_empty() {
326 return vec![df.clone()];
327 }
328 df.align_chunks_par();
330 split_df_as_ref(df, target, strict)
331}
332
333pub fn slice_slice<T>(vals: &[T], offset: i64, len: usize) -> &[T] {
334 let (raw_offset, slice_len) = slice_offsets(offset, len, vals.len());
335 &vals[raw_offset..raw_offset + slice_len]
336}
337
338#[inline]
339pub fn slice_offsets(offset: i64, length: usize, array_len: usize) -> (usize, usize) {
340 let signed_start_offset = if offset < 0 {
341 offset.saturating_add_unsigned(array_len as u64)
342 } else {
343 offset
344 };
345 let signed_stop_offset = signed_start_offset.saturating_add_unsigned(length as u64);
346
347 let signed_array_len: i64 = array_len
348 .try_into()
349 .expect("array length larger than i64::MAX");
350 let clamped_start_offset = signed_start_offset.clamp(0, signed_array_len);
351 let clamped_stop_offset = signed_stop_offset.clamp(0, signed_array_len);
352
353 let slice_start_idx = clamped_start_offset as usize;
354 let slice_len = (clamped_stop_offset - clamped_start_offset) as usize;
355 (slice_start_idx, slice_len)
356}
357
358#[macro_export]
360macro_rules! match_dtype_to_physical_apply_macro {
361 ($obj:expr, $macro:ident, $macro_string:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
362 match $obj {
363 DataType::String => $macro_string!($($opt_args)*),
364 DataType::Boolean => $macro_bool!($($opt_args)*),
365 #[cfg(feature = "dtype-u8")]
366 DataType::UInt8 => $macro!(u8 $(, $opt_args)*),
367 #[cfg(feature = "dtype-u16")]
368 DataType::UInt16 => $macro!(u16 $(, $opt_args)*),
369 DataType::UInt32 => $macro!(u32 $(, $opt_args)*),
370 DataType::UInt64 => $macro!(u64 $(, $opt_args)*),
371 #[cfg(feature = "dtype-i8")]
372 DataType::Int8 => $macro!(i8 $(, $opt_args)*),
373 #[cfg(feature = "dtype-i16")]
374 DataType::Int16 => $macro!(i16 $(, $opt_args)*),
375 DataType::Int32 => $macro!(i32 $(, $opt_args)*),
376 DataType::Int64 => $macro!(i64 $(, $opt_args)*),
377 #[cfg(feature = "dtype-i128")]
378 DataType::Int128 => $macro!(i128 $(, $opt_args)*),
379 DataType::Float32 => $macro!(f32 $(, $opt_args)*),
380 DataType::Float64 => $macro!(f64 $(, $opt_args)*),
381 dt => panic!("not implemented for dtype {:?}", dt),
382 }
383 }};
384}
385
386#[macro_export]
388macro_rules! match_dtype_to_logical_apply_macro {
389 ($obj:expr, $macro:ident, $macro_string:ident, $macro_binary:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
390 match $obj {
391 DataType::String => $macro_string!($($opt_args)*),
392 DataType::Binary => $macro_binary!($($opt_args)*),
393 DataType::Boolean => $macro_bool!($($opt_args)*),
394 #[cfg(feature = "dtype-u8")]
395 DataType::UInt8 => $macro!(UInt8Type $(, $opt_args)*),
396 #[cfg(feature = "dtype-u16")]
397 DataType::UInt16 => $macro!(UInt16Type $(, $opt_args)*),
398 DataType::UInt32 => $macro!(UInt32Type $(, $opt_args)*),
399 DataType::UInt64 => $macro!(UInt64Type $(, $opt_args)*),
400 #[cfg(feature = "dtype-i8")]
401 DataType::Int8 => $macro!(Int8Type $(, $opt_args)*),
402 #[cfg(feature = "dtype-i16")]
403 DataType::Int16 => $macro!(Int16Type $(, $opt_args)*),
404 DataType::Int32 => $macro!(Int32Type $(, $opt_args)*),
405 DataType::Int64 => $macro!(Int64Type $(, $opt_args)*),
406 #[cfg(feature = "dtype-i128")]
407 DataType::Int128 => $macro!(Int128Type $(, $opt_args)*),
408 DataType::Float32 => $macro!(Float32Type $(, $opt_args)*),
409 DataType::Float64 => $macro!(Float64Type $(, $opt_args)*),
410 dt => panic!("not implemented for dtype {:?}", dt),
411 }
412 }};
413}
414
415#[macro_export]
417macro_rules! match_arrow_dtype_apply_macro_ca {
418 ($self:expr, $macro:ident, $macro_string:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
419 match $self.dtype() {
420 DataType::String => $macro_string!($self.str().unwrap() $(, $opt_args)*),
421 DataType::Boolean => $macro_bool!($self.bool().unwrap() $(, $opt_args)*),
422 #[cfg(feature = "dtype-u8")]
423 DataType::UInt8 => $macro!($self.u8().unwrap() $(, $opt_args)*),
424 #[cfg(feature = "dtype-u16")]
425 DataType::UInt16 => $macro!($self.u16().unwrap() $(, $opt_args)*),
426 DataType::UInt32 => $macro!($self.u32().unwrap() $(, $opt_args)*),
427 DataType::UInt64 => $macro!($self.u64().unwrap() $(, $opt_args)*),
428 #[cfg(feature = "dtype-i8")]
429 DataType::Int8 => $macro!($self.i8().unwrap() $(, $opt_args)*),
430 #[cfg(feature = "dtype-i16")]
431 DataType::Int16 => $macro!($self.i16().unwrap() $(, $opt_args)*),
432 DataType::Int32 => $macro!($self.i32().unwrap() $(, $opt_args)*),
433 DataType::Int64 => $macro!($self.i64().unwrap() $(, $opt_args)*),
434 #[cfg(feature = "dtype-i128")]
435 DataType::Int128 => $macro!($self.i128().unwrap() $(, $opt_args)*),
436 DataType::Float32 => $macro!($self.f32().unwrap() $(, $opt_args)*),
437 DataType::Float64 => $macro!($self.f64().unwrap() $(, $opt_args)*),
438 dt => panic!("not implemented for dtype {:?}", dt),
439 }
440 }};
441}
442
443#[macro_export]
444macro_rules! with_match_physical_numeric_type {(
445 $dtype:expr, | $_:tt $T:ident | $($body:tt)*
446) => ({
447 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
448 use $crate::datatypes::DataType::*;
449 match $dtype {
450 #[cfg(feature = "dtype-i8")]
451 Int8 => __with_ty__! { i8 },
452 #[cfg(feature = "dtype-i16")]
453 Int16 => __with_ty__! { i16 },
454 Int32 => __with_ty__! { i32 },
455 Int64 => __with_ty__! { i64 },
456 #[cfg(feature = "dtype-i128")]
457 Int128 => __with_ty__! { i128 },
458 #[cfg(feature = "dtype-u8")]
459 UInt8 => __with_ty__! { u8 },
460 #[cfg(feature = "dtype-u16")]
461 UInt16 => __with_ty__! { u16 },
462 UInt32 => __with_ty__! { u32 },
463 UInt64 => __with_ty__! { u64 },
464 Float32 => __with_ty__! { f32 },
465 Float64 => __with_ty__! { f64 },
466 dt => panic!("not implemented for dtype {:?}", dt),
467 }
468})}
469
470#[macro_export]
471macro_rules! with_match_physical_integer_type {(
472 $dtype:expr, | $_:tt $T:ident | $($body:tt)*
473) => ({
474 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
475 use $crate::datatypes::DataType::*;
476 match $dtype {
477 #[cfg(feature = "dtype-i8")]
478 Int8 => __with_ty__! { i8 },
479 #[cfg(feature = "dtype-i16")]
480 Int16 => __with_ty__! { i16 },
481 Int32 => __with_ty__! { i32 },
482 Int64 => __with_ty__! { i64 },
483 #[cfg(feature = "dtype-i128")]
484 Int128 => __with_ty__! { i128 },
485 #[cfg(feature = "dtype-u8")]
486 UInt8 => __with_ty__! { u8 },
487 #[cfg(feature = "dtype-u16")]
488 UInt16 => __with_ty__! { u16 },
489 UInt32 => __with_ty__! { u32 },
490 UInt64 => __with_ty__! { u64 },
491 dt => panic!("not implemented for dtype {:?}", dt),
492 }
493})}
494
495#[macro_export]
496macro_rules! with_match_physical_float_type {(
497 $dtype:expr, | $_:tt $T:ident | $($body:tt)*
498) => ({
499 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
500 use $crate::datatypes::DataType::*;
501 match $dtype {
502 Float32 => __with_ty__! { f32 },
503 Float64 => __with_ty__! { f64 },
504 dt => panic!("not implemented for dtype {:?}", dt),
505 }
506})}
507
508#[macro_export]
509macro_rules! with_match_physical_float_polars_type {(
510 $key_type:expr, | $_:tt $T:ident | $($body:tt)*
511) => ({
512 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
513 use $crate::datatypes::DataType::*;
514 match $key_type {
515 Float32 => __with_ty__! { Float32Type },
516 Float64 => __with_ty__! { Float64Type },
517 dt => panic!("not implemented for dtype {:?}", dt),
518 }
519})}
520
521#[macro_export]
522macro_rules! with_match_physical_numeric_polars_type {(
523 $key_type:expr, | $_:tt $T:ident | $($body:tt)*
524) => ({
525 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
526 use $crate::datatypes::DataType::*;
527 match $key_type {
528 #[cfg(feature = "dtype-i8")]
529 Int8 => __with_ty__! { Int8Type },
530 #[cfg(feature = "dtype-i16")]
531 Int16 => __with_ty__! { Int16Type },
532 Int32 => __with_ty__! { Int32Type },
533 Int64 => __with_ty__! { Int64Type },
534 #[cfg(feature = "dtype-i128")]
535 Int128 => __with_ty__! { Int128Type },
536 #[cfg(feature = "dtype-u8")]
537 UInt8 => __with_ty__! { UInt8Type },
538 #[cfg(feature = "dtype-u16")]
539 UInt16 => __with_ty__! { UInt16Type },
540 UInt32 => __with_ty__! { UInt32Type },
541 UInt64 => __with_ty__! { UInt64Type },
542 Float32 => __with_ty__! { Float32Type },
543 Float64 => __with_ty__! { Float64Type },
544 dt => panic!("not implemented for dtype {:?}", dt),
545 }
546})}
547
548#[macro_export]
549macro_rules! with_match_physical_integer_polars_type {(
550 $key_type:expr, | $_:tt $T:ident | $($body:tt)*
551) => ({
552 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
553 use $crate::datatypes::DataType::*;
554 use $crate::datatypes::*;
555 match $key_type {
556 #[cfg(feature = "dtype-i8")]
557 Int8 => __with_ty__! { Int8Type },
558 #[cfg(feature = "dtype-i16")]
559 Int16 => __with_ty__! { Int16Type },
560 Int32 => __with_ty__! { Int32Type },
561 Int64 => __with_ty__! { Int64Type },
562 #[cfg(feature = "dtype-i128")]
563 Int128 => __with_ty__! { Int128Type },
564 #[cfg(feature = "dtype-u8")]
565 UInt8 => __with_ty__! { UInt8Type },
566 #[cfg(feature = "dtype-u16")]
567 UInt16 => __with_ty__! { UInt16Type },
568 UInt32 => __with_ty__! { UInt32Type },
569 UInt64 => __with_ty__! { UInt64Type },
570 dt => panic!("not implemented for dtype {:?}", dt),
571 }
572})}
573
574#[macro_export]
575macro_rules! with_match_categorical_physical_type {(
576 $dtype:expr, | $_:tt $T:ident | $($body:tt)*
577) => ({
578 macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
579 match $dtype {
580 CategoricalPhysical::U8 => __with_ty__! { Categorical8Type },
581 CategoricalPhysical::U16 => __with_ty__! { Categorical16Type },
582 CategoricalPhysical::U32 => __with_ty__! { Categorical32Type },
583 }
584})}
585
586#[macro_export]
589macro_rules! downcast_as_macro_arg_physical {
590 ($self:expr, $macro:ident $(, $opt_args:expr)*) => {{
591 match $self.dtype() {
592 #[cfg(feature = "dtype-u8")]
593 DataType::UInt8 => $macro!($self.u8().unwrap() $(, $opt_args)*),
594 #[cfg(feature = "dtype-u16")]
595 DataType::UInt16 => $macro!($self.u16().unwrap() $(, $opt_args)*),
596 DataType::UInt32 => $macro!($self.u32().unwrap() $(, $opt_args)*),
597 DataType::UInt64 => $macro!($self.u64().unwrap() $(, $opt_args)*),
598 #[cfg(feature = "dtype-i8")]
599 DataType::Int8 => $macro!($self.i8().unwrap() $(, $opt_args)*),
600 #[cfg(feature = "dtype-i16")]
601 DataType::Int16 => $macro!($self.i16().unwrap() $(, $opt_args)*),
602 DataType::Int32 => $macro!($self.i32().unwrap() $(, $opt_args)*),
603 DataType::Int64 => $macro!($self.i64().unwrap() $(, $opt_args)*),
604 #[cfg(feature = "dtype-i128")]
605 DataType::Int128 => $macro!($self.i128().unwrap() $(, $opt_args)*),
606 DataType::Float32 => $macro!($self.f32().unwrap() $(, $opt_args)*),
607 DataType::Float64 => $macro!($self.f64().unwrap() $(, $opt_args)*),
608 dt => panic!("not implemented for {:?}", dt),
609 }
610 }};
611}
612
613#[macro_export]
616macro_rules! downcast_as_macro_arg_physical_mut {
617 ($self:expr, $macro:ident $(, $opt_args:expr)*) => {{
618 match $self.dtype().clone() {
620 #[cfg(feature = "dtype-u8")]
621 DataType::UInt8 => {
622 let ca: &mut UInt8Chunked = $self.as_mut();
623 $macro!(UInt8Type, ca $(, $opt_args)*)
624 },
625 #[cfg(feature = "dtype-u16")]
626 DataType::UInt16 => {
627 let ca: &mut UInt16Chunked = $self.as_mut();
628 $macro!(UInt16Type, ca $(, $opt_args)*)
629 },
630 DataType::UInt32 => {
631 let ca: &mut UInt32Chunked = $self.as_mut();
632 $macro!(UInt32Type, ca $(, $opt_args)*)
633 },
634 DataType::UInt64 => {
635 let ca: &mut UInt64Chunked = $self.as_mut();
636 $macro!(UInt64Type, ca $(, $opt_args)*)
637 },
638 #[cfg(feature = "dtype-i8")]
639 DataType::Int8 => {
640 let ca: &mut Int8Chunked = $self.as_mut();
641 $macro!(Int8Type, ca $(, $opt_args)*)
642 },
643 #[cfg(feature = "dtype-i16")]
644 DataType::Int16 => {
645 let ca: &mut Int16Chunked = $self.as_mut();
646 $macro!(Int16Type, ca $(, $opt_args)*)
647 },
648 DataType::Int32 => {
649 let ca: &mut Int32Chunked = $self.as_mut();
650 $macro!(Int32Type, ca $(, $opt_args)*)
651 },
652 DataType::Int64 => {
653 let ca: &mut Int64Chunked = $self.as_mut();
654 $macro!(Int64Type, ca $(, $opt_args)*)
655 },
656 #[cfg(feature = "dtype-i128")]
657 DataType::Int128 => {
658 let ca: &mut Int128Chunked = $self.as_mut();
659 $macro!(Int128Type, ca $(, $opt_args)*)
660 },
661 DataType::Float32 => {
662 let ca: &mut Float32Chunked = $self.as_mut();
663 $macro!(Float32Type, ca $(, $opt_args)*)
664 },
665 DataType::Float64 => {
666 let ca: &mut Float64Chunked = $self.as_mut();
667 $macro!(Float64Type, ca $(, $opt_args)*)
668 },
669 dt => panic!("not implemented for {:?}", dt),
670 }
671 }};
672}
673
674#[macro_export]
675macro_rules! apply_method_all_arrow_series {
676 ($self:expr, $method:ident, $($args:expr),*) => {
677 match $self.dtype() {
678 DataType::Boolean => $self.bool().unwrap().$method($($args),*),
679 DataType::String => $self.str().unwrap().$method($($args),*),
680 #[cfg(feature = "dtype-u8")]
681 DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
682 #[cfg(feature = "dtype-u16")]
683 DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
684 DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
685 DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
686 #[cfg(feature = "dtype-i8")]
687 DataType::Int8 => $self.i8().unwrap().$method($($args),*),
688 #[cfg(feature = "dtype-i16")]
689 DataType::Int16 => $self.i16().unwrap().$method($($args),*),
690 DataType::Int32 => $self.i32().unwrap().$method($($args),*),
691 DataType::Int64 => $self.i64().unwrap().$method($($args),*),
692 #[cfg(feature = "dtype-i128")]
693 DataType::Int128 => $self.i128().unwrap().$method($($args),*),
694 DataType::Float32 => $self.f32().unwrap().$method($($args),*),
695 DataType::Float64 => $self.f64().unwrap().$method($($args),*),
696 DataType::Time => $self.time().unwrap().$method($($args),*),
697 DataType::Date => $self.date().unwrap().$method($($args),*),
698 DataType::Datetime(_, _) => $self.datetime().unwrap().$method($($args),*),
699 DataType::List(_) => $self.list().unwrap().$method($($args),*),
700 DataType::Struct(_) => $self.struct_().unwrap().$method($($args),*),
701 dt => panic!("dtype {:?} not supported", dt)
702 }
703 }
704}
705
706#[macro_export]
707macro_rules! apply_method_physical_integer {
708 ($self:expr, $method:ident, $($args:expr),*) => {
709 match $self.dtype() {
710 #[cfg(feature = "dtype-u8")]
711 DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
712 #[cfg(feature = "dtype-u16")]
713 DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
714 DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
715 DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
716 #[cfg(feature = "dtype-i8")]
717 DataType::Int8 => $self.i8().unwrap().$method($($args),*),
718 #[cfg(feature = "dtype-i16")]
719 DataType::Int16 => $self.i16().unwrap().$method($($args),*),
720 DataType::Int32 => $self.i32().unwrap().$method($($args),*),
721 DataType::Int64 => $self.i64().unwrap().$method($($args),*),
722 #[cfg(feature = "dtype-i128")]
723 DataType::Int128 => $self.i128().unwrap().$method($($args),*),
724 dt => panic!("not implemented for dtype {:?}", dt),
725 }
726 }
727}
728
729#[macro_export]
731macro_rules! apply_method_physical_numeric {
732 ($self:expr, $method:ident, $($args:expr),*) => {
733 match $self.dtype() {
734 DataType::Float32 => $self.f32().unwrap().$method($($args),*),
735 DataType::Float64 => $self.f64().unwrap().$method($($args),*),
736 _ => apply_method_physical_integer!($self, $method, $($args),*),
737 }
738 }
739}
740
741#[macro_export]
742macro_rules! df {
743 ($($col_name:expr => $slice:expr), + $(,)?) => {
744 $crate::prelude::DataFrame::new(vec![
745 $($crate::prelude::Column::from(<$crate::prelude::Series as $crate::prelude::NamedFrom::<_, _>>::new($col_name.into(), $slice)),)+
746 ])
747 }
748}
749
750pub fn get_time_units(tu_l: &TimeUnit, tu_r: &TimeUnit) -> TimeUnit {
751 use crate::datatypes::time_unit::TimeUnit::*;
752 match (tu_l, tu_r) {
753 (Nanoseconds, Microseconds) => Microseconds,
754 (_, Milliseconds) => Milliseconds,
755 _ => *tu_l,
756 }
757}
758
759#[cold]
760#[inline(never)]
761fn width_mismatch(df1: &DataFrame, df2: &DataFrame) -> PolarsError {
762 let mut df1_extra = Vec::new();
763 let mut df2_extra = Vec::new();
764
765 let s1 = df1.schema();
766 let s2 = df2.schema();
767
768 s1.field_compare(s2, &mut df1_extra, &mut df2_extra);
769
770 let df1_extra = df1_extra
771 .into_iter()
772 .map(|(_, (n, _))| n.as_str())
773 .collect::<Vec<_>>()
774 .join(", ");
775 let df2_extra = df2_extra
776 .into_iter()
777 .map(|(_, (n, _))| n.as_str())
778 .collect::<Vec<_>>()
779 .join(", ");
780
781 polars_err!(
782 SchemaMismatch: r#"unable to vstack, dataframes have different widths ({} != {}).
783One dataframe has additional columns: [{df1_extra}].
784Other dataframe has additional columns: [{df2_extra}]."#,
785 df1.width(),
786 df2.width(),
787 )
788}
789
790pub fn accumulate_dataframes_vertical_unchecked_optional<I>(dfs: I) -> Option<DataFrame>
791where
792 I: IntoIterator<Item = DataFrame>,
793{
794 let mut iter = dfs.into_iter();
795 let additional = iter.size_hint().0;
796 let mut acc_df = iter.next()?;
797 acc_df.reserve_chunks(additional);
798
799 for df in iter {
800 if acc_df.width() != df.width() {
801 panic!("{}", width_mismatch(&acc_df, &df));
802 }
803
804 acc_df.vstack_mut_owned_unchecked(df);
805 }
806 Some(acc_df)
807}
808
809pub fn accumulate_dataframes_vertical_unchecked<I>(dfs: I) -> DataFrame
812where
813 I: IntoIterator<Item = DataFrame>,
814{
815 let mut iter = dfs.into_iter();
816 let additional = iter.size_hint().0;
817 let mut acc_df = iter.next().unwrap();
818 acc_df.reserve_chunks(additional);
819
820 for df in iter {
821 if acc_df.width() != df.width() {
822 panic!("{}", width_mismatch(&acc_df, &df));
823 }
824
825 acc_df.vstack_mut_owned_unchecked(df);
826 }
827 acc_df
828}
829
830pub fn accumulate_dataframes_vertical<I>(dfs: I) -> PolarsResult<DataFrame>
834where
835 I: IntoIterator<Item = DataFrame>,
836{
837 let mut iter = dfs.into_iter();
838 let additional = iter.size_hint().0;
839 let mut acc_df = iter.next().unwrap();
840 acc_df.reserve_chunks(additional);
841 for df in iter {
842 if acc_df.width() != df.width() {
843 return Err(width_mismatch(&acc_df, &df));
844 }
845
846 acc_df.vstack_mut_owned(df)?;
847 }
848
849 Ok(acc_df)
850}
851
852pub fn concat_df<'a, I>(dfs: I) -> PolarsResult<DataFrame>
854where
855 I: IntoIterator<Item = &'a DataFrame>,
856{
857 let mut iter = dfs.into_iter();
858 let additional = iter.size_hint().0;
859 let mut acc_df = iter.next().unwrap().clone();
860 acc_df.reserve_chunks(additional);
861 for df in iter {
862 acc_df.vstack_mut(df)?;
863 }
864 Ok(acc_df)
865}
866
867pub fn concat_df_unchecked<'a, I>(dfs: I) -> DataFrame
869where
870 I: IntoIterator<Item = &'a DataFrame>,
871{
872 let mut iter = dfs.into_iter();
873 let additional = iter.size_hint().0;
874 let mut acc_df = iter.next().unwrap().clone();
875 acc_df.reserve_chunks(additional);
876 for df in iter {
877 acc_df.vstack_mut_unchecked(df);
878 }
879 acc_df
880}
881
882pub fn accumulate_dataframes_horizontal(dfs: Vec<DataFrame>) -> PolarsResult<DataFrame> {
883 let mut iter = dfs.into_iter();
884 let mut acc_df = iter.next().unwrap();
885 for df in iter {
886 acc_df.hstack_mut(df.get_columns())?;
887 }
888 Ok(acc_df)
889}
890
891pub fn align_chunks_binary<'a, T, B>(
895 left: &'a ChunkedArray<T>,
896 right: &'a ChunkedArray<B>,
897) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, ChunkedArray<B>>)
898where
899 B: PolarsDataType,
900 T: PolarsDataType,
901{
902 let assert = || {
903 assert_eq!(
904 left.len(),
905 right.len(),
906 "expected arrays of the same length"
907 )
908 };
909 match (left.chunks.len(), right.chunks.len()) {
910 (1, 1) => (Cow::Borrowed(left), Cow::Borrowed(right)),
912 (a, b)
914 if a == b
915 && left
916 .chunk_lengths()
917 .zip(right.chunk_lengths())
918 .all(|(l, r)| l == r) =>
919 {
920 (Cow::Borrowed(left), Cow::Borrowed(right))
921 },
922 (_, 1) => {
923 assert();
924 (
925 Cow::Borrowed(left),
926 Cow::Owned(right.match_chunks(left.chunk_lengths())),
927 )
928 },
929 (1, _) => {
930 assert();
931 (
932 Cow::Owned(left.match_chunks(right.chunk_lengths())),
933 Cow::Borrowed(right),
934 )
935 },
936 (_, _) => {
937 assert();
938 let left = left.rechunk();
940 (
941 Cow::Owned(left.match_chunks(right.chunk_lengths())),
942 Cow::Borrowed(right),
943 )
944 },
945 }
946}
947
948pub fn align_chunks_binary_ca_series<'a, T>(
952 left: &'a ChunkedArray<T>,
953 right: &'a Series,
954) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, Series>)
955where
956 T: PolarsDataType,
957{
958 let assert = || {
959 assert_eq!(
960 left.len(),
961 right.len(),
962 "expected arrays of the same length"
963 )
964 };
965 match (left.chunks.len(), right.chunks().len()) {
966 (1, 1) => (Cow::Borrowed(left), Cow::Borrowed(right)),
968 (a, b)
970 if a == b
971 && left
972 .chunk_lengths()
973 .zip(right.chunk_lengths())
974 .all(|(l, r)| l == r) =>
975 {
976 assert();
977 (Cow::Borrowed(left), Cow::Borrowed(right))
978 },
979 (_, 1) => (left.rechunk(), Cow::Borrowed(right)),
980 (1, _) => (Cow::Borrowed(left), Cow::Owned(right.rechunk())),
981 (_, _) => {
982 assert();
983 (left.rechunk(), Cow::Owned(right.rechunk()))
984 },
985 }
986}
987
988#[cfg(feature = "performant")]
989pub(crate) fn align_chunks_binary_owned_series(left: Series, right: Series) -> (Series, Series) {
990 match (left.chunks().len(), right.chunks().len()) {
991 (1, 1) => (left, right),
992 (a, b)
994 if a == b
995 && left
996 .chunk_lengths()
997 .zip(right.chunk_lengths())
998 .all(|(l, r)| l == r) =>
999 {
1000 (left, right)
1001 },
1002 (_, 1) => (left.rechunk(), right),
1003 (1, _) => (left, right.rechunk()),
1004 (_, _) => (left.rechunk(), right.rechunk()),
1005 }
1006}
1007
1008pub(crate) fn align_chunks_binary_owned<T, B>(
1009 left: ChunkedArray<T>,
1010 right: ChunkedArray<B>,
1011) -> (ChunkedArray<T>, ChunkedArray<B>)
1012where
1013 B: PolarsDataType,
1014 T: PolarsDataType,
1015{
1016 match (left.chunks.len(), right.chunks.len()) {
1017 (1, 1) => (left, right),
1018 (a, b)
1020 if a == b
1021 && left
1022 .chunk_lengths()
1023 .zip(right.chunk_lengths())
1024 .all(|(l, r)| l == r) =>
1025 {
1026 (left, right)
1027 },
1028 (_, 1) => (left.rechunk().into_owned(), right),
1029 (1, _) => (left, right.rechunk().into_owned()),
1030 (_, _) => (left.rechunk().into_owned(), right.rechunk().into_owned()),
1031 }
1032}
1033
1034#[allow(clippy::type_complexity)]
1037pub fn align_chunks_ternary<'a, A, B, C>(
1038 a: &'a ChunkedArray<A>,
1039 b: &'a ChunkedArray<B>,
1040 c: &'a ChunkedArray<C>,
1041) -> (
1042 Cow<'a, ChunkedArray<A>>,
1043 Cow<'a, ChunkedArray<B>>,
1044 Cow<'a, ChunkedArray<C>>,
1045)
1046where
1047 A: PolarsDataType,
1048 B: PolarsDataType,
1049 C: PolarsDataType,
1050{
1051 if a.chunks.len() == 1 && b.chunks.len() == 1 && c.chunks.len() == 1 {
1052 return (Cow::Borrowed(a), Cow::Borrowed(b), Cow::Borrowed(c));
1053 }
1054
1055 assert!(
1056 a.len() == b.len() && b.len() == c.len(),
1057 "expected arrays of the same length"
1058 );
1059
1060 match (a.chunks.len(), b.chunks.len(), c.chunks.len()) {
1061 (_, 1, 1) => (
1062 Cow::Borrowed(a),
1063 Cow::Owned(b.match_chunks(a.chunk_lengths())),
1064 Cow::Owned(c.match_chunks(a.chunk_lengths())),
1065 ),
1066 (1, 1, _) => (
1067 Cow::Owned(a.match_chunks(c.chunk_lengths())),
1068 Cow::Owned(b.match_chunks(c.chunk_lengths())),
1069 Cow::Borrowed(c),
1070 ),
1071 (1, _, 1) => (
1072 Cow::Owned(a.match_chunks(b.chunk_lengths())),
1073 Cow::Borrowed(b),
1074 Cow::Owned(c.match_chunks(b.chunk_lengths())),
1075 ),
1076 (1, _, _) => {
1077 let b = b.rechunk();
1078 (
1079 Cow::Owned(a.match_chunks(c.chunk_lengths())),
1080 Cow::Owned(b.match_chunks(c.chunk_lengths())),
1081 Cow::Borrowed(c),
1082 )
1083 },
1084 (_, 1, _) => {
1085 let a = a.rechunk();
1086 (
1087 Cow::Owned(a.match_chunks(c.chunk_lengths())),
1088 Cow::Owned(b.match_chunks(c.chunk_lengths())),
1089 Cow::Borrowed(c),
1090 )
1091 },
1092 (_, _, 1) => {
1093 let b = b.rechunk();
1094 (
1095 Cow::Borrowed(a),
1096 Cow::Owned(b.match_chunks(a.chunk_lengths())),
1097 Cow::Owned(c.match_chunks(a.chunk_lengths())),
1098 )
1099 },
1100 (len_a, len_b, len_c)
1101 if len_a == len_b
1102 && len_b == len_c
1103 && a.chunk_lengths()
1104 .zip(b.chunk_lengths())
1105 .zip(c.chunk_lengths())
1106 .all(|((a, b), c)| a == b && b == c) =>
1107 {
1108 (Cow::Borrowed(a), Cow::Borrowed(b), Cow::Borrowed(c))
1109 },
1110 _ => {
1111 let a = a.rechunk();
1113 let b = b.rechunk();
1114 (
1115 Cow::Owned(a.match_chunks(c.chunk_lengths())),
1116 Cow::Owned(b.match_chunks(c.chunk_lengths())),
1117 Cow::Borrowed(c),
1118 )
1119 },
1120 }
1121}
1122
1123pub fn binary_concatenate_validities<'a, T, B>(
1124 left: &'a ChunkedArray<T>,
1125 right: &'a ChunkedArray<B>,
1126) -> Option<Bitmap>
1127where
1128 B: PolarsDataType,
1129 T: PolarsDataType,
1130{
1131 let (left, right) = align_chunks_binary(left, right);
1132 let left_validity = concatenate_validities(left.chunks());
1133 let right_validity = concatenate_validities(right.chunks());
1134 combine_validities_and(left_validity.as_ref(), right_validity.as_ref())
1135}
1136
1137pub trait IntoVec<T> {
1139 fn into_vec(self) -> Vec<T>;
1140}
1141
1142impl<I, S> IntoVec<PlSmallStr> for I
1143where
1144 I: IntoIterator<Item = S>,
1145 S: Into<PlSmallStr>,
1146{
1147 fn into_vec(self) -> Vec<PlSmallStr> {
1148 self.into_iter().map(|s| s.into()).collect()
1149 }
1150}
1151
1152#[inline]
1157pub(crate) fn index_to_chunked_index<
1158 I: Iterator<Item = Idx>,
1159 Idx: PartialOrd + std::ops::AddAssign + std::ops::SubAssign + Zero + One,
1160>(
1161 chunk_lens: I,
1162 index: Idx,
1163) -> (Idx, Idx) {
1164 let mut index_remainder = index;
1165 let mut current_chunk_idx = Zero::zero();
1166
1167 for chunk_len in chunk_lens {
1168 if chunk_len > index_remainder {
1169 break;
1170 } else {
1171 index_remainder -= chunk_len;
1172 current_chunk_idx += One::one();
1173 }
1174 }
1175 (current_chunk_idx, index_remainder)
1176}
1177
1178pub(crate) fn index_to_chunked_index_rev<
1179 I: Iterator<Item = Idx>,
1180 Idx: PartialOrd
1181 + std::ops::AddAssign
1182 + std::ops::SubAssign
1183 + std::ops::Sub<Output = Idx>
1184 + Zero
1185 + One
1186 + Copy
1187 + std::fmt::Debug,
1188>(
1189 chunk_lens_rev: I,
1190 index_from_back: Idx,
1191 total_chunks: Idx,
1192) -> (Idx, Idx) {
1193 debug_assert!(index_from_back > Zero::zero(), "at least -1");
1194 let mut index_remainder = index_from_back;
1195 let mut current_chunk_idx = One::one();
1196 let mut current_chunk_len = Zero::zero();
1197
1198 for chunk_len in chunk_lens_rev {
1199 current_chunk_len = chunk_len;
1200 if chunk_len >= index_remainder {
1201 break;
1202 } else {
1203 index_remainder -= chunk_len;
1204 current_chunk_idx += One::one();
1205 }
1206 }
1207 (
1208 total_chunks - current_chunk_idx,
1209 current_chunk_len - index_remainder,
1210 )
1211}
1212
1213pub fn first_non_null<'a, I>(iter: I) -> Option<usize>
1214where
1215 I: Iterator<Item = Option<&'a Bitmap>>,
1216{
1217 let mut offset = 0;
1218 for validity in iter {
1219 if let Some(mask) = validity {
1220 let len_mask = mask.len();
1221 let n = mask.leading_zeros();
1222 if n < len_mask {
1223 return Some(offset + n);
1224 }
1225 offset += len_mask
1226 } else {
1227 return Some(offset);
1228 }
1229 }
1230 None
1231}
1232
1233pub fn last_non_null<'a, I>(iter: I, len: usize) -> Option<usize>
1234where
1235 I: DoubleEndedIterator<Item = Option<&'a Bitmap>>,
1236{
1237 if len == 0 {
1238 return None;
1239 }
1240 let mut offset = 0;
1241 for validity in iter.rev() {
1242 if let Some(mask) = validity {
1243 let len_mask = mask.len();
1244 let n = mask.trailing_zeros();
1245 if n < len_mask {
1246 return Some(len - offset - n - 1);
1247 }
1248 offset += len_mask;
1249 } else {
1250 return Some(len - offset - 1);
1251 }
1252 }
1253 None
1254}
1255
1256pub fn coalesce_nulls<'a, T: PolarsDataType>(
1258 a: &'a ChunkedArray<T>,
1259 b: &'a ChunkedArray<T>,
1260) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, ChunkedArray<T>>) {
1261 if a.null_count() > 0 || b.null_count() > 0 {
1262 let (a, b) = align_chunks_binary(a, b);
1263 let mut b = b.into_owned();
1264 let a = a.coalesce_nulls(b.chunks());
1265
1266 for arr in a.chunks().iter() {
1267 for arr_b in unsafe { b.chunks_mut() } {
1268 *arr_b = arr_b.with_validity(arr.validity().cloned())
1269 }
1270 }
1271 b.compute_len();
1272 (Cow::Owned(a), Cow::Owned(b))
1273 } else {
1274 (Cow::Borrowed(a), Cow::Borrowed(b))
1275 }
1276}
1277
1278pub fn coalesce_nulls_columns(a: &Column, b: &Column) -> (Column, Column) {
1279 if a.null_count() > 0 || b.null_count() > 0 {
1280 let mut a = a.as_materialized_series().rechunk();
1281 let mut b = b.as_materialized_series().rechunk();
1282 for (arr_a, arr_b) in unsafe { a.chunks_mut().iter_mut().zip(b.chunks_mut()) } {
1283 let validity = match (arr_a.validity(), arr_b.validity()) {
1284 (None, Some(b)) => Some(b.clone()),
1285 (Some(a), Some(b)) => Some(a & b),
1286 (Some(a), None) => Some(a.clone()),
1287 (None, None) => None,
1288 };
1289 *arr_a = arr_a.with_validity(validity.clone());
1290 *arr_b = arr_b.with_validity(validity);
1291 }
1292 a.compute_len();
1293 b.compute_len();
1294 (a.into(), b.into())
1295 } else {
1296 (a.clone(), b.clone())
1297 }
1298}
1299
1300pub fn operation_exceeded_idxsize_msg(operation: &str) -> String {
1301 if size_of::<IdxSize>() == size_of::<u32>() {
1302 format!(
1303 "{} exceeded the maximum supported limit of {} rows. Consider installing 'polars-u64-idx'.",
1304 operation,
1305 IdxSize::MAX,
1306 )
1307 } else {
1308 format!(
1309 "{} exceeded the maximum supported limit of {} rows.",
1310 operation,
1311 IdxSize::MAX,
1312 )
1313 }
1314}
1315
1316#[cfg(test)]
1317mod test {
1318 use super::*;
1319
1320 #[test]
1321 fn test_split() {
1322 let ca: Int32Chunked = (0..10).collect_ca("a".into());
1323
1324 let out = split(&ca, 3);
1325 assert_eq!(out[0].len(), 3);
1326 assert_eq!(out[1].len(), 3);
1327 assert_eq!(out[2].len(), 4);
1328 }
1329
1330 #[test]
1331 fn test_align_chunks() -> PolarsResult<()> {
1332 let a = Int32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3, 4]);
1333 let mut b = Int32Chunked::new(PlSmallStr::EMPTY, &[1]);
1334 let b2 = Int32Chunked::new(PlSmallStr::EMPTY, &[2, 3, 4]);
1335
1336 b.append(&b2)?;
1337 let (a, b) = align_chunks_binary(&a, &b);
1338 assert_eq!(
1339 a.chunk_lengths().collect::<Vec<_>>(),
1340 b.chunk_lengths().collect::<Vec<_>>()
1341 );
1342
1343 let a = Int32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3, 4]);
1344 let mut b = Int32Chunked::new(PlSmallStr::EMPTY, &[1]);
1345 let b1 = b.clone();
1346 b.append(&b1)?;
1347 b.append(&b1)?;
1348 b.append(&b1)?;
1349 let (a, b) = align_chunks_binary(&a, &b);
1350 assert_eq!(
1351 a.chunk_lengths().collect::<Vec<_>>(),
1352 b.chunk_lengths().collect::<Vec<_>>()
1353 );
1354
1355 Ok(())
1356 }
1357}