polars_core/frame/column/
mod.rs

1use std::borrow::Cow;
2
3use arrow::bitmap::BitmapBuilder;
4use arrow::trusted_len::TrustMyLength;
5use num_traits::{Num, NumCast};
6use polars_compute::rolling::QuantileMethod;
7use polars_error::PolarsResult;
8use polars_utils::aliases::PlSeedableRandomStateQuality;
9use polars_utils::index::check_bounds;
10use polars_utils::pl_str::PlSmallStr;
11pub use scalar::ScalarColumn;
12
13use self::compare_inner::{TotalEqInner, TotalOrdInner};
14use self::gather::check_bounds_ca;
15use self::partitioned::PartitionedColumn;
16use self::series::SeriesColumn;
17use crate::chunked_array::cast::CastOptions;
18use crate::chunked_array::flags::StatisticsFlags;
19use crate::datatypes::ReshapeDimension;
20use crate::prelude::*;
21use crate::series::{BitRepr, IsSorted, SeriesPhysIter};
22use crate::utils::{Container, slice_offsets};
23use crate::{HEAD_DEFAULT_LENGTH, TAIL_DEFAULT_LENGTH};
24
25mod arithmetic;
26mod compare;
27mod partitioned;
28mod scalar;
29mod series;
30
31/// A column within a [`DataFrame`].
32///
33/// This is lazily initialized to a [`Series`] with methods like
34/// [`as_materialized_series`][Column::as_materialized_series] and
35/// [`take_materialized_series`][Column::take_materialized_series].
36///
37/// Currently, there are two ways to represent a [`Column`].
38/// 1. A [`Series`] of values
39/// 2. A [`ScalarColumn`] that repeats a single [`Scalar`]
40#[derive(Debug, Clone)]
41#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
42pub enum Column {
43    Series(SeriesColumn),
44    Partitioned(PartitionedColumn),
45    Scalar(ScalarColumn),
46}
47
48/// Convert `Self` into a [`Column`]
49pub trait IntoColumn: Sized {
50    fn into_column(self) -> Column;
51}
52
53impl Column {
54    #[inline]
55    #[track_caller]
56    pub fn new<T, Phantom>(name: PlSmallStr, values: T) -> Self
57    where
58        Phantom: ?Sized,
59        Series: NamedFrom<T, Phantom>,
60    {
61        Self::Series(SeriesColumn::new(NamedFrom::new(name, values)))
62    }
63
64    #[inline]
65    pub fn new_empty(name: PlSmallStr, dtype: &DataType) -> Self {
66        Self::new_scalar(name, Scalar::new(dtype.clone(), AnyValue::Null), 0)
67    }
68
69    #[inline]
70    pub fn new_scalar(name: PlSmallStr, scalar: Scalar, length: usize) -> Self {
71        Self::Scalar(ScalarColumn::new(name, scalar, length))
72    }
73
74    #[inline]
75    pub fn new_partitioned(name: PlSmallStr, scalar: Scalar, length: usize) -> Self {
76        Self::Scalar(ScalarColumn::new(name, scalar, length))
77    }
78
79    pub fn new_row_index(name: PlSmallStr, offset: IdxSize, length: usize) -> PolarsResult<Column> {
80        let length = IdxSize::try_from(length).unwrap_or(IdxSize::MAX);
81
82        if offset.checked_add(length).is_none() {
83            polars_bail!(
84                ComputeError:
85                "row index with offset {} overflows on dataframe with height {}",
86                offset, length
87            )
88        }
89
90        let range = offset..offset + length;
91
92        let mut ca = IdxCa::from_vec(name, range.collect());
93        ca.set_sorted_flag(IsSorted::Ascending);
94        let col = ca.into_series().into();
95
96        Ok(col)
97    }
98
99    // # Materialize
100    /// Get a reference to a [`Series`] for this [`Column`]
101    ///
102    /// This may need to materialize the [`Series`] on the first invocation for a specific column.
103    #[inline]
104    pub fn as_materialized_series(&self) -> &Series {
105        match self {
106            Column::Series(s) => s,
107            Column::Partitioned(s) => s.as_materialized_series(),
108            Column::Scalar(s) => s.as_materialized_series(),
109        }
110    }
111    /// Turn [`Column`] into a [`Column::Series`].
112    ///
113    /// This may need to materialize the [`Series`] on the first invocation for a specific column.
114    #[inline]
115    pub fn into_materialized_series(&mut self) -> &mut Series {
116        match self {
117            Column::Series(s) => s,
118            Column::Partitioned(s) => {
119                let series = std::mem::replace(
120                    s,
121                    PartitionedColumn::new_empty(PlSmallStr::EMPTY, DataType::Null),
122                )
123                .take_materialized_series();
124                *self = Column::Series(series.into());
125                let Column::Series(s) = self else {
126                    unreachable!();
127                };
128                s
129            },
130            Column::Scalar(s) => {
131                let series = std::mem::replace(
132                    s,
133                    ScalarColumn::new_empty(PlSmallStr::EMPTY, DataType::Null),
134                )
135                .take_materialized_series();
136                *self = Column::Series(series.into());
137                let Column::Series(s) = self else {
138                    unreachable!();
139                };
140                s
141            },
142        }
143    }
144    /// Take [`Series`] from a [`Column`]
145    ///
146    /// This may need to materialize the [`Series`] on the first invocation for a specific column.
147    #[inline]
148    pub fn take_materialized_series(self) -> Series {
149        match self {
150            Column::Series(s) => s.take(),
151            Column::Partitioned(s) => s.take_materialized_series(),
152            Column::Scalar(s) => s.take_materialized_series(),
153        }
154    }
155
156    #[inline]
157    pub fn dtype(&self) -> &DataType {
158        match self {
159            Column::Series(s) => s.dtype(),
160            Column::Partitioned(s) => s.dtype(),
161            Column::Scalar(s) => s.dtype(),
162        }
163    }
164
165    #[inline]
166    pub fn field(&self) -> Cow<Field> {
167        match self {
168            Column::Series(s) => s.field(),
169            Column::Partitioned(s) => s.field(),
170            Column::Scalar(s) => match s.lazy_as_materialized_series() {
171                None => Cow::Owned(Field::new(s.name().clone(), s.dtype().clone())),
172                Some(s) => s.field(),
173            },
174        }
175    }
176
177    #[inline]
178    pub fn name(&self) -> &PlSmallStr {
179        match self {
180            Column::Series(s) => s.name(),
181            Column::Partitioned(s) => s.name(),
182            Column::Scalar(s) => s.name(),
183        }
184    }
185
186    #[inline]
187    pub fn len(&self) -> usize {
188        match self {
189            Column::Series(s) => s.len(),
190            Column::Partitioned(s) => s.len(),
191            Column::Scalar(s) => s.len(),
192        }
193    }
194
195    #[inline]
196    pub fn with_name(mut self, name: PlSmallStr) -> Column {
197        self.rename(name);
198        self
199    }
200
201    #[inline]
202    pub fn rename(&mut self, name: PlSmallStr) {
203        match self {
204            Column::Series(s) => _ = s.rename(name),
205            Column::Partitioned(s) => _ = s.rename(name),
206            Column::Scalar(s) => _ = s.rename(name),
207        }
208    }
209
210    // # Downcasting
211    #[inline]
212    pub fn as_series(&self) -> Option<&Series> {
213        match self {
214            Column::Series(s) => Some(s),
215            _ => None,
216        }
217    }
218    #[inline]
219    pub fn as_partitioned_column(&self) -> Option<&PartitionedColumn> {
220        match self {
221            Column::Partitioned(s) => Some(s),
222            _ => None,
223        }
224    }
225    #[inline]
226    pub fn as_scalar_column(&self) -> Option<&ScalarColumn> {
227        match self {
228            Column::Scalar(s) => Some(s),
229            _ => None,
230        }
231    }
232    #[inline]
233    pub fn as_scalar_column_mut(&mut self) -> Option<&mut ScalarColumn> {
234        match self {
235            Column::Scalar(s) => Some(s),
236            _ => None,
237        }
238    }
239
240    // # Try to Chunked Arrays
241    pub fn try_bool(&self) -> Option<&BooleanChunked> {
242        self.as_materialized_series().try_bool()
243    }
244    pub fn try_i8(&self) -> Option<&Int8Chunked> {
245        self.as_materialized_series().try_i8()
246    }
247    pub fn try_i16(&self) -> Option<&Int16Chunked> {
248        self.as_materialized_series().try_i16()
249    }
250    pub fn try_i32(&self) -> Option<&Int32Chunked> {
251        self.as_materialized_series().try_i32()
252    }
253    pub fn try_i64(&self) -> Option<&Int64Chunked> {
254        self.as_materialized_series().try_i64()
255    }
256    pub fn try_u8(&self) -> Option<&UInt8Chunked> {
257        self.as_materialized_series().try_u8()
258    }
259    pub fn try_u16(&self) -> Option<&UInt16Chunked> {
260        self.as_materialized_series().try_u16()
261    }
262    pub fn try_u32(&self) -> Option<&UInt32Chunked> {
263        self.as_materialized_series().try_u32()
264    }
265    pub fn try_u64(&self) -> Option<&UInt64Chunked> {
266        self.as_materialized_series().try_u64()
267    }
268    pub fn try_f32(&self) -> Option<&Float32Chunked> {
269        self.as_materialized_series().try_f32()
270    }
271    pub fn try_f64(&self) -> Option<&Float64Chunked> {
272        self.as_materialized_series().try_f64()
273    }
274    pub fn try_str(&self) -> Option<&StringChunked> {
275        self.as_materialized_series().try_str()
276    }
277    pub fn try_list(&self) -> Option<&ListChunked> {
278        self.as_materialized_series().try_list()
279    }
280    pub fn try_binary(&self) -> Option<&BinaryChunked> {
281        self.as_materialized_series().try_binary()
282    }
283    pub fn try_idx(&self) -> Option<&IdxCa> {
284        self.as_materialized_series().try_idx()
285    }
286    pub fn try_binary_offset(&self) -> Option<&BinaryOffsetChunked> {
287        self.as_materialized_series().try_binary_offset()
288    }
289    #[cfg(feature = "dtype-datetime")]
290    pub fn try_datetime(&self) -> Option<&DatetimeChunked> {
291        self.as_materialized_series().try_datetime()
292    }
293    #[cfg(feature = "dtype-struct")]
294    pub fn try_struct(&self) -> Option<&StructChunked> {
295        self.as_materialized_series().try_struct()
296    }
297    #[cfg(feature = "dtype-decimal")]
298    pub fn try_decimal(&self) -> Option<&DecimalChunked> {
299        self.as_materialized_series().try_decimal()
300    }
301    #[cfg(feature = "dtype-array")]
302    pub fn try_array(&self) -> Option<&ArrayChunked> {
303        self.as_materialized_series().try_array()
304    }
305    #[cfg(feature = "dtype-categorical")]
306    pub fn try_categorical(&self) -> Option<&CategoricalChunked> {
307        self.as_materialized_series().try_categorical()
308    }
309    #[cfg(feature = "dtype-date")]
310    pub fn try_date(&self) -> Option<&DateChunked> {
311        self.as_materialized_series().try_date()
312    }
313    #[cfg(feature = "dtype-duration")]
314    pub fn try_duration(&self) -> Option<&DurationChunked> {
315        self.as_materialized_series().try_duration()
316    }
317
318    // # To Chunked Arrays
319    pub fn bool(&self) -> PolarsResult<&BooleanChunked> {
320        self.as_materialized_series().bool()
321    }
322    pub fn i8(&self) -> PolarsResult<&Int8Chunked> {
323        self.as_materialized_series().i8()
324    }
325    pub fn i16(&self) -> PolarsResult<&Int16Chunked> {
326        self.as_materialized_series().i16()
327    }
328    pub fn i32(&self) -> PolarsResult<&Int32Chunked> {
329        self.as_materialized_series().i32()
330    }
331    pub fn i64(&self) -> PolarsResult<&Int64Chunked> {
332        self.as_materialized_series().i64()
333    }
334    #[cfg(feature = "dtype-i128")]
335    pub fn i128(&self) -> PolarsResult<&Int128Chunked> {
336        self.as_materialized_series().i128()
337    }
338    pub fn u8(&self) -> PolarsResult<&UInt8Chunked> {
339        self.as_materialized_series().u8()
340    }
341    pub fn u16(&self) -> PolarsResult<&UInt16Chunked> {
342        self.as_materialized_series().u16()
343    }
344    pub fn u32(&self) -> PolarsResult<&UInt32Chunked> {
345        self.as_materialized_series().u32()
346    }
347    pub fn u64(&self) -> PolarsResult<&UInt64Chunked> {
348        self.as_materialized_series().u64()
349    }
350    pub fn f32(&self) -> PolarsResult<&Float32Chunked> {
351        self.as_materialized_series().f32()
352    }
353    pub fn f64(&self) -> PolarsResult<&Float64Chunked> {
354        self.as_materialized_series().f64()
355    }
356    pub fn str(&self) -> PolarsResult<&StringChunked> {
357        self.as_materialized_series().str()
358    }
359    pub fn list(&self) -> PolarsResult<&ListChunked> {
360        self.as_materialized_series().list()
361    }
362    pub fn binary(&self) -> PolarsResult<&BinaryChunked> {
363        self.as_materialized_series().binary()
364    }
365    pub fn idx(&self) -> PolarsResult<&IdxCa> {
366        self.as_materialized_series().idx()
367    }
368    pub fn binary_offset(&self) -> PolarsResult<&BinaryOffsetChunked> {
369        self.as_materialized_series().binary_offset()
370    }
371    #[cfg(feature = "dtype-datetime")]
372    pub fn datetime(&self) -> PolarsResult<&DatetimeChunked> {
373        self.as_materialized_series().datetime()
374    }
375    #[cfg(feature = "dtype-struct")]
376    pub fn struct_(&self) -> PolarsResult<&StructChunked> {
377        self.as_materialized_series().struct_()
378    }
379    #[cfg(feature = "dtype-decimal")]
380    pub fn decimal(&self) -> PolarsResult<&DecimalChunked> {
381        self.as_materialized_series().decimal()
382    }
383    #[cfg(feature = "dtype-array")]
384    pub fn array(&self) -> PolarsResult<&ArrayChunked> {
385        self.as_materialized_series().array()
386    }
387    #[cfg(feature = "dtype-categorical")]
388    pub fn categorical(&self) -> PolarsResult<&CategoricalChunked> {
389        self.as_materialized_series().categorical()
390    }
391    #[cfg(feature = "dtype-date")]
392    pub fn date(&self) -> PolarsResult<&DateChunked> {
393        self.as_materialized_series().date()
394    }
395    #[cfg(feature = "dtype-duration")]
396    pub fn duration(&self) -> PolarsResult<&DurationChunked> {
397        self.as_materialized_series().duration()
398    }
399
400    // # Casting
401    pub fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Self> {
402        match self {
403            Column::Series(s) => s.cast_with_options(dtype, options).map(Column::from),
404            Column::Partitioned(s) => s.cast_with_options(dtype, options).map(Column::from),
405            Column::Scalar(s) => s.cast_with_options(dtype, options).map(Column::from),
406        }
407    }
408    pub fn strict_cast(&self, dtype: &DataType) -> PolarsResult<Self> {
409        match self {
410            Column::Series(s) => s.strict_cast(dtype).map(Column::from),
411            Column::Partitioned(s) => s.strict_cast(dtype).map(Column::from),
412            Column::Scalar(s) => s.strict_cast(dtype).map(Column::from),
413        }
414    }
415    pub fn cast(&self, dtype: &DataType) -> PolarsResult<Column> {
416        match self {
417            Column::Series(s) => s.cast(dtype).map(Column::from),
418            Column::Partitioned(s) => s.cast(dtype).map(Column::from),
419            Column::Scalar(s) => s.cast(dtype).map(Column::from),
420        }
421    }
422    /// # Safety
423    ///
424    /// This can lead to invalid memory access in downstream code.
425    pub unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Column> {
426        match self {
427            Column::Series(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from),
428            Column::Partitioned(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from),
429            Column::Scalar(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from),
430        }
431    }
432
433    pub fn clear(&self) -> Self {
434        match self {
435            Column::Series(s) => s.clear().into(),
436            Column::Partitioned(s) => s.clear().into(),
437            Column::Scalar(s) => s.resize(0).into(),
438        }
439    }
440
441    #[inline]
442    pub fn shrink_to_fit(&mut self) {
443        match self {
444            Column::Series(s) => s.shrink_to_fit(),
445            // @partition-opt
446            Column::Partitioned(_) => {},
447            Column::Scalar(_) => {},
448        }
449    }
450
451    #[inline]
452    pub fn new_from_index(&self, index: usize, length: usize) -> Self {
453        if index >= self.len() {
454            return Self::full_null(self.name().clone(), length, self.dtype());
455        }
456
457        match self {
458            Column::Series(s) => {
459                // SAFETY: Bounds check done before.
460                let av = unsafe { s.get_unchecked(index) };
461                let scalar = Scalar::new(self.dtype().clone(), av.into_static());
462                Self::new_scalar(self.name().clone(), scalar, length)
463            },
464            Column::Partitioned(s) => {
465                // SAFETY: Bounds check done before.
466                let av = unsafe { s.get_unchecked(index) };
467                let scalar = Scalar::new(self.dtype().clone(), av.into_static());
468                Self::new_scalar(self.name().clone(), scalar, length)
469            },
470            Column::Scalar(s) => s.resize(length).into(),
471        }
472    }
473
474    #[inline]
475    pub fn has_nulls(&self) -> bool {
476        match self {
477            Self::Series(s) => s.has_nulls(),
478            // @partition-opt
479            Self::Partitioned(s) => s.as_materialized_series().has_nulls(),
480            Self::Scalar(s) => s.has_nulls(),
481        }
482    }
483
484    #[inline]
485    pub fn is_null(&self) -> BooleanChunked {
486        match self {
487            Self::Series(s) => s.is_null(),
488            // @partition-opt
489            Self::Partitioned(s) => s.as_materialized_series().is_null(),
490            Self::Scalar(s) => {
491                BooleanChunked::full(s.name().clone(), s.scalar().is_null(), s.len())
492            },
493        }
494    }
495    #[inline]
496    pub fn is_not_null(&self) -> BooleanChunked {
497        match self {
498            Self::Series(s) => s.is_not_null(),
499            // @partition-opt
500            Self::Partitioned(s) => s.as_materialized_series().is_not_null(),
501            Self::Scalar(s) => {
502                BooleanChunked::full(s.name().clone(), !s.scalar().is_null(), s.len())
503            },
504        }
505    }
506
507    pub fn to_physical_repr(&self) -> Column {
508        // @scalar-opt
509        self.as_materialized_series()
510            .to_physical_repr()
511            .into_owned()
512            .into()
513    }
514    /// # Safety
515    ///
516    /// This can lead to invalid memory access in downstream code.
517    pub unsafe fn from_physical_unchecked(&self, dtype: &DataType) -> PolarsResult<Column> {
518        // @scalar-opt
519        self.as_materialized_series()
520            .from_physical_unchecked(dtype)
521            .map(Column::from)
522    }
523
524    pub fn head(&self, length: Option<usize>) -> Column {
525        let len = length.unwrap_or(HEAD_DEFAULT_LENGTH);
526        let len = usize::min(len, self.len());
527        self.slice(0, len)
528    }
529    pub fn tail(&self, length: Option<usize>) -> Column {
530        let len = length.unwrap_or(TAIL_DEFAULT_LENGTH);
531        let len = usize::min(len, self.len());
532        debug_assert!(len <= i64::MAX as usize);
533        self.slice(-(len as i64), len)
534    }
535    pub fn slice(&self, offset: i64, length: usize) -> Column {
536        match self {
537            Column::Series(s) => s.slice(offset, length).into(),
538            // @partition-opt
539            Column::Partitioned(s) => s.as_materialized_series().slice(offset, length).into(),
540            Column::Scalar(s) => {
541                let (_, length) = slice_offsets(offset, length, s.len());
542                s.resize(length).into()
543            },
544        }
545    }
546
547    pub fn split_at(&self, offset: i64) -> (Column, Column) {
548        // @scalar-opt
549        let (l, r) = self.as_materialized_series().split_at(offset);
550        (l.into(), r.into())
551    }
552
553    #[inline]
554    pub fn null_count(&self) -> usize {
555        match self {
556            Self::Series(s) => s.null_count(),
557            Self::Partitioned(s) => s.null_count(),
558            Self::Scalar(s) if s.scalar().is_null() => s.len(),
559            Self::Scalar(_) => 0,
560        }
561    }
562
563    pub fn take(&self, indices: &IdxCa) -> PolarsResult<Column> {
564        check_bounds_ca(indices, self.len() as IdxSize)?;
565        Ok(unsafe { self.take_unchecked(indices) })
566    }
567    pub fn take_slice(&self, indices: &[IdxSize]) -> PolarsResult<Column> {
568        check_bounds(indices, self.len() as IdxSize)?;
569        Ok(unsafe { self.take_slice_unchecked(indices) })
570    }
571    /// # Safety
572    ///
573    /// No bounds on the indexes are performed.
574    pub unsafe fn take_unchecked(&self, indices: &IdxCa) -> Column {
575        debug_assert!(check_bounds_ca(indices, self.len() as IdxSize).is_ok());
576
577        match self {
578            Self::Series(s) => unsafe { s.take_unchecked(indices) }.into(),
579            Self::Partitioned(s) => {
580                let s = s.as_materialized_series();
581                unsafe { s.take_unchecked(indices) }.into()
582            },
583            Self::Scalar(s) => {
584                let idxs_length = indices.len();
585                let idxs_null_count = indices.null_count();
586
587                let scalar = ScalarColumn::from_single_value_series(
588                    s.as_single_value_series().take_unchecked(&IdxCa::new(
589                        indices.name().clone(),
590                        &[0][..s.len().min(1)],
591                    )),
592                    idxs_length,
593                );
594
595                // We need to make sure that null values in `idx` become null values in the result
596                if idxs_null_count == 0 || scalar.has_nulls() {
597                    scalar.into_column()
598                } else if idxs_null_count == idxs_length {
599                    scalar.into_nulls().into_column()
600                } else {
601                    let validity = indices.rechunk_validity();
602                    let series = scalar.take_materialized_series();
603                    let name = series.name().clone();
604                    let dtype = series.dtype().clone();
605                    let mut chunks = series.into_chunks();
606                    assert_eq!(chunks.len(), 1);
607                    chunks[0] = chunks[0].with_validity(validity);
608                    unsafe { Series::from_chunks_and_dtype_unchecked(name, chunks, &dtype) }
609                        .into_column()
610                }
611            },
612        }
613    }
614    /// # Safety
615    ///
616    /// No bounds on the indexes are performed.
617    pub unsafe fn take_slice_unchecked(&self, indices: &[IdxSize]) -> Column {
618        debug_assert!(check_bounds(indices, self.len() as IdxSize).is_ok());
619
620        match self {
621            Self::Series(s) => unsafe { s.take_slice_unchecked(indices) }.into(),
622            Self::Partitioned(s) => {
623                let s = s.as_materialized_series();
624                unsafe { s.take_slice_unchecked(indices) }.into()
625            },
626            Self::Scalar(s) => ScalarColumn::from_single_value_series(
627                s.as_single_value_series()
628                    .take_slice_unchecked(&[0][..s.len().min(1)]),
629                indices.len(),
630            )
631            .into(),
632        }
633    }
634
635    /// General implementation for aggregation where a non-missing scalar would map to itself.
636    #[inline(always)]
637    #[cfg(any(feature = "algorithm_group_by", feature = "bitwise"))]
638    fn agg_with_unit_scalar(
639        &self,
640        groups: &GroupsType,
641        series_agg: impl Fn(&Series, &GroupsType) -> Series,
642    ) -> Column {
643        match self {
644            Column::Series(s) => series_agg(s, groups).into_column(),
645            // @partition-opt
646            Column::Partitioned(s) => series_agg(s.as_materialized_series(), groups).into_column(),
647            Column::Scalar(s) => {
648                if s.is_empty() {
649                    return series_agg(s.as_materialized_series(), groups).into_column();
650                }
651
652                // We utilize the aggregation on Series to see:
653                // 1. the output datatype of the aggregation
654                // 2. whether this aggregation is even defined
655                let series_aggregation = series_agg(
656                    &s.as_single_value_series(),
657                    &GroupsType::Slice {
658                        // @NOTE: this group is always valid since s is non-empty.
659                        groups: vec![[0, 1]],
660                        rolling: false,
661                    },
662                );
663
664                // If the aggregation is not defined, just return all nulls.
665                if series_aggregation.has_nulls() {
666                    return Self::new_scalar(
667                        series_aggregation.name().clone(),
668                        Scalar::new(series_aggregation.dtype().clone(), AnyValue::Null),
669                        groups.len(),
670                    );
671                }
672
673                let mut scalar_col = s.resize(groups.len());
674                // The aggregation might change the type (e.g. mean changes int -> float), so we do
675                // a cast here to the output type.
676                if series_aggregation.dtype() != s.dtype() {
677                    scalar_col = scalar_col.cast(series_aggregation.dtype()).unwrap();
678                }
679
680                let Some(first_empty_idx) = groups.iter().position(|g| g.is_empty()) else {
681                    // Fast path: no empty groups. keep the scalar intact.
682                    return scalar_col.into_column();
683                };
684
685                // All empty groups produce a *missing* or `null` value.
686                let mut validity = BitmapBuilder::with_capacity(groups.len());
687                validity.extend_constant(first_empty_idx, true);
688                // SAFETY: We trust the length of this iterator.
689                let iter = unsafe {
690                    TrustMyLength::new(
691                        groups.iter().skip(first_empty_idx).map(|g| !g.is_empty()),
692                        groups.len() - first_empty_idx,
693                    )
694                };
695                validity.extend_trusted_len_iter(iter);
696
697                let mut s = scalar_col.take_materialized_series().rechunk();
698                // SAFETY: We perform a compute_len afterwards.
699                let chunks = unsafe { s.chunks_mut() };
700                let arr = &mut chunks[0];
701                *arr = arr.with_validity(validity.into_opt_validity());
702                s.compute_len();
703
704                s.into_column()
705            },
706        }
707    }
708
709    /// # Safety
710    ///
711    /// Does no bounds checks, groups must be correct.
712    #[cfg(feature = "algorithm_group_by")]
713    pub unsafe fn agg_min(&self, groups: &GroupsType) -> Self {
714        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_min(g) })
715    }
716
717    /// # Safety
718    ///
719    /// Does no bounds checks, groups must be correct.
720    #[cfg(feature = "algorithm_group_by")]
721    pub unsafe fn agg_max(&self, groups: &GroupsType) -> Self {
722        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_max(g) })
723    }
724
725    /// # Safety
726    ///
727    /// Does no bounds checks, groups must be correct.
728    #[cfg(feature = "algorithm_group_by")]
729    pub unsafe fn agg_mean(&self, groups: &GroupsType) -> Self {
730        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_mean(g) })
731    }
732
733    /// # Safety
734    ///
735    /// Does no bounds checks, groups must be correct.
736    #[cfg(feature = "algorithm_group_by")]
737    pub unsafe fn agg_sum(&self, groups: &GroupsType) -> Self {
738        // @scalar-opt
739        unsafe { self.as_materialized_series().agg_sum(groups) }.into()
740    }
741
742    /// # Safety
743    ///
744    /// Does no bounds checks, groups must be correct.
745    #[cfg(feature = "algorithm_group_by")]
746    pub unsafe fn agg_first(&self, groups: &GroupsType) -> Self {
747        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_first(g) })
748    }
749
750    /// # Safety
751    ///
752    /// Does no bounds checks, groups must be correct.
753    #[cfg(feature = "algorithm_group_by")]
754    pub unsafe fn agg_last(&self, groups: &GroupsType) -> Self {
755        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_last(g) })
756    }
757
758    /// # Safety
759    ///
760    /// Does no bounds checks, groups must be correct.
761    #[cfg(feature = "algorithm_group_by")]
762    pub unsafe fn agg_n_unique(&self, groups: &GroupsType) -> Self {
763        // @scalar-opt
764        unsafe { self.as_materialized_series().agg_n_unique(groups) }.into()
765    }
766
767    /// # Safety
768    ///
769    /// Does no bounds checks, groups must be correct.
770    #[cfg(feature = "algorithm_group_by")]
771    pub unsafe fn agg_quantile(
772        &self,
773        groups: &GroupsType,
774        quantile: f64,
775        method: QuantileMethod,
776    ) -> Self {
777        // @scalar-opt
778
779        unsafe {
780            self.as_materialized_series()
781                .agg_quantile(groups, quantile, method)
782        }
783        .into()
784    }
785
786    /// # Safety
787    ///
788    /// Does no bounds checks, groups must be correct.
789    #[cfg(feature = "algorithm_group_by")]
790    pub unsafe fn agg_median(&self, groups: &GroupsType) -> Self {
791        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_median(g) })
792    }
793
794    /// # Safety
795    ///
796    /// Does no bounds checks, groups must be correct.
797    #[cfg(feature = "algorithm_group_by")]
798    pub unsafe fn agg_var(&self, groups: &GroupsType, ddof: u8) -> Self {
799        // @scalar-opt
800        unsafe { self.as_materialized_series().agg_var(groups, ddof) }.into()
801    }
802
803    /// # Safety
804    ///
805    /// Does no bounds checks, groups must be correct.
806    #[cfg(feature = "algorithm_group_by")]
807    pub unsafe fn agg_std(&self, groups: &GroupsType, ddof: u8) -> Self {
808        // @scalar-opt
809        unsafe { self.as_materialized_series().agg_std(groups, ddof) }.into()
810    }
811
812    /// # Safety
813    ///
814    /// Does no bounds checks, groups must be correct.
815    #[cfg(feature = "algorithm_group_by")]
816    pub unsafe fn agg_list(&self, groups: &GroupsType) -> Self {
817        // @scalar-opt
818        unsafe { self.as_materialized_series().agg_list(groups) }.into()
819    }
820
821    /// # Safety
822    ///
823    /// Does no bounds checks, groups must be correct.
824    #[cfg(feature = "algorithm_group_by")]
825    pub fn agg_valid_count(&self, groups: &GroupsType) -> Self {
826        // @partition-opt
827        // @scalar-opt
828        unsafe { self.as_materialized_series().agg_valid_count(groups) }.into()
829    }
830
831    /// # Safety
832    ///
833    /// Does no bounds checks, groups must be correct.
834    #[cfg(feature = "bitwise")]
835    pub fn agg_and(&self, groups: &GroupsType) -> Self {
836        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_and(g) })
837    }
838    /// # Safety
839    ///
840    /// Does no bounds checks, groups must be correct.
841    #[cfg(feature = "bitwise")]
842    pub fn agg_or(&self, groups: &GroupsType) -> Self {
843        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_or(g) })
844    }
845    /// # Safety
846    ///
847    /// Does no bounds checks, groups must be correct.
848    #[cfg(feature = "bitwise")]
849    pub fn agg_xor(&self, groups: &GroupsType) -> Self {
850        // @partition-opt
851        // @scalar-opt
852        unsafe { self.as_materialized_series().agg_xor(groups) }.into()
853    }
854
855    pub fn full_null(name: PlSmallStr, size: usize, dtype: &DataType) -> Self {
856        Self::new_scalar(name, Scalar::new(dtype.clone(), AnyValue::Null), size)
857    }
858
859    pub fn is_empty(&self) -> bool {
860        self.len() == 0
861    }
862
863    pub fn reverse(&self) -> Column {
864        match self {
865            Column::Series(s) => s.reverse().into(),
866            Column::Partitioned(s) => s.reverse().into(),
867            Column::Scalar(_) => self.clone(),
868        }
869    }
870
871    pub fn equals(&self, other: &Column) -> bool {
872        // @scalar-opt
873        self.as_materialized_series()
874            .equals(other.as_materialized_series())
875    }
876
877    pub fn equals_missing(&self, other: &Column) -> bool {
878        // @scalar-opt
879        self.as_materialized_series()
880            .equals_missing(other.as_materialized_series())
881    }
882
883    pub fn set_sorted_flag(&mut self, sorted: IsSorted) {
884        // @scalar-opt
885        match self {
886            Column::Series(s) => s.set_sorted_flag(sorted),
887            Column::Partitioned(s) => s.set_sorted_flag(sorted),
888            Column::Scalar(_) => {},
889        }
890    }
891
892    pub fn get_flags(&self) -> StatisticsFlags {
893        match self {
894            Column::Series(s) => s.get_flags(),
895            // @partition-opt
896            Column::Partitioned(_) => StatisticsFlags::empty(),
897            Column::Scalar(_) => {
898                StatisticsFlags::IS_SORTED_ASC | StatisticsFlags::CAN_FAST_EXPLODE_LIST
899            },
900        }
901    }
902
903    /// Returns whether the flags were set
904    pub fn set_flags(&mut self, flags: StatisticsFlags) -> bool {
905        match self {
906            Column::Series(s) => {
907                s.set_flags(flags);
908                true
909            },
910            // @partition-opt
911            Column::Partitioned(_) => false,
912            Column::Scalar(_) => false,
913        }
914    }
915
916    pub fn vec_hash(
917        &self,
918        build_hasher: PlSeedableRandomStateQuality,
919        buf: &mut Vec<u64>,
920    ) -> PolarsResult<()> {
921        // @scalar-opt?
922        self.as_materialized_series().vec_hash(build_hasher, buf)
923    }
924
925    pub fn vec_hash_combine(
926        &self,
927        build_hasher: PlSeedableRandomStateQuality,
928        hashes: &mut [u64],
929    ) -> PolarsResult<()> {
930        // @scalar-opt?
931        self.as_materialized_series()
932            .vec_hash_combine(build_hasher, hashes)
933    }
934
935    pub fn append(&mut self, other: &Column) -> PolarsResult<&mut Self> {
936        // @scalar-opt
937        self.into_materialized_series()
938            .append(other.as_materialized_series())?;
939        Ok(self)
940    }
941    pub fn append_owned(&mut self, other: Column) -> PolarsResult<&mut Self> {
942        self.into_materialized_series()
943            .append_owned(other.take_materialized_series())?;
944        Ok(self)
945    }
946
947    pub fn arg_sort(&self, options: SortOptions) -> IdxCa {
948        if self.is_empty() {
949            return IdxCa::from_vec(self.name().clone(), Vec::new());
950        }
951
952        if self.null_count() == self.len() {
953            // We might need to maintain order so just respect the descending parameter.
954            let values = if options.descending {
955                (0..self.len() as IdxSize).rev().collect()
956            } else {
957                (0..self.len() as IdxSize).collect()
958            };
959
960            return IdxCa::from_vec(self.name().clone(), values);
961        }
962
963        let is_sorted = Some(self.is_sorted_flag());
964        let Some(is_sorted) = is_sorted.filter(|v| !matches!(v, IsSorted::Not)) else {
965            return self.as_materialized_series().arg_sort(options);
966        };
967
968        // Fast path: the data is sorted.
969        let is_sorted_dsc = matches!(is_sorted, IsSorted::Descending);
970        let invert = options.descending != is_sorted_dsc;
971
972        let mut values = Vec::with_capacity(self.len());
973
974        #[inline(never)]
975        fn extend(
976            start: IdxSize,
977            end: IdxSize,
978            slf: &Column,
979            values: &mut Vec<IdxSize>,
980            is_only_nulls: bool,
981            invert: bool,
982            maintain_order: bool,
983        ) {
984            debug_assert!(start <= end);
985            debug_assert!(start as usize <= slf.len());
986            debug_assert!(end as usize <= slf.len());
987
988            if !invert || is_only_nulls {
989                values.extend(start..end);
990                return;
991            }
992
993            // If we don't have to maintain order but we have to invert. Just flip it around.
994            if !maintain_order {
995                values.extend((start..end).rev());
996                return;
997            }
998
999            // If we want to maintain order but we also needs to invert, we need to invert
1000            // per group of items.
1001            //
1002            // @NOTE: Since the column is sorted, arg_unique can also take a fast path and
1003            // just do a single traversal.
1004            let arg_unique = slf
1005                .slice(start as i64, (end - start) as usize)
1006                .arg_unique()
1007                .unwrap();
1008
1009            assert!(!arg_unique.has_nulls());
1010
1011            let num_unique = arg_unique.len();
1012
1013            // Fast path: all items are unique.
1014            if num_unique == (end - start) as usize {
1015                values.extend((start..end).rev());
1016                return;
1017            }
1018
1019            if num_unique == 1 {
1020                values.extend(start..end);
1021                return;
1022            }
1023
1024            let mut prev_idx = end - start;
1025            for chunk in arg_unique.downcast_iter() {
1026                for &idx in chunk.values().as_slice().iter().rev() {
1027                    values.extend(start + idx..start + prev_idx);
1028                    prev_idx = idx;
1029                }
1030            }
1031        }
1032        macro_rules! extend {
1033            ($start:expr, $end:expr) => {
1034                extend!($start, $end, is_only_nulls = false);
1035            };
1036            ($start:expr, $end:expr, is_only_nulls = $is_only_nulls:expr) => {
1037                extend(
1038                    $start,
1039                    $end,
1040                    self,
1041                    &mut values,
1042                    $is_only_nulls,
1043                    invert,
1044                    options.maintain_order,
1045                );
1046            };
1047        }
1048
1049        let length = self.len() as IdxSize;
1050        let null_count = self.null_count() as IdxSize;
1051
1052        if null_count == 0 {
1053            extend!(0, length);
1054        } else {
1055            let has_nulls_last = self.get(self.len() - 1).unwrap().is_null();
1056            match (options.nulls_last, has_nulls_last) {
1057                (true, true) => {
1058                    // Current: Nulls last, Wanted: Nulls last
1059                    extend!(0, length - null_count);
1060                    extend!(length - null_count, length, is_only_nulls = true);
1061                },
1062                (true, false) => {
1063                    // Current: Nulls first, Wanted: Nulls last
1064                    extend!(null_count, length);
1065                    extend!(0, null_count, is_only_nulls = true);
1066                },
1067                (false, true) => {
1068                    // Current: Nulls last, Wanted: Nulls first
1069                    extend!(length - null_count, length, is_only_nulls = true);
1070                    extend!(0, length - null_count);
1071                },
1072                (false, false) => {
1073                    // Current: Nulls first, Wanted: Nulls first
1074                    extend!(0, null_count, is_only_nulls = true);
1075                    extend!(null_count, length);
1076                },
1077            }
1078        }
1079
1080        // @NOTE: This can theoretically be pushed into the previous operation but it is really
1081        // worth it... probably not...
1082        if let Some(limit) = options.limit {
1083            let limit = limit.min(length);
1084            values.truncate(limit as usize);
1085        }
1086
1087        IdxCa::from_vec(self.name().clone(), values)
1088    }
1089
1090    pub fn arg_sort_multiple(
1091        &self,
1092        by: &[Column],
1093        options: &SortMultipleOptions,
1094    ) -> PolarsResult<IdxCa> {
1095        // @scalar-opt
1096        self.as_materialized_series().arg_sort_multiple(by, options)
1097    }
1098
1099    pub fn arg_unique(&self) -> PolarsResult<IdxCa> {
1100        match self {
1101            Column::Scalar(s) => Ok(IdxCa::new_vec(s.name().clone(), vec![0])),
1102            _ => self.as_materialized_series().arg_unique(),
1103        }
1104    }
1105
1106    pub fn bit_repr(&self) -> Option<BitRepr> {
1107        // @scalar-opt
1108        self.as_materialized_series().bit_repr()
1109    }
1110
1111    pub fn into_frame(self) -> DataFrame {
1112        // SAFETY: A single-column dataframe cannot have length mismatches or duplicate names
1113        unsafe { DataFrame::new_no_checks(self.len(), vec![self]) }
1114    }
1115
1116    pub fn extend(&mut self, other: &Column) -> PolarsResult<&mut Self> {
1117        // @scalar-opt
1118        self.into_materialized_series()
1119            .extend(other.as_materialized_series())?;
1120        Ok(self)
1121    }
1122
1123    pub fn rechunk(&self) -> Column {
1124        match self {
1125            Column::Series(s) => s.rechunk().into(),
1126            Column::Partitioned(s) => {
1127                if let Some(s) = s.lazy_as_materialized_series() {
1128                    // This should always hold for partitioned.
1129                    debug_assert_eq!(s.n_chunks(), 1)
1130                }
1131                self.clone()
1132            },
1133            Column::Scalar(s) => {
1134                if s.lazy_as_materialized_series()
1135                    .filter(|x| x.n_chunks() > 1)
1136                    .is_some()
1137                {
1138                    Column::Scalar(ScalarColumn::new(
1139                        s.name().clone(),
1140                        s.scalar().clone(),
1141                        s.len(),
1142                    ))
1143                } else {
1144                    self.clone()
1145                }
1146            },
1147        }
1148    }
1149
1150    pub fn explode(&self) -> PolarsResult<Column> {
1151        self.as_materialized_series().explode().map(Column::from)
1152    }
1153    pub fn implode(&self) -> PolarsResult<ListChunked> {
1154        self.as_materialized_series().implode()
1155    }
1156
1157    pub fn fill_null(&self, strategy: FillNullStrategy) -> PolarsResult<Self> {
1158        // @scalar-opt
1159        self.as_materialized_series()
1160            .fill_null(strategy)
1161            .map(Column::from)
1162    }
1163
1164    pub fn divide(&self, rhs: &Column) -> PolarsResult<Self> {
1165        // @scalar-opt
1166        self.as_materialized_series()
1167            .divide(rhs.as_materialized_series())
1168            .map(Column::from)
1169    }
1170
1171    pub fn shift(&self, periods: i64) -> Column {
1172        // @scalar-opt
1173        self.as_materialized_series().shift(periods).into()
1174    }
1175
1176    #[cfg(feature = "zip_with")]
1177    pub fn zip_with(&self, mask: &BooleanChunked, other: &Self) -> PolarsResult<Self> {
1178        // @scalar-opt
1179        self.as_materialized_series()
1180            .zip_with(mask, other.as_materialized_series())
1181            .map(Self::from)
1182    }
1183
1184    #[cfg(feature = "zip_with")]
1185    pub fn zip_with_same_type(
1186        &self,
1187        mask: &ChunkedArray<BooleanType>,
1188        other: &Column,
1189    ) -> PolarsResult<Column> {
1190        // @scalar-opt
1191        self.as_materialized_series()
1192            .zip_with_same_type(mask, other.as_materialized_series())
1193            .map(Column::from)
1194    }
1195
1196    pub fn drop_nulls(&self) -> Column {
1197        match self {
1198            Column::Series(s) => s.drop_nulls().into_column(),
1199            // @partition-opt
1200            Column::Partitioned(s) => s.as_materialized_series().drop_nulls().into_column(),
1201            Column::Scalar(s) => s.drop_nulls().into_column(),
1202        }
1203    }
1204
1205    /// Packs every element into a list.
1206    pub fn as_list(&self) -> ListChunked {
1207        // @scalar-opt
1208        // @partition-opt
1209        self.as_materialized_series().as_list()
1210    }
1211
1212    pub fn is_sorted_flag(&self) -> IsSorted {
1213        match self {
1214            Column::Series(s) => s.is_sorted_flag(),
1215            Column::Partitioned(s) => s.partitions().is_sorted_flag(),
1216            Column::Scalar(_) => IsSorted::Ascending,
1217        }
1218    }
1219
1220    pub fn unique(&self) -> PolarsResult<Column> {
1221        match self {
1222            Column::Series(s) => s.unique().map(Column::from),
1223            // @partition-opt
1224            Column::Partitioned(s) => s.as_materialized_series().unique().map(Column::from),
1225            Column::Scalar(s) => {
1226                _ = s.as_single_value_series().unique()?;
1227                if s.is_empty() {
1228                    return Ok(s.clone().into_column());
1229                }
1230
1231                Ok(s.resize(1).into_column())
1232            },
1233        }
1234    }
1235    pub fn unique_stable(&self) -> PolarsResult<Column> {
1236        match self {
1237            Column::Series(s) => s.unique_stable().map(Column::from),
1238            // @partition-opt
1239            Column::Partitioned(s) => s.as_materialized_series().unique_stable().map(Column::from),
1240            Column::Scalar(s) => {
1241                _ = s.as_single_value_series().unique_stable()?;
1242                if s.is_empty() {
1243                    return Ok(s.clone().into_column());
1244                }
1245
1246                Ok(s.resize(1).into_column())
1247            },
1248        }
1249    }
1250
1251    pub fn reshape_list(&self, dimensions: &[ReshapeDimension]) -> PolarsResult<Self> {
1252        // @scalar-opt
1253        self.as_materialized_series()
1254            .reshape_list(dimensions)
1255            .map(Self::from)
1256    }
1257
1258    #[cfg(feature = "dtype-array")]
1259    pub fn reshape_array(&self, dimensions: &[ReshapeDimension]) -> PolarsResult<Self> {
1260        // @scalar-opt
1261        self.as_materialized_series()
1262            .reshape_array(dimensions)
1263            .map(Self::from)
1264    }
1265
1266    pub fn sort(&self, sort_options: SortOptions) -> PolarsResult<Self> {
1267        // @scalar-opt
1268        self.as_materialized_series()
1269            .sort(sort_options)
1270            .map(Self::from)
1271    }
1272
1273    pub fn filter(&self, filter: &BooleanChunked) -> PolarsResult<Self> {
1274        match self {
1275            Column::Series(s) => s.filter(filter).map(Column::from),
1276            Column::Partitioned(s) => s.as_materialized_series().filter(filter).map(Column::from),
1277            Column::Scalar(s) => {
1278                if s.is_empty() {
1279                    return Ok(s.clone().into_column());
1280                }
1281
1282                // Broadcasting
1283                if filter.len() == 1 {
1284                    return match filter.get(0) {
1285                        Some(true) => Ok(s.clone().into_column()),
1286                        _ => Ok(s.resize(0).into_column()),
1287                    };
1288                }
1289
1290                Ok(s.resize(filter.sum().unwrap() as usize).into_column())
1291            },
1292        }
1293    }
1294
1295    #[cfg(feature = "random")]
1296    pub fn shuffle(&self, seed: Option<u64>) -> Self {
1297        // @scalar-opt
1298        self.as_materialized_series().shuffle(seed).into()
1299    }
1300
1301    #[cfg(feature = "random")]
1302    pub fn sample_frac(
1303        &self,
1304        frac: f64,
1305        with_replacement: bool,
1306        shuffle: bool,
1307        seed: Option<u64>,
1308    ) -> PolarsResult<Self> {
1309        self.as_materialized_series()
1310            .sample_frac(frac, with_replacement, shuffle, seed)
1311            .map(Self::from)
1312    }
1313
1314    #[cfg(feature = "random")]
1315    pub fn sample_n(
1316        &self,
1317        n: usize,
1318        with_replacement: bool,
1319        shuffle: bool,
1320        seed: Option<u64>,
1321    ) -> PolarsResult<Self> {
1322        self.as_materialized_series()
1323            .sample_n(n, with_replacement, shuffle, seed)
1324            .map(Self::from)
1325    }
1326
1327    pub fn gather_every(&self, n: usize, offset: usize) -> Column {
1328        if self.len().saturating_sub(offset) == 0 {
1329            return self.clear();
1330        }
1331
1332        match self {
1333            Column::Series(s) => s.gather_every(n, offset).into(),
1334            Column::Partitioned(s) => s.as_materialized_series().gather_every(n, offset).into(),
1335            Column::Scalar(s) => {
1336                let total = s.len() - offset;
1337                s.resize(1 + (total - 1) / n).into()
1338            },
1339        }
1340    }
1341
1342    pub fn extend_constant(&self, value: AnyValue, n: usize) -> PolarsResult<Self> {
1343        if self.is_empty() {
1344            return Ok(Self::new_scalar(
1345                self.name().clone(),
1346                Scalar::new(self.dtype().clone(), value.into_static()),
1347                n,
1348            ));
1349        }
1350
1351        match self {
1352            Column::Series(s) => s.extend_constant(value, n).map(Column::from),
1353            Column::Partitioned(s) => s.extend_constant(value, n).map(Column::from),
1354            Column::Scalar(s) => {
1355                if s.scalar().as_any_value() == value {
1356                    Ok(s.resize(s.len() + n).into())
1357                } else {
1358                    s.as_materialized_series()
1359                        .extend_constant(value, n)
1360                        .map(Column::from)
1361                }
1362            },
1363        }
1364    }
1365
1366    pub fn is_finite(&self) -> PolarsResult<BooleanChunked> {
1367        self.try_map_unary_elementwise_to_bool(|s| s.is_finite())
1368    }
1369    pub fn is_infinite(&self) -> PolarsResult<BooleanChunked> {
1370        self.try_map_unary_elementwise_to_bool(|s| s.is_infinite())
1371    }
1372    pub fn is_nan(&self) -> PolarsResult<BooleanChunked> {
1373        self.try_map_unary_elementwise_to_bool(|s| s.is_nan())
1374    }
1375    pub fn is_not_nan(&self) -> PolarsResult<BooleanChunked> {
1376        self.try_map_unary_elementwise_to_bool(|s| s.is_not_nan())
1377    }
1378
1379    pub fn wrapping_trunc_div_scalar<T>(&self, rhs: T) -> Self
1380    where
1381        T: Num + NumCast,
1382    {
1383        // @scalar-opt
1384        self.as_materialized_series()
1385            .wrapping_trunc_div_scalar(rhs)
1386            .into()
1387    }
1388
1389    pub fn product(&self) -> PolarsResult<Scalar> {
1390        // @scalar-opt
1391        self.as_materialized_series().product()
1392    }
1393
1394    pub fn phys_iter(&self) -> SeriesPhysIter<'_> {
1395        // @scalar-opt
1396        self.as_materialized_series().phys_iter()
1397    }
1398
1399    #[inline]
1400    pub fn get(&self, index: usize) -> PolarsResult<AnyValue> {
1401        polars_ensure!(index < self.len(), oob = index, self.len());
1402
1403        // SAFETY: Bounds check done just before.
1404        Ok(unsafe { self.get_unchecked(index) })
1405    }
1406    /// # Safety
1407    ///
1408    /// Does not perform bounds check on `index`
1409    #[inline(always)]
1410    pub unsafe fn get_unchecked(&self, index: usize) -> AnyValue {
1411        debug_assert!(index < self.len());
1412
1413        match self {
1414            Column::Series(s) => unsafe { s.get_unchecked(index) },
1415            Column::Partitioned(s) => unsafe { s.get_unchecked(index) },
1416            Column::Scalar(s) => s.scalar().as_any_value(),
1417        }
1418    }
1419
1420    #[cfg(feature = "object")]
1421    pub fn get_object(
1422        &self,
1423        index: usize,
1424    ) -> Option<&dyn crate::chunked_array::object::PolarsObjectSafe> {
1425        self.as_materialized_series().get_object(index)
1426    }
1427
1428    pub fn bitand(&self, rhs: &Self) -> PolarsResult<Self> {
1429        self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l & r)
1430    }
1431    pub fn bitor(&self, rhs: &Self) -> PolarsResult<Self> {
1432        self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l | r)
1433    }
1434    pub fn bitxor(&self, rhs: &Self) -> PolarsResult<Self> {
1435        self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l ^ r)
1436    }
1437
1438    pub fn try_add_owned(self, other: Self) -> PolarsResult<Self> {
1439        match (self, other) {
1440            (Column::Series(lhs), Column::Series(rhs)) => {
1441                lhs.take().try_add_owned(rhs.take()).map(Column::from)
1442            },
1443            (lhs, rhs) => lhs + rhs,
1444        }
1445    }
1446    pub fn try_sub_owned(self, other: Self) -> PolarsResult<Self> {
1447        match (self, other) {
1448            (Column::Series(lhs), Column::Series(rhs)) => {
1449                lhs.take().try_sub_owned(rhs.take()).map(Column::from)
1450            },
1451            (lhs, rhs) => lhs - rhs,
1452        }
1453    }
1454    pub fn try_mul_owned(self, other: Self) -> PolarsResult<Self> {
1455        match (self, other) {
1456            (Column::Series(lhs), Column::Series(rhs)) => {
1457                lhs.take().try_mul_owned(rhs.take()).map(Column::from)
1458            },
1459            (lhs, rhs) => lhs * rhs,
1460        }
1461    }
1462
1463    pub(crate) fn str_value(&self, index: usize) -> PolarsResult<Cow<str>> {
1464        Ok(self.get(index)?.str_value())
1465    }
1466
1467    pub fn min_reduce(&self) -> PolarsResult<Scalar> {
1468        match self {
1469            Column::Series(s) => s.min_reduce(),
1470            Column::Partitioned(s) => s.min_reduce(),
1471            Column::Scalar(s) => {
1472                // We don't really want to deal with handling the full semantics here so we just
1473                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1474                s.as_single_value_series().min_reduce()
1475            },
1476        }
1477    }
1478    pub fn max_reduce(&self) -> PolarsResult<Scalar> {
1479        match self {
1480            Column::Series(s) => s.max_reduce(),
1481            Column::Partitioned(s) => s.max_reduce(),
1482            Column::Scalar(s) => {
1483                // We don't really want to deal with handling the full semantics here so we just
1484                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1485                s.as_single_value_series().max_reduce()
1486            },
1487        }
1488    }
1489    pub fn median_reduce(&self) -> PolarsResult<Scalar> {
1490        match self {
1491            Column::Series(s) => s.median_reduce(),
1492            Column::Partitioned(s) => s.as_materialized_series().median_reduce(),
1493            Column::Scalar(s) => {
1494                // We don't really want to deal with handling the full semantics here so we just
1495                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1496                s.as_single_value_series().median_reduce()
1497            },
1498        }
1499    }
1500    pub fn mean_reduce(&self) -> Scalar {
1501        match self {
1502            Column::Series(s) => s.mean_reduce(),
1503            Column::Partitioned(s) => s.as_materialized_series().mean_reduce(),
1504            Column::Scalar(s) => {
1505                // We don't really want to deal with handling the full semantics here so we just
1506                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1507                s.as_single_value_series().mean_reduce()
1508            },
1509        }
1510    }
1511    pub fn std_reduce(&self, ddof: u8) -> PolarsResult<Scalar> {
1512        match self {
1513            Column::Series(s) => s.std_reduce(ddof),
1514            Column::Partitioned(s) => s.as_materialized_series().std_reduce(ddof),
1515            Column::Scalar(s) => {
1516                // We don't really want to deal with handling the full semantics here so we just
1517                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1518                s.as_single_value_series().std_reduce(ddof)
1519            },
1520        }
1521    }
1522    pub fn var_reduce(&self, ddof: u8) -> PolarsResult<Scalar> {
1523        match self {
1524            Column::Series(s) => s.var_reduce(ddof),
1525            Column::Partitioned(s) => s.as_materialized_series().var_reduce(ddof),
1526            Column::Scalar(s) => {
1527                // We don't really want to deal with handling the full semantics here so we just
1528                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1529                s.as_single_value_series().var_reduce(ddof)
1530            },
1531        }
1532    }
1533    pub fn sum_reduce(&self) -> PolarsResult<Scalar> {
1534        // @partition-opt
1535        // @scalar-opt
1536        self.as_materialized_series().sum_reduce()
1537    }
1538    pub fn and_reduce(&self) -> PolarsResult<Scalar> {
1539        match self {
1540            Column::Series(s) => s.and_reduce(),
1541            Column::Partitioned(s) => s.and_reduce(),
1542            Column::Scalar(s) => {
1543                // We don't really want to deal with handling the full semantics here so we just
1544                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1545                s.as_single_value_series().and_reduce()
1546            },
1547        }
1548    }
1549    pub fn or_reduce(&self) -> PolarsResult<Scalar> {
1550        match self {
1551            Column::Series(s) => s.or_reduce(),
1552            Column::Partitioned(s) => s.or_reduce(),
1553            Column::Scalar(s) => {
1554                // We don't really want to deal with handling the full semantics here so we just
1555                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1556                s.as_single_value_series().or_reduce()
1557            },
1558        }
1559    }
1560    pub fn xor_reduce(&self) -> PolarsResult<Scalar> {
1561        match self {
1562            Column::Series(s) => s.xor_reduce(),
1563            // @partition-opt
1564            Column::Partitioned(s) => s.as_materialized_series().xor_reduce(),
1565            Column::Scalar(s) => {
1566                // We don't really want to deal with handling the full semantics here so we just
1567                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1568                //
1569                // We have to deal with the fact that xor is 0 if there is an even number of
1570                // elements and the value if there is an odd number of elements. If there are zero
1571                // elements the result should be `null`.
1572                s.as_n_values_series(2 - s.len() % 2).xor_reduce()
1573            },
1574        }
1575    }
1576    pub fn n_unique(&self) -> PolarsResult<usize> {
1577        match self {
1578            Column::Series(s) => s.n_unique(),
1579            Column::Partitioned(s) => s.partitions().n_unique(),
1580            Column::Scalar(s) => s.as_single_value_series().n_unique(),
1581        }
1582    }
1583    pub fn quantile_reduce(&self, quantile: f64, method: QuantileMethod) -> PolarsResult<Scalar> {
1584        self.as_materialized_series()
1585            .quantile_reduce(quantile, method)
1586    }
1587
1588    pub(crate) fn estimated_size(&self) -> usize {
1589        // @scalar-opt
1590        self.as_materialized_series().estimated_size()
1591    }
1592
1593    pub fn sort_with(&self, options: SortOptions) -> PolarsResult<Self> {
1594        match self {
1595            Column::Series(s) => s.sort_with(options).map(Self::from),
1596            // @partition-opt
1597            Column::Partitioned(s) => s
1598                .as_materialized_series()
1599                .sort_with(options)
1600                .map(Self::from),
1601            Column::Scalar(s) => {
1602                // This makes this function throw the same errors as Series::sort_with
1603                _ = s.as_single_value_series().sort_with(options)?;
1604
1605                Ok(self.clone())
1606            },
1607        }
1608    }
1609
1610    pub fn map_unary_elementwise_to_bool(
1611        &self,
1612        f: impl Fn(&Series) -> BooleanChunked,
1613    ) -> BooleanChunked {
1614        self.try_map_unary_elementwise_to_bool(|s| Ok(f(s)))
1615            .unwrap()
1616    }
1617    pub fn try_map_unary_elementwise_to_bool(
1618        &self,
1619        f: impl Fn(&Series) -> PolarsResult<BooleanChunked>,
1620    ) -> PolarsResult<BooleanChunked> {
1621        match self {
1622            Column::Series(s) => f(s),
1623            Column::Partitioned(s) => f(s.as_materialized_series()),
1624            Column::Scalar(s) => Ok(f(&s.as_single_value_series())?.new_from_index(0, s.len())),
1625        }
1626    }
1627
1628    pub fn apply_unary_elementwise(&self, f: impl Fn(&Series) -> Series) -> Column {
1629        self.try_apply_unary_elementwise(|s| Ok(f(s))).unwrap()
1630    }
1631    pub fn try_apply_unary_elementwise(
1632        &self,
1633        f: impl Fn(&Series) -> PolarsResult<Series>,
1634    ) -> PolarsResult<Column> {
1635        match self {
1636            Column::Series(s) => f(s).map(Column::from),
1637            Column::Partitioned(s) => s.try_apply_unary_elementwise(f).map(Self::from),
1638            Column::Scalar(s) => Ok(ScalarColumn::from_single_value_series(
1639                f(&s.as_single_value_series())?,
1640                s.len(),
1641            )
1642            .into()),
1643        }
1644    }
1645
1646    pub fn apply_broadcasting_binary_elementwise(
1647        &self,
1648        other: &Self,
1649        op: impl Fn(&Series, &Series) -> Series,
1650    ) -> PolarsResult<Column> {
1651        self.try_apply_broadcasting_binary_elementwise(other, |lhs, rhs| Ok(op(lhs, rhs)))
1652    }
1653    pub fn try_apply_broadcasting_binary_elementwise(
1654        &self,
1655        other: &Self,
1656        op: impl Fn(&Series, &Series) -> PolarsResult<Series>,
1657    ) -> PolarsResult<Column> {
1658        fn output_length(a: &Column, b: &Column) -> PolarsResult<usize> {
1659            match (a.len(), b.len()) {
1660                // broadcasting
1661                (1, o) | (o, 1) => Ok(o),
1662                // equal
1663                (a, b) if a == b => Ok(a),
1664                // unequal
1665                (a, b) => {
1666                    polars_bail!(InvalidOperation: "cannot do a binary operation on columns of different lengths: got {} and {}", a, b)
1667                },
1668            }
1669        }
1670
1671        // Here we rely on the underlying broadcast operations.
1672        let length = output_length(self, other)?;
1673        match (self, other) {
1674            (Column::Series(lhs), Column::Series(rhs)) => op(lhs, rhs).map(Column::from),
1675            (Column::Series(lhs), Column::Scalar(rhs)) => {
1676                op(lhs, &rhs.as_single_value_series()).map(Column::from)
1677            },
1678            (Column::Scalar(lhs), Column::Series(rhs)) => {
1679                op(&lhs.as_single_value_series(), rhs).map(Column::from)
1680            },
1681            (Column::Scalar(lhs), Column::Scalar(rhs)) => {
1682                let lhs = lhs.as_single_value_series();
1683                let rhs = rhs.as_single_value_series();
1684
1685                Ok(ScalarColumn::from_single_value_series(op(&lhs, &rhs)?, length).into_column())
1686            },
1687            // @partition-opt
1688            (lhs, rhs) => {
1689                op(lhs.as_materialized_series(), rhs.as_materialized_series()).map(Column::from)
1690            },
1691        }
1692    }
1693
1694    pub fn apply_binary_elementwise(
1695        &self,
1696        other: &Self,
1697        f: impl Fn(&Series, &Series) -> Series,
1698        f_lb: impl Fn(&Scalar, &Series) -> Series,
1699        f_rb: impl Fn(&Series, &Scalar) -> Series,
1700    ) -> Column {
1701        self.try_apply_binary_elementwise(
1702            other,
1703            |lhs, rhs| Ok(f(lhs, rhs)),
1704            |lhs, rhs| Ok(f_lb(lhs, rhs)),
1705            |lhs, rhs| Ok(f_rb(lhs, rhs)),
1706        )
1707        .unwrap()
1708    }
1709    pub fn try_apply_binary_elementwise(
1710        &self,
1711        other: &Self,
1712        f: impl Fn(&Series, &Series) -> PolarsResult<Series>,
1713        f_lb: impl Fn(&Scalar, &Series) -> PolarsResult<Series>,
1714        f_rb: impl Fn(&Series, &Scalar) -> PolarsResult<Series>,
1715    ) -> PolarsResult<Column> {
1716        debug_assert_eq!(self.len(), other.len());
1717
1718        match (self, other) {
1719            (Column::Series(lhs), Column::Series(rhs)) => f(lhs, rhs).map(Column::from),
1720            (Column::Series(lhs), Column::Scalar(rhs)) => f_rb(lhs, rhs.scalar()).map(Column::from),
1721            (Column::Scalar(lhs), Column::Series(rhs)) => f_lb(lhs.scalar(), rhs).map(Column::from),
1722            (Column::Scalar(lhs), Column::Scalar(rhs)) => {
1723                let lhs = lhs.as_single_value_series();
1724                let rhs = rhs.as_single_value_series();
1725
1726                Ok(
1727                    ScalarColumn::from_single_value_series(f(&lhs, &rhs)?, self.len())
1728                        .into_column(),
1729                )
1730            },
1731            // @partition-opt
1732            (lhs, rhs) => {
1733                f(lhs.as_materialized_series(), rhs.as_materialized_series()).map(Column::from)
1734            },
1735        }
1736    }
1737
1738    #[cfg(feature = "approx_unique")]
1739    pub fn approx_n_unique(&self) -> PolarsResult<IdxSize> {
1740        match self {
1741            Column::Series(s) => s.approx_n_unique(),
1742            // @partition-opt
1743            Column::Partitioned(s) => s.as_materialized_series().approx_n_unique(),
1744            Column::Scalar(s) => {
1745                // @NOTE: We do this for the error handling.
1746                s.as_single_value_series().approx_n_unique()?;
1747                Ok(1)
1748            },
1749        }
1750    }
1751
1752    pub fn n_chunks(&self) -> usize {
1753        match self {
1754            Column::Series(s) => s.n_chunks(),
1755            Column::Scalar(s) => s.lazy_as_materialized_series().map_or(1, |x| x.n_chunks()),
1756            Column::Partitioned(s) => {
1757                if let Some(s) = s.lazy_as_materialized_series() {
1758                    // This should always hold for partitioned.
1759                    debug_assert_eq!(s.n_chunks(), 1)
1760                }
1761                1
1762            },
1763        }
1764    }
1765
1766    #[expect(clippy::wrong_self_convention)]
1767    pub(crate) fn into_total_ord_inner<'a>(&'a self) -> Box<dyn TotalOrdInner + 'a> {
1768        // @scalar-opt
1769        self.as_materialized_series().into_total_ord_inner()
1770    }
1771    #[expect(unused, clippy::wrong_self_convention)]
1772    pub(crate) fn into_total_eq_inner<'a>(&'a self) -> Box<dyn TotalEqInner + 'a> {
1773        // @scalar-opt
1774        self.as_materialized_series().into_total_eq_inner()
1775    }
1776
1777    pub fn rechunk_to_arrow(self, compat_level: CompatLevel) -> Box<dyn Array> {
1778        // Rechunk to one chunk if necessary
1779        let mut series = self.take_materialized_series();
1780        if series.n_chunks() > 1 {
1781            series = series.rechunk();
1782        }
1783        series.to_arrow(0, compat_level)
1784    }
1785}
1786
1787impl Default for Column {
1788    fn default() -> Self {
1789        Self::new_scalar(
1790            PlSmallStr::EMPTY,
1791            Scalar::new(DataType::Int64, AnyValue::Null),
1792            0,
1793        )
1794    }
1795}
1796
1797impl PartialEq for Column {
1798    fn eq(&self, other: &Self) -> bool {
1799        // @scalar-opt
1800        self.as_materialized_series()
1801            .eq(other.as_materialized_series())
1802    }
1803}
1804
1805impl From<Series> for Column {
1806    #[inline]
1807    fn from(series: Series) -> Self {
1808        // We instantiate a Scalar Column if the Series is length is 1. This makes it possible for
1809        // future operations to be faster.
1810        if series.len() == 1 {
1811            return Self::Scalar(ScalarColumn::unit_scalar_from_series(series));
1812        }
1813
1814        Self::Series(SeriesColumn::new(series))
1815    }
1816}
1817
1818impl<T: IntoSeries> IntoColumn for T {
1819    #[inline]
1820    fn into_column(self) -> Column {
1821        self.into_series().into()
1822    }
1823}
1824
1825impl IntoColumn for Column {
1826    #[inline(always)]
1827    fn into_column(self) -> Column {
1828        self
1829    }
1830}
1831
1832/// We don't want to serialize the scalar columns. So this helps pretend that columns are always
1833/// initialized without implementing From<Column> for Series.
1834///
1835/// Those casts should be explicit.
1836#[derive(Clone)]
1837#[cfg_attr(feature = "serde", derive(serde::Serialize))]
1838#[cfg_attr(feature = "serde", serde(into = "Series"))]
1839struct _SerdeSeries(Series);
1840
1841impl From<Column> for _SerdeSeries {
1842    #[inline]
1843    fn from(value: Column) -> Self {
1844        Self(value.take_materialized_series())
1845    }
1846}
1847
1848impl From<_SerdeSeries> for Series {
1849    #[inline]
1850    fn from(value: _SerdeSeries) -> Self {
1851        value.0
1852    }
1853}
polars_core/frame/column/mod.rs

polars_core/frame/column/
mod.rs