polars_core/frame/column/
mod.rs

1use std::borrow::Cow;
2
3use arrow::bitmap::BitmapBuilder;
4use arrow::trusted_len::TrustMyLength;
5use num_traits::{Num, NumCast};
6use polars_compute::rolling::QuantileMethod;
7use polars_error::PolarsResult;
8use polars_utils::aliases::PlSeedableRandomStateQuality;
9use polars_utils::index::check_bounds;
10use polars_utils::pl_str::PlSmallStr;
11pub use scalar::ScalarColumn;
12
13use self::compare_inner::{TotalEqInner, TotalOrdInner};
14use self::gather::check_bounds_ca;
15use self::partitioned::PartitionedColumn;
16use self::series::SeriesColumn;
17use crate::chunked_array::cast::CastOptions;
18use crate::chunked_array::flags::StatisticsFlags;
19use crate::datatypes::ReshapeDimension;
20use crate::prelude::*;
21use crate::series::{BitRepr, IsSorted, SeriesPhysIter};
22use crate::utils::{Container, slice_offsets};
23use crate::{HEAD_DEFAULT_LENGTH, TAIL_DEFAULT_LENGTH};
24
25mod arithmetic;
26mod compare;
27mod partitioned;
28mod scalar;
29mod series;
30
31/// A column within a [`DataFrame`].
32///
33/// This is lazily initialized to a [`Series`] with methods like
34/// [`as_materialized_series`][Column::as_materialized_series] and
35/// [`take_materialized_series`][Column::take_materialized_series].
36///
37/// Currently, there are two ways to represent a [`Column`].
38/// 1. A [`Series`] of values
39/// 2. A [`ScalarColumn`] that repeats a single [`Scalar`]
40#[derive(Debug, Clone)]
41#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
42pub enum Column {
43    Series(SeriesColumn),
44    Partitioned(PartitionedColumn),
45    Scalar(ScalarColumn),
46}
47
48/// Convert `Self` into a [`Column`]
49pub trait IntoColumn: Sized {
50    fn into_column(self) -> Column;
51}
52
53impl Column {
54    #[inline]
55    #[track_caller]
56    pub fn new<T, Phantom>(name: PlSmallStr, values: T) -> Self
57    where
58        Phantom: ?Sized,
59        Series: NamedFrom<T, Phantom>,
60    {
61        Self::Series(SeriesColumn::new(NamedFrom::new(name, values)))
62    }
63
64    #[inline]
65    pub fn new_empty(name: PlSmallStr, dtype: &DataType) -> Self {
66        Self::new_scalar(name, Scalar::new(dtype.clone(), AnyValue::Null), 0)
67    }
68
69    #[inline]
70    pub fn new_scalar(name: PlSmallStr, scalar: Scalar, length: usize) -> Self {
71        Self::Scalar(ScalarColumn::new(name, scalar, length))
72    }
73
74    #[inline]
75    pub fn new_partitioned(name: PlSmallStr, scalar: Scalar, length: usize) -> Self {
76        Self::Scalar(ScalarColumn::new(name, scalar, length))
77    }
78
79    pub fn new_row_index(name: PlSmallStr, offset: IdxSize, length: usize) -> PolarsResult<Column> {
80        let length = IdxSize::try_from(length).unwrap_or(IdxSize::MAX);
81
82        if offset.checked_add(length).is_none() {
83            polars_bail!(
84                ComputeError:
85                "row index with offset {} overflows on dataframe with height {}",
86                offset, length
87            )
88        }
89
90        let range = offset..offset + length;
91
92        let mut ca = IdxCa::from_vec(name, range.collect());
93        ca.set_sorted_flag(IsSorted::Ascending);
94        let col = ca.into_series().into();
95
96        Ok(col)
97    }
98
99    // # Materialize
100    /// Get a reference to a [`Series`] for this [`Column`]
101    ///
102    /// This may need to materialize the [`Series`] on the first invocation for a specific column.
103    #[inline]
104    pub fn as_materialized_series(&self) -> &Series {
105        match self {
106            Column::Series(s) => s,
107            Column::Partitioned(s) => s.as_materialized_series(),
108            Column::Scalar(s) => s.as_materialized_series(),
109        }
110    }
111
112    /// If the memory repr of this Column is a scalar, a unit-length Series will
113    /// be returned.
114    #[inline]
115    pub fn as_materialized_series_maintain_scalar(&self) -> Series {
116        match self {
117            Column::Scalar(s) => s.as_single_value_series(),
118            v => v.as_materialized_series().clone(),
119        }
120    }
121
122    /// Turn [`Column`] into a [`Column::Series`].
123    ///
124    /// This may need to materialize the [`Series`] on the first invocation for a specific column.
125    #[inline]
126    pub fn into_materialized_series(&mut self) -> &mut Series {
127        match self {
128            Column::Series(s) => s,
129            Column::Partitioned(s) => {
130                let series = std::mem::replace(
131                    s,
132                    PartitionedColumn::new_empty(PlSmallStr::EMPTY, DataType::Null),
133                )
134                .take_materialized_series();
135                *self = Column::Series(series.into());
136                let Column::Series(s) = self else {
137                    unreachable!();
138                };
139                s
140            },
141            Column::Scalar(s) => {
142                let series = std::mem::replace(
143                    s,
144                    ScalarColumn::new_empty(PlSmallStr::EMPTY, DataType::Null),
145                )
146                .take_materialized_series();
147                *self = Column::Series(series.into());
148                let Column::Series(s) = self else {
149                    unreachable!();
150                };
151                s
152            },
153        }
154    }
155    /// Take [`Series`] from a [`Column`]
156    ///
157    /// This may need to materialize the [`Series`] on the first invocation for a specific column.
158    #[inline]
159    pub fn take_materialized_series(self) -> Series {
160        match self {
161            Column::Series(s) => s.take(),
162            Column::Partitioned(s) => s.take_materialized_series(),
163            Column::Scalar(s) => s.take_materialized_series(),
164        }
165    }
166
167    #[inline]
168    pub fn dtype(&self) -> &DataType {
169        match self {
170            Column::Series(s) => s.dtype(),
171            Column::Partitioned(s) => s.dtype(),
172            Column::Scalar(s) => s.dtype(),
173        }
174    }
175
176    #[inline]
177    pub fn field(&self) -> Cow<Field> {
178        match self {
179            Column::Series(s) => s.field(),
180            Column::Partitioned(s) => s.field(),
181            Column::Scalar(s) => match s.lazy_as_materialized_series() {
182                None => Cow::Owned(Field::new(s.name().clone(), s.dtype().clone())),
183                Some(s) => s.field(),
184            },
185        }
186    }
187
188    #[inline]
189    pub fn name(&self) -> &PlSmallStr {
190        match self {
191            Column::Series(s) => s.name(),
192            Column::Partitioned(s) => s.name(),
193            Column::Scalar(s) => s.name(),
194        }
195    }
196
197    #[inline]
198    pub fn len(&self) -> usize {
199        match self {
200            Column::Series(s) => s.len(),
201            Column::Partitioned(s) => s.len(),
202            Column::Scalar(s) => s.len(),
203        }
204    }
205
206    #[inline]
207    pub fn with_name(mut self, name: PlSmallStr) -> Column {
208        self.rename(name);
209        self
210    }
211
212    #[inline]
213    pub fn rename(&mut self, name: PlSmallStr) {
214        match self {
215            Column::Series(s) => _ = s.rename(name),
216            Column::Partitioned(s) => _ = s.rename(name),
217            Column::Scalar(s) => _ = s.rename(name),
218        }
219    }
220
221    // # Downcasting
222    #[inline]
223    pub fn as_series(&self) -> Option<&Series> {
224        match self {
225            Column::Series(s) => Some(s),
226            _ => None,
227        }
228    }
229    #[inline]
230    pub fn as_partitioned_column(&self) -> Option<&PartitionedColumn> {
231        match self {
232            Column::Partitioned(s) => Some(s),
233            _ => None,
234        }
235    }
236    #[inline]
237    pub fn as_scalar_column(&self) -> Option<&ScalarColumn> {
238        match self {
239            Column::Scalar(s) => Some(s),
240            _ => None,
241        }
242    }
243    #[inline]
244    pub fn as_scalar_column_mut(&mut self) -> Option<&mut ScalarColumn> {
245        match self {
246            Column::Scalar(s) => Some(s),
247            _ => None,
248        }
249    }
250
251    // # Try to Chunked Arrays
252    pub fn try_bool(&self) -> Option<&BooleanChunked> {
253        self.as_materialized_series().try_bool()
254    }
255    pub fn try_i8(&self) -> Option<&Int8Chunked> {
256        self.as_materialized_series().try_i8()
257    }
258    pub fn try_i16(&self) -> Option<&Int16Chunked> {
259        self.as_materialized_series().try_i16()
260    }
261    pub fn try_i32(&self) -> Option<&Int32Chunked> {
262        self.as_materialized_series().try_i32()
263    }
264    pub fn try_i64(&self) -> Option<&Int64Chunked> {
265        self.as_materialized_series().try_i64()
266    }
267    pub fn try_u8(&self) -> Option<&UInt8Chunked> {
268        self.as_materialized_series().try_u8()
269    }
270    pub fn try_u16(&self) -> Option<&UInt16Chunked> {
271        self.as_materialized_series().try_u16()
272    }
273    pub fn try_u32(&self) -> Option<&UInt32Chunked> {
274        self.as_materialized_series().try_u32()
275    }
276    pub fn try_u64(&self) -> Option<&UInt64Chunked> {
277        self.as_materialized_series().try_u64()
278    }
279    pub fn try_f32(&self) -> Option<&Float32Chunked> {
280        self.as_materialized_series().try_f32()
281    }
282    pub fn try_f64(&self) -> Option<&Float64Chunked> {
283        self.as_materialized_series().try_f64()
284    }
285    pub fn try_str(&self) -> Option<&StringChunked> {
286        self.as_materialized_series().try_str()
287    }
288    pub fn try_list(&self) -> Option<&ListChunked> {
289        self.as_materialized_series().try_list()
290    }
291    pub fn try_binary(&self) -> Option<&BinaryChunked> {
292        self.as_materialized_series().try_binary()
293    }
294    pub fn try_idx(&self) -> Option<&IdxCa> {
295        self.as_materialized_series().try_idx()
296    }
297    pub fn try_binary_offset(&self) -> Option<&BinaryOffsetChunked> {
298        self.as_materialized_series().try_binary_offset()
299    }
300    #[cfg(feature = "dtype-datetime")]
301    pub fn try_datetime(&self) -> Option<&DatetimeChunked> {
302        self.as_materialized_series().try_datetime()
303    }
304    #[cfg(feature = "dtype-struct")]
305    pub fn try_struct(&self) -> Option<&StructChunked> {
306        self.as_materialized_series().try_struct()
307    }
308    #[cfg(feature = "dtype-decimal")]
309    pub fn try_decimal(&self) -> Option<&DecimalChunked> {
310        self.as_materialized_series().try_decimal()
311    }
312    #[cfg(feature = "dtype-array")]
313    pub fn try_array(&self) -> Option<&ArrayChunked> {
314        self.as_materialized_series().try_array()
315    }
316    #[cfg(feature = "dtype-categorical")]
317    pub fn try_categorical(&self) -> Option<&CategoricalChunked> {
318        self.as_materialized_series().try_categorical()
319    }
320    #[cfg(feature = "dtype-date")]
321    pub fn try_date(&self) -> Option<&DateChunked> {
322        self.as_materialized_series().try_date()
323    }
324    #[cfg(feature = "dtype-duration")]
325    pub fn try_duration(&self) -> Option<&DurationChunked> {
326        self.as_materialized_series().try_duration()
327    }
328
329    // # To Chunked Arrays
330    pub fn bool(&self) -> PolarsResult<&BooleanChunked> {
331        self.as_materialized_series().bool()
332    }
333    pub fn i8(&self) -> PolarsResult<&Int8Chunked> {
334        self.as_materialized_series().i8()
335    }
336    pub fn i16(&self) -> PolarsResult<&Int16Chunked> {
337        self.as_materialized_series().i16()
338    }
339    pub fn i32(&self) -> PolarsResult<&Int32Chunked> {
340        self.as_materialized_series().i32()
341    }
342    pub fn i64(&self) -> PolarsResult<&Int64Chunked> {
343        self.as_materialized_series().i64()
344    }
345    #[cfg(feature = "dtype-i128")]
346    pub fn i128(&self) -> PolarsResult<&Int128Chunked> {
347        self.as_materialized_series().i128()
348    }
349    pub fn u8(&self) -> PolarsResult<&UInt8Chunked> {
350        self.as_materialized_series().u8()
351    }
352    pub fn u16(&self) -> PolarsResult<&UInt16Chunked> {
353        self.as_materialized_series().u16()
354    }
355    pub fn u32(&self) -> PolarsResult<&UInt32Chunked> {
356        self.as_materialized_series().u32()
357    }
358    pub fn u64(&self) -> PolarsResult<&UInt64Chunked> {
359        self.as_materialized_series().u64()
360    }
361    pub fn f32(&self) -> PolarsResult<&Float32Chunked> {
362        self.as_materialized_series().f32()
363    }
364    pub fn f64(&self) -> PolarsResult<&Float64Chunked> {
365        self.as_materialized_series().f64()
366    }
367    pub fn str(&self) -> PolarsResult<&StringChunked> {
368        self.as_materialized_series().str()
369    }
370    pub fn list(&self) -> PolarsResult<&ListChunked> {
371        self.as_materialized_series().list()
372    }
373    pub fn binary(&self) -> PolarsResult<&BinaryChunked> {
374        self.as_materialized_series().binary()
375    }
376    pub fn idx(&self) -> PolarsResult<&IdxCa> {
377        self.as_materialized_series().idx()
378    }
379    pub fn binary_offset(&self) -> PolarsResult<&BinaryOffsetChunked> {
380        self.as_materialized_series().binary_offset()
381    }
382    #[cfg(feature = "dtype-datetime")]
383    pub fn datetime(&self) -> PolarsResult<&DatetimeChunked> {
384        self.as_materialized_series().datetime()
385    }
386    #[cfg(feature = "dtype-struct")]
387    pub fn struct_(&self) -> PolarsResult<&StructChunked> {
388        self.as_materialized_series().struct_()
389    }
390    #[cfg(feature = "dtype-decimal")]
391    pub fn decimal(&self) -> PolarsResult<&DecimalChunked> {
392        self.as_materialized_series().decimal()
393    }
394    #[cfg(feature = "dtype-array")]
395    pub fn array(&self) -> PolarsResult<&ArrayChunked> {
396        self.as_materialized_series().array()
397    }
398    #[cfg(feature = "dtype-categorical")]
399    pub fn categorical(&self) -> PolarsResult<&CategoricalChunked> {
400        self.as_materialized_series().categorical()
401    }
402    #[cfg(feature = "dtype-date")]
403    pub fn date(&self) -> PolarsResult<&DateChunked> {
404        self.as_materialized_series().date()
405    }
406    #[cfg(feature = "dtype-duration")]
407    pub fn duration(&self) -> PolarsResult<&DurationChunked> {
408        self.as_materialized_series().duration()
409    }
410
411    // # Casting
412    pub fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Self> {
413        match self {
414            Column::Series(s) => s.cast_with_options(dtype, options).map(Column::from),
415            Column::Partitioned(s) => s.cast_with_options(dtype, options).map(Column::from),
416            Column::Scalar(s) => s.cast_with_options(dtype, options).map(Column::from),
417        }
418    }
419    pub fn strict_cast(&self, dtype: &DataType) -> PolarsResult<Self> {
420        match self {
421            Column::Series(s) => s.strict_cast(dtype).map(Column::from),
422            Column::Partitioned(s) => s.strict_cast(dtype).map(Column::from),
423            Column::Scalar(s) => s.strict_cast(dtype).map(Column::from),
424        }
425    }
426    pub fn cast(&self, dtype: &DataType) -> PolarsResult<Column> {
427        match self {
428            Column::Series(s) => s.cast(dtype).map(Column::from),
429            Column::Partitioned(s) => s.cast(dtype).map(Column::from),
430            Column::Scalar(s) => s.cast(dtype).map(Column::from),
431        }
432    }
433    /// # Safety
434    ///
435    /// This can lead to invalid memory access in downstream code.
436    pub unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Column> {
437        match self {
438            Column::Series(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from),
439            Column::Partitioned(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from),
440            Column::Scalar(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from),
441        }
442    }
443
444    pub fn clear(&self) -> Self {
445        match self {
446            Column::Series(s) => s.clear().into(),
447            Column::Partitioned(s) => s.clear().into(),
448            Column::Scalar(s) => s.resize(0).into(),
449        }
450    }
451
452    #[inline]
453    pub fn shrink_to_fit(&mut self) {
454        match self {
455            Column::Series(s) => s.shrink_to_fit(),
456            // @partition-opt
457            Column::Partitioned(_) => {},
458            Column::Scalar(_) => {},
459        }
460    }
461
462    #[inline]
463    pub fn new_from_index(&self, index: usize, length: usize) -> Self {
464        if index >= self.len() {
465            return Self::full_null(self.name().clone(), length, self.dtype());
466        }
467
468        match self {
469            Column::Series(s) => {
470                // SAFETY: Bounds check done before.
471                let av = unsafe { s.get_unchecked(index) };
472                let scalar = Scalar::new(self.dtype().clone(), av.into_static());
473                Self::new_scalar(self.name().clone(), scalar, length)
474            },
475            Column::Partitioned(s) => {
476                // SAFETY: Bounds check done before.
477                let av = unsafe { s.get_unchecked(index) };
478                let scalar = Scalar::new(self.dtype().clone(), av.into_static());
479                Self::new_scalar(self.name().clone(), scalar, length)
480            },
481            Column::Scalar(s) => s.resize(length).into(),
482        }
483    }
484
485    #[inline]
486    pub fn has_nulls(&self) -> bool {
487        match self {
488            Self::Series(s) => s.has_nulls(),
489            // @partition-opt
490            Self::Partitioned(s) => s.as_materialized_series().has_nulls(),
491            Self::Scalar(s) => s.has_nulls(),
492        }
493    }
494
495    #[inline]
496    pub fn is_null(&self) -> BooleanChunked {
497        match self {
498            Self::Series(s) => s.is_null(),
499            // @partition-opt
500            Self::Partitioned(s) => s.as_materialized_series().is_null(),
501            Self::Scalar(s) => {
502                BooleanChunked::full(s.name().clone(), s.scalar().is_null(), s.len())
503            },
504        }
505    }
506    #[inline]
507    pub fn is_not_null(&self) -> BooleanChunked {
508        match self {
509            Self::Series(s) => s.is_not_null(),
510            // @partition-opt
511            Self::Partitioned(s) => s.as_materialized_series().is_not_null(),
512            Self::Scalar(s) => {
513                BooleanChunked::full(s.name().clone(), !s.scalar().is_null(), s.len())
514            },
515        }
516    }
517
518    pub fn to_physical_repr(&self) -> Column {
519        // @scalar-opt
520        self.as_materialized_series()
521            .to_physical_repr()
522            .into_owned()
523            .into()
524    }
525    /// # Safety
526    ///
527    /// This can lead to invalid memory access in downstream code.
528    pub unsafe fn from_physical_unchecked(&self, dtype: &DataType) -> PolarsResult<Column> {
529        // @scalar-opt
530        self.as_materialized_series()
531            .from_physical_unchecked(dtype)
532            .map(Column::from)
533    }
534
535    pub fn head(&self, length: Option<usize>) -> Column {
536        let len = length.unwrap_or(HEAD_DEFAULT_LENGTH);
537        let len = usize::min(len, self.len());
538        self.slice(0, len)
539    }
540    pub fn tail(&self, length: Option<usize>) -> Column {
541        let len = length.unwrap_or(TAIL_DEFAULT_LENGTH);
542        let len = usize::min(len, self.len());
543        debug_assert!(len <= i64::MAX as usize);
544        self.slice(-(len as i64), len)
545    }
546    pub fn slice(&self, offset: i64, length: usize) -> Column {
547        match self {
548            Column::Series(s) => s.slice(offset, length).into(),
549            // @partition-opt
550            Column::Partitioned(s) => s.as_materialized_series().slice(offset, length).into(),
551            Column::Scalar(s) => {
552                let (_, length) = slice_offsets(offset, length, s.len());
553                s.resize(length).into()
554            },
555        }
556    }
557
558    pub fn split_at(&self, offset: i64) -> (Column, Column) {
559        // @scalar-opt
560        let (l, r) = self.as_materialized_series().split_at(offset);
561        (l.into(), r.into())
562    }
563
564    #[inline]
565    pub fn null_count(&self) -> usize {
566        match self {
567            Self::Series(s) => s.null_count(),
568            Self::Partitioned(s) => s.null_count(),
569            Self::Scalar(s) if s.scalar().is_null() => s.len(),
570            Self::Scalar(_) => 0,
571        }
572    }
573
574    pub fn take(&self, indices: &IdxCa) -> PolarsResult<Column> {
575        check_bounds_ca(indices, self.len() as IdxSize)?;
576        Ok(unsafe { self.take_unchecked(indices) })
577    }
578    pub fn take_slice(&self, indices: &[IdxSize]) -> PolarsResult<Column> {
579        check_bounds(indices, self.len() as IdxSize)?;
580        Ok(unsafe { self.take_slice_unchecked(indices) })
581    }
582    /// # Safety
583    ///
584    /// No bounds on the indexes are performed.
585    pub unsafe fn take_unchecked(&self, indices: &IdxCa) -> Column {
586        debug_assert!(check_bounds_ca(indices, self.len() as IdxSize).is_ok());
587
588        match self {
589            Self::Series(s) => unsafe { s.take_unchecked(indices) }.into(),
590            Self::Partitioned(s) => {
591                let s = s.as_materialized_series();
592                unsafe { s.take_unchecked(indices) }.into()
593            },
594            Self::Scalar(s) => {
595                let idxs_length = indices.len();
596                let idxs_null_count = indices.null_count();
597
598                let scalar = ScalarColumn::from_single_value_series(
599                    s.as_single_value_series().take_unchecked(&IdxCa::new(
600                        indices.name().clone(),
601                        &[0][..s.len().min(1)],
602                    )),
603                    idxs_length,
604                );
605
606                // We need to make sure that null values in `idx` become null values in the result
607                if idxs_null_count == 0 || scalar.has_nulls() {
608                    scalar.into_column()
609                } else if idxs_null_count == idxs_length {
610                    scalar.into_nulls().into_column()
611                } else {
612                    let validity = indices.rechunk_validity();
613                    let series = scalar.take_materialized_series();
614                    let name = series.name().clone();
615                    let dtype = series.dtype().clone();
616                    let mut chunks = series.into_chunks();
617                    assert_eq!(chunks.len(), 1);
618                    chunks[0] = chunks[0].with_validity(validity);
619                    unsafe { Series::from_chunks_and_dtype_unchecked(name, chunks, &dtype) }
620                        .into_column()
621                }
622            },
623        }
624    }
625    /// # Safety
626    ///
627    /// No bounds on the indexes are performed.
628    pub unsafe fn take_slice_unchecked(&self, indices: &[IdxSize]) -> Column {
629        debug_assert!(check_bounds(indices, self.len() as IdxSize).is_ok());
630
631        match self {
632            Self::Series(s) => unsafe { s.take_slice_unchecked(indices) }.into(),
633            Self::Partitioned(s) => {
634                let s = s.as_materialized_series();
635                unsafe { s.take_slice_unchecked(indices) }.into()
636            },
637            Self::Scalar(s) => ScalarColumn::from_single_value_series(
638                s.as_single_value_series()
639                    .take_slice_unchecked(&[0][..s.len().min(1)]),
640                indices.len(),
641            )
642            .into(),
643        }
644    }
645
646    /// General implementation for aggregation where a non-missing scalar would map to itself.
647    #[inline(always)]
648    #[cfg(any(feature = "algorithm_group_by", feature = "bitwise"))]
649    fn agg_with_unit_scalar(
650        &self,
651        groups: &GroupsType,
652        series_agg: impl Fn(&Series, &GroupsType) -> Series,
653    ) -> Column {
654        match self {
655            Column::Series(s) => series_agg(s, groups).into_column(),
656            // @partition-opt
657            Column::Partitioned(s) => series_agg(s.as_materialized_series(), groups).into_column(),
658            Column::Scalar(s) => {
659                if s.is_empty() {
660                    return series_agg(s.as_materialized_series(), groups).into_column();
661                }
662
663                // We utilize the aggregation on Series to see:
664                // 1. the output datatype of the aggregation
665                // 2. whether this aggregation is even defined
666                let series_aggregation = series_agg(
667                    &s.as_single_value_series(),
668                    &GroupsType::Slice {
669                        // @NOTE: this group is always valid since s is non-empty.
670                        groups: vec![[0, 1]],
671                        rolling: false,
672                    },
673                );
674
675                // If the aggregation is not defined, just return all nulls.
676                if series_aggregation.has_nulls() {
677                    return Self::new_scalar(
678                        series_aggregation.name().clone(),
679                        Scalar::new(series_aggregation.dtype().clone(), AnyValue::Null),
680                        groups.len(),
681                    );
682                }
683
684                let mut scalar_col = s.resize(groups.len());
685                // The aggregation might change the type (e.g. mean changes int -> float), so we do
686                // a cast here to the output type.
687                if series_aggregation.dtype() != s.dtype() {
688                    scalar_col = scalar_col.cast(series_aggregation.dtype()).unwrap();
689                }
690
691                let Some(first_empty_idx) = groups.iter().position(|g| g.is_empty()) else {
692                    // Fast path: no empty groups. keep the scalar intact.
693                    return scalar_col.into_column();
694                };
695
696                // All empty groups produce a *missing* or `null` value.
697                let mut validity = BitmapBuilder::with_capacity(groups.len());
698                validity.extend_constant(first_empty_idx, true);
699                // SAFETY: We trust the length of this iterator.
700                let iter = unsafe {
701                    TrustMyLength::new(
702                        groups.iter().skip(first_empty_idx).map(|g| !g.is_empty()),
703                        groups.len() - first_empty_idx,
704                    )
705                };
706                validity.extend_trusted_len_iter(iter);
707
708                let mut s = scalar_col.take_materialized_series().rechunk();
709                // SAFETY: We perform a compute_len afterwards.
710                let chunks = unsafe { s.chunks_mut() };
711                let arr = &mut chunks[0];
712                *arr = arr.with_validity(validity.into_opt_validity());
713                s.compute_len();
714
715                s.into_column()
716            },
717        }
718    }
719
720    /// # Safety
721    ///
722    /// Does no bounds checks, groups must be correct.
723    #[cfg(feature = "algorithm_group_by")]
724    pub unsafe fn agg_min(&self, groups: &GroupsType) -> Self {
725        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_min(g) })
726    }
727
728    /// # Safety
729    ///
730    /// Does no bounds checks, groups must be correct.
731    #[cfg(feature = "algorithm_group_by")]
732    pub unsafe fn agg_max(&self, groups: &GroupsType) -> Self {
733        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_max(g) })
734    }
735
736    /// # Safety
737    ///
738    /// Does no bounds checks, groups must be correct.
739    #[cfg(feature = "algorithm_group_by")]
740    pub unsafe fn agg_mean(&self, groups: &GroupsType) -> Self {
741        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_mean(g) })
742    }
743
744    /// # Safety
745    ///
746    /// Does no bounds checks, groups must be correct.
747    #[cfg(feature = "algorithm_group_by")]
748    pub unsafe fn agg_sum(&self, groups: &GroupsType) -> Self {
749        // @scalar-opt
750        unsafe { self.as_materialized_series().agg_sum(groups) }.into()
751    }
752
753    /// # Safety
754    ///
755    /// Does no bounds checks, groups must be correct.
756    #[cfg(feature = "algorithm_group_by")]
757    pub unsafe fn agg_first(&self, groups: &GroupsType) -> Self {
758        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_first(g) })
759    }
760
761    /// # Safety
762    ///
763    /// Does no bounds checks, groups must be correct.
764    #[cfg(feature = "algorithm_group_by")]
765    pub unsafe fn agg_last(&self, groups: &GroupsType) -> Self {
766        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_last(g) })
767    }
768
769    /// # Safety
770    ///
771    /// Does no bounds checks, groups must be correct.
772    #[cfg(feature = "algorithm_group_by")]
773    pub unsafe fn agg_n_unique(&self, groups: &GroupsType) -> Self {
774        // @scalar-opt
775        unsafe { self.as_materialized_series().agg_n_unique(groups) }.into()
776    }
777
778    /// # Safety
779    ///
780    /// Does no bounds checks, groups must be correct.
781    #[cfg(feature = "algorithm_group_by")]
782    pub unsafe fn agg_quantile(
783        &self,
784        groups: &GroupsType,
785        quantile: f64,
786        method: QuantileMethod,
787    ) -> Self {
788        // @scalar-opt
789
790        unsafe {
791            self.as_materialized_series()
792                .agg_quantile(groups, quantile, method)
793        }
794        .into()
795    }
796
797    /// # Safety
798    ///
799    /// Does no bounds checks, groups must be correct.
800    #[cfg(feature = "algorithm_group_by")]
801    pub unsafe fn agg_median(&self, groups: &GroupsType) -> Self {
802        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_median(g) })
803    }
804
805    /// # Safety
806    ///
807    /// Does no bounds checks, groups must be correct.
808    #[cfg(feature = "algorithm_group_by")]
809    pub unsafe fn agg_var(&self, groups: &GroupsType, ddof: u8) -> Self {
810        // @scalar-opt
811        unsafe { self.as_materialized_series().agg_var(groups, ddof) }.into()
812    }
813
814    /// # Safety
815    ///
816    /// Does no bounds checks, groups must be correct.
817    #[cfg(feature = "algorithm_group_by")]
818    pub unsafe fn agg_std(&self, groups: &GroupsType, ddof: u8) -> Self {
819        // @scalar-opt
820        unsafe { self.as_materialized_series().agg_std(groups, ddof) }.into()
821    }
822
823    /// # Safety
824    ///
825    /// Does no bounds checks, groups must be correct.
826    #[cfg(feature = "algorithm_group_by")]
827    pub unsafe fn agg_list(&self, groups: &GroupsType) -> Self {
828        // @scalar-opt
829        unsafe { self.as_materialized_series().agg_list(groups) }.into()
830    }
831
832    /// # Safety
833    ///
834    /// Does no bounds checks, groups must be correct.
835    #[cfg(feature = "algorithm_group_by")]
836    pub fn agg_valid_count(&self, groups: &GroupsType) -> Self {
837        // @partition-opt
838        // @scalar-opt
839        unsafe { self.as_materialized_series().agg_valid_count(groups) }.into()
840    }
841
842    /// # Safety
843    ///
844    /// Does no bounds checks, groups must be correct.
845    #[cfg(feature = "bitwise")]
846    pub fn agg_and(&self, groups: &GroupsType) -> Self {
847        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_and(g) })
848    }
849    /// # Safety
850    ///
851    /// Does no bounds checks, groups must be correct.
852    #[cfg(feature = "bitwise")]
853    pub fn agg_or(&self, groups: &GroupsType) -> Self {
854        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_or(g) })
855    }
856    /// # Safety
857    ///
858    /// Does no bounds checks, groups must be correct.
859    #[cfg(feature = "bitwise")]
860    pub fn agg_xor(&self, groups: &GroupsType) -> Self {
861        // @partition-opt
862        // @scalar-opt
863        unsafe { self.as_materialized_series().agg_xor(groups) }.into()
864    }
865
866    pub fn full_null(name: PlSmallStr, size: usize, dtype: &DataType) -> Self {
867        Self::new_scalar(name, Scalar::new(dtype.clone(), AnyValue::Null), size)
868    }
869
870    pub fn is_empty(&self) -> bool {
871        self.len() == 0
872    }
873
874    pub fn reverse(&self) -> Column {
875        match self {
876            Column::Series(s) => s.reverse().into(),
877            Column::Partitioned(s) => s.reverse().into(),
878            Column::Scalar(_) => self.clone(),
879        }
880    }
881
882    pub fn equals(&self, other: &Column) -> bool {
883        // @scalar-opt
884        self.as_materialized_series()
885            .equals(other.as_materialized_series())
886    }
887
888    pub fn equals_missing(&self, other: &Column) -> bool {
889        // @scalar-opt
890        self.as_materialized_series()
891            .equals_missing(other.as_materialized_series())
892    }
893
894    pub fn set_sorted_flag(&mut self, sorted: IsSorted) {
895        // @scalar-opt
896        match self {
897            Column::Series(s) => s.set_sorted_flag(sorted),
898            Column::Partitioned(s) => s.set_sorted_flag(sorted),
899            Column::Scalar(_) => {},
900        }
901    }
902
903    pub fn get_flags(&self) -> StatisticsFlags {
904        match self {
905            Column::Series(s) => s.get_flags(),
906            // @partition-opt
907            Column::Partitioned(_) => StatisticsFlags::empty(),
908            Column::Scalar(_) => {
909                StatisticsFlags::IS_SORTED_ASC | StatisticsFlags::CAN_FAST_EXPLODE_LIST
910            },
911        }
912    }
913
914    /// Returns whether the flags were set
915    pub fn set_flags(&mut self, flags: StatisticsFlags) -> bool {
916        match self {
917            Column::Series(s) => {
918                s.set_flags(flags);
919                true
920            },
921            // @partition-opt
922            Column::Partitioned(_) => false,
923            Column::Scalar(_) => false,
924        }
925    }
926
927    pub fn vec_hash(
928        &self,
929        build_hasher: PlSeedableRandomStateQuality,
930        buf: &mut Vec<u64>,
931    ) -> PolarsResult<()> {
932        // @scalar-opt?
933        self.as_materialized_series().vec_hash(build_hasher, buf)
934    }
935
936    pub fn vec_hash_combine(
937        &self,
938        build_hasher: PlSeedableRandomStateQuality,
939        hashes: &mut [u64],
940    ) -> PolarsResult<()> {
941        // @scalar-opt?
942        self.as_materialized_series()
943            .vec_hash_combine(build_hasher, hashes)
944    }
945
946    pub fn append(&mut self, other: &Column) -> PolarsResult<&mut Self> {
947        // @scalar-opt
948        self.into_materialized_series()
949            .append(other.as_materialized_series())?;
950        Ok(self)
951    }
952    pub fn append_owned(&mut self, other: Column) -> PolarsResult<&mut Self> {
953        self.into_materialized_series()
954            .append_owned(other.take_materialized_series())?;
955        Ok(self)
956    }
957
958    pub fn arg_sort(&self, options: SortOptions) -> IdxCa {
959        if self.is_empty() {
960            return IdxCa::from_vec(self.name().clone(), Vec::new());
961        }
962
963        if self.null_count() == self.len() {
964            // We might need to maintain order so just respect the descending parameter.
965            let values = if options.descending {
966                (0..self.len() as IdxSize).rev().collect()
967            } else {
968                (0..self.len() as IdxSize).collect()
969            };
970
971            return IdxCa::from_vec(self.name().clone(), values);
972        }
973
974        let is_sorted = Some(self.is_sorted_flag());
975        let Some(is_sorted) = is_sorted.filter(|v| !matches!(v, IsSorted::Not)) else {
976            return self.as_materialized_series().arg_sort(options);
977        };
978
979        // Fast path: the data is sorted.
980        let is_sorted_dsc = matches!(is_sorted, IsSorted::Descending);
981        let invert = options.descending != is_sorted_dsc;
982
983        let mut values = Vec::with_capacity(self.len());
984
985        #[inline(never)]
986        fn extend(
987            start: IdxSize,
988            end: IdxSize,
989            slf: &Column,
990            values: &mut Vec<IdxSize>,
991            is_only_nulls: bool,
992            invert: bool,
993            maintain_order: bool,
994        ) {
995            debug_assert!(start <= end);
996            debug_assert!(start as usize <= slf.len());
997            debug_assert!(end as usize <= slf.len());
998
999            if !invert || is_only_nulls {
1000                values.extend(start..end);
1001                return;
1002            }
1003
1004            // If we don't have to maintain order but we have to invert. Just flip it around.
1005            if !maintain_order {
1006                values.extend((start..end).rev());
1007                return;
1008            }
1009
1010            // If we want to maintain order but we also needs to invert, we need to invert
1011            // per group of items.
1012            //
1013            // @NOTE: Since the column is sorted, arg_unique can also take a fast path and
1014            // just do a single traversal.
1015            let arg_unique = slf
1016                .slice(start as i64, (end - start) as usize)
1017                .arg_unique()
1018                .unwrap();
1019
1020            assert!(!arg_unique.has_nulls());
1021
1022            let num_unique = arg_unique.len();
1023
1024            // Fast path: all items are unique.
1025            if num_unique == (end - start) as usize {
1026                values.extend((start..end).rev());
1027                return;
1028            }
1029
1030            if num_unique == 1 {
1031                values.extend(start..end);
1032                return;
1033            }
1034
1035            let mut prev_idx = end - start;
1036            for chunk in arg_unique.downcast_iter() {
1037                for &idx in chunk.values().as_slice().iter().rev() {
1038                    values.extend(start + idx..start + prev_idx);
1039                    prev_idx = idx;
1040                }
1041            }
1042        }
1043        macro_rules! extend {
1044            ($start:expr, $end:expr) => {
1045                extend!($start, $end, is_only_nulls = false);
1046            };
1047            ($start:expr, $end:expr, is_only_nulls = $is_only_nulls:expr) => {
1048                extend(
1049                    $start,
1050                    $end,
1051                    self,
1052                    &mut values,
1053                    $is_only_nulls,
1054                    invert,
1055                    options.maintain_order,
1056                );
1057            };
1058        }
1059
1060        let length = self.len() as IdxSize;
1061        let null_count = self.null_count() as IdxSize;
1062
1063        if null_count == 0 {
1064            extend!(0, length);
1065        } else {
1066            let has_nulls_last = self.get(self.len() - 1).unwrap().is_null();
1067            match (options.nulls_last, has_nulls_last) {
1068                (true, true) => {
1069                    // Current: Nulls last, Wanted: Nulls last
1070                    extend!(0, length - null_count);
1071                    extend!(length - null_count, length, is_only_nulls = true);
1072                },
1073                (true, false) => {
1074                    // Current: Nulls first, Wanted: Nulls last
1075                    extend!(null_count, length);
1076                    extend!(0, null_count, is_only_nulls = true);
1077                },
1078                (false, true) => {
1079                    // Current: Nulls last, Wanted: Nulls first
1080                    extend!(length - null_count, length, is_only_nulls = true);
1081                    extend!(0, length - null_count);
1082                },
1083                (false, false) => {
1084                    // Current: Nulls first, Wanted: Nulls first
1085                    extend!(0, null_count, is_only_nulls = true);
1086                    extend!(null_count, length);
1087                },
1088            }
1089        }
1090
1091        // @NOTE: This can theoretically be pushed into the previous operation but it is really
1092        // worth it... probably not...
1093        if let Some(limit) = options.limit {
1094            let limit = limit.min(length);
1095            values.truncate(limit as usize);
1096        }
1097
1098        IdxCa::from_vec(self.name().clone(), values)
1099    }
1100
1101    pub fn arg_sort_multiple(
1102        &self,
1103        by: &[Column],
1104        options: &SortMultipleOptions,
1105    ) -> PolarsResult<IdxCa> {
1106        // @scalar-opt
1107        self.as_materialized_series().arg_sort_multiple(by, options)
1108    }
1109
1110    pub fn arg_unique(&self) -> PolarsResult<IdxCa> {
1111        match self {
1112            Column::Scalar(s) => Ok(IdxCa::new_vec(s.name().clone(), vec![0])),
1113            _ => self.as_materialized_series().arg_unique(),
1114        }
1115    }
1116
1117    pub fn bit_repr(&self) -> Option<BitRepr> {
1118        // @scalar-opt
1119        self.as_materialized_series().bit_repr()
1120    }
1121
1122    pub fn into_frame(self) -> DataFrame {
1123        // SAFETY: A single-column dataframe cannot have length mismatches or duplicate names
1124        unsafe { DataFrame::new_no_checks(self.len(), vec![self]) }
1125    }
1126
1127    pub fn extend(&mut self, other: &Column) -> PolarsResult<&mut Self> {
1128        // @scalar-opt
1129        self.into_materialized_series()
1130            .extend(other.as_materialized_series())?;
1131        Ok(self)
1132    }
1133
1134    pub fn rechunk(&self) -> Column {
1135        match self {
1136            Column::Series(s) => s.rechunk().into(),
1137            Column::Partitioned(s) => {
1138                if let Some(s) = s.lazy_as_materialized_series() {
1139                    // This should always hold for partitioned.
1140                    debug_assert_eq!(s.n_chunks(), 1)
1141                }
1142                self.clone()
1143            },
1144            Column::Scalar(s) => {
1145                if s.lazy_as_materialized_series()
1146                    .filter(|x| x.n_chunks() > 1)
1147                    .is_some()
1148                {
1149                    Column::Scalar(ScalarColumn::new(
1150                        s.name().clone(),
1151                        s.scalar().clone(),
1152                        s.len(),
1153                    ))
1154                } else {
1155                    self.clone()
1156                }
1157            },
1158        }
1159    }
1160
1161    pub fn explode(&self, skip_empty: bool) -> PolarsResult<Column> {
1162        self.as_materialized_series()
1163            .explode(skip_empty)
1164            .map(Column::from)
1165    }
1166    pub fn implode(&self) -> PolarsResult<ListChunked> {
1167        self.as_materialized_series().implode()
1168    }
1169
1170    pub fn fill_null(&self, strategy: FillNullStrategy) -> PolarsResult<Self> {
1171        // @scalar-opt
1172        self.as_materialized_series()
1173            .fill_null(strategy)
1174            .map(Column::from)
1175    }
1176
1177    pub fn divide(&self, rhs: &Column) -> PolarsResult<Self> {
1178        // @scalar-opt
1179        self.as_materialized_series()
1180            .divide(rhs.as_materialized_series())
1181            .map(Column::from)
1182    }
1183
1184    pub fn shift(&self, periods: i64) -> Column {
1185        // @scalar-opt
1186        self.as_materialized_series().shift(periods).into()
1187    }
1188
1189    #[cfg(feature = "zip_with")]
1190    pub fn zip_with(&self, mask: &BooleanChunked, other: &Self) -> PolarsResult<Self> {
1191        // @scalar-opt
1192        self.as_materialized_series()
1193            .zip_with(mask, other.as_materialized_series())
1194            .map(Self::from)
1195    }
1196
1197    #[cfg(feature = "zip_with")]
1198    pub fn zip_with_same_type(
1199        &self,
1200        mask: &ChunkedArray<BooleanType>,
1201        other: &Column,
1202    ) -> PolarsResult<Column> {
1203        // @scalar-opt
1204        self.as_materialized_series()
1205            .zip_with_same_type(mask, other.as_materialized_series())
1206            .map(Column::from)
1207    }
1208
1209    pub fn drop_nulls(&self) -> Column {
1210        match self {
1211            Column::Series(s) => s.drop_nulls().into_column(),
1212            // @partition-opt
1213            Column::Partitioned(s) => s.as_materialized_series().drop_nulls().into_column(),
1214            Column::Scalar(s) => s.drop_nulls().into_column(),
1215        }
1216    }
1217
1218    /// Packs every element into a list.
1219    pub fn as_list(&self) -> ListChunked {
1220        // @scalar-opt
1221        // @partition-opt
1222        self.as_materialized_series().as_list()
1223    }
1224
1225    pub fn is_sorted_flag(&self) -> IsSorted {
1226        match self {
1227            Column::Series(s) => s.is_sorted_flag(),
1228            Column::Partitioned(s) => s.partitions().is_sorted_flag(),
1229            Column::Scalar(_) => IsSorted::Ascending,
1230        }
1231    }
1232
1233    pub fn unique(&self) -> PolarsResult<Column> {
1234        match self {
1235            Column::Series(s) => s.unique().map(Column::from),
1236            // @partition-opt
1237            Column::Partitioned(s) => s.as_materialized_series().unique().map(Column::from),
1238            Column::Scalar(s) => {
1239                _ = s.as_single_value_series().unique()?;
1240                if s.is_empty() {
1241                    return Ok(s.clone().into_column());
1242                }
1243
1244                Ok(s.resize(1).into_column())
1245            },
1246        }
1247    }
1248    pub fn unique_stable(&self) -> PolarsResult<Column> {
1249        match self {
1250            Column::Series(s) => s.unique_stable().map(Column::from),
1251            // @partition-opt
1252            Column::Partitioned(s) => s.as_materialized_series().unique_stable().map(Column::from),
1253            Column::Scalar(s) => {
1254                _ = s.as_single_value_series().unique_stable()?;
1255                if s.is_empty() {
1256                    return Ok(s.clone().into_column());
1257                }
1258
1259                Ok(s.resize(1).into_column())
1260            },
1261        }
1262    }
1263
1264    pub fn reshape_list(&self, dimensions: &[ReshapeDimension]) -> PolarsResult<Self> {
1265        // @scalar-opt
1266        self.as_materialized_series()
1267            .reshape_list(dimensions)
1268            .map(Self::from)
1269    }
1270
1271    #[cfg(feature = "dtype-array")]
1272    pub fn reshape_array(&self, dimensions: &[ReshapeDimension]) -> PolarsResult<Self> {
1273        // @scalar-opt
1274        self.as_materialized_series()
1275            .reshape_array(dimensions)
1276            .map(Self::from)
1277    }
1278
1279    pub fn sort(&self, sort_options: SortOptions) -> PolarsResult<Self> {
1280        // @scalar-opt
1281        self.as_materialized_series()
1282            .sort(sort_options)
1283            .map(Self::from)
1284    }
1285
1286    pub fn filter(&self, filter: &BooleanChunked) -> PolarsResult<Self> {
1287        match self {
1288            Column::Series(s) => s.filter(filter).map(Column::from),
1289            Column::Partitioned(s) => s.as_materialized_series().filter(filter).map(Column::from),
1290            Column::Scalar(s) => {
1291                if s.is_empty() {
1292                    return Ok(s.clone().into_column());
1293                }
1294
1295                // Broadcasting
1296                if filter.len() == 1 {
1297                    return match filter.get(0) {
1298                        Some(true) => Ok(s.clone().into_column()),
1299                        _ => Ok(s.resize(0).into_column()),
1300                    };
1301                }
1302
1303                Ok(s.resize(filter.sum().unwrap() as usize).into_column())
1304            },
1305        }
1306    }
1307
1308    #[cfg(feature = "random")]
1309    pub fn shuffle(&self, seed: Option<u64>) -> Self {
1310        // @scalar-opt
1311        self.as_materialized_series().shuffle(seed).into()
1312    }
1313
1314    #[cfg(feature = "random")]
1315    pub fn sample_frac(
1316        &self,
1317        frac: f64,
1318        with_replacement: bool,
1319        shuffle: bool,
1320        seed: Option<u64>,
1321    ) -> PolarsResult<Self> {
1322        self.as_materialized_series()
1323            .sample_frac(frac, with_replacement, shuffle, seed)
1324            .map(Self::from)
1325    }
1326
1327    #[cfg(feature = "random")]
1328    pub fn sample_n(
1329        &self,
1330        n: usize,
1331        with_replacement: bool,
1332        shuffle: bool,
1333        seed: Option<u64>,
1334    ) -> PolarsResult<Self> {
1335        self.as_materialized_series()
1336            .sample_n(n, with_replacement, shuffle, seed)
1337            .map(Self::from)
1338    }
1339
1340    pub fn gather_every(&self, n: usize, offset: usize) -> PolarsResult<Column> {
1341        polars_ensure!(n > 0, InvalidOperation: "gather_every(n): n should be positive");
1342        if self.len().saturating_sub(offset) == 0 {
1343            return Ok(self.clear());
1344        }
1345
1346        match self {
1347            Column::Series(s) => Ok(s.gather_every(n, offset)?.into()),
1348            Column::Partitioned(s) => {
1349                Ok(s.as_materialized_series().gather_every(n, offset)?.into())
1350            },
1351            Column::Scalar(s) => {
1352                let total = s.len() - offset;
1353                Ok(s.resize(1 + (total - 1) / n).into())
1354            },
1355        }
1356    }
1357
1358    pub fn extend_constant(&self, value: AnyValue, n: usize) -> PolarsResult<Self> {
1359        if self.is_empty() {
1360            return Ok(Self::new_scalar(
1361                self.name().clone(),
1362                Scalar::new(self.dtype().clone(), value.into_static()),
1363                n,
1364            ));
1365        }
1366
1367        match self {
1368            Column::Series(s) => s.extend_constant(value, n).map(Column::from),
1369            Column::Partitioned(s) => s.extend_constant(value, n).map(Column::from),
1370            Column::Scalar(s) => {
1371                if s.scalar().as_any_value() == value {
1372                    Ok(s.resize(s.len() + n).into())
1373                } else {
1374                    s.as_materialized_series()
1375                        .extend_constant(value, n)
1376                        .map(Column::from)
1377                }
1378            },
1379        }
1380    }
1381
1382    pub fn is_finite(&self) -> PolarsResult<BooleanChunked> {
1383        self.try_map_unary_elementwise_to_bool(|s| s.is_finite())
1384    }
1385    pub fn is_infinite(&self) -> PolarsResult<BooleanChunked> {
1386        self.try_map_unary_elementwise_to_bool(|s| s.is_infinite())
1387    }
1388    pub fn is_nan(&self) -> PolarsResult<BooleanChunked> {
1389        self.try_map_unary_elementwise_to_bool(|s| s.is_nan())
1390    }
1391    pub fn is_not_nan(&self) -> PolarsResult<BooleanChunked> {
1392        self.try_map_unary_elementwise_to_bool(|s| s.is_not_nan())
1393    }
1394
1395    pub fn wrapping_trunc_div_scalar<T>(&self, rhs: T) -> Self
1396    where
1397        T: Num + NumCast,
1398    {
1399        // @scalar-opt
1400        self.as_materialized_series()
1401            .wrapping_trunc_div_scalar(rhs)
1402            .into()
1403    }
1404
1405    pub fn product(&self) -> PolarsResult<Scalar> {
1406        // @scalar-opt
1407        self.as_materialized_series().product()
1408    }
1409
1410    pub fn phys_iter(&self) -> SeriesPhysIter<'_> {
1411        // @scalar-opt
1412        self.as_materialized_series().phys_iter()
1413    }
1414
1415    #[inline]
1416    pub fn get(&self, index: usize) -> PolarsResult<AnyValue> {
1417        polars_ensure!(index < self.len(), oob = index, self.len());
1418
1419        // SAFETY: Bounds check done just before.
1420        Ok(unsafe { self.get_unchecked(index) })
1421    }
1422    /// # Safety
1423    ///
1424    /// Does not perform bounds check on `index`
1425    #[inline(always)]
1426    pub unsafe fn get_unchecked(&self, index: usize) -> AnyValue {
1427        debug_assert!(index < self.len());
1428
1429        match self {
1430            Column::Series(s) => unsafe { s.get_unchecked(index) },
1431            Column::Partitioned(s) => unsafe { s.get_unchecked(index) },
1432            Column::Scalar(s) => s.scalar().as_any_value(),
1433        }
1434    }
1435
1436    #[cfg(feature = "object")]
1437    pub fn get_object(
1438        &self,
1439        index: usize,
1440    ) -> Option<&dyn crate::chunked_array::object::PolarsObjectSafe> {
1441        self.as_materialized_series().get_object(index)
1442    }
1443
1444    pub fn bitand(&self, rhs: &Self) -> PolarsResult<Self> {
1445        self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l & r)
1446    }
1447    pub fn bitor(&self, rhs: &Self) -> PolarsResult<Self> {
1448        self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l | r)
1449    }
1450    pub fn bitxor(&self, rhs: &Self) -> PolarsResult<Self> {
1451        self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l ^ r)
1452    }
1453
1454    pub fn try_add_owned(self, other: Self) -> PolarsResult<Self> {
1455        match (self, other) {
1456            (Column::Series(lhs), Column::Series(rhs)) => {
1457                lhs.take().try_add_owned(rhs.take()).map(Column::from)
1458            },
1459            (lhs, rhs) => lhs + rhs,
1460        }
1461    }
1462    pub fn try_sub_owned(self, other: Self) -> PolarsResult<Self> {
1463        match (self, other) {
1464            (Column::Series(lhs), Column::Series(rhs)) => {
1465                lhs.take().try_sub_owned(rhs.take()).map(Column::from)
1466            },
1467            (lhs, rhs) => lhs - rhs,
1468        }
1469    }
1470    pub fn try_mul_owned(self, other: Self) -> PolarsResult<Self> {
1471        match (self, other) {
1472            (Column::Series(lhs), Column::Series(rhs)) => {
1473                lhs.take().try_mul_owned(rhs.take()).map(Column::from)
1474            },
1475            (lhs, rhs) => lhs * rhs,
1476        }
1477    }
1478
1479    pub(crate) fn str_value(&self, index: usize) -> PolarsResult<Cow<str>> {
1480        Ok(self.get(index)?.str_value())
1481    }
1482
1483    pub fn min_reduce(&self) -> PolarsResult<Scalar> {
1484        match self {
1485            Column::Series(s) => s.min_reduce(),
1486            Column::Partitioned(s) => s.min_reduce(),
1487            Column::Scalar(s) => {
1488                // We don't really want to deal with handling the full semantics here so we just
1489                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1490                s.as_single_value_series().min_reduce()
1491            },
1492        }
1493    }
1494    pub fn max_reduce(&self) -> PolarsResult<Scalar> {
1495        match self {
1496            Column::Series(s) => s.max_reduce(),
1497            Column::Partitioned(s) => s.max_reduce(),
1498            Column::Scalar(s) => {
1499                // We don't really want to deal with handling the full semantics here so we just
1500                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1501                s.as_single_value_series().max_reduce()
1502            },
1503        }
1504    }
1505    pub fn median_reduce(&self) -> PolarsResult<Scalar> {
1506        match self {
1507            Column::Series(s) => s.median_reduce(),
1508            Column::Partitioned(s) => s.as_materialized_series().median_reduce(),
1509            Column::Scalar(s) => {
1510                // We don't really want to deal with handling the full semantics here so we just
1511                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1512                s.as_single_value_series().median_reduce()
1513            },
1514        }
1515    }
1516    pub fn mean_reduce(&self) -> Scalar {
1517        match self {
1518            Column::Series(s) => s.mean_reduce(),
1519            Column::Partitioned(s) => s.as_materialized_series().mean_reduce(),
1520            Column::Scalar(s) => {
1521                // We don't really want to deal with handling the full semantics here so we just
1522                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1523                s.as_single_value_series().mean_reduce()
1524            },
1525        }
1526    }
1527    pub fn std_reduce(&self, ddof: u8) -> PolarsResult<Scalar> {
1528        match self {
1529            Column::Series(s) => s.std_reduce(ddof),
1530            Column::Partitioned(s) => s.as_materialized_series().std_reduce(ddof),
1531            Column::Scalar(s) => {
1532                // We don't really want to deal with handling the full semantics here so we just
1533                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1534                s.as_single_value_series().std_reduce(ddof)
1535            },
1536        }
1537    }
1538    pub fn var_reduce(&self, ddof: u8) -> PolarsResult<Scalar> {
1539        match self {
1540            Column::Series(s) => s.var_reduce(ddof),
1541            Column::Partitioned(s) => s.as_materialized_series().var_reduce(ddof),
1542            Column::Scalar(s) => {
1543                // We don't really want to deal with handling the full semantics here so we just
1544                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1545                s.as_single_value_series().var_reduce(ddof)
1546            },
1547        }
1548    }
1549    pub fn sum_reduce(&self) -> PolarsResult<Scalar> {
1550        // @partition-opt
1551        // @scalar-opt
1552        self.as_materialized_series().sum_reduce()
1553    }
1554    pub fn and_reduce(&self) -> PolarsResult<Scalar> {
1555        match self {
1556            Column::Series(s) => s.and_reduce(),
1557            Column::Partitioned(s) => s.and_reduce(),
1558            Column::Scalar(s) => {
1559                // We don't really want to deal with handling the full semantics here so we just
1560                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1561                s.as_single_value_series().and_reduce()
1562            },
1563        }
1564    }
1565    pub fn or_reduce(&self) -> PolarsResult<Scalar> {
1566        match self {
1567            Column::Series(s) => s.or_reduce(),
1568            Column::Partitioned(s) => s.or_reduce(),
1569            Column::Scalar(s) => {
1570                // We don't really want to deal with handling the full semantics here so we just
1571                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1572                s.as_single_value_series().or_reduce()
1573            },
1574        }
1575    }
1576    pub fn xor_reduce(&self) -> PolarsResult<Scalar> {
1577        match self {
1578            Column::Series(s) => s.xor_reduce(),
1579            // @partition-opt
1580            Column::Partitioned(s) => s.as_materialized_series().xor_reduce(),
1581            Column::Scalar(s) => {
1582                // We don't really want to deal with handling the full semantics here so we just
1583                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1584                //
1585                // We have to deal with the fact that xor is 0 if there is an even number of
1586                // elements and the value if there is an odd number of elements. If there are zero
1587                // elements the result should be `null`.
1588                s.as_n_values_series(2 - s.len() % 2).xor_reduce()
1589            },
1590        }
1591    }
1592    pub fn n_unique(&self) -> PolarsResult<usize> {
1593        match self {
1594            Column::Series(s) => s.n_unique(),
1595            Column::Partitioned(s) => s.partitions().n_unique(),
1596            Column::Scalar(s) => s.as_single_value_series().n_unique(),
1597        }
1598    }
1599    pub fn quantile_reduce(&self, quantile: f64, method: QuantileMethod) -> PolarsResult<Scalar> {
1600        self.as_materialized_series()
1601            .quantile_reduce(quantile, method)
1602    }
1603
1604    pub(crate) fn estimated_size(&self) -> usize {
1605        // @scalar-opt
1606        self.as_materialized_series().estimated_size()
1607    }
1608
1609    pub fn sort_with(&self, options: SortOptions) -> PolarsResult<Self> {
1610        match self {
1611            Column::Series(s) => s.sort_with(options).map(Self::from),
1612            // @partition-opt
1613            Column::Partitioned(s) => s
1614                .as_materialized_series()
1615                .sort_with(options)
1616                .map(Self::from),
1617            Column::Scalar(s) => {
1618                // This makes this function throw the same errors as Series::sort_with
1619                _ = s.as_single_value_series().sort_with(options)?;
1620
1621                Ok(self.clone())
1622            },
1623        }
1624    }
1625
1626    pub fn map_unary_elementwise_to_bool(
1627        &self,
1628        f: impl Fn(&Series) -> BooleanChunked,
1629    ) -> BooleanChunked {
1630        self.try_map_unary_elementwise_to_bool(|s| Ok(f(s)))
1631            .unwrap()
1632    }
1633    pub fn try_map_unary_elementwise_to_bool(
1634        &self,
1635        f: impl Fn(&Series) -> PolarsResult<BooleanChunked>,
1636    ) -> PolarsResult<BooleanChunked> {
1637        match self {
1638            Column::Series(s) => f(s),
1639            Column::Partitioned(s) => f(s.as_materialized_series()),
1640            Column::Scalar(s) => Ok(f(&s.as_single_value_series())?.new_from_index(0, s.len())),
1641        }
1642    }
1643
1644    pub fn apply_unary_elementwise(&self, f: impl Fn(&Series) -> Series) -> Column {
1645        self.try_apply_unary_elementwise(|s| Ok(f(s))).unwrap()
1646    }
1647    pub fn try_apply_unary_elementwise(
1648        &self,
1649        f: impl Fn(&Series) -> PolarsResult<Series>,
1650    ) -> PolarsResult<Column> {
1651        match self {
1652            Column::Series(s) => f(s).map(Column::from),
1653            Column::Partitioned(s) => s.try_apply_unary_elementwise(f).map(Self::from),
1654            Column::Scalar(s) => Ok(ScalarColumn::from_single_value_series(
1655                f(&s.as_single_value_series())?,
1656                s.len(),
1657            )
1658            .into()),
1659        }
1660    }
1661
1662    pub fn apply_broadcasting_binary_elementwise(
1663        &self,
1664        other: &Self,
1665        op: impl Fn(&Series, &Series) -> Series,
1666    ) -> PolarsResult<Column> {
1667        self.try_apply_broadcasting_binary_elementwise(other, |lhs, rhs| Ok(op(lhs, rhs)))
1668    }
1669    pub fn try_apply_broadcasting_binary_elementwise(
1670        &self,
1671        other: &Self,
1672        op: impl Fn(&Series, &Series) -> PolarsResult<Series>,
1673    ) -> PolarsResult<Column> {
1674        fn output_length(a: &Column, b: &Column) -> PolarsResult<usize> {
1675            match (a.len(), b.len()) {
1676                // broadcasting
1677                (1, o) | (o, 1) => Ok(o),
1678                // equal
1679                (a, b) if a == b => Ok(a),
1680                // unequal
1681                (a, b) => {
1682                    polars_bail!(InvalidOperation: "cannot do a binary operation on columns of different lengths: got {} and {}", a, b)
1683                },
1684            }
1685        }
1686
1687        // Here we rely on the underlying broadcast operations.
1688        let length = output_length(self, other)?;
1689        match (self, other) {
1690            (Column::Series(lhs), Column::Series(rhs)) => op(lhs, rhs).map(Column::from),
1691            (Column::Series(lhs), Column::Scalar(rhs)) => {
1692                op(lhs, &rhs.as_single_value_series()).map(Column::from)
1693            },
1694            (Column::Scalar(lhs), Column::Series(rhs)) => {
1695                op(&lhs.as_single_value_series(), rhs).map(Column::from)
1696            },
1697            (Column::Scalar(lhs), Column::Scalar(rhs)) => {
1698                let lhs = lhs.as_single_value_series();
1699                let rhs = rhs.as_single_value_series();
1700
1701                Ok(ScalarColumn::from_single_value_series(op(&lhs, &rhs)?, length).into_column())
1702            },
1703            // @partition-opt
1704            (lhs, rhs) => {
1705                op(lhs.as_materialized_series(), rhs.as_materialized_series()).map(Column::from)
1706            },
1707        }
1708    }
1709
1710    pub fn apply_binary_elementwise(
1711        &self,
1712        other: &Self,
1713        f: impl Fn(&Series, &Series) -> Series,
1714        f_lb: impl Fn(&Scalar, &Series) -> Series,
1715        f_rb: impl Fn(&Series, &Scalar) -> Series,
1716    ) -> Column {
1717        self.try_apply_binary_elementwise(
1718            other,
1719            |lhs, rhs| Ok(f(lhs, rhs)),
1720            |lhs, rhs| Ok(f_lb(lhs, rhs)),
1721            |lhs, rhs| Ok(f_rb(lhs, rhs)),
1722        )
1723        .unwrap()
1724    }
1725    pub fn try_apply_binary_elementwise(
1726        &self,
1727        other: &Self,
1728        f: impl Fn(&Series, &Series) -> PolarsResult<Series>,
1729        f_lb: impl Fn(&Scalar, &Series) -> PolarsResult<Series>,
1730        f_rb: impl Fn(&Series, &Scalar) -> PolarsResult<Series>,
1731    ) -> PolarsResult<Column> {
1732        debug_assert_eq!(self.len(), other.len());
1733
1734        match (self, other) {
1735            (Column::Series(lhs), Column::Series(rhs)) => f(lhs, rhs).map(Column::from),
1736            (Column::Series(lhs), Column::Scalar(rhs)) => f_rb(lhs, rhs.scalar()).map(Column::from),
1737            (Column::Scalar(lhs), Column::Series(rhs)) => f_lb(lhs.scalar(), rhs).map(Column::from),
1738            (Column::Scalar(lhs), Column::Scalar(rhs)) => {
1739                let lhs = lhs.as_single_value_series();
1740                let rhs = rhs.as_single_value_series();
1741
1742                Ok(
1743                    ScalarColumn::from_single_value_series(f(&lhs, &rhs)?, self.len())
1744                        .into_column(),
1745                )
1746            },
1747            // @partition-opt
1748            (lhs, rhs) => {
1749                f(lhs.as_materialized_series(), rhs.as_materialized_series()).map(Column::from)
1750            },
1751        }
1752    }
1753
1754    #[cfg(feature = "approx_unique")]
1755    pub fn approx_n_unique(&self) -> PolarsResult<IdxSize> {
1756        match self {
1757            Column::Series(s) => s.approx_n_unique(),
1758            // @partition-opt
1759            Column::Partitioned(s) => s.as_materialized_series().approx_n_unique(),
1760            Column::Scalar(s) => {
1761                // @NOTE: We do this for the error handling.
1762                s.as_single_value_series().approx_n_unique()?;
1763                Ok(1)
1764            },
1765        }
1766    }
1767
1768    pub fn n_chunks(&self) -> usize {
1769        match self {
1770            Column::Series(s) => s.n_chunks(),
1771            Column::Scalar(s) => s.lazy_as_materialized_series().map_or(1, |x| x.n_chunks()),
1772            Column::Partitioned(s) => {
1773                if let Some(s) = s.lazy_as_materialized_series() {
1774                    // This should always hold for partitioned.
1775                    debug_assert_eq!(s.n_chunks(), 1)
1776                }
1777                1
1778            },
1779        }
1780    }
1781
1782    #[expect(clippy::wrong_self_convention)]
1783    pub(crate) fn into_total_ord_inner<'a>(&'a self) -> Box<dyn TotalOrdInner + 'a> {
1784        // @scalar-opt
1785        self.as_materialized_series().into_total_ord_inner()
1786    }
1787    #[expect(unused, clippy::wrong_self_convention)]
1788    pub(crate) fn into_total_eq_inner<'a>(&'a self) -> Box<dyn TotalEqInner + 'a> {
1789        // @scalar-opt
1790        self.as_materialized_series().into_total_eq_inner()
1791    }
1792
1793    pub fn rechunk_to_arrow(self, compat_level: CompatLevel) -> Box<dyn Array> {
1794        // Rechunk to one chunk if necessary
1795        let mut series = self.take_materialized_series();
1796        if series.n_chunks() > 1 {
1797            series = series.rechunk();
1798        }
1799        series.to_arrow(0, compat_level)
1800    }
1801
1802    pub fn trim_lists_to_normalized_offsets(&self) -> Option<Column> {
1803        self.as_materialized_series()
1804            .trim_lists_to_normalized_offsets()
1805            .map(Column::from)
1806    }
1807
1808    pub fn propagate_nulls(&self) -> Option<Column> {
1809        self.as_materialized_series()
1810            .propagate_nulls()
1811            .map(Column::from)
1812    }
1813}
1814
1815impl Default for Column {
1816    fn default() -> Self {
1817        Self::new_scalar(
1818            PlSmallStr::EMPTY,
1819            Scalar::new(DataType::Int64, AnyValue::Null),
1820            0,
1821        )
1822    }
1823}
1824
1825impl PartialEq for Column {
1826    fn eq(&self, other: &Self) -> bool {
1827        // @scalar-opt
1828        self.as_materialized_series()
1829            .eq(other.as_materialized_series())
1830    }
1831}
1832
1833impl From<Series> for Column {
1834    #[inline]
1835    fn from(series: Series) -> Self {
1836        // We instantiate a Scalar Column if the Series is length is 1. This makes it possible for
1837        // future operations to be faster.
1838        if series.len() == 1 {
1839            return Self::Scalar(ScalarColumn::unit_scalar_from_series(series));
1840        }
1841
1842        Self::Series(SeriesColumn::new(series))
1843    }
1844}
1845
1846impl<T: IntoSeries> IntoColumn for T {
1847    #[inline]
1848    fn into_column(self) -> Column {
1849        self.into_series().into()
1850    }
1851}
1852
1853impl IntoColumn for Column {
1854    #[inline(always)]
1855    fn into_column(self) -> Column {
1856        self
1857    }
1858}
1859
1860/// We don't want to serialize the scalar columns. So this helps pretend that columns are always
1861/// initialized without implementing From<Column> for Series.
1862///
1863/// Those casts should be explicit.
1864#[derive(Clone)]
1865#[cfg_attr(feature = "serde", derive(serde::Serialize))]
1866#[cfg_attr(feature = "serde", serde(into = "Series"))]
1867struct _SerdeSeries(Series);
1868
1869impl From<Column> for _SerdeSeries {
1870    #[inline]
1871    fn from(value: Column) -> Self {
1872        Self(value.take_materialized_series())
1873    }
1874}
1875
1876impl From<_SerdeSeries> for Series {
1877    #[inline]
1878    fn from(value: _SerdeSeries) -> Self {
1879        value.0
1880    }
1881}