Skip to main content

polars_core/frame/column/
mod.rs

1use std::borrow::Cow;
2
3use arrow::bitmap::{Bitmap, BitmapBuilder};
4use arrow::trusted_len::TrustMyLength;
5use num_traits::{Num, NumCast};
6use polars_compute::rolling::QuantileMethod;
7use polars_error::PolarsResult;
8use polars_utils::aliases::PlSeedableRandomStateQuality;
9use polars_utils::index::check_bounds;
10use polars_utils::pl_str::PlSmallStr;
11pub use scalar::ScalarColumn;
12
13use self::compare_inner::{TotalEqInner, TotalOrdInner};
14use self::gather::check_bounds_ca;
15use self::series::SeriesColumn;
16use crate::chunked_array::cast::CastOptions;
17use crate::chunked_array::flags::StatisticsFlags;
18use crate::datatypes::ReshapeDimension;
19use crate::prelude::*;
20use crate::series::{BitRepr, IsSorted, SeriesPhysIter};
21use crate::utils::{Container, slice_offsets};
22use crate::{HEAD_DEFAULT_LENGTH, TAIL_DEFAULT_LENGTH};
23
24mod arithmetic;
25mod compare;
26mod scalar;
27mod series;
28
29/// A column within a [`DataFrame`].
30///
31/// This is lazily initialized to a [`Series`] with methods like
32/// [`as_materialized_series`][Column::as_materialized_series] and
33/// [`take_materialized_series`][Column::take_materialized_series].
34///
35/// Currently, there are two ways to represent a [`Column`].
36/// 1. A [`Series`] of values
37/// 2. A [`ScalarColumn`] that repeats a single [`Scalar`]
38#[derive(Debug, Clone)]
39#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
40#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
41pub enum Column {
42    Series(SeriesColumn),
43    Scalar(ScalarColumn),
44}
45
46/// Convert `Self` into a [`Column`]
47pub trait IntoColumn: Sized {
48    fn into_column(self) -> Column;
49}
50
51impl Column {
52    #[inline]
53    #[track_caller]
54    pub fn new<T, Phantom>(name: PlSmallStr, values: T) -> Self
55    where
56        Phantom: ?Sized,
57        Series: NamedFrom<T, Phantom>,
58    {
59        Self::Series(SeriesColumn::new(NamedFrom::new(name, values)))
60    }
61
62    #[inline]
63    pub fn new_empty(name: PlSmallStr, dtype: &DataType) -> Self {
64        Self::new_scalar(name, Scalar::new(dtype.clone(), AnyValue::Null), 0)
65    }
66
67    #[inline]
68    pub fn new_scalar(name: PlSmallStr, scalar: Scalar, length: usize) -> Self {
69        Self::Scalar(ScalarColumn::new(name, scalar, length))
70    }
71
72    pub fn new_row_index(name: PlSmallStr, offset: IdxSize, length: usize) -> PolarsResult<Column> {
73        let Ok(length) = IdxSize::try_from(length) else {
74            polars_bail!(
75                ComputeError:
76                "row index length {} overflows IdxSize::MAX ({})",
77                length,
78                IdxSize::MAX,
79            )
80        };
81
82        if offset.checked_add(length).is_none() {
83            polars_bail!(
84                ComputeError:
85                "row index with offset {} overflows on dataframe with height {}",
86                offset, length
87            )
88        }
89
90        let range = offset..offset + length;
91
92        let mut ca = IdxCa::from_vec(name, range.collect());
93        ca.set_sorted_flag(IsSorted::Ascending);
94        let col = ca.into_series().into();
95
96        Ok(col)
97    }
98
99    // # Materialize
100    /// Get a reference to a [`Series`] for this [`Column`]
101    ///
102    /// This may need to materialize the [`Series`] on the first invocation for a specific column.
103    #[inline]
104    pub fn as_materialized_series(&self) -> &Series {
105        match self {
106            Column::Series(s) => s,
107            Column::Scalar(s) => s.as_materialized_series(),
108        }
109    }
110
111    /// If the memory repr of this Column is a scalar, a unit-length Series will
112    /// be returned.
113    #[inline]
114    pub fn as_materialized_series_maintain_scalar(&self) -> Series {
115        match self {
116            Column::Scalar(s) => s.as_single_value_series(),
117            v => v.as_materialized_series().clone(),
118        }
119    }
120
121    /// Returns the backing `Series` for the values of this column.
122    ///
123    /// * For `Column::Series` columns, simply returns the inner `Series`.
124    /// * For `Column::Scalar` columns, returns an empty or unit length series.
125    ///
126    /// # Note
127    /// This method is safe to use. However, care must be taken when operating on the returned
128    /// `Series` to ensure result correctness. E.g. It is suitable to perform elementwise operations
129    /// on it, however e.g. aggregations will return unspecified results.
130    pub fn _get_backing_series(&self) -> Series {
131        match self {
132            Column::Series(s) => (**s).clone(),
133            Column::Scalar(s) => s.as_single_value_series(),
134        }
135    }
136
137    /// Constructs a new `Column` of the same variant as `self` from a backing `Series` representing
138    /// the values.
139    ///
140    /// # Panics
141    /// Panics if:
142    /// * `self` is `Column::Series` and the length of `new_s` does not match that of `self`.
143    /// * `self` is `Column::Scalar` and if either:
144    ///   * `self` is not empty and `new_s` is not of unit length.
145    ///   * `self` is empty and `new_s` is not empty.
146    pub fn _to_new_from_backing(&self, new_s: Series) -> Self {
147        match self {
148            Column::Series(s) => {
149                assert_eq!(new_s.len(), s.len());
150                Column::Series(SeriesColumn::new(new_s))
151            },
152            Column::Scalar(s) => {
153                assert_eq!(new_s.len(), s.as_single_value_series().len());
154                Column::Scalar(ScalarColumn::from_single_value_series(new_s, self.len()))
155            },
156        }
157    }
158
159    /// Turn [`Column`] into a [`Column::Series`].
160    ///
161    /// This may need to materialize the [`Series`] on the first invocation for a specific column.
162    #[inline]
163    pub fn into_materialized_series(&mut self) -> &mut Series {
164        match self {
165            Column::Series(s) => s,
166            Column::Scalar(s) => {
167                let series = std::mem::replace(
168                    s,
169                    ScalarColumn::new_empty(PlSmallStr::EMPTY, DataType::Null),
170                )
171                .take_materialized_series();
172                *self = Column::Series(series.into());
173                let Column::Series(s) = self else {
174                    unreachable!();
175                };
176                s
177            },
178        }
179    }
180    /// Take [`Series`] from a [`Column`]
181    ///
182    /// This may need to materialize the [`Series`] on the first invocation for a specific column.
183    #[inline]
184    pub fn take_materialized_series(self) -> Series {
185        match self {
186            Column::Series(s) => s.take(),
187            Column::Scalar(s) => s.take_materialized_series(),
188        }
189    }
190
191    #[inline]
192    pub fn dtype(&self) -> &DataType {
193        match self {
194            Column::Series(s) => s.dtype(),
195            Column::Scalar(s) => s.dtype(),
196        }
197    }
198
199    #[inline]
200    pub fn field(&self) -> Cow<'_, Field> {
201        match self {
202            Column::Series(s) => s.field(),
203            Column::Scalar(s) => match s.lazy_as_materialized_series() {
204                None => Cow::Owned(Field::new(s.name().clone(), s.dtype().clone())),
205                Some(s) => s.field(),
206            },
207        }
208    }
209
210    #[inline]
211    pub fn name(&self) -> &PlSmallStr {
212        match self {
213            Column::Series(s) => s.name(),
214            Column::Scalar(s) => s.name(),
215        }
216    }
217
218    #[inline]
219    pub fn len(&self) -> usize {
220        match self {
221            Column::Series(s) => s.len(),
222            Column::Scalar(s) => s.len(),
223        }
224    }
225
226    #[inline]
227    pub fn with_name(mut self, name: PlSmallStr) -> Column {
228        self.rename(name);
229        self
230    }
231
232    #[inline]
233    pub fn rename(&mut self, name: PlSmallStr) {
234        match self {
235            Column::Series(s) => _ = s.rename(name),
236            Column::Scalar(s) => _ = s.rename(name),
237        }
238    }
239
240    // # Downcasting
241    #[inline]
242    pub fn as_series(&self) -> Option<&Series> {
243        match self {
244            Column::Series(s) => Some(s),
245            _ => None,
246        }
247    }
248    #[inline]
249    pub fn as_scalar_column(&self) -> Option<&ScalarColumn> {
250        match self {
251            Column::Scalar(s) => Some(s),
252            _ => None,
253        }
254    }
255    #[inline]
256    pub fn as_scalar_column_mut(&mut self) -> Option<&mut ScalarColumn> {
257        match self {
258            Column::Scalar(s) => Some(s),
259            _ => None,
260        }
261    }
262
263    // # Try to Chunked Arrays
264    pub fn try_bool(&self) -> Option<&BooleanChunked> {
265        self.as_materialized_series().try_bool()
266    }
267    pub fn try_i8(&self) -> Option<&Int8Chunked> {
268        self.as_materialized_series().try_i8()
269    }
270    pub fn try_i16(&self) -> Option<&Int16Chunked> {
271        self.as_materialized_series().try_i16()
272    }
273    pub fn try_i32(&self) -> Option<&Int32Chunked> {
274        self.as_materialized_series().try_i32()
275    }
276    pub fn try_i64(&self) -> Option<&Int64Chunked> {
277        self.as_materialized_series().try_i64()
278    }
279    pub fn try_u8(&self) -> Option<&UInt8Chunked> {
280        self.as_materialized_series().try_u8()
281    }
282    pub fn try_u16(&self) -> Option<&UInt16Chunked> {
283        self.as_materialized_series().try_u16()
284    }
285    pub fn try_u32(&self) -> Option<&UInt32Chunked> {
286        self.as_materialized_series().try_u32()
287    }
288    pub fn try_u64(&self) -> Option<&UInt64Chunked> {
289        self.as_materialized_series().try_u64()
290    }
291    #[cfg(feature = "dtype-u128")]
292    pub fn try_u128(&self) -> Option<&UInt128Chunked> {
293        self.as_materialized_series().try_u128()
294    }
295    #[cfg(feature = "dtype-f16")]
296    pub fn try_f16(&self) -> Option<&Float16Chunked> {
297        self.as_materialized_series().try_f16()
298    }
299    pub fn try_f32(&self) -> Option<&Float32Chunked> {
300        self.as_materialized_series().try_f32()
301    }
302    pub fn try_f64(&self) -> Option<&Float64Chunked> {
303        self.as_materialized_series().try_f64()
304    }
305    pub fn try_str(&self) -> Option<&StringChunked> {
306        self.as_materialized_series().try_str()
307    }
308    pub fn try_list(&self) -> Option<&ListChunked> {
309        self.as_materialized_series().try_list()
310    }
311    pub fn try_binary(&self) -> Option<&BinaryChunked> {
312        self.as_materialized_series().try_binary()
313    }
314    pub fn try_idx(&self) -> Option<&IdxCa> {
315        self.as_materialized_series().try_idx()
316    }
317    pub fn try_binary_offset(&self) -> Option<&BinaryOffsetChunked> {
318        self.as_materialized_series().try_binary_offset()
319    }
320    #[cfg(feature = "dtype-datetime")]
321    pub fn try_datetime(&self) -> Option<&DatetimeChunked> {
322        self.as_materialized_series().try_datetime()
323    }
324    #[cfg(feature = "dtype-struct")]
325    pub fn try_struct(&self) -> Option<&StructChunked> {
326        self.as_materialized_series().try_struct()
327    }
328    #[cfg(feature = "dtype-decimal")]
329    pub fn try_decimal(&self) -> Option<&DecimalChunked> {
330        self.as_materialized_series().try_decimal()
331    }
332    #[cfg(feature = "dtype-array")]
333    pub fn try_array(&self) -> Option<&ArrayChunked> {
334        self.as_materialized_series().try_array()
335    }
336    #[cfg(feature = "dtype-categorical")]
337    pub fn try_cat<T: PolarsCategoricalType>(&self) -> Option<&CategoricalChunked<T>> {
338        self.as_materialized_series().try_cat::<T>()
339    }
340    #[cfg(feature = "dtype-categorical")]
341    pub fn try_cat8(&self) -> Option<&Categorical8Chunked> {
342        self.as_materialized_series().try_cat8()
343    }
344    #[cfg(feature = "dtype-categorical")]
345    pub fn try_cat16(&self) -> Option<&Categorical16Chunked> {
346        self.as_materialized_series().try_cat16()
347    }
348    #[cfg(feature = "dtype-categorical")]
349    pub fn try_cat32(&self) -> Option<&Categorical32Chunked> {
350        self.as_materialized_series().try_cat32()
351    }
352    #[cfg(feature = "dtype-date")]
353    pub fn try_date(&self) -> Option<&DateChunked> {
354        self.as_materialized_series().try_date()
355    }
356    #[cfg(feature = "dtype-duration")]
357    pub fn try_duration(&self) -> Option<&DurationChunked> {
358        self.as_materialized_series().try_duration()
359    }
360
361    // # To Chunked Arrays
362    pub fn bool(&self) -> PolarsResult<&BooleanChunked> {
363        self.as_materialized_series().bool()
364    }
365    pub fn i8(&self) -> PolarsResult<&Int8Chunked> {
366        self.as_materialized_series().i8()
367    }
368    pub fn i16(&self) -> PolarsResult<&Int16Chunked> {
369        self.as_materialized_series().i16()
370    }
371    pub fn i32(&self) -> PolarsResult<&Int32Chunked> {
372        self.as_materialized_series().i32()
373    }
374    pub fn i64(&self) -> PolarsResult<&Int64Chunked> {
375        self.as_materialized_series().i64()
376    }
377    #[cfg(feature = "dtype-i128")]
378    pub fn i128(&self) -> PolarsResult<&Int128Chunked> {
379        self.as_materialized_series().i128()
380    }
381    pub fn u8(&self) -> PolarsResult<&UInt8Chunked> {
382        self.as_materialized_series().u8()
383    }
384    pub fn u16(&self) -> PolarsResult<&UInt16Chunked> {
385        self.as_materialized_series().u16()
386    }
387    pub fn u32(&self) -> PolarsResult<&UInt32Chunked> {
388        self.as_materialized_series().u32()
389    }
390    pub fn u64(&self) -> PolarsResult<&UInt64Chunked> {
391        self.as_materialized_series().u64()
392    }
393    #[cfg(feature = "dtype-u128")]
394    pub fn u128(&self) -> PolarsResult<&UInt128Chunked> {
395        self.as_materialized_series().u128()
396    }
397    #[cfg(feature = "dtype-f16")]
398    pub fn f16(&self) -> PolarsResult<&Float16Chunked> {
399        self.as_materialized_series().f16()
400    }
401    pub fn f32(&self) -> PolarsResult<&Float32Chunked> {
402        self.as_materialized_series().f32()
403    }
404    pub fn f64(&self) -> PolarsResult<&Float64Chunked> {
405        self.as_materialized_series().f64()
406    }
407    pub fn str(&self) -> PolarsResult<&StringChunked> {
408        self.as_materialized_series().str()
409    }
410    pub fn list(&self) -> PolarsResult<&ListChunked> {
411        self.as_materialized_series().list()
412    }
413    pub fn binary(&self) -> PolarsResult<&BinaryChunked> {
414        self.as_materialized_series().binary()
415    }
416    pub fn idx(&self) -> PolarsResult<&IdxCa> {
417        self.as_materialized_series().idx()
418    }
419    pub fn binary_offset(&self) -> PolarsResult<&BinaryOffsetChunked> {
420        self.as_materialized_series().binary_offset()
421    }
422    #[cfg(feature = "dtype-datetime")]
423    pub fn datetime(&self) -> PolarsResult<&DatetimeChunked> {
424        self.as_materialized_series().datetime()
425    }
426    #[cfg(feature = "dtype-struct")]
427    pub fn struct_(&self) -> PolarsResult<&StructChunked> {
428        self.as_materialized_series().struct_()
429    }
430    #[cfg(feature = "dtype-decimal")]
431    pub fn decimal(&self) -> PolarsResult<&DecimalChunked> {
432        self.as_materialized_series().decimal()
433    }
434    #[cfg(feature = "dtype-array")]
435    pub fn array(&self) -> PolarsResult<&ArrayChunked> {
436        self.as_materialized_series().array()
437    }
438    #[cfg(feature = "dtype-categorical")]
439    pub fn cat<T: PolarsCategoricalType>(&self) -> PolarsResult<&CategoricalChunked<T>> {
440        self.as_materialized_series().cat::<T>()
441    }
442    #[cfg(feature = "dtype-categorical")]
443    pub fn cat8(&self) -> PolarsResult<&Categorical8Chunked> {
444        self.as_materialized_series().cat8()
445    }
446    #[cfg(feature = "dtype-categorical")]
447    pub fn cat16(&self) -> PolarsResult<&Categorical16Chunked> {
448        self.as_materialized_series().cat16()
449    }
450    #[cfg(feature = "dtype-categorical")]
451    pub fn cat32(&self) -> PolarsResult<&Categorical32Chunked> {
452        self.as_materialized_series().cat32()
453    }
454    #[cfg(feature = "dtype-date")]
455    pub fn date(&self) -> PolarsResult<&DateChunked> {
456        self.as_materialized_series().date()
457    }
458    #[cfg(feature = "dtype-duration")]
459    pub fn duration(&self) -> PolarsResult<&DurationChunked> {
460        self.as_materialized_series().duration()
461    }
462
463    // # Casting
464    pub fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Self> {
465        match self {
466            Column::Series(s) => s.cast_with_options(dtype, options).map(Column::from),
467            Column::Scalar(s) => s.cast_with_options(dtype, options).map(Column::from),
468        }
469    }
470    pub fn strict_cast(&self, dtype: &DataType) -> PolarsResult<Self> {
471        match self {
472            Column::Series(s) => s.strict_cast(dtype).map(Column::from),
473            Column::Scalar(s) => s.strict_cast(dtype).map(Column::from),
474        }
475    }
476    pub fn cast(&self, dtype: &DataType) -> PolarsResult<Column> {
477        match self {
478            Column::Series(s) => s.cast(dtype).map(Column::from),
479            Column::Scalar(s) => s.cast(dtype).map(Column::from),
480        }
481    }
482    /// # Safety
483    ///
484    /// This can lead to invalid memory access in downstream code.
485    pub unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Column> {
486        match self {
487            Column::Series(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from),
488            Column::Scalar(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from),
489        }
490    }
491
492    #[must_use]
493    pub fn clear(&self) -> Self {
494        match self {
495            Column::Series(s) => s.clear().into(),
496            Column::Scalar(s) => s.resize(0).into(),
497        }
498    }
499
500    #[inline]
501    pub fn shrink_to_fit(&mut self) {
502        match self {
503            Column::Series(s) => s.shrink_to_fit(),
504            Column::Scalar(_) => {},
505        }
506    }
507
508    #[inline]
509    pub fn new_from_index(&self, index: usize, length: usize) -> Self {
510        if index >= self.len() {
511            return Self::full_null(self.name().clone(), length, self.dtype());
512        }
513
514        match self {
515            Column::Series(s) => {
516                // SAFETY: Bounds check done before.
517                let av = unsafe { s.get_unchecked(index) };
518                let scalar = Scalar::new(self.dtype().clone(), av.into_static());
519                Self::new_scalar(self.name().clone(), scalar, length)
520            },
521            Column::Scalar(s) => s.resize(length).into(),
522        }
523    }
524
525    #[inline]
526    pub fn has_nulls(&self) -> bool {
527        match self {
528            Self::Series(s) => s.has_nulls(),
529            Self::Scalar(s) => s.has_nulls(),
530        }
531    }
532
533    #[inline]
534    pub fn is_null(&self) -> BooleanChunked {
535        match self {
536            Self::Series(s) => s.is_null(),
537            Self::Scalar(s) => {
538                BooleanChunked::full(s.name().clone(), s.scalar().is_null(), s.len())
539            },
540        }
541    }
542    #[inline]
543    pub fn is_not_null(&self) -> BooleanChunked {
544        match self {
545            Self::Series(s) => s.is_not_null(),
546            Self::Scalar(s) => {
547                BooleanChunked::full(s.name().clone(), !s.scalar().is_null(), s.len())
548            },
549        }
550    }
551
552    pub fn to_physical_repr(&self) -> Column {
553        // @scalar-opt
554        self.as_materialized_series()
555            .to_physical_repr()
556            .into_owned()
557            .into()
558    }
559    /// # Safety
560    ///
561    /// This can lead to invalid memory access in downstream code.
562    pub unsafe fn from_physical_unchecked(&self, dtype: &DataType) -> PolarsResult<Column> {
563        // @scalar-opt
564        self.as_materialized_series()
565            .from_physical_unchecked(dtype)
566            .map(Column::from)
567    }
568
569    pub fn head(&self, length: Option<usize>) -> Column {
570        let len = length.unwrap_or(HEAD_DEFAULT_LENGTH);
571        let len = usize::min(len, self.len());
572        self.slice(0, len)
573    }
574    pub fn tail(&self, length: Option<usize>) -> Column {
575        let len = length.unwrap_or(TAIL_DEFAULT_LENGTH);
576        let len = usize::min(len, self.len());
577        debug_assert!(len <= i64::MAX as usize);
578        self.slice(-(len as i64), len)
579    }
580    pub fn slice(&self, offset: i64, length: usize) -> Column {
581        match self {
582            Column::Series(s) => s.slice(offset, length).into(),
583            Column::Scalar(s) => {
584                let (_, length) = slice_offsets(offset, length, s.len());
585                s.resize(length).into()
586            },
587        }
588    }
589
590    pub fn split_at(&self, offset: i64) -> (Column, Column) {
591        // @scalar-opt
592        let (l, r) = self.as_materialized_series().split_at(offset);
593        (l.into(), r.into())
594    }
595
596    #[inline]
597    pub fn null_count(&self) -> usize {
598        match self {
599            Self::Series(s) => s.null_count(),
600            Self::Scalar(s) if s.scalar().is_null() => s.len(),
601            Self::Scalar(_) => 0,
602        }
603    }
604
605    pub fn take(&self, indices: &IdxCa) -> PolarsResult<Column> {
606        check_bounds_ca(indices, self.len() as IdxSize)?;
607        Ok(unsafe { self.take_unchecked(indices) })
608    }
609    pub fn take_slice(&self, indices: &[IdxSize]) -> PolarsResult<Column> {
610        check_bounds(indices, self.len() as IdxSize)?;
611        Ok(unsafe { self.take_slice_unchecked(indices) })
612    }
613    /// # Safety
614    ///
615    /// No bounds on the indexes are performed.
616    pub unsafe fn take_unchecked(&self, indices: &IdxCa) -> Column {
617        debug_assert!(check_bounds_ca(indices, self.len() as IdxSize).is_ok());
618
619        match self {
620            Self::Series(s) => unsafe { s.take_unchecked(indices) }.into(),
621            Self::Scalar(s) => {
622                let idxs_length = indices.len();
623                let idxs_null_count = indices.null_count();
624
625                let scalar = ScalarColumn::from_single_value_series(
626                    s.as_single_value_series().take_unchecked(&IdxCa::new(
627                        indices.name().clone(),
628                        &[0][..s.len().min(1)],
629                    )),
630                    idxs_length,
631                );
632
633                // We need to make sure that null values in `idx` become null values in the result
634                if idxs_null_count == 0 || scalar.has_nulls() {
635                    scalar.into_column()
636                } else if idxs_null_count == idxs_length {
637                    scalar.into_nulls().into_column()
638                } else {
639                    let validity = indices.rechunk_validity();
640                    let series = scalar.take_materialized_series();
641                    let name = series.name().clone();
642                    let dtype = series.dtype().clone();
643                    let mut chunks = series.into_chunks();
644                    assert_eq!(chunks.len(), 1);
645                    chunks[0] = chunks[0].with_validity(validity);
646                    unsafe { Series::from_chunks_and_dtype_unchecked(name, chunks, &dtype) }
647                        .into_column()
648                }
649            },
650        }
651    }
652    /// # Safety
653    ///
654    /// No bounds on the indexes are performed.
655    pub unsafe fn take_slice_unchecked(&self, indices: &[IdxSize]) -> Column {
656        debug_assert!(check_bounds(indices, self.len() as IdxSize).is_ok());
657
658        match self {
659            Self::Series(s) => unsafe { s.take_slice_unchecked(indices) }.into(),
660            Self::Scalar(s) => ScalarColumn::from_single_value_series(
661                s.as_single_value_series()
662                    .take_slice_unchecked(&[0][..s.len().min(1)]),
663                indices.len(),
664            )
665            .into(),
666        }
667    }
668
669    /// General implementation for aggregation where a non-missing scalar would map to itself.
670    #[inline(always)]
671    #[cfg(any(feature = "algorithm_group_by", feature = "bitwise"))]
672    fn agg_with_scalar_identity(
673        &self,
674        groups: &GroupsType,
675        series_agg: impl Fn(&Series, &GroupsType) -> Series,
676    ) -> Column {
677        match self {
678            Column::Series(s) => series_agg(s, groups).into_column(),
679            Column::Scalar(s) => {
680                if s.is_empty() {
681                    return series_agg(s.as_materialized_series(), groups).into_column();
682                }
683
684                // We utilize the aggregation on Series to see:
685                // 1. the output datatype of the aggregation
686                // 2. whether this aggregation is even defined
687                let series_aggregation = series_agg(
688                    &s.as_single_value_series(),
689                    // @NOTE: this group is always valid since s is non-empty.
690                    &GroupsType::new_slice(vec![[0, 1]], false, true),
691                );
692
693                // If the aggregation is not defined, just return all nulls.
694                if series_aggregation.has_nulls() {
695                    return Self::new_scalar(
696                        series_aggregation.name().clone(),
697                        Scalar::new(series_aggregation.dtype().clone(), AnyValue::Null),
698                        groups.len(),
699                    );
700                }
701
702                let mut scalar_col = s.resize(groups.len());
703                // The aggregation might change the type (e.g. mean changes int -> float), so we do
704                // a cast here to the output type.
705                if series_aggregation.dtype() != s.dtype() {
706                    scalar_col = scalar_col.cast(series_aggregation.dtype()).unwrap();
707                }
708
709                let Some(first_empty_idx) = groups.iter().position(|g| g.is_empty()) else {
710                    // Fast path: no empty groups. keep the scalar intact.
711                    return scalar_col.into_column();
712                };
713
714                // All empty groups produce a *missing* or `null` value.
715                let mut validity = BitmapBuilder::with_capacity(groups.len());
716                validity.extend_constant(first_empty_idx, true);
717                // SAFETY: We trust the length of this iterator.
718                let iter = unsafe {
719                    TrustMyLength::new(
720                        groups.iter().skip(first_empty_idx).map(|g| !g.is_empty()),
721                        groups.len() - first_empty_idx,
722                    )
723                };
724                validity.extend_trusted_len_iter(iter);
725
726                let mut s = scalar_col.take_materialized_series().rechunk();
727                // SAFETY: We perform a compute_len afterwards.
728                let chunks = unsafe { s.chunks_mut() };
729                let arr = &mut chunks[0];
730                *arr = arr.with_validity(validity.into_opt_validity());
731                s.compute_len();
732
733                s.into_column()
734            },
735        }
736    }
737
738    /// # Safety
739    ///
740    /// Does no bounds checks, groups must be correct.
741    #[cfg(feature = "algorithm_group_by")]
742    pub unsafe fn agg_min(&self, groups: &GroupsType) -> Self {
743        self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_min(g) })
744    }
745
746    /// # Safety
747    ///
748    /// Does no bounds checks, groups must be correct.
749    #[cfg(feature = "algorithm_group_by")]
750    pub unsafe fn agg_max(&self, groups: &GroupsType) -> Self {
751        self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_max(g) })
752    }
753
754    /// # Safety
755    ///
756    /// Does no bounds checks, groups must be correct.
757    #[cfg(feature = "algorithm_group_by")]
758    pub unsafe fn agg_mean(&self, groups: &GroupsType) -> Self {
759        self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_mean(g) })
760    }
761
762    /// # Safety
763    ///
764    /// Does no bounds checks, groups must be correct.
765    #[cfg(feature = "algorithm_group_by")]
766    pub unsafe fn agg_arg_min(&self, groups: &GroupsType) -> Self {
767        match self {
768            Column::Series(s) => unsafe { Column::from(s.agg_arg_min(groups)) },
769            Column::Scalar(sc) => {
770                let scalar = if sc.is_empty() || sc.has_nulls() {
771                    Scalar::null(IDX_DTYPE)
772                } else {
773                    Scalar::new_idxsize(0)
774                };
775                Column::new_scalar(self.name().clone(), scalar, 1)
776            },
777        }
778    }
779
780    /// # Safety
781    ///
782    /// Does no bounds checks, groups must be correct.
783    #[cfg(feature = "algorithm_group_by")]
784    pub unsafe fn agg_arg_max(&self, groups: &GroupsType) -> Self {
785        match self {
786            Column::Series(s) => unsafe { Column::from(s.agg_arg_max(groups)) },
787            Column::Scalar(sc) => {
788                let scalar = if sc.is_empty() || sc.has_nulls() {
789                    Scalar::null(IDX_DTYPE)
790                } else {
791                    Scalar::new_idxsize(0)
792                };
793                Column::new_scalar(self.name().clone(), scalar, 1)
794            },
795        }
796    }
797
798    /// # Safety
799    ///
800    /// Does no bounds checks, groups must be correct.
801    #[cfg(feature = "algorithm_group_by")]
802    pub unsafe fn agg_sum(&self, groups: &GroupsType) -> Self {
803        // @scalar-opt
804        unsafe { self.as_materialized_series().agg_sum(groups) }.into()
805    }
806
807    /// # Safety
808    ///
809    /// Does no bounds checks, groups must be correct.
810    #[cfg(feature = "algorithm_group_by")]
811    pub unsafe fn agg_first(&self, groups: &GroupsType) -> Self {
812        self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_first(g) })
813    }
814
815    /// # Safety
816    ///
817    /// Does no bounds checks, groups must be correct.
818    #[cfg(feature = "algorithm_group_by")]
819    pub unsafe fn agg_first_non_null(&self, groups: &GroupsType) -> Self {
820        self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_first_non_null(g) })
821    }
822
823    /// # Safety
824    ///
825    /// Does no bounds checks, groups must be correct.
826    #[cfg(feature = "algorithm_group_by")]
827    pub unsafe fn agg_last(&self, groups: &GroupsType) -> Self {
828        self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_last(g) })
829    }
830
831    /// # Safety
832    ///
833    /// Does no bounds checks, groups must be correct.
834    #[cfg(feature = "algorithm_group_by")]
835    pub unsafe fn agg_last_non_null(&self, groups: &GroupsType) -> Self {
836        self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_last_non_null(g) })
837    }
838
839    /// # Safety
840    ///
841    /// Does no bounds checks, groups must be correct.
842    #[cfg(feature = "algorithm_group_by")]
843    pub unsafe fn agg_n_unique(&self, groups: &GroupsType) -> Self {
844        // @scalar-opt
845        unsafe { self.as_materialized_series().agg_n_unique(groups) }.into()
846    }
847
848    /// # Safety
849    ///
850    /// Does no bounds checks, groups must be correct.
851    #[cfg(feature = "algorithm_group_by")]
852    pub unsafe fn agg_quantile(
853        &self,
854        groups: &GroupsType,
855        quantile: f64,
856        method: QuantileMethod,
857    ) -> Self {
858        // @scalar-opt
859
860        unsafe {
861            self.as_materialized_series()
862                .agg_quantile(groups, quantile, method)
863        }
864        .into()
865    }
866
867    /// # Safety
868    ///
869    /// Does no bounds checks, groups must be correct.
870    #[cfg(feature = "algorithm_group_by")]
871    pub unsafe fn agg_median(&self, groups: &GroupsType) -> Self {
872        self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_median(g) })
873    }
874
875    /// # Safety
876    ///
877    /// Does no bounds checks, groups must be correct.
878    #[cfg(feature = "algorithm_group_by")]
879    pub unsafe fn agg_var(&self, groups: &GroupsType, ddof: u8) -> Self {
880        // @scalar-opt
881        unsafe { self.as_materialized_series().agg_var(groups, ddof) }.into()
882    }
883
884    /// # Safety
885    ///
886    /// Does no bounds checks, groups must be correct.
887    #[cfg(feature = "algorithm_group_by")]
888    pub unsafe fn agg_std(&self, groups: &GroupsType, ddof: u8) -> Self {
889        // @scalar-opt
890        unsafe { self.as_materialized_series().agg_std(groups, ddof) }.into()
891    }
892
893    /// # Safety
894    ///
895    /// Does no bounds checks, groups must be correct.
896    #[cfg(feature = "algorithm_group_by")]
897    pub unsafe fn agg_list(&self, groups: &GroupsType) -> Self {
898        // @scalar-opt
899        unsafe { self.as_materialized_series().agg_list(groups) }.into()
900    }
901
902    /// # Safety
903    ///
904    /// Does no bounds checks, groups must be correct.
905    #[cfg(feature = "algorithm_group_by")]
906    pub fn agg_valid_count(&self, groups: &GroupsType) -> Self {
907        // @scalar-opt
908        unsafe { self.as_materialized_series().agg_valid_count(groups) }.into()
909    }
910
911    /// # Safety
912    ///
913    /// Does no bounds checks, groups must be correct.
914    #[cfg(feature = "bitwise")]
915    pub unsafe fn agg_and(&self, groups: &GroupsType) -> Self {
916        self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_and(g) })
917    }
918    /// # Safety
919    ///
920    /// Does no bounds checks, groups must be correct.
921    #[cfg(feature = "bitwise")]
922    pub unsafe fn agg_or(&self, groups: &GroupsType) -> Self {
923        self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_or(g) })
924    }
925    /// # Safety
926    ///
927    /// Does no bounds checks, groups must be correct.
928    #[cfg(feature = "bitwise")]
929    pub unsafe fn agg_xor(&self, groups: &GroupsType) -> Self {
930        // @scalar-opt
931        unsafe { self.as_materialized_series().agg_xor(groups) }.into()
932    }
933
934    pub fn full_null(name: PlSmallStr, size: usize, dtype: &DataType) -> Self {
935        Self::new_scalar(name, Scalar::new(dtype.clone(), AnyValue::Null), size)
936    }
937
938    pub fn is_empty(&self) -> bool {
939        self.len() == 0
940    }
941
942    pub fn reverse(&self) -> Column {
943        match self {
944            Column::Series(s) => s.reverse().into(),
945            Column::Scalar(_) => self.clone(),
946        }
947    }
948
949    pub fn equals(&self, other: &Column) -> bool {
950        // @scalar-opt
951        self.as_materialized_series()
952            .equals(other.as_materialized_series())
953    }
954
955    pub fn equals_missing(&self, other: &Column) -> bool {
956        // @scalar-opt
957        self.as_materialized_series()
958            .equals_missing(other.as_materialized_series())
959    }
960
961    pub fn set_sorted_flag(&mut self, sorted: IsSorted) {
962        // @scalar-opt
963        match self {
964            Column::Series(s) => s.set_sorted_flag(sorted),
965            Column::Scalar(_) => {},
966        }
967    }
968
969    pub fn get_flags(&self) -> StatisticsFlags {
970        match self {
971            Column::Series(s) => s.get_flags(),
972            Column::Scalar(_) => {
973                StatisticsFlags::IS_SORTED_ASC | StatisticsFlags::CAN_FAST_EXPLODE_LIST
974            },
975        }
976    }
977
978    /// Returns whether the flags were set
979    pub fn set_flags(&mut self, flags: StatisticsFlags) -> bool {
980        match self {
981            Column::Series(s) => {
982                s.set_flags(flags);
983                true
984            },
985            Column::Scalar(_) => false,
986        }
987    }
988
989    pub fn vec_hash(
990        &self,
991        build_hasher: PlSeedableRandomStateQuality,
992        buf: &mut Vec<u64>,
993    ) -> PolarsResult<()> {
994        // @scalar-opt?
995        self.as_materialized_series().vec_hash(build_hasher, buf)
996    }
997
998    pub fn vec_hash_combine(
999        &self,
1000        build_hasher: PlSeedableRandomStateQuality,
1001        hashes: &mut [u64],
1002    ) -> PolarsResult<()> {
1003        // @scalar-opt?
1004        self.as_materialized_series()
1005            .vec_hash_combine(build_hasher, hashes)
1006    }
1007
1008    pub fn append(&mut self, other: &Column) -> PolarsResult<&mut Self> {
1009        // @scalar-opt
1010        self.into_materialized_series()
1011            .append(other.as_materialized_series())?;
1012        Ok(self)
1013    }
1014    pub fn append_owned(&mut self, other: Column) -> PolarsResult<&mut Self> {
1015        self.into_materialized_series()
1016            .append_owned(other.take_materialized_series())?;
1017        Ok(self)
1018    }
1019
1020    pub fn arg_sort(&self, options: SortOptions) -> IdxCa {
1021        if self.is_empty() {
1022            return IdxCa::from_vec(self.name().clone(), Vec::new());
1023        }
1024
1025        if self.null_count() == self.len() {
1026            // We might need to maintain order so just respect the descending parameter.
1027            let values = if options.descending {
1028                (0..self.len() as IdxSize).rev().collect()
1029            } else {
1030                (0..self.len() as IdxSize).collect()
1031            };
1032
1033            return IdxCa::from_vec(self.name().clone(), values);
1034        }
1035
1036        let is_sorted = Some(self.is_sorted_flag());
1037        let Some(is_sorted) = is_sorted.filter(|v| !matches!(v, IsSorted::Not)) else {
1038            return self.as_materialized_series().arg_sort(options);
1039        };
1040
1041        // Fast path: the data is sorted.
1042        let is_sorted_dsc = matches!(is_sorted, IsSorted::Descending);
1043        let invert = options.descending != is_sorted_dsc;
1044
1045        let mut values = Vec::with_capacity(self.len());
1046
1047        #[inline(never)]
1048        fn extend(
1049            start: IdxSize,
1050            end: IdxSize,
1051            slf: &Column,
1052            values: &mut Vec<IdxSize>,
1053            is_only_nulls: bool,
1054            invert: bool,
1055            maintain_order: bool,
1056        ) {
1057            debug_assert!(start <= end);
1058            debug_assert!(start as usize <= slf.len());
1059            debug_assert!(end as usize <= slf.len());
1060
1061            if !invert || is_only_nulls {
1062                values.extend(start..end);
1063                return;
1064            }
1065
1066            // If we don't have to maintain order but we have to invert. Just flip it around.
1067            if !maintain_order {
1068                values.extend((start..end).rev());
1069                return;
1070            }
1071
1072            // If we want to maintain order but we also needs to invert, we need to invert
1073            // per group of items.
1074            //
1075            // @NOTE: Since the column is sorted, arg_unique can also take a fast path and
1076            // just do a single traversal.
1077            let arg_unique = slf
1078                .slice(start as i64, (end - start) as usize)
1079                .arg_unique()
1080                .unwrap();
1081
1082            assert!(!arg_unique.has_nulls());
1083
1084            let num_unique = arg_unique.len();
1085
1086            // Fast path: all items are unique.
1087            if num_unique == (end - start) as usize {
1088                values.extend((start..end).rev());
1089                return;
1090            }
1091
1092            if num_unique == 1 {
1093                values.extend(start..end);
1094                return;
1095            }
1096
1097            let mut prev_idx = end - start;
1098            for chunk in arg_unique.downcast_iter() {
1099                for &idx in chunk.values().as_slice().iter().rev() {
1100                    values.extend(start + idx..start + prev_idx);
1101                    prev_idx = idx;
1102                }
1103            }
1104        }
1105        macro_rules! extend {
1106            ($start:expr, $end:expr) => {
1107                extend!($start, $end, is_only_nulls = false);
1108            };
1109            ($start:expr, $end:expr, is_only_nulls = $is_only_nulls:expr) => {
1110                extend(
1111                    $start,
1112                    $end,
1113                    self,
1114                    &mut values,
1115                    $is_only_nulls,
1116                    invert,
1117                    options.maintain_order,
1118                );
1119            };
1120        }
1121
1122        let length = self.len() as IdxSize;
1123        let null_count = self.null_count() as IdxSize;
1124
1125        if null_count == 0 {
1126            extend!(0, length);
1127        } else {
1128            let has_nulls_last = self.get(self.len() - 1).unwrap().is_null();
1129            match (options.nulls_last, has_nulls_last) {
1130                (true, true) => {
1131                    // Current: Nulls last, Wanted: Nulls last
1132                    extend!(0, length - null_count);
1133                    extend!(length - null_count, length, is_only_nulls = true);
1134                },
1135                (true, false) => {
1136                    // Current: Nulls first, Wanted: Nulls last
1137                    extend!(null_count, length);
1138                    extend!(0, null_count, is_only_nulls = true);
1139                },
1140                (false, true) => {
1141                    // Current: Nulls last, Wanted: Nulls first
1142                    extend!(length - null_count, length, is_only_nulls = true);
1143                    extend!(0, length - null_count);
1144                },
1145                (false, false) => {
1146                    // Current: Nulls first, Wanted: Nulls first
1147                    extend!(0, null_count, is_only_nulls = true);
1148                    extend!(null_count, length);
1149                },
1150            }
1151        }
1152
1153        // @NOTE: This can theoretically be pushed into the previous operation but it is really
1154        // worth it... probably not...
1155        if let Some(limit) = options.limit {
1156            let limit = limit.min(length);
1157            values.truncate(limit as usize);
1158        }
1159
1160        IdxCa::from_vec(self.name().clone(), values)
1161    }
1162
1163    pub fn arg_sort_multiple(
1164        &self,
1165        by: &[Column],
1166        options: &SortMultipleOptions,
1167    ) -> PolarsResult<IdxCa> {
1168        // @scalar-opt
1169        self.as_materialized_series().arg_sort_multiple(by, options)
1170    }
1171
1172    pub fn arg_unique(&self) -> PolarsResult<IdxCa> {
1173        match self {
1174            Column::Scalar(s) => Ok(IdxCa::new_vec(s.name().clone(), vec![0])),
1175            _ => self.as_materialized_series().arg_unique(),
1176        }
1177    }
1178
1179    pub fn bit_repr(&self) -> Option<BitRepr> {
1180        // @scalar-opt
1181        self.as_materialized_series().bit_repr()
1182    }
1183
1184    pub fn into_frame(self) -> DataFrame {
1185        // SAFETY: A single-column dataframe cannot have length mismatches or duplicate names
1186        unsafe { DataFrame::new_unchecked(self.len(), vec![self]) }
1187    }
1188
1189    pub fn extend(&mut self, other: &Column) -> PolarsResult<&mut Self> {
1190        // @scalar-opt
1191        self.into_materialized_series()
1192            .extend(other.as_materialized_series())?;
1193        Ok(self)
1194    }
1195
1196    pub fn rechunk(&self) -> Column {
1197        match self {
1198            Column::Series(s) => s.rechunk().into(),
1199            Column::Scalar(s) => {
1200                if s.lazy_as_materialized_series()
1201                    .filter(|x| x.n_chunks() > 1)
1202                    .is_some()
1203                {
1204                    Column::Scalar(ScalarColumn::new(
1205                        s.name().clone(),
1206                        s.scalar().clone(),
1207                        s.len(),
1208                    ))
1209                } else {
1210                    self.clone()
1211                }
1212            },
1213        }
1214    }
1215
1216    pub fn explode(&self, options: ExplodeOptions) -> PolarsResult<Column> {
1217        self.as_materialized_series()
1218            .explode(options)
1219            .map(Column::from)
1220    }
1221    pub fn implode(&self) -> PolarsResult<ListChunked> {
1222        self.as_materialized_series().implode()
1223    }
1224
1225    pub fn fill_null(&self, strategy: FillNullStrategy) -> PolarsResult<Self> {
1226        // @scalar-opt
1227        self.as_materialized_series()
1228            .fill_null(strategy)
1229            .map(Column::from)
1230    }
1231
1232    pub fn divide(&self, rhs: &Column) -> PolarsResult<Self> {
1233        // @scalar-opt
1234        self.as_materialized_series()
1235            .divide(rhs.as_materialized_series())
1236            .map(Column::from)
1237    }
1238
1239    pub fn shift(&self, periods: i64) -> Column {
1240        // @scalar-opt
1241        self.as_materialized_series().shift(periods).into()
1242    }
1243
1244    #[cfg(feature = "zip_with")]
1245    pub fn zip_with(&self, mask: &BooleanChunked, other: &Self) -> PolarsResult<Self> {
1246        // @scalar-opt
1247        self.as_materialized_series()
1248            .zip_with(mask, other.as_materialized_series())
1249            .map(Self::from)
1250    }
1251
1252    #[cfg(feature = "zip_with")]
1253    pub fn zip_with_same_type(
1254        &self,
1255        mask: &ChunkedArray<BooleanType>,
1256        other: &Column,
1257    ) -> PolarsResult<Column> {
1258        // @scalar-opt
1259        self.as_materialized_series()
1260            .zip_with_same_type(mask, other.as_materialized_series())
1261            .map(Column::from)
1262    }
1263
1264    pub fn drop_nulls(&self) -> Column {
1265        match self {
1266            Column::Series(s) => s.drop_nulls().into_column(),
1267            Column::Scalar(s) => s.drop_nulls().into_column(),
1268        }
1269    }
1270
1271    /// Packs every element into a list.
1272    pub fn as_list(&self) -> ListChunked {
1273        // @scalar-opt
1274        self.as_materialized_series().as_list()
1275    }
1276
1277    pub fn is_sorted_flag(&self) -> IsSorted {
1278        match self {
1279            Column::Series(s) => s.is_sorted_flag(),
1280            Column::Scalar(_) => IsSorted::Ascending,
1281        }
1282    }
1283
1284    pub fn unique(&self) -> PolarsResult<Column> {
1285        match self {
1286            Column::Series(s) => s.unique().map(Column::from),
1287            Column::Scalar(s) => {
1288                _ = s.as_single_value_series().unique()?;
1289                if s.is_empty() {
1290                    return Ok(s.clone().into_column());
1291                }
1292
1293                Ok(s.resize(1).into_column())
1294            },
1295        }
1296    }
1297    pub fn unique_stable(&self) -> PolarsResult<Column> {
1298        match self {
1299            Column::Series(s) => s.unique_stable().map(Column::from),
1300            Column::Scalar(s) => {
1301                _ = s.as_single_value_series().unique_stable()?;
1302                if s.is_empty() {
1303                    return Ok(s.clone().into_column());
1304                }
1305
1306                Ok(s.resize(1).into_column())
1307            },
1308        }
1309    }
1310
1311    pub fn reshape_list(&self, dimensions: &[ReshapeDimension]) -> PolarsResult<Self> {
1312        // @scalar-opt
1313        self.as_materialized_series()
1314            .reshape_list(dimensions)
1315            .map(Self::from)
1316    }
1317
1318    #[cfg(feature = "dtype-array")]
1319    pub fn reshape_array(&self, dimensions: &[ReshapeDimension]) -> PolarsResult<Self> {
1320        // @scalar-opt
1321        self.as_materialized_series()
1322            .reshape_array(dimensions)
1323            .map(Self::from)
1324    }
1325
1326    pub fn sort(&self, sort_options: SortOptions) -> PolarsResult<Self> {
1327        // @scalar-opt
1328        self.as_materialized_series()
1329            .sort(sort_options)
1330            .map(Self::from)
1331    }
1332
1333    pub fn filter(&self, filter: &BooleanChunked) -> PolarsResult<Self> {
1334        match self {
1335            Column::Series(s) => s.filter(filter).map(Column::from),
1336            Column::Scalar(s) => {
1337                if s.is_empty() {
1338                    return Ok(s.clone().into_column());
1339                }
1340
1341                // Broadcasting
1342                if filter.len() == 1 {
1343                    return match filter.get(0) {
1344                        Some(true) => Ok(s.clone().into_column()),
1345                        _ => Ok(s.resize(0).into_column()),
1346                    };
1347                }
1348
1349                Ok(s.resize(filter.sum().unwrap() as usize).into_column())
1350            },
1351        }
1352    }
1353
1354    #[cfg(feature = "random")]
1355    pub fn shuffle(&self, seed: Option<u64>) -> Self {
1356        // @scalar-opt
1357        self.as_materialized_series().shuffle(seed).into()
1358    }
1359
1360    #[cfg(feature = "random")]
1361    pub fn sample_frac(
1362        &self,
1363        frac: f64,
1364        with_replacement: bool,
1365        shuffle: bool,
1366        seed: Option<u64>,
1367    ) -> PolarsResult<Self> {
1368        self.as_materialized_series()
1369            .sample_frac(frac, with_replacement, shuffle, seed)
1370            .map(Self::from)
1371    }
1372
1373    #[cfg(feature = "random")]
1374    pub fn sample_n(
1375        &self,
1376        n: usize,
1377        with_replacement: bool,
1378        shuffle: bool,
1379        seed: Option<u64>,
1380    ) -> PolarsResult<Self> {
1381        self.as_materialized_series()
1382            .sample_n(n, with_replacement, shuffle, seed)
1383            .map(Self::from)
1384    }
1385
1386    pub fn gather_every(&self, n: usize, offset: usize) -> PolarsResult<Column> {
1387        polars_ensure!(n > 0, InvalidOperation: "gather_every(n): n should be positive");
1388        if self.len().saturating_sub(offset) == 0 {
1389            return Ok(self.clear());
1390        }
1391
1392        match self {
1393            Column::Series(s) => Ok(s.gather_every(n, offset)?.into()),
1394            Column::Scalar(s) => {
1395                let total = s.len() - offset;
1396                Ok(s.resize(1 + (total - 1) / n).into())
1397            },
1398        }
1399    }
1400
1401    pub fn extend_constant(&self, value: AnyValue, n: usize) -> PolarsResult<Self> {
1402        if self.is_empty() {
1403            return Ok(Self::new_scalar(
1404                self.name().clone(),
1405                Scalar::new(self.dtype().clone(), value.into_static()),
1406                n,
1407            ));
1408        }
1409
1410        match self {
1411            Column::Series(s) => s.extend_constant(value, n).map(Column::from),
1412            Column::Scalar(s) => {
1413                if s.scalar().as_any_value() == value {
1414                    Ok(s.resize(s.len() + n).into())
1415                } else {
1416                    s.as_materialized_series()
1417                        .extend_constant(value, n)
1418                        .map(Column::from)
1419                }
1420            },
1421        }
1422    }
1423
1424    pub fn is_finite(&self) -> PolarsResult<BooleanChunked> {
1425        self.try_map_unary_elementwise_to_bool(|s| s.is_finite())
1426    }
1427    pub fn is_infinite(&self) -> PolarsResult<BooleanChunked> {
1428        self.try_map_unary_elementwise_to_bool(|s| s.is_infinite())
1429    }
1430    pub fn is_nan(&self) -> PolarsResult<BooleanChunked> {
1431        self.try_map_unary_elementwise_to_bool(|s| s.is_nan())
1432    }
1433    pub fn is_not_nan(&self) -> PolarsResult<BooleanChunked> {
1434        self.try_map_unary_elementwise_to_bool(|s| s.is_not_nan())
1435    }
1436
1437    pub fn wrapping_trunc_div_scalar<T>(&self, rhs: T) -> Self
1438    where
1439        T: Num + NumCast,
1440    {
1441        // @scalar-opt
1442        self.as_materialized_series()
1443            .wrapping_trunc_div_scalar(rhs)
1444            .into()
1445    }
1446
1447    pub fn product(&self) -> PolarsResult<Scalar> {
1448        // @scalar-opt
1449        self.as_materialized_series().product()
1450    }
1451
1452    pub fn phys_iter(&self) -> SeriesPhysIter<'_> {
1453        // @scalar-opt
1454        self.as_materialized_series().phys_iter()
1455    }
1456
1457    #[inline]
1458    pub fn get(&self, index: usize) -> PolarsResult<AnyValue<'_>> {
1459        polars_ensure!(index < self.len(), oob = index, self.len());
1460
1461        // SAFETY: Bounds check done just before.
1462        Ok(unsafe { self.get_unchecked(index) })
1463    }
1464    /// # Safety
1465    ///
1466    /// Does not perform bounds check on `index`
1467    #[inline(always)]
1468    pub unsafe fn get_unchecked(&self, index: usize) -> AnyValue<'_> {
1469        debug_assert!(index < self.len());
1470
1471        match self {
1472            Column::Series(s) => unsafe { s.get_unchecked(index) },
1473            Column::Scalar(s) => s.scalar().as_any_value(),
1474        }
1475    }
1476
1477    #[cfg(feature = "object")]
1478    pub fn get_object(
1479        &self,
1480        index: usize,
1481    ) -> Option<&dyn crate::chunked_array::object::PolarsObjectSafe> {
1482        self.as_materialized_series().get_object(index)
1483    }
1484
1485    pub fn bitand(&self, rhs: &Self) -> PolarsResult<Self> {
1486        self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l & r)
1487    }
1488    pub fn bitor(&self, rhs: &Self) -> PolarsResult<Self> {
1489        self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l | r)
1490    }
1491    pub fn bitxor(&self, rhs: &Self) -> PolarsResult<Self> {
1492        self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l ^ r)
1493    }
1494
1495    pub fn try_add_owned(self, other: Self) -> PolarsResult<Self> {
1496        match (self, other) {
1497            (Column::Series(lhs), Column::Series(rhs)) => {
1498                lhs.take().try_add_owned(rhs.take()).map(Column::from)
1499            },
1500            (lhs, rhs) => lhs + rhs,
1501        }
1502    }
1503    pub fn try_sub_owned(self, other: Self) -> PolarsResult<Self> {
1504        match (self, other) {
1505            (Column::Series(lhs), Column::Series(rhs)) => {
1506                lhs.take().try_sub_owned(rhs.take()).map(Column::from)
1507            },
1508            (lhs, rhs) => lhs - rhs,
1509        }
1510    }
1511    pub fn try_mul_owned(self, other: Self) -> PolarsResult<Self> {
1512        match (self, other) {
1513            (Column::Series(lhs), Column::Series(rhs)) => {
1514                lhs.take().try_mul_owned(rhs.take()).map(Column::from)
1515            },
1516            (lhs, rhs) => lhs * rhs,
1517        }
1518    }
1519
1520    pub(crate) fn str_value(&self, index: usize) -> PolarsResult<Cow<'_, str>> {
1521        Ok(self.get(index)?.str_value())
1522    }
1523
1524    pub fn min_reduce(&self) -> PolarsResult<Scalar> {
1525        match self {
1526            Column::Series(s) => s.min_reduce(),
1527            Column::Scalar(s) => {
1528                // We don't really want to deal with handling the full semantics here so we just
1529                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1530                s.as_single_value_series().min_reduce()
1531            },
1532        }
1533    }
1534    pub fn max_reduce(&self) -> PolarsResult<Scalar> {
1535        match self {
1536            Column::Series(s) => s.max_reduce(),
1537            Column::Scalar(s) => {
1538                // We don't really want to deal with handling the full semantics here so we just
1539                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1540                s.as_single_value_series().max_reduce()
1541            },
1542        }
1543    }
1544    pub fn median_reduce(&self) -> PolarsResult<Scalar> {
1545        match self {
1546            Column::Series(s) => s.median_reduce(),
1547            Column::Scalar(s) => {
1548                // We don't really want to deal with handling the full semantics here so we just
1549                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1550                s.as_single_value_series().median_reduce()
1551            },
1552        }
1553    }
1554    pub fn mean_reduce(&self) -> PolarsResult<Scalar> {
1555        match self {
1556            Column::Series(s) => s.mean_reduce(),
1557            Column::Scalar(s) => {
1558                // We don't really want to deal with handling the full semantics here so we just
1559                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1560                s.as_single_value_series().mean_reduce()
1561            },
1562        }
1563    }
1564    pub fn std_reduce(&self, ddof: u8) -> PolarsResult<Scalar> {
1565        match self {
1566            Column::Series(s) => s.std_reduce(ddof),
1567            Column::Scalar(s) => {
1568                // We don't really want to deal with handling the full semantics here so we just
1569                // cast to a small series. This is a tiny bit wasteful, but probably fine.
1570                let n = s.len().min(ddof as usize + 1);
1571                s.as_n_values_series(n).std_reduce(ddof)
1572            },
1573        }
1574    }
1575    pub fn var_reduce(&self, ddof: u8) -> PolarsResult<Scalar> {
1576        match self {
1577            Column::Series(s) => s.var_reduce(ddof),
1578            Column::Scalar(s) => {
1579                // We don't really want to deal with handling the full semantics here so we just
1580                // cast to a small series. This is a tiny bit wasteful, but probably fine.
1581                let n = s.len().min(ddof as usize + 1);
1582                s.as_n_values_series(n).var_reduce(ddof)
1583            },
1584        }
1585    }
1586    pub fn sum_reduce(&self) -> PolarsResult<Scalar> {
1587        // @scalar-opt
1588        self.as_materialized_series().sum_reduce()
1589    }
1590    pub fn and_reduce(&self) -> PolarsResult<Scalar> {
1591        match self {
1592            Column::Series(s) => s.and_reduce(),
1593            Column::Scalar(s) => {
1594                // We don't really want to deal with handling the full semantics here so we just
1595                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1596                s.as_single_value_series().and_reduce()
1597            },
1598        }
1599    }
1600    pub fn or_reduce(&self) -> PolarsResult<Scalar> {
1601        match self {
1602            Column::Series(s) => s.or_reduce(),
1603            Column::Scalar(s) => {
1604                // We don't really want to deal with handling the full semantics here so we just
1605                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1606                s.as_single_value_series().or_reduce()
1607            },
1608        }
1609    }
1610    pub fn xor_reduce(&self) -> PolarsResult<Scalar> {
1611        match self {
1612            Column::Series(s) => s.xor_reduce(),
1613            Column::Scalar(s) => {
1614                // We don't really want to deal with handling the full semantics here so we just
1615                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1616                //
1617                // We have to deal with the fact that xor is 0 if there is an even number of
1618                // elements and the value if there is an odd number of elements. If there are zero
1619                // elements the result should be `null`.
1620                s.as_n_values_series(2 - s.len() % 2).xor_reduce()
1621            },
1622        }
1623    }
1624    pub fn n_unique(&self) -> PolarsResult<usize> {
1625        match self {
1626            Column::Series(s) => s.n_unique(),
1627            Column::Scalar(s) => s.as_single_value_series().n_unique(),
1628        }
1629    }
1630
1631    pub fn quantile_reduce(&self, quantile: f64, method: QuantileMethod) -> PolarsResult<Scalar> {
1632        self.as_materialized_series()
1633            .quantile_reduce(quantile, method)
1634    }
1635
1636    pub fn quantiles_reduce(
1637        &self,
1638        quantiles: &[f64],
1639        method: QuantileMethod,
1640    ) -> PolarsResult<Scalar> {
1641        self.as_materialized_series()
1642            .quantiles_reduce(quantiles, method)
1643    }
1644
1645    pub(crate) fn estimated_size(&self) -> usize {
1646        // @scalar-opt
1647        self.as_materialized_series().estimated_size()
1648    }
1649
1650    pub fn sort_with(&self, options: SortOptions) -> PolarsResult<Self> {
1651        match self {
1652            Column::Series(s) => s.sort_with(options).map(Self::from),
1653            Column::Scalar(s) => {
1654                // This makes this function throw the same errors as Series::sort_with
1655                _ = s.as_single_value_series().sort_with(options)?;
1656
1657                Ok(self.clone())
1658            },
1659        }
1660    }
1661
1662    pub fn map_unary_elementwise_to_bool(
1663        &self,
1664        f: impl Fn(&Series) -> BooleanChunked,
1665    ) -> BooleanChunked {
1666        self.try_map_unary_elementwise_to_bool(|s| Ok(f(s)))
1667            .unwrap()
1668    }
1669    pub fn try_map_unary_elementwise_to_bool(
1670        &self,
1671        f: impl Fn(&Series) -> PolarsResult<BooleanChunked>,
1672    ) -> PolarsResult<BooleanChunked> {
1673        match self {
1674            Column::Series(s) => f(s),
1675            Column::Scalar(s) => Ok(f(&s.as_single_value_series())?.new_from_index(0, s.len())),
1676        }
1677    }
1678
1679    pub fn apply_unary_elementwise(&self, f: impl Fn(&Series) -> Series) -> Column {
1680        self.try_apply_unary_elementwise(|s| Ok(f(s))).unwrap()
1681    }
1682    pub fn try_apply_unary_elementwise(
1683        &self,
1684        f: impl Fn(&Series) -> PolarsResult<Series>,
1685    ) -> PolarsResult<Column> {
1686        match self {
1687            Column::Series(s) => f(s).map(Column::from),
1688            Column::Scalar(s) => Ok(ScalarColumn::from_single_value_series(
1689                f(&s.as_single_value_series())?,
1690                s.len(),
1691            )
1692            .into()),
1693        }
1694    }
1695
1696    pub fn apply_broadcasting_binary_elementwise(
1697        &self,
1698        other: &Self,
1699        op: impl Fn(&Series, &Series) -> Series,
1700    ) -> PolarsResult<Column> {
1701        self.try_apply_broadcasting_binary_elementwise(other, |lhs, rhs| Ok(op(lhs, rhs)))
1702    }
1703    pub fn try_apply_broadcasting_binary_elementwise(
1704        &self,
1705        other: &Self,
1706        op: impl Fn(&Series, &Series) -> PolarsResult<Series>,
1707    ) -> PolarsResult<Column> {
1708        fn output_length(a: &Column, b: &Column) -> PolarsResult<usize> {
1709            match (a.len(), b.len()) {
1710                // broadcasting
1711                (1, o) | (o, 1) => Ok(o),
1712                // equal
1713                (a, b) if a == b => Ok(a),
1714                // unequal
1715                (a, b) => {
1716                    polars_bail!(InvalidOperation: "cannot do a binary operation on columns of different lengths: got {} and {}", a, b)
1717                },
1718            }
1719        }
1720
1721        // Here we rely on the underlying broadcast operations.
1722        let length = output_length(self, other)?;
1723        match (self, other) {
1724            (Column::Series(lhs), Column::Series(rhs)) => op(lhs, rhs).map(Column::from),
1725            (Column::Series(lhs), Column::Scalar(rhs)) => {
1726                op(lhs, &rhs.as_single_value_series()).map(Column::from)
1727            },
1728            (Column::Scalar(lhs), Column::Series(rhs)) => {
1729                op(&lhs.as_single_value_series(), rhs).map(Column::from)
1730            },
1731            (Column::Scalar(lhs), Column::Scalar(rhs)) => {
1732                let lhs = lhs.as_single_value_series();
1733                let rhs = rhs.as_single_value_series();
1734
1735                Ok(ScalarColumn::from_single_value_series(op(&lhs, &rhs)?, length).into_column())
1736            },
1737        }
1738    }
1739
1740    pub fn apply_binary_elementwise(
1741        &self,
1742        other: &Self,
1743        f: impl Fn(&Series, &Series) -> Series,
1744        f_lb: impl Fn(&Scalar, &Series) -> Series,
1745        f_rb: impl Fn(&Series, &Scalar) -> Series,
1746    ) -> Column {
1747        self.try_apply_binary_elementwise(
1748            other,
1749            |lhs, rhs| Ok(f(lhs, rhs)),
1750            |lhs, rhs| Ok(f_lb(lhs, rhs)),
1751            |lhs, rhs| Ok(f_rb(lhs, rhs)),
1752        )
1753        .unwrap()
1754    }
1755    pub fn try_apply_binary_elementwise(
1756        &self,
1757        other: &Self,
1758        f: impl Fn(&Series, &Series) -> PolarsResult<Series>,
1759        f_lb: impl Fn(&Scalar, &Series) -> PolarsResult<Series>,
1760        f_rb: impl Fn(&Series, &Scalar) -> PolarsResult<Series>,
1761    ) -> PolarsResult<Column> {
1762        debug_assert_eq!(self.len(), other.len());
1763
1764        match (self, other) {
1765            (Column::Series(lhs), Column::Series(rhs)) => f(lhs, rhs).map(Column::from),
1766            (Column::Series(lhs), Column::Scalar(rhs)) => f_rb(lhs, rhs.scalar()).map(Column::from),
1767            (Column::Scalar(lhs), Column::Series(rhs)) => f_lb(lhs.scalar(), rhs).map(Column::from),
1768            (Column::Scalar(lhs), Column::Scalar(rhs)) => {
1769                let lhs = lhs.as_single_value_series();
1770                let rhs = rhs.as_single_value_series();
1771
1772                Ok(
1773                    ScalarColumn::from_single_value_series(f(&lhs, &rhs)?, self.len())
1774                        .into_column(),
1775                )
1776            },
1777        }
1778    }
1779
1780    #[cfg(feature = "approx_unique")]
1781    pub fn approx_n_unique(&self) -> PolarsResult<IdxSize> {
1782        match self {
1783            Column::Series(s) => s.approx_n_unique(),
1784            Column::Scalar(s) => {
1785                // @NOTE: We do this for the error handling.
1786                s.as_single_value_series().approx_n_unique()?;
1787                Ok(1)
1788            },
1789        }
1790    }
1791
1792    pub fn n_chunks(&self) -> usize {
1793        match self {
1794            Column::Series(s) => s.n_chunks(),
1795            Column::Scalar(s) => s.lazy_as_materialized_series().map_or(1, |x| x.n_chunks()),
1796        }
1797    }
1798
1799    #[expect(clippy::wrong_self_convention)]
1800    pub(crate) fn into_total_ord_inner<'a>(&'a self) -> Box<dyn TotalOrdInner + 'a> {
1801        // @scalar-opt
1802        self.as_materialized_series().into_total_ord_inner()
1803    }
1804    #[expect(unused, clippy::wrong_self_convention)]
1805    pub(crate) fn into_total_eq_inner<'a>(&'a self) -> Box<dyn TotalEqInner + 'a> {
1806        // @scalar-opt
1807        self.as_materialized_series().into_total_eq_inner()
1808    }
1809
1810    pub fn rechunk_to_arrow(self, compat_level: CompatLevel) -> Box<dyn Array> {
1811        // Rechunk to one chunk if necessary
1812        let mut series = self.take_materialized_series();
1813        if series.n_chunks() > 1 {
1814            series = series.rechunk();
1815        }
1816        series.to_arrow(0, compat_level)
1817    }
1818
1819    pub fn trim_lists_to_normalized_offsets(&self) -> Option<Column> {
1820        self.as_materialized_series()
1821            .trim_lists_to_normalized_offsets()
1822            .map(Column::from)
1823    }
1824
1825    pub fn propagate_nulls(&self) -> Option<Column> {
1826        self.as_materialized_series()
1827            .propagate_nulls()
1828            .map(Column::from)
1829    }
1830
1831    pub fn deposit(&self, validity: &Bitmap) -> Column {
1832        self.as_materialized_series()
1833            .deposit(validity)
1834            .into_column()
1835    }
1836
1837    pub fn rechunk_validity(&self) -> Option<Bitmap> {
1838        // @scalar-opt
1839        self.as_materialized_series().rechunk_validity()
1840    }
1841
1842    pub fn unique_id(&self) -> PolarsResult<(IdxSize, Vec<IdxSize>)> {
1843        self.as_materialized_series().unique_id()
1844    }
1845}
1846
1847impl Default for Column {
1848    fn default() -> Self {
1849        Self::new_scalar(
1850            PlSmallStr::EMPTY,
1851            Scalar::new(DataType::Int64, AnyValue::Null),
1852            0,
1853        )
1854    }
1855}
1856
1857impl PartialEq for Column {
1858    fn eq(&self, other: &Self) -> bool {
1859        // @scalar-opt
1860        self.as_materialized_series()
1861            .eq(other.as_materialized_series())
1862    }
1863}
1864
1865impl From<Series> for Column {
1866    #[inline]
1867    fn from(series: Series) -> Self {
1868        // We instantiate a Scalar Column if the Series is length is 1. This makes it possible for
1869        // future operations to be faster.
1870        if series.len() == 1 {
1871            return Self::Scalar(ScalarColumn::unit_scalar_from_series(series));
1872        }
1873
1874        Self::Series(SeriesColumn::new(series))
1875    }
1876}
1877
1878impl<T: IntoSeries> IntoColumn for T {
1879    #[inline]
1880    fn into_column(self) -> Column {
1881        self.into_series().into()
1882    }
1883}
1884
1885impl IntoColumn for Column {
1886    #[inline(always)]
1887    fn into_column(self) -> Column {
1888        self
1889    }
1890}
1891
1892/// We don't want to serialize the scalar columns. So this helps pretend that columns are always
1893/// initialized without implementing From<Column> for Series.
1894///
1895/// Those casts should be explicit.
1896#[derive(Clone)]
1897#[cfg_attr(feature = "serde", derive(serde::Serialize))]
1898#[cfg_attr(feature = "serde", serde(into = "Series"))]
1899struct _SerdeSeries(Series);
1900
1901impl From<Column> for _SerdeSeries {
1902    #[inline]
1903    fn from(value: Column) -> Self {
1904        Self(value.take_materialized_series())
1905    }
1906}
1907
1908impl From<_SerdeSeries> for Series {
1909    #[inline]
1910    fn from(value: _SerdeSeries) -> Self {
1911        value.0
1912    }
1913}