polars_core/frame/column/
mod.rs

1use std::borrow::Cow;
2
3use arrow::bitmap::{Bitmap, BitmapBuilder};
4use arrow::trusted_len::TrustMyLength;
5use num_traits::{Num, NumCast};
6use polars_compute::rolling::QuantileMethod;
7use polars_error::PolarsResult;
8use polars_utils::aliases::PlSeedableRandomStateQuality;
9use polars_utils::index::check_bounds;
10use polars_utils::pl_str::PlSmallStr;
11pub use scalar::ScalarColumn;
12
13use self::compare_inner::{TotalEqInner, TotalOrdInner};
14use self::gather::check_bounds_ca;
15use self::series::SeriesColumn;
16use crate::chunked_array::cast::CastOptions;
17use crate::chunked_array::flags::StatisticsFlags;
18use crate::datatypes::ReshapeDimension;
19use crate::prelude::*;
20use crate::series::{BitRepr, IsSorted, SeriesPhysIter};
21use crate::utils::{Container, slice_offsets};
22use crate::{HEAD_DEFAULT_LENGTH, TAIL_DEFAULT_LENGTH};
23
24mod arithmetic;
25mod compare;
26mod scalar;
27mod series;
28
29/// A column within a [`DataFrame`].
30///
31/// This is lazily initialized to a [`Series`] with methods like
32/// [`as_materialized_series`][Column::as_materialized_series] and
33/// [`take_materialized_series`][Column::take_materialized_series].
34///
35/// Currently, there are two ways to represent a [`Column`].
36/// 1. A [`Series`] of values
37/// 2. A [`ScalarColumn`] that repeats a single [`Scalar`]
38#[derive(Debug, Clone)]
39#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
40#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
41pub enum Column {
42    Series(SeriesColumn),
43    Scalar(ScalarColumn),
44}
45
46/// Convert `Self` into a [`Column`]
47pub trait IntoColumn: Sized {
48    fn into_column(self) -> Column;
49}
50
51impl Column {
52    #[inline]
53    #[track_caller]
54    pub fn new<T, Phantom>(name: PlSmallStr, values: T) -> Self
55    where
56        Phantom: ?Sized,
57        Series: NamedFrom<T, Phantom>,
58    {
59        Self::Series(SeriesColumn::new(NamedFrom::new(name, values)))
60    }
61
62    #[inline]
63    pub fn new_empty(name: PlSmallStr, dtype: &DataType) -> Self {
64        Self::new_scalar(name, Scalar::new(dtype.clone(), AnyValue::Null), 0)
65    }
66
67    #[inline]
68    pub fn new_scalar(name: PlSmallStr, scalar: Scalar, length: usize) -> Self {
69        Self::Scalar(ScalarColumn::new(name, scalar, length))
70    }
71
72    pub fn new_row_index(name: PlSmallStr, offset: IdxSize, length: usize) -> PolarsResult<Column> {
73        let Ok(length) = IdxSize::try_from(length) else {
74            polars_bail!(
75                ComputeError:
76                "row index length {} overflows IdxSize::MAX ({})",
77                length,
78                IdxSize::MAX,
79            )
80        };
81
82        if offset.checked_add(length).is_none() {
83            polars_bail!(
84                ComputeError:
85                "row index with offset {} overflows on dataframe with height {}",
86                offset, length
87            )
88        }
89
90        let range = offset..offset + length;
91
92        let mut ca = IdxCa::from_vec(name, range.collect());
93        ca.set_sorted_flag(IsSorted::Ascending);
94        let col = ca.into_series().into();
95
96        Ok(col)
97    }
98
99    // # Materialize
100    /// Get a reference to a [`Series`] for this [`Column`]
101    ///
102    /// This may need to materialize the [`Series`] on the first invocation for a specific column.
103    #[inline]
104    pub fn as_materialized_series(&self) -> &Series {
105        match self {
106            Column::Series(s) => s,
107            Column::Scalar(s) => s.as_materialized_series(),
108        }
109    }
110
111    /// If the memory repr of this Column is a scalar, a unit-length Series will
112    /// be returned.
113    #[inline]
114    pub fn as_materialized_series_maintain_scalar(&self) -> Series {
115        match self {
116            Column::Scalar(s) => s.as_single_value_series(),
117            v => v.as_materialized_series().clone(),
118        }
119    }
120
121    /// Returns the backing `Series` for the values of this column.
122    ///
123    /// * For `Column::Series` columns, simply returns the inner `Series`.
124    /// * For `Column::Scalar` columns, returns an empty or unit length series.
125    ///
126    /// # Note
127    /// This method is safe to use. However, care must be taken when operating on the returned
128    /// `Series` to ensure result correctness. E.g. It is suitable to perform elementwise operations
129    /// on it, however e.g. aggregations will return unspecified results.
130    pub fn _get_backing_series(&self) -> Series {
131        match self {
132            Column::Series(s) => (**s).clone(),
133            Column::Scalar(s) => s.as_single_value_series(),
134        }
135    }
136
137    /// Constructs a new `Column` of the same variant as `self` from a backing `Series` representing
138    /// the values.
139    ///
140    /// # Panics
141    /// Panics if:
142    /// * `self` is `Column::Series` and the length of `new_s` does not match that of `self`.
143    /// * `self` is `Column::Scalar` and if either:
144    ///   * `self` is not empty and `new_s` is not of unit length.
145    ///   * `self` is empty and `new_s` is not empty.
146    pub fn _to_new_from_backing(&self, new_s: Series) -> Self {
147        match self {
148            Column::Series(s) => {
149                assert_eq!(new_s.len(), s.len());
150                Column::Series(SeriesColumn::new(new_s))
151            },
152            Column::Scalar(s) => {
153                assert_eq!(new_s.len(), s.as_single_value_series().len());
154                Column::Scalar(ScalarColumn::from_single_value_series(new_s, self.len()))
155            },
156        }
157    }
158
159    /// Turn [`Column`] into a [`Column::Series`].
160    ///
161    /// This may need to materialize the [`Series`] on the first invocation for a specific column.
162    #[inline]
163    pub fn into_materialized_series(&mut self) -> &mut Series {
164        match self {
165            Column::Series(s) => s,
166            Column::Scalar(s) => {
167                let series = std::mem::replace(
168                    s,
169                    ScalarColumn::new_empty(PlSmallStr::EMPTY, DataType::Null),
170                )
171                .take_materialized_series();
172                *self = Column::Series(series.into());
173                let Column::Series(s) = self else {
174                    unreachable!();
175                };
176                s
177            },
178        }
179    }
180    /// Take [`Series`] from a [`Column`]
181    ///
182    /// This may need to materialize the [`Series`] on the first invocation for a specific column.
183    #[inline]
184    pub fn take_materialized_series(self) -> Series {
185        match self {
186            Column::Series(s) => s.take(),
187            Column::Scalar(s) => s.take_materialized_series(),
188        }
189    }
190
191    #[inline]
192    pub fn dtype(&self) -> &DataType {
193        match self {
194            Column::Series(s) => s.dtype(),
195            Column::Scalar(s) => s.dtype(),
196        }
197    }
198
199    #[inline]
200    pub fn field(&self) -> Cow<'_, Field> {
201        match self {
202            Column::Series(s) => s.field(),
203            Column::Scalar(s) => match s.lazy_as_materialized_series() {
204                None => Cow::Owned(Field::new(s.name().clone(), s.dtype().clone())),
205                Some(s) => s.field(),
206            },
207        }
208    }
209
210    #[inline]
211    pub fn name(&self) -> &PlSmallStr {
212        match self {
213            Column::Series(s) => s.name(),
214            Column::Scalar(s) => s.name(),
215        }
216    }
217
218    #[inline]
219    pub fn len(&self) -> usize {
220        match self {
221            Column::Series(s) => s.len(),
222            Column::Scalar(s) => s.len(),
223        }
224    }
225
226    #[inline]
227    pub fn with_name(mut self, name: PlSmallStr) -> Column {
228        self.rename(name);
229        self
230    }
231
232    #[inline]
233    pub fn rename(&mut self, name: PlSmallStr) {
234        match self {
235            Column::Series(s) => _ = s.rename(name),
236            Column::Scalar(s) => _ = s.rename(name),
237        }
238    }
239
240    // # Downcasting
241    #[inline]
242    pub fn as_series(&self) -> Option<&Series> {
243        match self {
244            Column::Series(s) => Some(s),
245            _ => None,
246        }
247    }
248    #[inline]
249    pub fn as_scalar_column(&self) -> Option<&ScalarColumn> {
250        match self {
251            Column::Scalar(s) => Some(s),
252            _ => None,
253        }
254    }
255    #[inline]
256    pub fn as_scalar_column_mut(&mut self) -> Option<&mut ScalarColumn> {
257        match self {
258            Column::Scalar(s) => Some(s),
259            _ => None,
260        }
261    }
262
263    // # Try to Chunked Arrays
264    pub fn try_bool(&self) -> Option<&BooleanChunked> {
265        self.as_materialized_series().try_bool()
266    }
267    pub fn try_i8(&self) -> Option<&Int8Chunked> {
268        self.as_materialized_series().try_i8()
269    }
270    pub fn try_i16(&self) -> Option<&Int16Chunked> {
271        self.as_materialized_series().try_i16()
272    }
273    pub fn try_i32(&self) -> Option<&Int32Chunked> {
274        self.as_materialized_series().try_i32()
275    }
276    pub fn try_i64(&self) -> Option<&Int64Chunked> {
277        self.as_materialized_series().try_i64()
278    }
279    pub fn try_u8(&self) -> Option<&UInt8Chunked> {
280        self.as_materialized_series().try_u8()
281    }
282    pub fn try_u16(&self) -> Option<&UInt16Chunked> {
283        self.as_materialized_series().try_u16()
284    }
285    pub fn try_u32(&self) -> Option<&UInt32Chunked> {
286        self.as_materialized_series().try_u32()
287    }
288    pub fn try_u64(&self) -> Option<&UInt64Chunked> {
289        self.as_materialized_series().try_u64()
290    }
291    #[cfg(feature = "dtype-u128")]
292    pub fn try_u128(&self) -> Option<&UInt128Chunked> {
293        self.as_materialized_series().try_u128()
294    }
295    #[cfg(feature = "dtype-f16")]
296    pub fn try_f16(&self) -> Option<&Float16Chunked> {
297        self.as_materialized_series().try_f16()
298    }
299    pub fn try_f32(&self) -> Option<&Float32Chunked> {
300        self.as_materialized_series().try_f32()
301    }
302    pub fn try_f64(&self) -> Option<&Float64Chunked> {
303        self.as_materialized_series().try_f64()
304    }
305    pub fn try_str(&self) -> Option<&StringChunked> {
306        self.as_materialized_series().try_str()
307    }
308    pub fn try_list(&self) -> Option<&ListChunked> {
309        self.as_materialized_series().try_list()
310    }
311    pub fn try_binary(&self) -> Option<&BinaryChunked> {
312        self.as_materialized_series().try_binary()
313    }
314    pub fn try_idx(&self) -> Option<&IdxCa> {
315        self.as_materialized_series().try_idx()
316    }
317    pub fn try_binary_offset(&self) -> Option<&BinaryOffsetChunked> {
318        self.as_materialized_series().try_binary_offset()
319    }
320    #[cfg(feature = "dtype-datetime")]
321    pub fn try_datetime(&self) -> Option<&DatetimeChunked> {
322        self.as_materialized_series().try_datetime()
323    }
324    #[cfg(feature = "dtype-struct")]
325    pub fn try_struct(&self) -> Option<&StructChunked> {
326        self.as_materialized_series().try_struct()
327    }
328    #[cfg(feature = "dtype-decimal")]
329    pub fn try_decimal(&self) -> Option<&DecimalChunked> {
330        self.as_materialized_series().try_decimal()
331    }
332    #[cfg(feature = "dtype-array")]
333    pub fn try_array(&self) -> Option<&ArrayChunked> {
334        self.as_materialized_series().try_array()
335    }
336    #[cfg(feature = "dtype-categorical")]
337    pub fn try_cat<T: PolarsCategoricalType>(&self) -> Option<&CategoricalChunked<T>> {
338        self.as_materialized_series().try_cat::<T>()
339    }
340    #[cfg(feature = "dtype-categorical")]
341    pub fn try_cat8(&self) -> Option<&Categorical8Chunked> {
342        self.as_materialized_series().try_cat8()
343    }
344    #[cfg(feature = "dtype-categorical")]
345    pub fn try_cat16(&self) -> Option<&Categorical16Chunked> {
346        self.as_materialized_series().try_cat16()
347    }
348    #[cfg(feature = "dtype-categorical")]
349    pub fn try_cat32(&self) -> Option<&Categorical32Chunked> {
350        self.as_materialized_series().try_cat32()
351    }
352    #[cfg(feature = "dtype-date")]
353    pub fn try_date(&self) -> Option<&DateChunked> {
354        self.as_materialized_series().try_date()
355    }
356    #[cfg(feature = "dtype-duration")]
357    pub fn try_duration(&self) -> Option<&DurationChunked> {
358        self.as_materialized_series().try_duration()
359    }
360
361    // # To Chunked Arrays
362    pub fn bool(&self) -> PolarsResult<&BooleanChunked> {
363        self.as_materialized_series().bool()
364    }
365    pub fn i8(&self) -> PolarsResult<&Int8Chunked> {
366        self.as_materialized_series().i8()
367    }
368    pub fn i16(&self) -> PolarsResult<&Int16Chunked> {
369        self.as_materialized_series().i16()
370    }
371    pub fn i32(&self) -> PolarsResult<&Int32Chunked> {
372        self.as_materialized_series().i32()
373    }
374    pub fn i64(&self) -> PolarsResult<&Int64Chunked> {
375        self.as_materialized_series().i64()
376    }
377    #[cfg(feature = "dtype-i128")]
378    pub fn i128(&self) -> PolarsResult<&Int128Chunked> {
379        self.as_materialized_series().i128()
380    }
381    pub fn u8(&self) -> PolarsResult<&UInt8Chunked> {
382        self.as_materialized_series().u8()
383    }
384    pub fn u16(&self) -> PolarsResult<&UInt16Chunked> {
385        self.as_materialized_series().u16()
386    }
387    pub fn u32(&self) -> PolarsResult<&UInt32Chunked> {
388        self.as_materialized_series().u32()
389    }
390    pub fn u64(&self) -> PolarsResult<&UInt64Chunked> {
391        self.as_materialized_series().u64()
392    }
393    #[cfg(feature = "dtype-u128")]
394    pub fn u128(&self) -> PolarsResult<&UInt128Chunked> {
395        self.as_materialized_series().u128()
396    }
397    #[cfg(feature = "dtype-f16")]
398    pub fn f16(&self) -> PolarsResult<&Float16Chunked> {
399        self.as_materialized_series().f16()
400    }
401    pub fn f32(&self) -> PolarsResult<&Float32Chunked> {
402        self.as_materialized_series().f32()
403    }
404    pub fn f64(&self) -> PolarsResult<&Float64Chunked> {
405        self.as_materialized_series().f64()
406    }
407    pub fn str(&self) -> PolarsResult<&StringChunked> {
408        self.as_materialized_series().str()
409    }
410    pub fn list(&self) -> PolarsResult<&ListChunked> {
411        self.as_materialized_series().list()
412    }
413    pub fn binary(&self) -> PolarsResult<&BinaryChunked> {
414        self.as_materialized_series().binary()
415    }
416    pub fn idx(&self) -> PolarsResult<&IdxCa> {
417        self.as_materialized_series().idx()
418    }
419    pub fn binary_offset(&self) -> PolarsResult<&BinaryOffsetChunked> {
420        self.as_materialized_series().binary_offset()
421    }
422    #[cfg(feature = "dtype-datetime")]
423    pub fn datetime(&self) -> PolarsResult<&DatetimeChunked> {
424        self.as_materialized_series().datetime()
425    }
426    #[cfg(feature = "dtype-struct")]
427    pub fn struct_(&self) -> PolarsResult<&StructChunked> {
428        self.as_materialized_series().struct_()
429    }
430    #[cfg(feature = "dtype-decimal")]
431    pub fn decimal(&self) -> PolarsResult<&DecimalChunked> {
432        self.as_materialized_series().decimal()
433    }
434    #[cfg(feature = "dtype-array")]
435    pub fn array(&self) -> PolarsResult<&ArrayChunked> {
436        self.as_materialized_series().array()
437    }
438    #[cfg(feature = "dtype-categorical")]
439    pub fn cat<T: PolarsCategoricalType>(&self) -> PolarsResult<&CategoricalChunked<T>> {
440        self.as_materialized_series().cat::<T>()
441    }
442    #[cfg(feature = "dtype-categorical")]
443    pub fn cat8(&self) -> PolarsResult<&Categorical8Chunked> {
444        self.as_materialized_series().cat8()
445    }
446    #[cfg(feature = "dtype-categorical")]
447    pub fn cat16(&self) -> PolarsResult<&Categorical16Chunked> {
448        self.as_materialized_series().cat16()
449    }
450    #[cfg(feature = "dtype-categorical")]
451    pub fn cat32(&self) -> PolarsResult<&Categorical32Chunked> {
452        self.as_materialized_series().cat32()
453    }
454    #[cfg(feature = "dtype-date")]
455    pub fn date(&self) -> PolarsResult<&DateChunked> {
456        self.as_materialized_series().date()
457    }
458    #[cfg(feature = "dtype-duration")]
459    pub fn duration(&self) -> PolarsResult<&DurationChunked> {
460        self.as_materialized_series().duration()
461    }
462
463    // # Casting
464    pub fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Self> {
465        match self {
466            Column::Series(s) => s.cast_with_options(dtype, options).map(Column::from),
467            Column::Scalar(s) => s.cast_with_options(dtype, options).map(Column::from),
468        }
469    }
470    pub fn strict_cast(&self, dtype: &DataType) -> PolarsResult<Self> {
471        match self {
472            Column::Series(s) => s.strict_cast(dtype).map(Column::from),
473            Column::Scalar(s) => s.strict_cast(dtype).map(Column::from),
474        }
475    }
476    pub fn cast(&self, dtype: &DataType) -> PolarsResult<Column> {
477        match self {
478            Column::Series(s) => s.cast(dtype).map(Column::from),
479            Column::Scalar(s) => s.cast(dtype).map(Column::from),
480        }
481    }
482    /// # Safety
483    ///
484    /// This can lead to invalid memory access in downstream code.
485    pub unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Column> {
486        match self {
487            Column::Series(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from),
488            Column::Scalar(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from),
489        }
490    }
491
492    #[must_use]
493    pub fn clear(&self) -> Self {
494        match self {
495            Column::Series(s) => s.clear().into(),
496            Column::Scalar(s) => s.resize(0).into(),
497        }
498    }
499
500    #[inline]
501    pub fn shrink_to_fit(&mut self) {
502        match self {
503            Column::Series(s) => s.shrink_to_fit(),
504            Column::Scalar(_) => {},
505        }
506    }
507
508    #[inline]
509    pub fn new_from_index(&self, index: usize, length: usize) -> Self {
510        if index >= self.len() {
511            return Self::full_null(self.name().clone(), length, self.dtype());
512        }
513
514        match self {
515            Column::Series(s) => {
516                // SAFETY: Bounds check done before.
517                let av = unsafe { s.get_unchecked(index) };
518                let scalar = Scalar::new(self.dtype().clone(), av.into_static());
519                Self::new_scalar(self.name().clone(), scalar, length)
520            },
521            Column::Scalar(s) => s.resize(length).into(),
522        }
523    }
524
525    #[inline]
526    pub fn has_nulls(&self) -> bool {
527        match self {
528            Self::Series(s) => s.has_nulls(),
529            Self::Scalar(s) => s.has_nulls(),
530        }
531    }
532
533    #[inline]
534    pub fn is_null(&self) -> BooleanChunked {
535        match self {
536            Self::Series(s) => s.is_null(),
537            Self::Scalar(s) => {
538                BooleanChunked::full(s.name().clone(), s.scalar().is_null(), s.len())
539            },
540        }
541    }
542    #[inline]
543    pub fn is_not_null(&self) -> BooleanChunked {
544        match self {
545            Self::Series(s) => s.is_not_null(),
546            Self::Scalar(s) => {
547                BooleanChunked::full(s.name().clone(), !s.scalar().is_null(), s.len())
548            },
549        }
550    }
551
552    pub fn to_physical_repr(&self) -> Column {
553        // @scalar-opt
554        self.as_materialized_series()
555            .to_physical_repr()
556            .into_owned()
557            .into()
558    }
559    /// # Safety
560    ///
561    /// This can lead to invalid memory access in downstream code.
562    pub unsafe fn from_physical_unchecked(&self, dtype: &DataType) -> PolarsResult<Column> {
563        // @scalar-opt
564        self.as_materialized_series()
565            .from_physical_unchecked(dtype)
566            .map(Column::from)
567    }
568
569    pub fn head(&self, length: Option<usize>) -> Column {
570        let len = length.unwrap_or(HEAD_DEFAULT_LENGTH);
571        let len = usize::min(len, self.len());
572        self.slice(0, len)
573    }
574    pub fn tail(&self, length: Option<usize>) -> Column {
575        let len = length.unwrap_or(TAIL_DEFAULT_LENGTH);
576        let len = usize::min(len, self.len());
577        debug_assert!(len <= i64::MAX as usize);
578        self.slice(-(len as i64), len)
579    }
580    pub fn slice(&self, offset: i64, length: usize) -> Column {
581        match self {
582            Column::Series(s) => s.slice(offset, length).into(),
583            Column::Scalar(s) => {
584                let (_, length) = slice_offsets(offset, length, s.len());
585                s.resize(length).into()
586            },
587        }
588    }
589
590    pub fn split_at(&self, offset: i64) -> (Column, Column) {
591        // @scalar-opt
592        let (l, r) = self.as_materialized_series().split_at(offset);
593        (l.into(), r.into())
594    }
595
596    #[inline]
597    pub fn null_count(&self) -> usize {
598        match self {
599            Self::Series(s) => s.null_count(),
600            Self::Scalar(s) if s.scalar().is_null() => s.len(),
601            Self::Scalar(_) => 0,
602        }
603    }
604
605    pub fn take(&self, indices: &IdxCa) -> PolarsResult<Column> {
606        check_bounds_ca(indices, self.len() as IdxSize)?;
607        Ok(unsafe { self.take_unchecked(indices) })
608    }
609    pub fn take_slice(&self, indices: &[IdxSize]) -> PolarsResult<Column> {
610        check_bounds(indices, self.len() as IdxSize)?;
611        Ok(unsafe { self.take_slice_unchecked(indices) })
612    }
613    /// # Safety
614    ///
615    /// No bounds on the indexes are performed.
616    pub unsafe fn take_unchecked(&self, indices: &IdxCa) -> Column {
617        debug_assert!(check_bounds_ca(indices, self.len() as IdxSize).is_ok());
618
619        match self {
620            Self::Series(s) => unsafe { s.take_unchecked(indices) }.into(),
621            Self::Scalar(s) => {
622                let idxs_length = indices.len();
623                let idxs_null_count = indices.null_count();
624
625                let scalar = ScalarColumn::from_single_value_series(
626                    s.as_single_value_series().take_unchecked(&IdxCa::new(
627                        indices.name().clone(),
628                        &[0][..s.len().min(1)],
629                    )),
630                    idxs_length,
631                );
632
633                // We need to make sure that null values in `idx` become null values in the result
634                if idxs_null_count == 0 || scalar.has_nulls() {
635                    scalar.into_column()
636                } else if idxs_null_count == idxs_length {
637                    scalar.into_nulls().into_column()
638                } else {
639                    let validity = indices.rechunk_validity();
640                    let series = scalar.take_materialized_series();
641                    let name = series.name().clone();
642                    let dtype = series.dtype().clone();
643                    let mut chunks = series.into_chunks();
644                    assert_eq!(chunks.len(), 1);
645                    chunks[0] = chunks[0].with_validity(validity);
646                    unsafe { Series::from_chunks_and_dtype_unchecked(name, chunks, &dtype) }
647                        .into_column()
648                }
649            },
650        }
651    }
652    /// # Safety
653    ///
654    /// No bounds on the indexes are performed.
655    pub unsafe fn take_slice_unchecked(&self, indices: &[IdxSize]) -> Column {
656        debug_assert!(check_bounds(indices, self.len() as IdxSize).is_ok());
657
658        match self {
659            Self::Series(s) => unsafe { s.take_slice_unchecked(indices) }.into(),
660            Self::Scalar(s) => ScalarColumn::from_single_value_series(
661                s.as_single_value_series()
662                    .take_slice_unchecked(&[0][..s.len().min(1)]),
663                indices.len(),
664            )
665            .into(),
666        }
667    }
668
669    /// General implementation for aggregation where a non-missing scalar would map to itself.
670    #[inline(always)]
671    #[cfg(any(feature = "algorithm_group_by", feature = "bitwise"))]
672    fn agg_with_unit_scalar(
673        &self,
674        groups: &GroupsType,
675        series_agg: impl Fn(&Series, &GroupsType) -> Series,
676    ) -> Column {
677        match self {
678            Column::Series(s) => series_agg(s, groups).into_column(),
679            Column::Scalar(s) => {
680                if s.is_empty() {
681                    return series_agg(s.as_materialized_series(), groups).into_column();
682                }
683
684                // We utilize the aggregation on Series to see:
685                // 1. the output datatype of the aggregation
686                // 2. whether this aggregation is even defined
687                let series_aggregation = series_agg(
688                    &s.as_single_value_series(),
689                    // @NOTE: this group is always valid since s is non-empty.
690                    &GroupsType::new_slice(vec![[0, 1]], false, true),
691                );
692
693                // If the aggregation is not defined, just return all nulls.
694                if series_aggregation.has_nulls() {
695                    return Self::new_scalar(
696                        series_aggregation.name().clone(),
697                        Scalar::new(series_aggregation.dtype().clone(), AnyValue::Null),
698                        groups.len(),
699                    );
700                }
701
702                let mut scalar_col = s.resize(groups.len());
703                // The aggregation might change the type (e.g. mean changes int -> float), so we do
704                // a cast here to the output type.
705                if series_aggregation.dtype() != s.dtype() {
706                    scalar_col = scalar_col.cast(series_aggregation.dtype()).unwrap();
707                }
708
709                let Some(first_empty_idx) = groups.iter().position(|g| g.is_empty()) else {
710                    // Fast path: no empty groups. keep the scalar intact.
711                    return scalar_col.into_column();
712                };
713
714                // All empty groups produce a *missing* or `null` value.
715                let mut validity = BitmapBuilder::with_capacity(groups.len());
716                validity.extend_constant(first_empty_idx, true);
717                // SAFETY: We trust the length of this iterator.
718                let iter = unsafe {
719                    TrustMyLength::new(
720                        groups.iter().skip(first_empty_idx).map(|g| !g.is_empty()),
721                        groups.len() - first_empty_idx,
722                    )
723                };
724                validity.extend_trusted_len_iter(iter);
725
726                let mut s = scalar_col.take_materialized_series().rechunk();
727                // SAFETY: We perform a compute_len afterwards.
728                let chunks = unsafe { s.chunks_mut() };
729                let arr = &mut chunks[0];
730                *arr = arr.with_validity(validity.into_opt_validity());
731                s.compute_len();
732
733                s.into_column()
734            },
735        }
736    }
737
738    /// # Safety
739    ///
740    /// Does no bounds checks, groups must be correct.
741    #[cfg(feature = "algorithm_group_by")]
742    pub unsafe fn agg_min(&self, groups: &GroupsType) -> Self {
743        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_min(g) })
744    }
745
746    /// # Safety
747    ///
748    /// Does no bounds checks, groups must be correct.
749    #[cfg(feature = "algorithm_group_by")]
750    pub unsafe fn agg_max(&self, groups: &GroupsType) -> Self {
751        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_max(g) })
752    }
753
754    /// # Safety
755    ///
756    /// Does no bounds checks, groups must be correct.
757    #[cfg(feature = "algorithm_group_by")]
758    pub unsafe fn agg_mean(&self, groups: &GroupsType) -> Self {
759        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_mean(g) })
760    }
761
762    /// # Safety
763    ///
764    /// Does no bounds checks, groups must be correct.
765    #[cfg(feature = "algorithm_group_by")]
766    pub unsafe fn agg_sum(&self, groups: &GroupsType) -> Self {
767        // @scalar-opt
768        unsafe { self.as_materialized_series().agg_sum(groups) }.into()
769    }
770
771    /// # Safety
772    ///
773    /// Does no bounds checks, groups must be correct.
774    #[cfg(feature = "algorithm_group_by")]
775    pub unsafe fn agg_first(&self, groups: &GroupsType) -> Self {
776        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_first(g) })
777    }
778
779    /// # Safety
780    ///
781    /// Does no bounds checks, groups must be correct.
782    #[cfg(feature = "algorithm_group_by")]
783    pub unsafe fn agg_first_non_null(&self, groups: &GroupsType) -> Self {
784        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_first_non_null(g) })
785    }
786
787    /// # Safety
788    ///
789    /// Does no bounds checks, groups must be correct.
790    #[cfg(feature = "algorithm_group_by")]
791    pub unsafe fn agg_last(&self, groups: &GroupsType) -> Self {
792        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_last(g) })
793    }
794
795    /// # Safety
796    ///
797    /// Does no bounds checks, groups must be correct.
798    #[cfg(feature = "algorithm_group_by")]
799    pub unsafe fn agg_last_non_null(&self, groups: &GroupsType) -> Self {
800        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_last_non_null(g) })
801    }
802
803    /// # Safety
804    ///
805    /// Does no bounds checks, groups must be correct.
806    #[cfg(feature = "algorithm_group_by")]
807    pub unsafe fn agg_n_unique(&self, groups: &GroupsType) -> Self {
808        // @scalar-opt
809        unsafe { self.as_materialized_series().agg_n_unique(groups) }.into()
810    }
811
812    /// # Safety
813    ///
814    /// Does no bounds checks, groups must be correct.
815    #[cfg(feature = "algorithm_group_by")]
816    pub unsafe fn agg_quantile(
817        &self,
818        groups: &GroupsType,
819        quantile: f64,
820        method: QuantileMethod,
821    ) -> Self {
822        // @scalar-opt
823
824        unsafe {
825            self.as_materialized_series()
826                .agg_quantile(groups, quantile, method)
827        }
828        .into()
829    }
830
831    /// # Safety
832    ///
833    /// Does no bounds checks, groups must be correct.
834    #[cfg(feature = "algorithm_group_by")]
835    pub unsafe fn agg_median(&self, groups: &GroupsType) -> Self {
836        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_median(g) })
837    }
838
839    /// # Safety
840    ///
841    /// Does no bounds checks, groups must be correct.
842    #[cfg(feature = "algorithm_group_by")]
843    pub unsafe fn agg_var(&self, groups: &GroupsType, ddof: u8) -> Self {
844        // @scalar-opt
845        unsafe { self.as_materialized_series().agg_var(groups, ddof) }.into()
846    }
847
848    /// # Safety
849    ///
850    /// Does no bounds checks, groups must be correct.
851    #[cfg(feature = "algorithm_group_by")]
852    pub unsafe fn agg_std(&self, groups: &GroupsType, ddof: u8) -> Self {
853        // @scalar-opt
854        unsafe { self.as_materialized_series().agg_std(groups, ddof) }.into()
855    }
856
857    /// # Safety
858    ///
859    /// Does no bounds checks, groups must be correct.
860    #[cfg(feature = "algorithm_group_by")]
861    pub unsafe fn agg_list(&self, groups: &GroupsType) -> Self {
862        // @scalar-opt
863        unsafe { self.as_materialized_series().agg_list(groups) }.into()
864    }
865
866    /// # Safety
867    ///
868    /// Does no bounds checks, groups must be correct.
869    #[cfg(feature = "algorithm_group_by")]
870    pub fn agg_valid_count(&self, groups: &GroupsType) -> Self {
871        // @scalar-opt
872        unsafe { self.as_materialized_series().agg_valid_count(groups) }.into()
873    }
874
875    /// # Safety
876    ///
877    /// Does no bounds checks, groups must be correct.
878    #[cfg(feature = "bitwise")]
879    pub unsafe fn agg_and(&self, groups: &GroupsType) -> Self {
880        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_and(g) })
881    }
882    /// # Safety
883    ///
884    /// Does no bounds checks, groups must be correct.
885    #[cfg(feature = "bitwise")]
886    pub unsafe fn agg_or(&self, groups: &GroupsType) -> Self {
887        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_or(g) })
888    }
889    /// # Safety
890    ///
891    /// Does no bounds checks, groups must be correct.
892    #[cfg(feature = "bitwise")]
893    pub unsafe fn agg_xor(&self, groups: &GroupsType) -> Self {
894        // @scalar-opt
895        unsafe { self.as_materialized_series().agg_xor(groups) }.into()
896    }
897
898    pub fn full_null(name: PlSmallStr, size: usize, dtype: &DataType) -> Self {
899        Self::new_scalar(name, Scalar::new(dtype.clone(), AnyValue::Null), size)
900    }
901
902    pub fn is_empty(&self) -> bool {
903        self.len() == 0
904    }
905
906    pub fn reverse(&self) -> Column {
907        match self {
908            Column::Series(s) => s.reverse().into(),
909            Column::Scalar(_) => self.clone(),
910        }
911    }
912
913    pub fn equals(&self, other: &Column) -> bool {
914        // @scalar-opt
915        self.as_materialized_series()
916            .equals(other.as_materialized_series())
917    }
918
919    pub fn equals_missing(&self, other: &Column) -> bool {
920        // @scalar-opt
921        self.as_materialized_series()
922            .equals_missing(other.as_materialized_series())
923    }
924
925    pub fn set_sorted_flag(&mut self, sorted: IsSorted) {
926        // @scalar-opt
927        match self {
928            Column::Series(s) => s.set_sorted_flag(sorted),
929            Column::Scalar(_) => {},
930        }
931    }
932
933    pub fn get_flags(&self) -> StatisticsFlags {
934        match self {
935            Column::Series(s) => s.get_flags(),
936            Column::Scalar(_) => {
937                StatisticsFlags::IS_SORTED_ASC | StatisticsFlags::CAN_FAST_EXPLODE_LIST
938            },
939        }
940    }
941
942    /// Returns whether the flags were set
943    pub fn set_flags(&mut self, flags: StatisticsFlags) -> bool {
944        match self {
945            Column::Series(s) => {
946                s.set_flags(flags);
947                true
948            },
949            Column::Scalar(_) => false,
950        }
951    }
952
953    pub fn vec_hash(
954        &self,
955        build_hasher: PlSeedableRandomStateQuality,
956        buf: &mut Vec<u64>,
957    ) -> PolarsResult<()> {
958        // @scalar-opt?
959        self.as_materialized_series().vec_hash(build_hasher, buf)
960    }
961
962    pub fn vec_hash_combine(
963        &self,
964        build_hasher: PlSeedableRandomStateQuality,
965        hashes: &mut [u64],
966    ) -> PolarsResult<()> {
967        // @scalar-opt?
968        self.as_materialized_series()
969            .vec_hash_combine(build_hasher, hashes)
970    }
971
972    pub fn append(&mut self, other: &Column) -> PolarsResult<&mut Self> {
973        // @scalar-opt
974        self.into_materialized_series()
975            .append(other.as_materialized_series())?;
976        Ok(self)
977    }
978    pub fn append_owned(&mut self, other: Column) -> PolarsResult<&mut Self> {
979        self.into_materialized_series()
980            .append_owned(other.take_materialized_series())?;
981        Ok(self)
982    }
983
984    pub fn arg_sort(&self, options: SortOptions) -> IdxCa {
985        if self.is_empty() {
986            return IdxCa::from_vec(self.name().clone(), Vec::new());
987        }
988
989        if self.null_count() == self.len() {
990            // We might need to maintain order so just respect the descending parameter.
991            let values = if options.descending {
992                (0..self.len() as IdxSize).rev().collect()
993            } else {
994                (0..self.len() as IdxSize).collect()
995            };
996
997            return IdxCa::from_vec(self.name().clone(), values);
998        }
999
1000        let is_sorted = Some(self.is_sorted_flag());
1001        let Some(is_sorted) = is_sorted.filter(|v| !matches!(v, IsSorted::Not)) else {
1002            return self.as_materialized_series().arg_sort(options);
1003        };
1004
1005        // Fast path: the data is sorted.
1006        let is_sorted_dsc = matches!(is_sorted, IsSorted::Descending);
1007        let invert = options.descending != is_sorted_dsc;
1008
1009        let mut values = Vec::with_capacity(self.len());
1010
1011        #[inline(never)]
1012        fn extend(
1013            start: IdxSize,
1014            end: IdxSize,
1015            slf: &Column,
1016            values: &mut Vec<IdxSize>,
1017            is_only_nulls: bool,
1018            invert: bool,
1019            maintain_order: bool,
1020        ) {
1021            debug_assert!(start <= end);
1022            debug_assert!(start as usize <= slf.len());
1023            debug_assert!(end as usize <= slf.len());
1024
1025            if !invert || is_only_nulls {
1026                values.extend(start..end);
1027                return;
1028            }
1029
1030            // If we don't have to maintain order but we have to invert. Just flip it around.
1031            if !maintain_order {
1032                values.extend((start..end).rev());
1033                return;
1034            }
1035
1036            // If we want to maintain order but we also needs to invert, we need to invert
1037            // per group of items.
1038            //
1039            // @NOTE: Since the column is sorted, arg_unique can also take a fast path and
1040            // just do a single traversal.
1041            let arg_unique = slf
1042                .slice(start as i64, (end - start) as usize)
1043                .arg_unique()
1044                .unwrap();
1045
1046            assert!(!arg_unique.has_nulls());
1047
1048            let num_unique = arg_unique.len();
1049
1050            // Fast path: all items are unique.
1051            if num_unique == (end - start) as usize {
1052                values.extend((start..end).rev());
1053                return;
1054            }
1055
1056            if num_unique == 1 {
1057                values.extend(start..end);
1058                return;
1059            }
1060
1061            let mut prev_idx = end - start;
1062            for chunk in arg_unique.downcast_iter() {
1063                for &idx in chunk.values().as_slice().iter().rev() {
1064                    values.extend(start + idx..start + prev_idx);
1065                    prev_idx = idx;
1066                }
1067            }
1068        }
1069        macro_rules! extend {
1070            ($start:expr, $end:expr) => {
1071                extend!($start, $end, is_only_nulls = false);
1072            };
1073            ($start:expr, $end:expr, is_only_nulls = $is_only_nulls:expr) => {
1074                extend(
1075                    $start,
1076                    $end,
1077                    self,
1078                    &mut values,
1079                    $is_only_nulls,
1080                    invert,
1081                    options.maintain_order,
1082                );
1083            };
1084        }
1085
1086        let length = self.len() as IdxSize;
1087        let null_count = self.null_count() as IdxSize;
1088
1089        if null_count == 0 {
1090            extend!(0, length);
1091        } else {
1092            let has_nulls_last = self.get(self.len() - 1).unwrap().is_null();
1093            match (options.nulls_last, has_nulls_last) {
1094                (true, true) => {
1095                    // Current: Nulls last, Wanted: Nulls last
1096                    extend!(0, length - null_count);
1097                    extend!(length - null_count, length, is_only_nulls = true);
1098                },
1099                (true, false) => {
1100                    // Current: Nulls first, Wanted: Nulls last
1101                    extend!(null_count, length);
1102                    extend!(0, null_count, is_only_nulls = true);
1103                },
1104                (false, true) => {
1105                    // Current: Nulls last, Wanted: Nulls first
1106                    extend!(length - null_count, length, is_only_nulls = true);
1107                    extend!(0, length - null_count);
1108                },
1109                (false, false) => {
1110                    // Current: Nulls first, Wanted: Nulls first
1111                    extend!(0, null_count, is_only_nulls = true);
1112                    extend!(null_count, length);
1113                },
1114            }
1115        }
1116
1117        // @NOTE: This can theoretically be pushed into the previous operation but it is really
1118        // worth it... probably not...
1119        if let Some(limit) = options.limit {
1120            let limit = limit.min(length);
1121            values.truncate(limit as usize);
1122        }
1123
1124        IdxCa::from_vec(self.name().clone(), values)
1125    }
1126
1127    pub fn arg_sort_multiple(
1128        &self,
1129        by: &[Column],
1130        options: &SortMultipleOptions,
1131    ) -> PolarsResult<IdxCa> {
1132        // @scalar-opt
1133        self.as_materialized_series().arg_sort_multiple(by, options)
1134    }
1135
1136    pub fn arg_unique(&self) -> PolarsResult<IdxCa> {
1137        match self {
1138            Column::Scalar(s) => Ok(IdxCa::new_vec(s.name().clone(), vec![0])),
1139            _ => self.as_materialized_series().arg_unique(),
1140        }
1141    }
1142
1143    pub fn bit_repr(&self) -> Option<BitRepr> {
1144        // @scalar-opt
1145        self.as_materialized_series().bit_repr()
1146    }
1147
1148    pub fn into_frame(self) -> DataFrame {
1149        // SAFETY: A single-column dataframe cannot have length mismatches or duplicate names
1150        unsafe { DataFrame::new_no_checks(self.len(), vec![self]) }
1151    }
1152
1153    pub fn extend(&mut self, other: &Column) -> PolarsResult<&mut Self> {
1154        // @scalar-opt
1155        self.into_materialized_series()
1156            .extend(other.as_materialized_series())?;
1157        Ok(self)
1158    }
1159
1160    pub fn rechunk(&self) -> Column {
1161        match self {
1162            Column::Series(s) => s.rechunk().into(),
1163            Column::Scalar(s) => {
1164                if s.lazy_as_materialized_series()
1165                    .filter(|x| x.n_chunks() > 1)
1166                    .is_some()
1167                {
1168                    Column::Scalar(ScalarColumn::new(
1169                        s.name().clone(),
1170                        s.scalar().clone(),
1171                        s.len(),
1172                    ))
1173                } else {
1174                    self.clone()
1175                }
1176            },
1177        }
1178    }
1179
1180    pub fn explode(&self, options: ExplodeOptions) -> PolarsResult<Column> {
1181        self.as_materialized_series()
1182            .explode(options)
1183            .map(Column::from)
1184    }
1185    pub fn implode(&self) -> PolarsResult<ListChunked> {
1186        self.as_materialized_series().implode()
1187    }
1188
1189    pub fn fill_null(&self, strategy: FillNullStrategy) -> PolarsResult<Self> {
1190        // @scalar-opt
1191        self.as_materialized_series()
1192            .fill_null(strategy)
1193            .map(Column::from)
1194    }
1195
1196    pub fn divide(&self, rhs: &Column) -> PolarsResult<Self> {
1197        // @scalar-opt
1198        self.as_materialized_series()
1199            .divide(rhs.as_materialized_series())
1200            .map(Column::from)
1201    }
1202
1203    pub fn shift(&self, periods: i64) -> Column {
1204        // @scalar-opt
1205        self.as_materialized_series().shift(periods).into()
1206    }
1207
1208    #[cfg(feature = "zip_with")]
1209    pub fn zip_with(&self, mask: &BooleanChunked, other: &Self) -> PolarsResult<Self> {
1210        // @scalar-opt
1211        self.as_materialized_series()
1212            .zip_with(mask, other.as_materialized_series())
1213            .map(Self::from)
1214    }
1215
1216    #[cfg(feature = "zip_with")]
1217    pub fn zip_with_same_type(
1218        &self,
1219        mask: &ChunkedArray<BooleanType>,
1220        other: &Column,
1221    ) -> PolarsResult<Column> {
1222        // @scalar-opt
1223        self.as_materialized_series()
1224            .zip_with_same_type(mask, other.as_materialized_series())
1225            .map(Column::from)
1226    }
1227
1228    pub fn drop_nulls(&self) -> Column {
1229        match self {
1230            Column::Series(s) => s.drop_nulls().into_column(),
1231            Column::Scalar(s) => s.drop_nulls().into_column(),
1232        }
1233    }
1234
1235    /// Packs every element into a list.
1236    pub fn as_list(&self) -> ListChunked {
1237        // @scalar-opt
1238        self.as_materialized_series().as_list()
1239    }
1240
1241    pub fn is_sorted_flag(&self) -> IsSorted {
1242        match self {
1243            Column::Series(s) => s.is_sorted_flag(),
1244            Column::Scalar(_) => IsSorted::Ascending,
1245        }
1246    }
1247
1248    pub fn unique(&self) -> PolarsResult<Column> {
1249        match self {
1250            Column::Series(s) => s.unique().map(Column::from),
1251            Column::Scalar(s) => {
1252                _ = s.as_single_value_series().unique()?;
1253                if s.is_empty() {
1254                    return Ok(s.clone().into_column());
1255                }
1256
1257                Ok(s.resize(1).into_column())
1258            },
1259        }
1260    }
1261    pub fn unique_stable(&self) -> PolarsResult<Column> {
1262        match self {
1263            Column::Series(s) => s.unique_stable().map(Column::from),
1264            Column::Scalar(s) => {
1265                _ = s.as_single_value_series().unique_stable()?;
1266                if s.is_empty() {
1267                    return Ok(s.clone().into_column());
1268                }
1269
1270                Ok(s.resize(1).into_column())
1271            },
1272        }
1273    }
1274
1275    pub fn reshape_list(&self, dimensions: &[ReshapeDimension]) -> PolarsResult<Self> {
1276        // @scalar-opt
1277        self.as_materialized_series()
1278            .reshape_list(dimensions)
1279            .map(Self::from)
1280    }
1281
1282    #[cfg(feature = "dtype-array")]
1283    pub fn reshape_array(&self, dimensions: &[ReshapeDimension]) -> PolarsResult<Self> {
1284        // @scalar-opt
1285        self.as_materialized_series()
1286            .reshape_array(dimensions)
1287            .map(Self::from)
1288    }
1289
1290    pub fn sort(&self, sort_options: SortOptions) -> PolarsResult<Self> {
1291        // @scalar-opt
1292        self.as_materialized_series()
1293            .sort(sort_options)
1294            .map(Self::from)
1295    }
1296
1297    pub fn filter(&self, filter: &BooleanChunked) -> PolarsResult<Self> {
1298        match self {
1299            Column::Series(s) => s.filter(filter).map(Column::from),
1300            Column::Scalar(s) => {
1301                if s.is_empty() {
1302                    return Ok(s.clone().into_column());
1303                }
1304
1305                // Broadcasting
1306                if filter.len() == 1 {
1307                    return match filter.get(0) {
1308                        Some(true) => Ok(s.clone().into_column()),
1309                        _ => Ok(s.resize(0).into_column()),
1310                    };
1311                }
1312
1313                Ok(s.resize(filter.sum().unwrap() as usize).into_column())
1314            },
1315        }
1316    }
1317
1318    #[cfg(feature = "random")]
1319    pub fn shuffle(&self, seed: Option<u64>) -> Self {
1320        // @scalar-opt
1321        self.as_materialized_series().shuffle(seed).into()
1322    }
1323
1324    #[cfg(feature = "random")]
1325    pub fn sample_frac(
1326        &self,
1327        frac: f64,
1328        with_replacement: bool,
1329        shuffle: bool,
1330        seed: Option<u64>,
1331    ) -> PolarsResult<Self> {
1332        self.as_materialized_series()
1333            .sample_frac(frac, with_replacement, shuffle, seed)
1334            .map(Self::from)
1335    }
1336
1337    #[cfg(feature = "random")]
1338    pub fn sample_n(
1339        &self,
1340        n: usize,
1341        with_replacement: bool,
1342        shuffle: bool,
1343        seed: Option<u64>,
1344    ) -> PolarsResult<Self> {
1345        self.as_materialized_series()
1346            .sample_n(n, with_replacement, shuffle, seed)
1347            .map(Self::from)
1348    }
1349
1350    pub fn gather_every(&self, n: usize, offset: usize) -> PolarsResult<Column> {
1351        polars_ensure!(n > 0, InvalidOperation: "gather_every(n): n should be positive");
1352        if self.len().saturating_sub(offset) == 0 {
1353            return Ok(self.clear());
1354        }
1355
1356        match self {
1357            Column::Series(s) => Ok(s.gather_every(n, offset)?.into()),
1358            Column::Scalar(s) => {
1359                let total = s.len() - offset;
1360                Ok(s.resize(1 + (total - 1) / n).into())
1361            },
1362        }
1363    }
1364
1365    pub fn extend_constant(&self, value: AnyValue, n: usize) -> PolarsResult<Self> {
1366        if self.is_empty() {
1367            return Ok(Self::new_scalar(
1368                self.name().clone(),
1369                Scalar::new(self.dtype().clone(), value.into_static()),
1370                n,
1371            ));
1372        }
1373
1374        match self {
1375            Column::Series(s) => s.extend_constant(value, n).map(Column::from),
1376            Column::Scalar(s) => {
1377                if s.scalar().as_any_value() == value {
1378                    Ok(s.resize(s.len() + n).into())
1379                } else {
1380                    s.as_materialized_series()
1381                        .extend_constant(value, n)
1382                        .map(Column::from)
1383                }
1384            },
1385        }
1386    }
1387
1388    pub fn is_finite(&self) -> PolarsResult<BooleanChunked> {
1389        self.try_map_unary_elementwise_to_bool(|s| s.is_finite())
1390    }
1391    pub fn is_infinite(&self) -> PolarsResult<BooleanChunked> {
1392        self.try_map_unary_elementwise_to_bool(|s| s.is_infinite())
1393    }
1394    pub fn is_nan(&self) -> PolarsResult<BooleanChunked> {
1395        self.try_map_unary_elementwise_to_bool(|s| s.is_nan())
1396    }
1397    pub fn is_not_nan(&self) -> PolarsResult<BooleanChunked> {
1398        self.try_map_unary_elementwise_to_bool(|s| s.is_not_nan())
1399    }
1400
1401    pub fn wrapping_trunc_div_scalar<T>(&self, rhs: T) -> Self
1402    where
1403        T: Num + NumCast,
1404    {
1405        // @scalar-opt
1406        self.as_materialized_series()
1407            .wrapping_trunc_div_scalar(rhs)
1408            .into()
1409    }
1410
1411    pub fn product(&self) -> PolarsResult<Scalar> {
1412        // @scalar-opt
1413        self.as_materialized_series().product()
1414    }
1415
1416    pub fn phys_iter(&self) -> SeriesPhysIter<'_> {
1417        // @scalar-opt
1418        self.as_materialized_series().phys_iter()
1419    }
1420
1421    #[inline]
1422    pub fn get(&self, index: usize) -> PolarsResult<AnyValue<'_>> {
1423        polars_ensure!(index < self.len(), oob = index, self.len());
1424
1425        // SAFETY: Bounds check done just before.
1426        Ok(unsafe { self.get_unchecked(index) })
1427    }
1428    /// # Safety
1429    ///
1430    /// Does not perform bounds check on `index`
1431    #[inline(always)]
1432    pub unsafe fn get_unchecked(&self, index: usize) -> AnyValue<'_> {
1433        debug_assert!(index < self.len());
1434
1435        match self {
1436            Column::Series(s) => unsafe { s.get_unchecked(index) },
1437            Column::Scalar(s) => s.scalar().as_any_value(),
1438        }
1439    }
1440
1441    #[cfg(feature = "object")]
1442    pub fn get_object(
1443        &self,
1444        index: usize,
1445    ) -> Option<&dyn crate::chunked_array::object::PolarsObjectSafe> {
1446        self.as_materialized_series().get_object(index)
1447    }
1448
1449    pub fn bitand(&self, rhs: &Self) -> PolarsResult<Self> {
1450        self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l & r)
1451    }
1452    pub fn bitor(&self, rhs: &Self) -> PolarsResult<Self> {
1453        self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l | r)
1454    }
1455    pub fn bitxor(&self, rhs: &Self) -> PolarsResult<Self> {
1456        self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l ^ r)
1457    }
1458
1459    pub fn try_add_owned(self, other: Self) -> PolarsResult<Self> {
1460        match (self, other) {
1461            (Column::Series(lhs), Column::Series(rhs)) => {
1462                lhs.take().try_add_owned(rhs.take()).map(Column::from)
1463            },
1464            (lhs, rhs) => lhs + rhs,
1465        }
1466    }
1467    pub fn try_sub_owned(self, other: Self) -> PolarsResult<Self> {
1468        match (self, other) {
1469            (Column::Series(lhs), Column::Series(rhs)) => {
1470                lhs.take().try_sub_owned(rhs.take()).map(Column::from)
1471            },
1472            (lhs, rhs) => lhs - rhs,
1473        }
1474    }
1475    pub fn try_mul_owned(self, other: Self) -> PolarsResult<Self> {
1476        match (self, other) {
1477            (Column::Series(lhs), Column::Series(rhs)) => {
1478                lhs.take().try_mul_owned(rhs.take()).map(Column::from)
1479            },
1480            (lhs, rhs) => lhs * rhs,
1481        }
1482    }
1483
1484    pub(crate) fn str_value(&self, index: usize) -> PolarsResult<Cow<'_, str>> {
1485        Ok(self.get(index)?.str_value())
1486    }
1487
1488    pub fn min_reduce(&self) -> PolarsResult<Scalar> {
1489        match self {
1490            Column::Series(s) => s.min_reduce(),
1491            Column::Scalar(s) => {
1492                // We don't really want to deal with handling the full semantics here so we just
1493                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1494                s.as_single_value_series().min_reduce()
1495            },
1496        }
1497    }
1498    pub fn max_reduce(&self) -> PolarsResult<Scalar> {
1499        match self {
1500            Column::Series(s) => s.max_reduce(),
1501            Column::Scalar(s) => {
1502                // We don't really want to deal with handling the full semantics here so we just
1503                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1504                s.as_single_value_series().max_reduce()
1505            },
1506        }
1507    }
1508    pub fn median_reduce(&self) -> PolarsResult<Scalar> {
1509        match self {
1510            Column::Series(s) => s.median_reduce(),
1511            Column::Scalar(s) => {
1512                // We don't really want to deal with handling the full semantics here so we just
1513                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1514                s.as_single_value_series().median_reduce()
1515            },
1516        }
1517    }
1518    pub fn mean_reduce(&self) -> PolarsResult<Scalar> {
1519        match self {
1520            Column::Series(s) => s.mean_reduce(),
1521            Column::Scalar(s) => {
1522                // We don't really want to deal with handling the full semantics here so we just
1523                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1524                s.as_single_value_series().mean_reduce()
1525            },
1526        }
1527    }
1528    pub fn std_reduce(&self, ddof: u8) -> PolarsResult<Scalar> {
1529        match self {
1530            Column::Series(s) => s.std_reduce(ddof),
1531            Column::Scalar(s) => {
1532                // We don't really want to deal with handling the full semantics here so we just
1533                // cast to a small series. This is a tiny bit wasteful, but probably fine.
1534                let n = s.len().min(ddof as usize + 1);
1535                s.as_n_values_series(n).std_reduce(ddof)
1536            },
1537        }
1538    }
1539    pub fn var_reduce(&self, ddof: u8) -> PolarsResult<Scalar> {
1540        match self {
1541            Column::Series(s) => s.var_reduce(ddof),
1542            Column::Scalar(s) => {
1543                // We don't really want to deal with handling the full semantics here so we just
1544                // cast to a small series. This is a tiny bit wasteful, but probably fine.
1545                let n = s.len().min(ddof as usize + 1);
1546                s.as_n_values_series(n).var_reduce(ddof)
1547            },
1548        }
1549    }
1550    pub fn sum_reduce(&self) -> PolarsResult<Scalar> {
1551        // @scalar-opt
1552        self.as_materialized_series().sum_reduce()
1553    }
1554    pub fn and_reduce(&self) -> PolarsResult<Scalar> {
1555        match self {
1556            Column::Series(s) => s.and_reduce(),
1557            Column::Scalar(s) => {
1558                // We don't really want to deal with handling the full semantics here so we just
1559                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1560                s.as_single_value_series().and_reduce()
1561            },
1562        }
1563    }
1564    pub fn or_reduce(&self) -> PolarsResult<Scalar> {
1565        match self {
1566            Column::Series(s) => s.or_reduce(),
1567            Column::Scalar(s) => {
1568                // We don't really want to deal with handling the full semantics here so we just
1569                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1570                s.as_single_value_series().or_reduce()
1571            },
1572        }
1573    }
1574    pub fn xor_reduce(&self) -> PolarsResult<Scalar> {
1575        match self {
1576            Column::Series(s) => s.xor_reduce(),
1577            Column::Scalar(s) => {
1578                // We don't really want to deal with handling the full semantics here so we just
1579                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1580                //
1581                // We have to deal with the fact that xor is 0 if there is an even number of
1582                // elements and the value if there is an odd number of elements. If there are zero
1583                // elements the result should be `null`.
1584                s.as_n_values_series(2 - s.len() % 2).xor_reduce()
1585            },
1586        }
1587    }
1588    pub fn n_unique(&self) -> PolarsResult<usize> {
1589        match self {
1590            Column::Series(s) => s.n_unique(),
1591            Column::Scalar(s) => s.as_single_value_series().n_unique(),
1592        }
1593    }
1594    pub fn quantile_reduce(&self, quantile: f64, method: QuantileMethod) -> PolarsResult<Scalar> {
1595        self.as_materialized_series()
1596            .quantile_reduce(quantile, method)
1597    }
1598
1599    pub(crate) fn estimated_size(&self) -> usize {
1600        // @scalar-opt
1601        self.as_materialized_series().estimated_size()
1602    }
1603
1604    pub fn sort_with(&self, options: SortOptions) -> PolarsResult<Self> {
1605        match self {
1606            Column::Series(s) => s.sort_with(options).map(Self::from),
1607            Column::Scalar(s) => {
1608                // This makes this function throw the same errors as Series::sort_with
1609                _ = s.as_single_value_series().sort_with(options)?;
1610
1611                Ok(self.clone())
1612            },
1613        }
1614    }
1615
1616    pub fn map_unary_elementwise_to_bool(
1617        &self,
1618        f: impl Fn(&Series) -> BooleanChunked,
1619    ) -> BooleanChunked {
1620        self.try_map_unary_elementwise_to_bool(|s| Ok(f(s)))
1621            .unwrap()
1622    }
1623    pub fn try_map_unary_elementwise_to_bool(
1624        &self,
1625        f: impl Fn(&Series) -> PolarsResult<BooleanChunked>,
1626    ) -> PolarsResult<BooleanChunked> {
1627        match self {
1628            Column::Series(s) => f(s),
1629            Column::Scalar(s) => Ok(f(&s.as_single_value_series())?.new_from_index(0, s.len())),
1630        }
1631    }
1632
1633    pub fn apply_unary_elementwise(&self, f: impl Fn(&Series) -> Series) -> Column {
1634        self.try_apply_unary_elementwise(|s| Ok(f(s))).unwrap()
1635    }
1636    pub fn try_apply_unary_elementwise(
1637        &self,
1638        f: impl Fn(&Series) -> PolarsResult<Series>,
1639    ) -> PolarsResult<Column> {
1640        match self {
1641            Column::Series(s) => f(s).map(Column::from),
1642            Column::Scalar(s) => Ok(ScalarColumn::from_single_value_series(
1643                f(&s.as_single_value_series())?,
1644                s.len(),
1645            )
1646            .into()),
1647        }
1648    }
1649
1650    pub fn apply_broadcasting_binary_elementwise(
1651        &self,
1652        other: &Self,
1653        op: impl Fn(&Series, &Series) -> Series,
1654    ) -> PolarsResult<Column> {
1655        self.try_apply_broadcasting_binary_elementwise(other, |lhs, rhs| Ok(op(lhs, rhs)))
1656    }
1657    pub fn try_apply_broadcasting_binary_elementwise(
1658        &self,
1659        other: &Self,
1660        op: impl Fn(&Series, &Series) -> PolarsResult<Series>,
1661    ) -> PolarsResult<Column> {
1662        fn output_length(a: &Column, b: &Column) -> PolarsResult<usize> {
1663            match (a.len(), b.len()) {
1664                // broadcasting
1665                (1, o) | (o, 1) => Ok(o),
1666                // equal
1667                (a, b) if a == b => Ok(a),
1668                // unequal
1669                (a, b) => {
1670                    polars_bail!(InvalidOperation: "cannot do a binary operation on columns of different lengths: got {} and {}", a, b)
1671                },
1672            }
1673        }
1674
1675        // Here we rely on the underlying broadcast operations.
1676        let length = output_length(self, other)?;
1677        match (self, other) {
1678            (Column::Series(lhs), Column::Series(rhs)) => op(lhs, rhs).map(Column::from),
1679            (Column::Series(lhs), Column::Scalar(rhs)) => {
1680                op(lhs, &rhs.as_single_value_series()).map(Column::from)
1681            },
1682            (Column::Scalar(lhs), Column::Series(rhs)) => {
1683                op(&lhs.as_single_value_series(), rhs).map(Column::from)
1684            },
1685            (Column::Scalar(lhs), Column::Scalar(rhs)) => {
1686                let lhs = lhs.as_single_value_series();
1687                let rhs = rhs.as_single_value_series();
1688
1689                Ok(ScalarColumn::from_single_value_series(op(&lhs, &rhs)?, length).into_column())
1690            },
1691        }
1692    }
1693
1694    pub fn apply_binary_elementwise(
1695        &self,
1696        other: &Self,
1697        f: impl Fn(&Series, &Series) -> Series,
1698        f_lb: impl Fn(&Scalar, &Series) -> Series,
1699        f_rb: impl Fn(&Series, &Scalar) -> Series,
1700    ) -> Column {
1701        self.try_apply_binary_elementwise(
1702            other,
1703            |lhs, rhs| Ok(f(lhs, rhs)),
1704            |lhs, rhs| Ok(f_lb(lhs, rhs)),
1705            |lhs, rhs| Ok(f_rb(lhs, rhs)),
1706        )
1707        .unwrap()
1708    }
1709    pub fn try_apply_binary_elementwise(
1710        &self,
1711        other: &Self,
1712        f: impl Fn(&Series, &Series) -> PolarsResult<Series>,
1713        f_lb: impl Fn(&Scalar, &Series) -> PolarsResult<Series>,
1714        f_rb: impl Fn(&Series, &Scalar) -> PolarsResult<Series>,
1715    ) -> PolarsResult<Column> {
1716        debug_assert_eq!(self.len(), other.len());
1717
1718        match (self, other) {
1719            (Column::Series(lhs), Column::Series(rhs)) => f(lhs, rhs).map(Column::from),
1720            (Column::Series(lhs), Column::Scalar(rhs)) => f_rb(lhs, rhs.scalar()).map(Column::from),
1721            (Column::Scalar(lhs), Column::Series(rhs)) => f_lb(lhs.scalar(), rhs).map(Column::from),
1722            (Column::Scalar(lhs), Column::Scalar(rhs)) => {
1723                let lhs = lhs.as_single_value_series();
1724                let rhs = rhs.as_single_value_series();
1725
1726                Ok(
1727                    ScalarColumn::from_single_value_series(f(&lhs, &rhs)?, self.len())
1728                        .into_column(),
1729                )
1730            },
1731        }
1732    }
1733
1734    #[cfg(feature = "approx_unique")]
1735    pub fn approx_n_unique(&self) -> PolarsResult<IdxSize> {
1736        match self {
1737            Column::Series(s) => s.approx_n_unique(),
1738            Column::Scalar(s) => {
1739                // @NOTE: We do this for the error handling.
1740                s.as_single_value_series().approx_n_unique()?;
1741                Ok(1)
1742            },
1743        }
1744    }
1745
1746    pub fn n_chunks(&self) -> usize {
1747        match self {
1748            Column::Series(s) => s.n_chunks(),
1749            Column::Scalar(s) => s.lazy_as_materialized_series().map_or(1, |x| x.n_chunks()),
1750        }
1751    }
1752
1753    #[expect(clippy::wrong_self_convention)]
1754    pub(crate) fn into_total_ord_inner<'a>(&'a self) -> Box<dyn TotalOrdInner + 'a> {
1755        // @scalar-opt
1756        self.as_materialized_series().into_total_ord_inner()
1757    }
1758    #[expect(unused, clippy::wrong_self_convention)]
1759    pub(crate) fn into_total_eq_inner<'a>(&'a self) -> Box<dyn TotalEqInner + 'a> {
1760        // @scalar-opt
1761        self.as_materialized_series().into_total_eq_inner()
1762    }
1763
1764    pub fn rechunk_to_arrow(self, compat_level: CompatLevel) -> Box<dyn Array> {
1765        // Rechunk to one chunk if necessary
1766        let mut series = self.take_materialized_series();
1767        if series.n_chunks() > 1 {
1768            series = series.rechunk();
1769        }
1770        series.to_arrow(0, compat_level)
1771    }
1772
1773    pub fn trim_lists_to_normalized_offsets(&self) -> Option<Column> {
1774        self.as_materialized_series()
1775            .trim_lists_to_normalized_offsets()
1776            .map(Column::from)
1777    }
1778
1779    pub fn propagate_nulls(&self) -> Option<Column> {
1780        self.as_materialized_series()
1781            .propagate_nulls()
1782            .map(Column::from)
1783    }
1784
1785    pub fn deposit(&self, validity: &Bitmap) -> Column {
1786        self.as_materialized_series()
1787            .deposit(validity)
1788            .into_column()
1789    }
1790
1791    pub fn rechunk_validity(&self) -> Option<Bitmap> {
1792        // @scalar-opt
1793        self.as_materialized_series().rechunk_validity()
1794    }
1795
1796    pub fn unique_id(&self) -> PolarsResult<(IdxSize, Vec<IdxSize>)> {
1797        self.as_materialized_series().unique_id()
1798    }
1799}
1800
1801impl Default for Column {
1802    fn default() -> Self {
1803        Self::new_scalar(
1804            PlSmallStr::EMPTY,
1805            Scalar::new(DataType::Int64, AnyValue::Null),
1806            0,
1807        )
1808    }
1809}
1810
1811impl PartialEq for Column {
1812    fn eq(&self, other: &Self) -> bool {
1813        // @scalar-opt
1814        self.as_materialized_series()
1815            .eq(other.as_materialized_series())
1816    }
1817}
1818
1819impl From<Series> for Column {
1820    #[inline]
1821    fn from(series: Series) -> Self {
1822        // We instantiate a Scalar Column if the Series is length is 1. This makes it possible for
1823        // future operations to be faster.
1824        if series.len() == 1 {
1825            return Self::Scalar(ScalarColumn::unit_scalar_from_series(series));
1826        }
1827
1828        Self::Series(SeriesColumn::new(series))
1829    }
1830}
1831
1832impl<T: IntoSeries> IntoColumn for T {
1833    #[inline]
1834    fn into_column(self) -> Column {
1835        self.into_series().into()
1836    }
1837}
1838
1839impl IntoColumn for Column {
1840    #[inline(always)]
1841    fn into_column(self) -> Column {
1842        self
1843    }
1844}
1845
1846/// We don't want to serialize the scalar columns. So this helps pretend that columns are always
1847/// initialized without implementing From<Column> for Series.
1848///
1849/// Those casts should be explicit.
1850#[derive(Clone)]
1851#[cfg_attr(feature = "serde", derive(serde::Serialize))]
1852#[cfg_attr(feature = "serde", serde(into = "Series"))]
1853struct _SerdeSeries(Series);
1854
1855impl From<Column> for _SerdeSeries {
1856    #[inline]
1857    fn from(value: Column) -> Self {
1858        Self(value.take_materialized_series())
1859    }
1860}
1861
1862impl From<_SerdeSeries> for Series {
1863    #[inline]
1864    fn from(value: _SerdeSeries) -> Self {
1865        value.0
1866    }
1867}