Skip to main content

polars_core/frame/column/
mod.rs

1use std::borrow::Cow;
2
3use arrow::bitmap::{Bitmap, BitmapBuilder};
4use arrow::trusted_len::TrustMyLength;
5use num_traits::{Num, NumCast};
6use polars_compute::rolling::QuantileMethod;
7use polars_error::PolarsResult;
8use polars_utils::aliases::PlSeedableRandomStateQuality;
9use polars_utils::index::check_bounds;
10use polars_utils::pl_str::PlSmallStr;
11pub use scalar::ScalarColumn;
12
13use self::compare_inner::{TotalEqInner, TotalOrdInner};
14use self::gather::check_bounds_ca;
15use self::series::SeriesColumn;
16use crate::chunked_array::cast::CastOptions;
17use crate::chunked_array::flags::StatisticsFlags;
18use crate::datatypes::ReshapeDimension;
19use crate::prelude::*;
20use crate::series::{BitRepr, IsSorted, SeriesPhysIter};
21use crate::utils::{Container, slice_offsets};
22use crate::{HEAD_DEFAULT_LENGTH, TAIL_DEFAULT_LENGTH};
23
24mod arithmetic;
25mod compare;
26mod scalar;
27mod series;
28
29/// A column within a [`DataFrame`].
30///
31/// This is lazily initialized to a [`Series`] with methods like
32/// [`as_materialized_series`][Column::as_materialized_series] and
33/// [`take_materialized_series`][Column::take_materialized_series].
34///
35/// Currently, there are two ways to represent a [`Column`].
36/// 1. A [`Series`] of values
37/// 2. A [`ScalarColumn`] that repeats a single [`Scalar`]
38#[derive(Debug, Clone)]
39#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
40#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
41pub enum Column {
42    Series(SeriesColumn),
43    Scalar(ScalarColumn),
44}
45
46/// Convert `Self` into a [`Column`]
47pub trait IntoColumn: Sized {
48    fn into_column(self) -> Column;
49}
50
51impl Column {
52    #[inline]
53    #[track_caller]
54    pub fn new<T, Phantom>(name: PlSmallStr, values: T) -> Self
55    where
56        Phantom: ?Sized,
57        Series: NamedFrom<T, Phantom>,
58    {
59        Self::Series(SeriesColumn::new(NamedFrom::new(name, values)))
60    }
61
62    #[inline]
63    pub fn new_empty(name: PlSmallStr, dtype: &DataType) -> Self {
64        Self::new_scalar(name, Scalar::new(dtype.clone(), AnyValue::Null), 0)
65    }
66
67    #[inline]
68    pub fn new_scalar(name: PlSmallStr, scalar: Scalar, length: usize) -> Self {
69        Self::Scalar(ScalarColumn::new(name, scalar, length))
70    }
71
72    pub fn new_row_index(name: PlSmallStr, offset: IdxSize, length: usize) -> PolarsResult<Column> {
73        let Ok(length) = IdxSize::try_from(length) else {
74            polars_bail!(
75                ComputeError:
76                "row index length {} overflows IdxSize::MAX ({})",
77                length,
78                IdxSize::MAX,
79            )
80        };
81
82        if offset.checked_add(length).is_none() {
83            polars_bail!(
84                ComputeError:
85                "row index with offset {} overflows on dataframe with height {}",
86                offset, length
87            )
88        }
89
90        let range = offset..offset + length;
91
92        let mut ca = IdxCa::from_vec(name, range.collect());
93        ca.set_sorted_flag(IsSorted::Ascending);
94        let col = ca.into_series().into();
95
96        Ok(col)
97    }
98
99    // # Materialize
100    /// Get a reference to a [`Series`] for this [`Column`]
101    ///
102    /// This may need to materialize the [`Series`] on the first invocation for a specific column.
103    #[inline]
104    pub fn as_materialized_series(&self) -> &Series {
105        match self {
106            Column::Series(s) => s,
107            Column::Scalar(s) => s.as_materialized_series(),
108        }
109    }
110
111    /// If the memory repr of this Column is a scalar, a unit-length Series will
112    /// be returned.
113    #[inline]
114    pub fn as_materialized_series_maintain_scalar(&self) -> Series {
115        match self {
116            Column::Scalar(s) => s.as_single_value_series(),
117            v => v.as_materialized_series().clone(),
118        }
119    }
120
121    /// Returns the backing `Series` for the values of this column.
122    ///
123    /// * For `Column::Series` columns, simply returns the inner `Series`.
124    /// * For `Column::Scalar` columns, returns an empty or unit length series.
125    ///
126    /// # Note
127    /// This method is safe to use. However, care must be taken when operating on the returned
128    /// `Series` to ensure result correctness. E.g. It is suitable to perform elementwise operations
129    /// on it, however e.g. aggregations will return unspecified results.
130    pub fn _get_backing_series(&self) -> Series {
131        match self {
132            Column::Series(s) => (**s).clone(),
133            Column::Scalar(s) => s.as_single_value_series(),
134        }
135    }
136
137    /// Constructs a new `Column` of the same variant as `self` from a backing `Series` representing
138    /// the values.
139    ///
140    /// # Panics
141    /// Panics if:
142    /// * `self` is `Column::Series` and the length of `new_s` does not match that of `self`.
143    /// * `self` is `Column::Scalar` and if either:
144    ///   * `self` is not empty and `new_s` is not of unit length.
145    ///   * `self` is empty and `new_s` is not empty.
146    pub fn _to_new_from_backing(&self, new_s: Series) -> Self {
147        match self {
148            Column::Series(s) => {
149                assert_eq!(new_s.len(), s.len());
150                Column::Series(SeriesColumn::new(new_s))
151            },
152            Column::Scalar(s) => {
153                assert_eq!(new_s.len(), s.as_single_value_series().len());
154                Column::Scalar(ScalarColumn::from_single_value_series(new_s, self.len()))
155            },
156        }
157    }
158
159    /// Turn [`Column`] into a [`Column::Series`].
160    ///
161    /// This may need to materialize the [`Series`] on the first invocation for a specific column.
162    #[inline]
163    pub fn into_materialized_series(&mut self) -> &mut Series {
164        match self {
165            Column::Series(s) => s,
166            Column::Scalar(s) => {
167                let series = std::mem::replace(
168                    s,
169                    ScalarColumn::new_empty(PlSmallStr::EMPTY, DataType::Null),
170                )
171                .take_materialized_series();
172                *self = Column::Series(series.into());
173                let Column::Series(s) = self else {
174                    unreachable!();
175                };
176                s
177            },
178        }
179    }
180    /// Take [`Series`] from a [`Column`]
181    ///
182    /// This may need to materialize the [`Series`] on the first invocation for a specific column.
183    #[inline]
184    pub fn take_materialized_series(self) -> Series {
185        match self {
186            Column::Series(s) => s.take(),
187            Column::Scalar(s) => s.take_materialized_series(),
188        }
189    }
190
191    #[inline]
192    pub fn dtype(&self) -> &DataType {
193        match self {
194            Column::Series(s) => s.dtype(),
195            Column::Scalar(s) => s.dtype(),
196        }
197    }
198
199    #[inline]
200    pub fn field(&self) -> Cow<'_, Field> {
201        match self {
202            Column::Series(s) => s.field(),
203            Column::Scalar(s) => match s.lazy_as_materialized_series() {
204                None => Cow::Owned(Field::new(s.name().clone(), s.dtype().clone())),
205                Some(s) => s.field(),
206            },
207        }
208    }
209
210    #[inline]
211    pub fn name(&self) -> &PlSmallStr {
212        match self {
213            Column::Series(s) => s.name(),
214            Column::Scalar(s) => s.name(),
215        }
216    }
217
218    #[inline]
219    pub fn len(&self) -> usize {
220        match self {
221            Column::Series(s) => s.len(),
222            Column::Scalar(s) => s.len(),
223        }
224    }
225
226    #[inline]
227    pub fn with_name(mut self, name: PlSmallStr) -> Column {
228        self.rename(name);
229        self
230    }
231
232    #[inline]
233    pub fn rename(&mut self, name: PlSmallStr) {
234        match self {
235            Column::Series(s) => _ = s.rename(name),
236            Column::Scalar(s) => _ = s.rename(name),
237        }
238    }
239
240    // # Downcasting
241    #[inline]
242    pub fn as_series(&self) -> Option<&Series> {
243        match self {
244            Column::Series(s) => Some(s),
245            _ => None,
246        }
247    }
248    #[inline]
249    pub fn as_scalar_column(&self) -> Option<&ScalarColumn> {
250        match self {
251            Column::Scalar(s) => Some(s),
252            _ => None,
253        }
254    }
255    #[inline]
256    pub fn as_scalar_column_mut(&mut self) -> Option<&mut ScalarColumn> {
257        match self {
258            Column::Scalar(s) => Some(s),
259            _ => None,
260        }
261    }
262
263    // # Try to Chunked Arrays
264    pub fn try_bool(&self) -> Option<&BooleanChunked> {
265        self.as_materialized_series().try_bool()
266    }
267    pub fn try_i8(&self) -> Option<&Int8Chunked> {
268        self.as_materialized_series().try_i8()
269    }
270    pub fn try_i16(&self) -> Option<&Int16Chunked> {
271        self.as_materialized_series().try_i16()
272    }
273    pub fn try_i32(&self) -> Option<&Int32Chunked> {
274        self.as_materialized_series().try_i32()
275    }
276    pub fn try_i64(&self) -> Option<&Int64Chunked> {
277        self.as_materialized_series().try_i64()
278    }
279    pub fn try_u8(&self) -> Option<&UInt8Chunked> {
280        self.as_materialized_series().try_u8()
281    }
282    pub fn try_u16(&self) -> Option<&UInt16Chunked> {
283        self.as_materialized_series().try_u16()
284    }
285    pub fn try_u32(&self) -> Option<&UInt32Chunked> {
286        self.as_materialized_series().try_u32()
287    }
288    pub fn try_u64(&self) -> Option<&UInt64Chunked> {
289        self.as_materialized_series().try_u64()
290    }
291    #[cfg(feature = "dtype-u128")]
292    pub fn try_u128(&self) -> Option<&UInt128Chunked> {
293        self.as_materialized_series().try_u128()
294    }
295    #[cfg(feature = "dtype-f16")]
296    pub fn try_f16(&self) -> Option<&Float16Chunked> {
297        self.as_materialized_series().try_f16()
298    }
299    pub fn try_f32(&self) -> Option<&Float32Chunked> {
300        self.as_materialized_series().try_f32()
301    }
302    pub fn try_f64(&self) -> Option<&Float64Chunked> {
303        self.as_materialized_series().try_f64()
304    }
305    pub fn try_str(&self) -> Option<&StringChunked> {
306        self.as_materialized_series().try_str()
307    }
308    pub fn try_list(&self) -> Option<&ListChunked> {
309        self.as_materialized_series().try_list()
310    }
311    pub fn try_binary(&self) -> Option<&BinaryChunked> {
312        self.as_materialized_series().try_binary()
313    }
314    pub fn try_idx(&self) -> Option<&IdxCa> {
315        self.as_materialized_series().try_idx()
316    }
317    pub fn try_binary_offset(&self) -> Option<&BinaryOffsetChunked> {
318        self.as_materialized_series().try_binary_offset()
319    }
320    #[cfg(feature = "dtype-datetime")]
321    pub fn try_datetime(&self) -> Option<&DatetimeChunked> {
322        self.as_materialized_series().try_datetime()
323    }
324    #[cfg(feature = "dtype-struct")]
325    pub fn try_struct(&self) -> Option<&StructChunked> {
326        self.as_materialized_series().try_struct()
327    }
328    #[cfg(feature = "dtype-decimal")]
329    pub fn try_decimal(&self) -> Option<&DecimalChunked> {
330        self.as_materialized_series().try_decimal()
331    }
332    #[cfg(feature = "dtype-array")]
333    pub fn try_array(&self) -> Option<&ArrayChunked> {
334        self.as_materialized_series().try_array()
335    }
336    #[cfg(feature = "dtype-categorical")]
337    pub fn try_cat<T: PolarsCategoricalType>(&self) -> Option<&CategoricalChunked<T>> {
338        self.as_materialized_series().try_cat::<T>()
339    }
340    #[cfg(feature = "dtype-categorical")]
341    pub fn try_cat8(&self) -> Option<&Categorical8Chunked> {
342        self.as_materialized_series().try_cat8()
343    }
344    #[cfg(feature = "dtype-categorical")]
345    pub fn try_cat16(&self) -> Option<&Categorical16Chunked> {
346        self.as_materialized_series().try_cat16()
347    }
348    #[cfg(feature = "dtype-categorical")]
349    pub fn try_cat32(&self) -> Option<&Categorical32Chunked> {
350        self.as_materialized_series().try_cat32()
351    }
352    #[cfg(feature = "dtype-date")]
353    pub fn try_date(&self) -> Option<&DateChunked> {
354        self.as_materialized_series().try_date()
355    }
356    #[cfg(feature = "dtype-duration")]
357    pub fn try_duration(&self) -> Option<&DurationChunked> {
358        self.as_materialized_series().try_duration()
359    }
360
361    // # To Chunked Arrays
362    pub fn bool(&self) -> PolarsResult<&BooleanChunked> {
363        self.as_materialized_series().bool()
364    }
365    pub fn i8(&self) -> PolarsResult<&Int8Chunked> {
366        self.as_materialized_series().i8()
367    }
368    pub fn i16(&self) -> PolarsResult<&Int16Chunked> {
369        self.as_materialized_series().i16()
370    }
371    pub fn i32(&self) -> PolarsResult<&Int32Chunked> {
372        self.as_materialized_series().i32()
373    }
374    pub fn i64(&self) -> PolarsResult<&Int64Chunked> {
375        self.as_materialized_series().i64()
376    }
377    #[cfg(feature = "dtype-i128")]
378    pub fn i128(&self) -> PolarsResult<&Int128Chunked> {
379        self.as_materialized_series().i128()
380    }
381    pub fn u8(&self) -> PolarsResult<&UInt8Chunked> {
382        self.as_materialized_series().u8()
383    }
384    pub fn u16(&self) -> PolarsResult<&UInt16Chunked> {
385        self.as_materialized_series().u16()
386    }
387    pub fn u32(&self) -> PolarsResult<&UInt32Chunked> {
388        self.as_materialized_series().u32()
389    }
390    pub fn u64(&self) -> PolarsResult<&UInt64Chunked> {
391        self.as_materialized_series().u64()
392    }
393    #[cfg(feature = "dtype-u128")]
394    pub fn u128(&self) -> PolarsResult<&UInt128Chunked> {
395        self.as_materialized_series().u128()
396    }
397    #[cfg(feature = "dtype-f16")]
398    pub fn f16(&self) -> PolarsResult<&Float16Chunked> {
399        self.as_materialized_series().f16()
400    }
401    pub fn f32(&self) -> PolarsResult<&Float32Chunked> {
402        self.as_materialized_series().f32()
403    }
404    pub fn f64(&self) -> PolarsResult<&Float64Chunked> {
405        self.as_materialized_series().f64()
406    }
407    pub fn str(&self) -> PolarsResult<&StringChunked> {
408        self.as_materialized_series().str()
409    }
410    pub fn list(&self) -> PolarsResult<&ListChunked> {
411        self.as_materialized_series().list()
412    }
413    pub fn binary(&self) -> PolarsResult<&BinaryChunked> {
414        self.as_materialized_series().binary()
415    }
416    pub fn idx(&self) -> PolarsResult<&IdxCa> {
417        self.as_materialized_series().idx()
418    }
419    pub fn binary_offset(&self) -> PolarsResult<&BinaryOffsetChunked> {
420        self.as_materialized_series().binary_offset()
421    }
422    #[cfg(feature = "dtype-datetime")]
423    pub fn datetime(&self) -> PolarsResult<&DatetimeChunked> {
424        self.as_materialized_series().datetime()
425    }
426    #[cfg(feature = "dtype-struct")]
427    pub fn struct_(&self) -> PolarsResult<&StructChunked> {
428        self.as_materialized_series().struct_()
429    }
430    #[cfg(feature = "dtype-decimal")]
431    pub fn decimal(&self) -> PolarsResult<&DecimalChunked> {
432        self.as_materialized_series().decimal()
433    }
434    #[cfg(feature = "dtype-array")]
435    pub fn array(&self) -> PolarsResult<&ArrayChunked> {
436        self.as_materialized_series().array()
437    }
438    #[cfg(feature = "dtype-categorical")]
439    pub fn cat<T: PolarsCategoricalType>(&self) -> PolarsResult<&CategoricalChunked<T>> {
440        self.as_materialized_series().cat::<T>()
441    }
442    #[cfg(feature = "dtype-categorical")]
443    pub fn cat8(&self) -> PolarsResult<&Categorical8Chunked> {
444        self.as_materialized_series().cat8()
445    }
446    #[cfg(feature = "dtype-categorical")]
447    pub fn cat16(&self) -> PolarsResult<&Categorical16Chunked> {
448        self.as_materialized_series().cat16()
449    }
450    #[cfg(feature = "dtype-categorical")]
451    pub fn cat32(&self) -> PolarsResult<&Categorical32Chunked> {
452        self.as_materialized_series().cat32()
453    }
454    #[cfg(feature = "dtype-date")]
455    pub fn date(&self) -> PolarsResult<&DateChunked> {
456        self.as_materialized_series().date()
457    }
458    #[cfg(feature = "dtype-duration")]
459    pub fn duration(&self) -> PolarsResult<&DurationChunked> {
460        self.as_materialized_series().duration()
461    }
462
463    // # Casting
464    pub fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Self> {
465        match self {
466            Column::Series(s) => s.cast_with_options(dtype, options).map(Column::from),
467            Column::Scalar(s) => s.cast_with_options(dtype, options).map(Column::from),
468        }
469    }
470    pub fn strict_cast(&self, dtype: &DataType) -> PolarsResult<Self> {
471        match self {
472            Column::Series(s) => s.strict_cast(dtype).map(Column::from),
473            Column::Scalar(s) => s.strict_cast(dtype).map(Column::from),
474        }
475    }
476    pub fn cast(&self, dtype: &DataType) -> PolarsResult<Column> {
477        match self {
478            Column::Series(s) => s.cast(dtype).map(Column::from),
479            Column::Scalar(s) => s.cast(dtype).map(Column::from),
480        }
481    }
482    /// # Safety
483    ///
484    /// This can lead to invalid memory access in downstream code.
485    pub unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Column> {
486        match self {
487            Column::Series(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from),
488            Column::Scalar(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from),
489        }
490    }
491
492    #[must_use]
493    pub fn clear(&self) -> Self {
494        match self {
495            Column::Series(s) => s.clear().into(),
496            Column::Scalar(s) => s.resize(0).into(),
497        }
498    }
499
500    #[inline]
501    pub fn shrink_to_fit(&mut self) {
502        match self {
503            Column::Series(s) => s.shrink_to_fit(),
504            Column::Scalar(_) => {},
505        }
506    }
507
508    #[inline]
509    pub fn new_from_index(&self, index: usize, length: usize) -> Self {
510        if index >= self.len() {
511            return Self::full_null(self.name().clone(), length, self.dtype());
512        }
513
514        match self {
515            Column::Series(s) => {
516                // SAFETY: Bounds check done before.
517                let av = unsafe { s.get_unchecked(index) };
518                let scalar = Scalar::new(self.dtype().clone(), av.into_static());
519                Self::new_scalar(self.name().clone(), scalar, length)
520            },
521            Column::Scalar(s) => s.resize(length).into(),
522        }
523    }
524
525    #[inline]
526    pub fn has_nulls(&self) -> bool {
527        match self {
528            Self::Series(s) => s.has_nulls(),
529            Self::Scalar(s) => s.has_nulls(),
530        }
531    }
532
533    #[inline]
534    pub fn is_null(&self) -> BooleanChunked {
535        match self {
536            Self::Series(s) => s.is_null(),
537            Self::Scalar(s) => {
538                BooleanChunked::full(s.name().clone(), s.scalar().is_null(), s.len())
539            },
540        }
541    }
542    #[inline]
543    pub fn is_not_null(&self) -> BooleanChunked {
544        match self {
545            Self::Series(s) => s.is_not_null(),
546            Self::Scalar(s) => {
547                BooleanChunked::full(s.name().clone(), !s.scalar().is_null(), s.len())
548            },
549        }
550    }
551
552    pub fn to_physical_repr(&self) -> Column {
553        // @scalar-opt
554        self.as_materialized_series()
555            .to_physical_repr()
556            .into_owned()
557            .into()
558    }
559    /// # Safety
560    ///
561    /// This can lead to invalid memory access in downstream code.
562    pub unsafe fn from_physical_unchecked(&self, dtype: &DataType) -> PolarsResult<Column> {
563        // @scalar-opt
564        self.as_materialized_series()
565            .from_physical_unchecked(dtype)
566            .map(Column::from)
567    }
568
569    pub fn head(&self, length: Option<usize>) -> Column {
570        let len = length.unwrap_or(HEAD_DEFAULT_LENGTH);
571        let len = usize::min(len, self.len());
572        self.slice(0, len)
573    }
574    pub fn tail(&self, length: Option<usize>) -> Column {
575        let len = length.unwrap_or(TAIL_DEFAULT_LENGTH);
576        let len = usize::min(len, self.len());
577        debug_assert!(len <= i64::MAX as usize);
578        self.slice(-(len as i64), len)
579    }
580    pub fn slice(&self, offset: i64, length: usize) -> Column {
581        match self {
582            Column::Series(s) => s.slice(offset, length).into(),
583            Column::Scalar(s) => {
584                let (_, length) = slice_offsets(offset, length, s.len());
585                s.resize(length).into()
586            },
587        }
588    }
589
590    pub fn split_at(&self, offset: i64) -> (Column, Column) {
591        // @scalar-opt
592        let (l, r) = self.as_materialized_series().split_at(offset);
593        (l.into(), r.into())
594    }
595
596    #[inline]
597    pub fn null_count(&self) -> usize {
598        match self {
599            Self::Series(s) => s.null_count(),
600            Self::Scalar(s) if s.scalar().is_null() => s.len(),
601            Self::Scalar(_) => 0,
602        }
603    }
604
605    pub fn first_non_null(&self) -> Option<usize> {
606        match self {
607            Self::Series(s) => crate::utils::first_non_null(s.chunks().iter().map(|a| a.as_ref())),
608            Self::Scalar(s) => (!s.scalar().is_null() && !s.is_empty()).then_some(0),
609        }
610    }
611
612    pub fn last_non_null(&self) -> Option<usize> {
613        match self {
614            Self::Series(s) => {
615                crate::utils::last_non_null(s.chunks().iter().map(|a| a.as_ref()), s.len())
616            },
617            Self::Scalar(s) => (!s.scalar().is_null() && !s.is_empty()).then(|| s.len() - 1),
618        }
619    }
620
621    pub fn take(&self, indices: &IdxCa) -> PolarsResult<Column> {
622        check_bounds_ca(indices, self.len() as IdxSize)?;
623        Ok(unsafe { self.take_unchecked(indices) })
624    }
625    pub fn take_slice(&self, indices: &[IdxSize]) -> PolarsResult<Column> {
626        check_bounds(indices, self.len() as IdxSize)?;
627        Ok(unsafe { self.take_slice_unchecked(indices) })
628    }
629    /// # Safety
630    ///
631    /// No bounds on the indexes are performed.
632    pub unsafe fn take_unchecked(&self, indices: &IdxCa) -> Column {
633        debug_assert!(check_bounds_ca(indices, self.len() as IdxSize).is_ok());
634
635        match self {
636            Self::Series(s) => unsafe { s.take_unchecked(indices) }.into(),
637            Self::Scalar(s) => {
638                let idxs_length = indices.len();
639                let idxs_null_count = indices.null_count();
640
641                let scalar = ScalarColumn::from_single_value_series(
642                    s.as_single_value_series().take_unchecked(&IdxCa::new(
643                        indices.name().clone(),
644                        &[0][..s.len().min(1)],
645                    )),
646                    idxs_length,
647                );
648
649                // We need to make sure that null values in `idx` become null values in the result
650                if idxs_null_count == 0 || scalar.has_nulls() {
651                    scalar.into_column()
652                } else if idxs_null_count == idxs_length {
653                    scalar.into_nulls().into_column()
654                } else {
655                    let validity = indices.rechunk_validity();
656                    let series = scalar.take_materialized_series();
657                    let name = series.name().clone();
658                    let dtype = series.dtype().clone();
659                    let mut chunks = series.into_chunks();
660                    assert_eq!(chunks.len(), 1);
661                    chunks[0] = chunks[0].with_validity(validity);
662                    unsafe { Series::from_chunks_and_dtype_unchecked(name, chunks, &dtype) }
663                        .into_column()
664                }
665            },
666        }
667    }
668    /// # Safety
669    ///
670    /// No bounds on the indexes are performed.
671    pub unsafe fn take_slice_unchecked(&self, indices: &[IdxSize]) -> Column {
672        debug_assert!(check_bounds(indices, self.len() as IdxSize).is_ok());
673
674        match self {
675            Self::Series(s) => unsafe { s.take_slice_unchecked(indices) }.into(),
676            Self::Scalar(s) => ScalarColumn::from_single_value_series(
677                s.as_single_value_series()
678                    .take_slice_unchecked(&[0][..s.len().min(1)]),
679                indices.len(),
680            )
681            .into(),
682        }
683    }
684
685    /// General implementation for aggregation where a non-missing scalar would map to itself.
686    #[inline(always)]
687    #[cfg(any(feature = "algorithm_group_by", feature = "bitwise"))]
688    fn agg_with_scalar_identity(
689        &self,
690        groups: &GroupsType,
691        series_agg: impl Fn(&Series, &GroupsType) -> Series,
692    ) -> Column {
693        match self {
694            Column::Series(s) => series_agg(s, groups).into_column(),
695            Column::Scalar(s) => {
696                if s.is_empty() {
697                    return series_agg(s.as_materialized_series(), groups).into_column();
698                }
699
700                // We utilize the aggregation on Series to see:
701                // 1. the output datatype of the aggregation
702                // 2. whether this aggregation is even defined
703                let series_aggregation = series_agg(
704                    &s.as_single_value_series(),
705                    // @NOTE: this group is always valid since s is non-empty.
706                    &GroupsType::new_slice(vec![[0, 1]], false, true),
707                );
708
709                // If the aggregation is not defined, just return all nulls.
710                if series_aggregation.has_nulls() {
711                    return Self::new_scalar(
712                        series_aggregation.name().clone(),
713                        Scalar::new(series_aggregation.dtype().clone(), AnyValue::Null),
714                        groups.len(),
715                    );
716                }
717
718                let mut scalar_col = s.resize(groups.len());
719                // The aggregation might change the type (e.g. mean changes int -> float), so we do
720                // a cast here to the output type.
721                if series_aggregation.dtype() != s.dtype() {
722                    scalar_col = scalar_col.cast(series_aggregation.dtype()).unwrap();
723                }
724
725                let Some(first_empty_idx) = groups.iter().position(|g| g.is_empty()) else {
726                    // Fast path: no empty groups. keep the scalar intact.
727                    return scalar_col.into_column();
728                };
729
730                // All empty groups produce a *missing* or `null` value.
731                let mut validity = BitmapBuilder::with_capacity(groups.len());
732                validity.extend_constant(first_empty_idx, true);
733                // SAFETY: We trust the length of this iterator.
734                let iter = unsafe {
735                    TrustMyLength::new(
736                        groups.iter().skip(first_empty_idx).map(|g| !g.is_empty()),
737                        groups.len() - first_empty_idx,
738                    )
739                };
740                validity.extend_trusted_len_iter(iter);
741
742                let mut s = scalar_col.take_materialized_series().rechunk();
743                // SAFETY: We perform a compute_len afterwards.
744                let chunks = unsafe { s.chunks_mut() };
745                let arr = &mut chunks[0];
746                *arr = arr.with_validity(validity.into_opt_validity());
747                s.compute_len();
748
749                s.into_column()
750            },
751        }
752    }
753
754    /// # Safety
755    ///
756    /// Does no bounds checks, groups must be correct.
757    #[cfg(feature = "algorithm_group_by")]
758    pub unsafe fn agg_min(&self, groups: &GroupsType) -> Self {
759        self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_min(g) })
760    }
761
762    /// # Safety
763    ///
764    /// Does no bounds checks, groups must be correct.
765    #[cfg(feature = "algorithm_group_by")]
766    pub unsafe fn agg_max(&self, groups: &GroupsType) -> Self {
767        self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_max(g) })
768    }
769
770    /// # Safety
771    ///
772    /// Does no bounds checks, groups must be correct.
773    #[cfg(feature = "algorithm_group_by")]
774    pub unsafe fn agg_mean(&self, groups: &GroupsType) -> Self {
775        self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_mean(g) })
776    }
777
778    /// # Safety
779    ///
780    /// Does no bounds checks, groups must be correct.
781    #[cfg(feature = "algorithm_group_by")]
782    pub unsafe fn agg_arg_min(&self, groups: &GroupsType) -> Self {
783        match self {
784            Column::Series(s) => unsafe { Column::from(s.agg_arg_min(groups)) },
785            Column::Scalar(sc) => {
786                let scalar = if sc.is_empty() || sc.has_nulls() {
787                    Scalar::null(IDX_DTYPE)
788                } else {
789                    Scalar::new_idxsize(0)
790                };
791                Column::new_scalar(self.name().clone(), scalar, 1)
792            },
793        }
794    }
795
796    /// # Safety
797    ///
798    /// Does no bounds checks, groups must be correct.
799    #[cfg(feature = "algorithm_group_by")]
800    pub unsafe fn agg_arg_max(&self, groups: &GroupsType) -> Self {
801        match self {
802            Column::Series(s) => unsafe { Column::from(s.agg_arg_max(groups)) },
803            Column::Scalar(sc) => {
804                let scalar = if sc.is_empty() || sc.has_nulls() {
805                    Scalar::null(IDX_DTYPE)
806                } else {
807                    Scalar::new_idxsize(0)
808                };
809                Column::new_scalar(self.name().clone(), scalar, 1)
810            },
811        }
812    }
813
814    /// # Safety
815    ///
816    /// Does no bounds checks, groups must be correct.
817    #[cfg(feature = "algorithm_group_by")]
818    pub unsafe fn agg_sum(&self, groups: &GroupsType) -> Self {
819        // @scalar-opt
820        unsafe { self.as_materialized_series().agg_sum(groups) }.into()
821    }
822
823    /// # Safety
824    ///
825    /// Does no bounds checks, groups must be correct.
826    #[cfg(feature = "algorithm_group_by")]
827    pub unsafe fn agg_first(&self, groups: &GroupsType) -> Self {
828        self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_first(g) })
829    }
830
831    /// # Safety
832    ///
833    /// Does no bounds checks, groups must be correct.
834    #[cfg(feature = "algorithm_group_by")]
835    pub unsafe fn agg_first_non_null(&self, groups: &GroupsType) -> Self {
836        self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_first_non_null(g) })
837    }
838
839    /// # Safety
840    ///
841    /// Does no bounds checks, groups must be correct.
842    #[cfg(feature = "algorithm_group_by")]
843    pub unsafe fn agg_last(&self, groups: &GroupsType) -> Self {
844        self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_last(g) })
845    }
846
847    /// # Safety
848    ///
849    /// Does no bounds checks, groups must be correct.
850    #[cfg(feature = "algorithm_group_by")]
851    pub unsafe fn agg_last_non_null(&self, groups: &GroupsType) -> Self {
852        self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_last_non_null(g) })
853    }
854
855    /// # Safety
856    ///
857    /// Does no bounds checks, groups must be correct.
858    #[cfg(feature = "algorithm_group_by")]
859    pub unsafe fn agg_n_unique(&self, groups: &GroupsType) -> Self {
860        // @scalar-opt
861        unsafe { self.as_materialized_series().agg_n_unique(groups) }.into()
862    }
863
864    /// # Safety
865    ///
866    /// Does no bounds checks, groups must be correct.
867    #[cfg(feature = "algorithm_group_by")]
868    pub unsafe fn agg_quantile(
869        &self,
870        groups: &GroupsType,
871        quantile: f64,
872        method: QuantileMethod,
873    ) -> Self {
874        // @scalar-opt
875
876        unsafe {
877            self.as_materialized_series()
878                .agg_quantile(groups, quantile, method)
879        }
880        .into()
881    }
882
883    /// # Safety
884    ///
885    /// Does no bounds checks, groups must be correct.
886    #[cfg(feature = "algorithm_group_by")]
887    pub unsafe fn agg_median(&self, groups: &GroupsType) -> Self {
888        self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_median(g) })
889    }
890
891    /// # Safety
892    ///
893    /// Does no bounds checks, groups must be correct.
894    #[cfg(feature = "algorithm_group_by")]
895    pub unsafe fn agg_var(&self, groups: &GroupsType, ddof: u8) -> Self {
896        // @scalar-opt
897        unsafe { self.as_materialized_series().agg_var(groups, ddof) }.into()
898    }
899
900    /// # Safety
901    ///
902    /// Does no bounds checks, groups must be correct.
903    #[cfg(feature = "algorithm_group_by")]
904    pub unsafe fn agg_std(&self, groups: &GroupsType, ddof: u8) -> Self {
905        // @scalar-opt
906        unsafe { self.as_materialized_series().agg_std(groups, ddof) }.into()
907    }
908
909    /// # Safety
910    ///
911    /// Does no bounds checks, groups must be correct.
912    #[cfg(feature = "algorithm_group_by")]
913    pub unsafe fn agg_list(&self, groups: &GroupsType) -> Self {
914        // @scalar-opt
915        unsafe { self.as_materialized_series().agg_list(groups) }.into()
916    }
917
918    /// # Safety
919    ///
920    /// Does no bounds checks, groups must be correct.
921    #[cfg(feature = "algorithm_group_by")]
922    pub fn agg_valid_count(&self, groups: &GroupsType) -> Self {
923        // @scalar-opt
924        unsafe { self.as_materialized_series().agg_valid_count(groups) }.into()
925    }
926
927    /// # Safety
928    ///
929    /// Does no bounds checks, groups must be correct.
930    #[cfg(feature = "bitwise")]
931    pub unsafe fn agg_and(&self, groups: &GroupsType) -> Self {
932        self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_and(g) })
933    }
934    /// # Safety
935    ///
936    /// Does no bounds checks, groups must be correct.
937    #[cfg(feature = "bitwise")]
938    pub unsafe fn agg_or(&self, groups: &GroupsType) -> Self {
939        self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_or(g) })
940    }
941    /// # Safety
942    ///
943    /// Does no bounds checks, groups must be correct.
944    #[cfg(feature = "bitwise")]
945    pub unsafe fn agg_xor(&self, groups: &GroupsType) -> Self {
946        // @scalar-opt
947        unsafe { self.as_materialized_series().agg_xor(groups) }.into()
948    }
949
950    pub fn full_null(name: PlSmallStr, size: usize, dtype: &DataType) -> Self {
951        Self::new_scalar(name, Scalar::new(dtype.clone(), AnyValue::Null), size)
952    }
953
954    pub fn is_empty(&self) -> bool {
955        self.len() == 0
956    }
957
958    pub fn reverse(&self) -> Column {
959        match self {
960            Column::Series(s) => s.reverse().into(),
961            Column::Scalar(_) => self.clone(),
962        }
963    }
964
965    pub fn equals(&self, other: &Column) -> bool {
966        // @scalar-opt
967        self.as_materialized_series()
968            .equals(other.as_materialized_series())
969    }
970
971    pub fn equals_missing(&self, other: &Column) -> bool {
972        // @scalar-opt
973        self.as_materialized_series()
974            .equals_missing(other.as_materialized_series())
975    }
976
977    pub fn set_sorted_flag(&mut self, sorted: IsSorted) {
978        // @scalar-opt
979        match self {
980            Column::Series(s) => s.set_sorted_flag(sorted),
981            Column::Scalar(_) => {},
982        }
983    }
984
985    pub fn get_flags(&self) -> StatisticsFlags {
986        match self {
987            Column::Series(s) => s.get_flags(),
988            Column::Scalar(_) => {
989                StatisticsFlags::IS_SORTED_ASC | StatisticsFlags::CAN_FAST_EXPLODE_LIST
990            },
991        }
992    }
993
994    /// Returns whether the flags were set
995    pub fn set_flags(&mut self, flags: StatisticsFlags) -> bool {
996        match self {
997            Column::Series(s) => {
998                s.set_flags(flags);
999                true
1000            },
1001            Column::Scalar(_) => false,
1002        }
1003    }
1004
1005    pub fn vec_hash(
1006        &self,
1007        build_hasher: PlSeedableRandomStateQuality,
1008        buf: &mut Vec<u64>,
1009    ) -> PolarsResult<()> {
1010        // @scalar-opt?
1011        self.as_materialized_series().vec_hash(build_hasher, buf)
1012    }
1013
1014    pub fn vec_hash_combine(
1015        &self,
1016        build_hasher: PlSeedableRandomStateQuality,
1017        hashes: &mut [u64],
1018    ) -> PolarsResult<()> {
1019        // @scalar-opt?
1020        self.as_materialized_series()
1021            .vec_hash_combine(build_hasher, hashes)
1022    }
1023
1024    pub fn append(&mut self, other: &Column) -> PolarsResult<&mut Self> {
1025        // @scalar-opt
1026        self.into_materialized_series()
1027            .append(other.as_materialized_series())?;
1028        Ok(self)
1029    }
1030    pub fn append_owned(&mut self, other: Column) -> PolarsResult<&mut Self> {
1031        self.into_materialized_series()
1032            .append_owned(other.take_materialized_series())?;
1033        Ok(self)
1034    }
1035
1036    pub fn arg_sort(&self, options: SortOptions) -> IdxCa {
1037        if self.is_empty() {
1038            return IdxCa::from_vec(self.name().clone(), Vec::new());
1039        }
1040
1041        if self.null_count() == self.len() {
1042            // We might need to maintain order so just respect the descending parameter.
1043            let values = if options.descending {
1044                (0..self.len() as IdxSize).rev().collect()
1045            } else {
1046                (0..self.len() as IdxSize).collect()
1047            };
1048
1049            return IdxCa::from_vec(self.name().clone(), values);
1050        }
1051
1052        let is_sorted = Some(self.is_sorted_flag());
1053        let Some(is_sorted) = is_sorted.filter(|v| !matches!(v, IsSorted::Not)) else {
1054            return self.as_materialized_series().arg_sort(options);
1055        };
1056
1057        // Fast path: the data is sorted.
1058        let is_sorted_dsc = matches!(is_sorted, IsSorted::Descending);
1059        let invert = options.descending != is_sorted_dsc;
1060
1061        let mut values = Vec::with_capacity(self.len());
1062
1063        #[inline(never)]
1064        fn extend(
1065            start: IdxSize,
1066            end: IdxSize,
1067            slf: &Column,
1068            values: &mut Vec<IdxSize>,
1069            is_only_nulls: bool,
1070            invert: bool,
1071            maintain_order: bool,
1072        ) {
1073            debug_assert!(start <= end);
1074            debug_assert!(start as usize <= slf.len());
1075            debug_assert!(end as usize <= slf.len());
1076
1077            if !invert || is_only_nulls {
1078                values.extend(start..end);
1079                return;
1080            }
1081
1082            // If we don't have to maintain order but we have to invert. Just flip it around.
1083            if !maintain_order {
1084                values.extend((start..end).rev());
1085                return;
1086            }
1087
1088            // If we want to maintain order but we also needs to invert, we need to invert
1089            // per group of items.
1090            //
1091            // @NOTE: Since the column is sorted, arg_unique can also take a fast path and
1092            // just do a single traversal.
1093            let arg_unique = slf
1094                .slice(start as i64, (end - start) as usize)
1095                .arg_unique()
1096                .unwrap();
1097
1098            assert!(!arg_unique.has_nulls());
1099
1100            let num_unique = arg_unique.len();
1101
1102            // Fast path: all items are unique.
1103            if num_unique == (end - start) as usize {
1104                values.extend((start..end).rev());
1105                return;
1106            }
1107
1108            if num_unique == 1 {
1109                values.extend(start..end);
1110                return;
1111            }
1112
1113            let mut prev_idx = end - start;
1114            for chunk in arg_unique.downcast_iter() {
1115                for &idx in chunk.values().as_slice().iter().rev() {
1116                    values.extend(start + idx..start + prev_idx);
1117                    prev_idx = idx;
1118                }
1119            }
1120        }
1121        macro_rules! extend {
1122            ($start:expr, $end:expr) => {
1123                extend!($start, $end, is_only_nulls = false);
1124            };
1125            ($start:expr, $end:expr, is_only_nulls = $is_only_nulls:expr) => {
1126                extend(
1127                    $start,
1128                    $end,
1129                    self,
1130                    &mut values,
1131                    $is_only_nulls,
1132                    invert,
1133                    options.maintain_order,
1134                );
1135            };
1136        }
1137
1138        let length = self.len() as IdxSize;
1139        let null_count = self.null_count() as IdxSize;
1140
1141        if null_count == 0 {
1142            extend!(0, length);
1143        } else {
1144            let has_nulls_last = self.get(self.len() - 1).unwrap().is_null();
1145            match (options.nulls_last, has_nulls_last) {
1146                (true, true) => {
1147                    // Current: Nulls last, Wanted: Nulls last
1148                    extend!(0, length - null_count);
1149                    extend!(length - null_count, length, is_only_nulls = true);
1150                },
1151                (true, false) => {
1152                    // Current: Nulls first, Wanted: Nulls last
1153                    extend!(null_count, length);
1154                    extend!(0, null_count, is_only_nulls = true);
1155                },
1156                (false, true) => {
1157                    // Current: Nulls last, Wanted: Nulls first
1158                    extend!(length - null_count, length, is_only_nulls = true);
1159                    extend!(0, length - null_count);
1160                },
1161                (false, false) => {
1162                    // Current: Nulls first, Wanted: Nulls first
1163                    extend!(0, null_count, is_only_nulls = true);
1164                    extend!(null_count, length);
1165                },
1166            }
1167        }
1168
1169        // @NOTE: This can theoretically be pushed into the previous operation but it is really
1170        // worth it... probably not...
1171        if let Some(limit) = options.limit {
1172            let limit = limit.min(length);
1173            values.truncate(limit as usize);
1174        }
1175
1176        IdxCa::from_vec(self.name().clone(), values)
1177    }
1178
1179    pub fn arg_sort_multiple(
1180        &self,
1181        by: &[Column],
1182        options: &SortMultipleOptions,
1183    ) -> PolarsResult<IdxCa> {
1184        // @scalar-opt
1185        self.as_materialized_series().arg_sort_multiple(by, options)
1186    }
1187
1188    pub fn arg_unique(&self) -> PolarsResult<IdxCa> {
1189        match self {
1190            Column::Scalar(s) => Ok(IdxCa::new_vec(s.name().clone(), vec![0])),
1191            _ => self.as_materialized_series().arg_unique(),
1192        }
1193    }
1194
1195    pub fn bit_repr(&self) -> Option<BitRepr> {
1196        // @scalar-opt
1197        self.as_materialized_series().bit_repr()
1198    }
1199
1200    pub fn into_frame(self) -> DataFrame {
1201        // SAFETY: A single-column dataframe cannot have length mismatches or duplicate names
1202        unsafe { DataFrame::new_unchecked(self.len(), vec![self]) }
1203    }
1204
1205    pub fn extend(&mut self, other: &Column) -> PolarsResult<&mut Self> {
1206        // @scalar-opt
1207        self.into_materialized_series()
1208            .extend(other.as_materialized_series())?;
1209        Ok(self)
1210    }
1211
1212    pub fn rechunk(&self) -> Column {
1213        match self {
1214            Column::Series(s) => s.rechunk().into(),
1215            Column::Scalar(s) => {
1216                if s.lazy_as_materialized_series()
1217                    .filter(|x| x.n_chunks() > 1)
1218                    .is_some()
1219                {
1220                    Column::Scalar(ScalarColumn::new(
1221                        s.name().clone(),
1222                        s.scalar().clone(),
1223                        s.len(),
1224                    ))
1225                } else {
1226                    self.clone()
1227                }
1228            },
1229        }
1230    }
1231
1232    pub fn explode(&self, options: ExplodeOptions) -> PolarsResult<Column> {
1233        self.as_materialized_series()
1234            .explode(options)
1235            .map(Column::from)
1236    }
1237    pub fn implode(&self) -> PolarsResult<ListChunked> {
1238        self.as_materialized_series().implode()
1239    }
1240
1241    pub fn fill_null(&self, strategy: FillNullStrategy) -> PolarsResult<Self> {
1242        // @scalar-opt
1243        self.as_materialized_series()
1244            .fill_null(strategy)
1245            .map(Column::from)
1246    }
1247
1248    pub fn divide(&self, rhs: &Column) -> PolarsResult<Self> {
1249        // @scalar-opt
1250        self.as_materialized_series()
1251            .divide(rhs.as_materialized_series())
1252            .map(Column::from)
1253    }
1254
1255    pub fn shift(&self, periods: i64) -> Column {
1256        // @scalar-opt
1257        self.as_materialized_series().shift(periods).into()
1258    }
1259
1260    #[cfg(feature = "zip_with")]
1261    pub fn zip_with(&self, mask: &BooleanChunked, other: &Self) -> PolarsResult<Self> {
1262        // @scalar-opt
1263        self.as_materialized_series()
1264            .zip_with(mask, other.as_materialized_series())
1265            .map(Self::from)
1266    }
1267
1268    #[cfg(feature = "zip_with")]
1269    pub fn zip_with_same_type(
1270        &self,
1271        mask: &ChunkedArray<BooleanType>,
1272        other: &Column,
1273    ) -> PolarsResult<Column> {
1274        // @scalar-opt
1275        self.as_materialized_series()
1276            .zip_with_same_type(mask, other.as_materialized_series())
1277            .map(Column::from)
1278    }
1279
1280    pub fn drop_nulls(&self) -> Column {
1281        match self {
1282            Column::Series(s) => s.drop_nulls().into_column(),
1283            Column::Scalar(s) => s.drop_nulls().into_column(),
1284        }
1285    }
1286
1287    /// Packs every element into a list.
1288    pub fn as_list(&self) -> ListChunked {
1289        // @scalar-opt
1290        self.as_materialized_series().as_list()
1291    }
1292
1293    pub fn is_sorted_flag(&self) -> IsSorted {
1294        match self {
1295            Column::Series(s) => s.is_sorted_flag(),
1296            Column::Scalar(_) => IsSorted::Ascending,
1297        }
1298    }
1299
1300    pub fn unique(&self) -> PolarsResult<Column> {
1301        match self {
1302            Column::Series(s) => s.unique().map(Column::from),
1303            Column::Scalar(s) => {
1304                _ = s.as_single_value_series().unique()?;
1305                if s.is_empty() {
1306                    return Ok(s.clone().into_column());
1307                }
1308
1309                Ok(s.resize(1).into_column())
1310            },
1311        }
1312    }
1313    pub fn unique_stable(&self) -> PolarsResult<Column> {
1314        match self {
1315            Column::Series(s) => s.unique_stable().map(Column::from),
1316            Column::Scalar(s) => {
1317                _ = s.as_single_value_series().unique_stable()?;
1318                if s.is_empty() {
1319                    return Ok(s.clone().into_column());
1320                }
1321
1322                Ok(s.resize(1).into_column())
1323            },
1324        }
1325    }
1326
1327    pub fn reshape_list(&self, dimensions: &[ReshapeDimension]) -> PolarsResult<Self> {
1328        // @scalar-opt
1329        self.as_materialized_series()
1330            .reshape_list(dimensions)
1331            .map(Self::from)
1332    }
1333
1334    #[cfg(feature = "dtype-array")]
1335    pub fn reshape_array(&self, dimensions: &[ReshapeDimension]) -> PolarsResult<Self> {
1336        // @scalar-opt
1337        self.as_materialized_series()
1338            .reshape_array(dimensions)
1339            .map(Self::from)
1340    }
1341
1342    pub fn sort(&self, sort_options: SortOptions) -> PolarsResult<Self> {
1343        // @scalar-opt
1344        self.as_materialized_series()
1345            .sort(sort_options)
1346            .map(Self::from)
1347    }
1348
1349    pub fn filter(&self, filter: &BooleanChunked) -> PolarsResult<Self> {
1350        match self {
1351            Column::Series(s) => s.filter(filter).map(Column::from),
1352            Column::Scalar(s) => {
1353                if s.is_empty() {
1354                    return Ok(s.clone().into_column());
1355                }
1356
1357                // Broadcasting
1358                if filter.len() == 1 {
1359                    return match filter.get(0) {
1360                        Some(true) => Ok(s.clone().into_column()),
1361                        _ => Ok(s.resize(0).into_column()),
1362                    };
1363                }
1364
1365                Ok(s.resize(filter.sum().unwrap() as usize).into_column())
1366            },
1367        }
1368    }
1369
1370    #[cfg(feature = "random")]
1371    pub fn shuffle(&self, seed: Option<u64>) -> Self {
1372        // @scalar-opt
1373        self.as_materialized_series().shuffle(seed).into()
1374    }
1375
1376    #[cfg(feature = "random")]
1377    pub fn sample_frac(
1378        &self,
1379        frac: f64,
1380        with_replacement: bool,
1381        shuffle: bool,
1382        seed: Option<u64>,
1383    ) -> PolarsResult<Self> {
1384        self.as_materialized_series()
1385            .sample_frac(frac, with_replacement, shuffle, seed)
1386            .map(Self::from)
1387    }
1388
1389    #[cfg(feature = "random")]
1390    pub fn sample_n(
1391        &self,
1392        n: usize,
1393        with_replacement: bool,
1394        shuffle: bool,
1395        seed: Option<u64>,
1396    ) -> PolarsResult<Self> {
1397        self.as_materialized_series()
1398            .sample_n(n, with_replacement, shuffle, seed)
1399            .map(Self::from)
1400    }
1401
1402    pub fn gather_every(&self, n: usize, offset: usize) -> PolarsResult<Column> {
1403        polars_ensure!(n > 0, InvalidOperation: "gather_every(n): n should be positive");
1404        if self.len().saturating_sub(offset) == 0 {
1405            return Ok(self.clear());
1406        }
1407
1408        match self {
1409            Column::Series(s) => Ok(s.gather_every(n, offset)?.into()),
1410            Column::Scalar(s) => {
1411                let total = s.len() - offset;
1412                Ok(s.resize(1 + (total - 1) / n).into())
1413            },
1414        }
1415    }
1416
1417    pub fn extend_constant(&self, value: AnyValue, n: usize) -> PolarsResult<Self> {
1418        if self.is_empty() {
1419            return Ok(Self::new_scalar(
1420                self.name().clone(),
1421                Scalar::new(self.dtype().clone(), value.into_static()),
1422                n,
1423            ));
1424        }
1425
1426        match self {
1427            Column::Series(s) => s.extend_constant(value, n).map(Column::from),
1428            Column::Scalar(s) => {
1429                if s.scalar().as_any_value() == value {
1430                    Ok(s.resize(s.len() + n).into())
1431                } else {
1432                    s.as_materialized_series()
1433                        .extend_constant(value, n)
1434                        .map(Column::from)
1435                }
1436            },
1437        }
1438    }
1439
1440    pub fn is_finite(&self) -> PolarsResult<BooleanChunked> {
1441        self.try_map_unary_elementwise_to_bool(|s| s.is_finite())
1442    }
1443    pub fn is_infinite(&self) -> PolarsResult<BooleanChunked> {
1444        self.try_map_unary_elementwise_to_bool(|s| s.is_infinite())
1445    }
1446    pub fn is_nan(&self) -> PolarsResult<BooleanChunked> {
1447        self.try_map_unary_elementwise_to_bool(|s| s.is_nan())
1448    }
1449    pub fn is_not_nan(&self) -> PolarsResult<BooleanChunked> {
1450        self.try_map_unary_elementwise_to_bool(|s| s.is_not_nan())
1451    }
1452
1453    pub fn wrapping_trunc_div_scalar<T>(&self, rhs: T) -> Self
1454    where
1455        T: Num + NumCast,
1456    {
1457        // @scalar-opt
1458        self.as_materialized_series()
1459            .wrapping_trunc_div_scalar(rhs)
1460            .into()
1461    }
1462
1463    pub fn product(&self) -> PolarsResult<Scalar> {
1464        // @scalar-opt
1465        self.as_materialized_series().product()
1466    }
1467
1468    pub fn phys_iter(&self) -> SeriesPhysIter<'_> {
1469        // @scalar-opt
1470        self.as_materialized_series().phys_iter()
1471    }
1472
1473    #[inline]
1474    pub fn get(&self, index: usize) -> PolarsResult<AnyValue<'_>> {
1475        polars_ensure!(index < self.len(), oob = index, self.len());
1476
1477        // SAFETY: Bounds check done just before.
1478        Ok(unsafe { self.get_unchecked(index) })
1479    }
1480    /// # Safety
1481    ///
1482    /// Does not perform bounds check on `index`
1483    #[inline(always)]
1484    pub unsafe fn get_unchecked(&self, index: usize) -> AnyValue<'_> {
1485        debug_assert!(index < self.len());
1486
1487        match self {
1488            Column::Series(s) => unsafe { s.get_unchecked(index) },
1489            Column::Scalar(s) => s.scalar().as_any_value(),
1490        }
1491    }
1492
1493    #[cfg(feature = "object")]
1494    pub fn get_object(
1495        &self,
1496        index: usize,
1497    ) -> Option<&dyn crate::chunked_array::object::PolarsObjectSafe> {
1498        self.as_materialized_series().get_object(index)
1499    }
1500
1501    pub fn bitand(&self, rhs: &Self) -> PolarsResult<Self> {
1502        self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l & r)
1503    }
1504    pub fn bitor(&self, rhs: &Self) -> PolarsResult<Self> {
1505        self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l | r)
1506    }
1507    pub fn bitxor(&self, rhs: &Self) -> PolarsResult<Self> {
1508        self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l ^ r)
1509    }
1510
1511    pub fn try_add_owned(self, other: Self) -> PolarsResult<Self> {
1512        match (self, other) {
1513            (Column::Series(lhs), Column::Series(rhs)) => {
1514                lhs.take().try_add_owned(rhs.take()).map(Column::from)
1515            },
1516            (lhs, rhs) => lhs + rhs,
1517        }
1518    }
1519    pub fn try_sub_owned(self, other: Self) -> PolarsResult<Self> {
1520        match (self, other) {
1521            (Column::Series(lhs), Column::Series(rhs)) => {
1522                lhs.take().try_sub_owned(rhs.take()).map(Column::from)
1523            },
1524            (lhs, rhs) => lhs - rhs,
1525        }
1526    }
1527    pub fn try_mul_owned(self, other: Self) -> PolarsResult<Self> {
1528        match (self, other) {
1529            (Column::Series(lhs), Column::Series(rhs)) => {
1530                lhs.take().try_mul_owned(rhs.take()).map(Column::from)
1531            },
1532            (lhs, rhs) => lhs * rhs,
1533        }
1534    }
1535
1536    pub(crate) fn str_value(&self, index: usize) -> PolarsResult<Cow<'_, str>> {
1537        Ok(self.get(index)?.str_value())
1538    }
1539
1540    pub fn min_reduce(&self) -> PolarsResult<Scalar> {
1541        match self {
1542            Column::Series(s) => s.min_reduce(),
1543            Column::Scalar(s) => {
1544                // We don't really want to deal with handling the full semantics here so we just
1545                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1546                s.as_single_value_series().min_reduce()
1547            },
1548        }
1549    }
1550    pub fn max_reduce(&self) -> PolarsResult<Scalar> {
1551        match self {
1552            Column::Series(s) => s.max_reduce(),
1553            Column::Scalar(s) => {
1554                // We don't really want to deal with handling the full semantics here so we just
1555                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1556                s.as_single_value_series().max_reduce()
1557            },
1558        }
1559    }
1560    pub fn median_reduce(&self) -> PolarsResult<Scalar> {
1561        match self {
1562            Column::Series(s) => s.median_reduce(),
1563            Column::Scalar(s) => {
1564                // We don't really want to deal with handling the full semantics here so we just
1565                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1566                s.as_single_value_series().median_reduce()
1567            },
1568        }
1569    }
1570    pub fn mean_reduce(&self) -> PolarsResult<Scalar> {
1571        match self {
1572            Column::Series(s) => s.mean_reduce(),
1573            Column::Scalar(s) => {
1574                // We don't really want to deal with handling the full semantics here so we just
1575                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1576                s.as_single_value_series().mean_reduce()
1577            },
1578        }
1579    }
1580    pub fn std_reduce(&self, ddof: u8) -> PolarsResult<Scalar> {
1581        match self {
1582            Column::Series(s) => s.std_reduce(ddof),
1583            Column::Scalar(s) => {
1584                // We don't really want to deal with handling the full semantics here so we just
1585                // cast to a small series. This is a tiny bit wasteful, but probably fine.
1586                let n = s.len().min(ddof as usize + 1);
1587                s.as_n_values_series(n).std_reduce(ddof)
1588            },
1589        }
1590    }
1591    pub fn var_reduce(&self, ddof: u8) -> PolarsResult<Scalar> {
1592        match self {
1593            Column::Series(s) => s.var_reduce(ddof),
1594            Column::Scalar(s) => {
1595                // We don't really want to deal with handling the full semantics here so we just
1596                // cast to a small series. This is a tiny bit wasteful, but probably fine.
1597                let n = s.len().min(ddof as usize + 1);
1598                s.as_n_values_series(n).var_reduce(ddof)
1599            },
1600        }
1601    }
1602    pub fn sum_reduce(&self) -> PolarsResult<Scalar> {
1603        // @scalar-opt
1604        self.as_materialized_series().sum_reduce()
1605    }
1606    pub fn and_reduce(&self) -> PolarsResult<Scalar> {
1607        match self {
1608            Column::Series(s) => s.and_reduce(),
1609            Column::Scalar(s) => {
1610                // We don't really want to deal with handling the full semantics here so we just
1611                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1612                s.as_single_value_series().and_reduce()
1613            },
1614        }
1615    }
1616    pub fn or_reduce(&self) -> PolarsResult<Scalar> {
1617        match self {
1618            Column::Series(s) => s.or_reduce(),
1619            Column::Scalar(s) => {
1620                // We don't really want to deal with handling the full semantics here so we just
1621                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1622                s.as_single_value_series().or_reduce()
1623            },
1624        }
1625    }
1626    pub fn xor_reduce(&self) -> PolarsResult<Scalar> {
1627        match self {
1628            Column::Series(s) => s.xor_reduce(),
1629            Column::Scalar(s) => {
1630                // We don't really want to deal with handling the full semantics here so we just
1631                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1632                //
1633                // We have to deal with the fact that xor is 0 if there is an even number of
1634                // elements and the value if there is an odd number of elements. If there are zero
1635                // elements the result should be `null`.
1636                s.as_n_values_series(2 - s.len() % 2).xor_reduce()
1637            },
1638        }
1639    }
1640    pub fn n_unique(&self) -> PolarsResult<usize> {
1641        match self {
1642            Column::Series(s) => s.n_unique(),
1643            Column::Scalar(s) => s.as_single_value_series().n_unique(),
1644        }
1645    }
1646
1647    pub fn quantile_reduce(&self, quantile: f64, method: QuantileMethod) -> PolarsResult<Scalar> {
1648        self.as_materialized_series()
1649            .quantile_reduce(quantile, method)
1650    }
1651
1652    pub fn quantiles_reduce(
1653        &self,
1654        quantiles: &[f64],
1655        method: QuantileMethod,
1656    ) -> PolarsResult<Scalar> {
1657        self.as_materialized_series()
1658            .quantiles_reduce(quantiles, method)
1659    }
1660
1661    pub(crate) fn estimated_size(&self) -> usize {
1662        // @scalar-opt
1663        self.as_materialized_series().estimated_size()
1664    }
1665
1666    pub fn sort_with(&self, options: SortOptions) -> PolarsResult<Self> {
1667        match self {
1668            Column::Series(s) => s.sort_with(options).map(Self::from),
1669            Column::Scalar(s) => {
1670                // This makes this function throw the same errors as Series::sort_with
1671                _ = s.as_single_value_series().sort_with(options)?;
1672
1673                Ok(self.clone())
1674            },
1675        }
1676    }
1677
1678    pub fn map_unary_elementwise_to_bool(
1679        &self,
1680        f: impl Fn(&Series) -> BooleanChunked,
1681    ) -> BooleanChunked {
1682        self.try_map_unary_elementwise_to_bool(|s| Ok(f(s)))
1683            .unwrap()
1684    }
1685    pub fn try_map_unary_elementwise_to_bool(
1686        &self,
1687        f: impl Fn(&Series) -> PolarsResult<BooleanChunked>,
1688    ) -> PolarsResult<BooleanChunked> {
1689        match self {
1690            Column::Series(s) => f(s),
1691            Column::Scalar(s) => Ok(f(&s.as_single_value_series())?.new_from_index(0, s.len())),
1692        }
1693    }
1694
1695    pub fn apply_unary_elementwise(&self, f: impl Fn(&Series) -> Series) -> Column {
1696        self.try_apply_unary_elementwise(|s| Ok(f(s))).unwrap()
1697    }
1698    pub fn try_apply_unary_elementwise(
1699        &self,
1700        f: impl Fn(&Series) -> PolarsResult<Series>,
1701    ) -> PolarsResult<Column> {
1702        match self {
1703            Column::Series(s) => f(s).map(Column::from),
1704            Column::Scalar(s) => Ok(ScalarColumn::from_single_value_series(
1705                f(&s.as_single_value_series())?,
1706                s.len(),
1707            )
1708            .into()),
1709        }
1710    }
1711
1712    pub fn apply_broadcasting_binary_elementwise(
1713        &self,
1714        other: &Self,
1715        op: impl Fn(&Series, &Series) -> Series,
1716    ) -> PolarsResult<Column> {
1717        self.try_apply_broadcasting_binary_elementwise(other, |lhs, rhs| Ok(op(lhs, rhs)))
1718    }
1719    pub fn try_apply_broadcasting_binary_elementwise(
1720        &self,
1721        other: &Self,
1722        op: impl Fn(&Series, &Series) -> PolarsResult<Series>,
1723    ) -> PolarsResult<Column> {
1724        fn output_length(a: &Column, b: &Column) -> PolarsResult<usize> {
1725            match (a.len(), b.len()) {
1726                // broadcasting
1727                (1, o) | (o, 1) => Ok(o),
1728                // equal
1729                (a, b) if a == b => Ok(a),
1730                // unequal
1731                (a, b) => {
1732                    polars_bail!(InvalidOperation: "cannot do a binary operation on columns of different lengths: got {} and {}", a, b)
1733                },
1734            }
1735        }
1736
1737        // Here we rely on the underlying broadcast operations.
1738        let length = output_length(self, other)?;
1739        match (self, other) {
1740            (Column::Series(lhs), Column::Series(rhs)) => op(lhs, rhs).map(Column::from),
1741            (Column::Series(lhs), Column::Scalar(rhs)) => {
1742                op(lhs, &rhs.as_single_value_series()).map(Column::from)
1743            },
1744            (Column::Scalar(lhs), Column::Series(rhs)) => {
1745                op(&lhs.as_single_value_series(), rhs).map(Column::from)
1746            },
1747            (Column::Scalar(lhs), Column::Scalar(rhs)) => {
1748                let lhs = lhs.as_single_value_series();
1749                let rhs = rhs.as_single_value_series();
1750
1751                Ok(ScalarColumn::from_single_value_series(op(&lhs, &rhs)?, length).into_column())
1752            },
1753        }
1754    }
1755
1756    pub fn apply_binary_elementwise(
1757        &self,
1758        other: &Self,
1759        f: impl Fn(&Series, &Series) -> Series,
1760        f_lb: impl Fn(&Scalar, &Series) -> Series,
1761        f_rb: impl Fn(&Series, &Scalar) -> Series,
1762    ) -> Column {
1763        self.try_apply_binary_elementwise(
1764            other,
1765            |lhs, rhs| Ok(f(lhs, rhs)),
1766            |lhs, rhs| Ok(f_lb(lhs, rhs)),
1767            |lhs, rhs| Ok(f_rb(lhs, rhs)),
1768        )
1769        .unwrap()
1770    }
1771    pub fn try_apply_binary_elementwise(
1772        &self,
1773        other: &Self,
1774        f: impl Fn(&Series, &Series) -> PolarsResult<Series>,
1775        f_lb: impl Fn(&Scalar, &Series) -> PolarsResult<Series>,
1776        f_rb: impl Fn(&Series, &Scalar) -> PolarsResult<Series>,
1777    ) -> PolarsResult<Column> {
1778        debug_assert_eq!(self.len(), other.len());
1779
1780        match (self, other) {
1781            (Column::Series(lhs), Column::Series(rhs)) => f(lhs, rhs).map(Column::from),
1782            (Column::Series(lhs), Column::Scalar(rhs)) => f_rb(lhs, rhs.scalar()).map(Column::from),
1783            (Column::Scalar(lhs), Column::Series(rhs)) => f_lb(lhs.scalar(), rhs).map(Column::from),
1784            (Column::Scalar(lhs), Column::Scalar(rhs)) => {
1785                let lhs = lhs.as_single_value_series();
1786                let rhs = rhs.as_single_value_series();
1787
1788                Ok(
1789                    ScalarColumn::from_single_value_series(f(&lhs, &rhs)?, self.len())
1790                        .into_column(),
1791                )
1792            },
1793        }
1794    }
1795
1796    #[cfg(feature = "approx_unique")]
1797    pub fn approx_n_unique(&self) -> PolarsResult<IdxSize> {
1798        match self {
1799            Column::Series(s) => s.approx_n_unique(),
1800            Column::Scalar(s) => {
1801                // @NOTE: We do this for the error handling.
1802                s.as_single_value_series().approx_n_unique()?;
1803                Ok(1)
1804            },
1805        }
1806    }
1807
1808    pub fn n_chunks(&self) -> usize {
1809        match self {
1810            Column::Series(s) => s.n_chunks(),
1811            Column::Scalar(s) => s.lazy_as_materialized_series().map_or(1, |x| x.n_chunks()),
1812        }
1813    }
1814
1815    #[expect(clippy::wrong_self_convention)]
1816    pub(crate) fn into_total_ord_inner<'a>(&'a self) -> Box<dyn TotalOrdInner + 'a> {
1817        // @scalar-opt
1818        self.as_materialized_series().into_total_ord_inner()
1819    }
1820    #[expect(unused, clippy::wrong_self_convention)]
1821    pub(crate) fn into_total_eq_inner<'a>(&'a self) -> Box<dyn TotalEqInner + 'a> {
1822        // @scalar-opt
1823        self.as_materialized_series().into_total_eq_inner()
1824    }
1825
1826    pub fn rechunk_to_arrow(self, compat_level: CompatLevel) -> Box<dyn Array> {
1827        // Rechunk to one chunk if necessary
1828        let mut series = self.take_materialized_series();
1829        if series.n_chunks() > 1 {
1830            series = series.rechunk();
1831        }
1832        series.to_arrow(0, compat_level)
1833    }
1834
1835    pub fn trim_lists_to_normalized_offsets(&self) -> Option<Column> {
1836        self.as_materialized_series()
1837            .trim_lists_to_normalized_offsets()
1838            .map(Column::from)
1839    }
1840
1841    pub fn propagate_nulls(&self) -> Option<Column> {
1842        self.as_materialized_series()
1843            .propagate_nulls()
1844            .map(Column::from)
1845    }
1846
1847    pub fn deposit(&self, validity: &Bitmap) -> Column {
1848        self.as_materialized_series()
1849            .deposit(validity)
1850            .into_column()
1851    }
1852
1853    pub fn rechunk_validity(&self) -> Option<Bitmap> {
1854        // @scalar-opt
1855        self.as_materialized_series().rechunk_validity()
1856    }
1857
1858    pub fn unique_id(&self) -> PolarsResult<(IdxSize, Vec<IdxSize>)> {
1859        self.as_materialized_series().unique_id()
1860    }
1861}
1862
1863impl Default for Column {
1864    fn default() -> Self {
1865        Self::new_scalar(
1866            PlSmallStr::EMPTY,
1867            Scalar::new(DataType::Int64, AnyValue::Null),
1868            0,
1869        )
1870    }
1871}
1872
1873impl PartialEq for Column {
1874    fn eq(&self, other: &Self) -> bool {
1875        // @scalar-opt
1876        self.as_materialized_series()
1877            .eq(other.as_materialized_series())
1878    }
1879}
1880
1881impl From<Series> for Column {
1882    #[inline]
1883    fn from(series: Series) -> Self {
1884        // We instantiate a Scalar Column if the Series is length is 1. This makes it possible for
1885        // future operations to be faster.
1886        if series.len() == 1 {
1887            return Self::Scalar(ScalarColumn::unit_scalar_from_series(series));
1888        }
1889
1890        Self::Series(SeriesColumn::new(series))
1891    }
1892}
1893
1894impl<T: IntoSeries> IntoColumn for T {
1895    #[inline]
1896    fn into_column(self) -> Column {
1897        self.into_series().into()
1898    }
1899}
1900
1901impl IntoColumn for Column {
1902    #[inline(always)]
1903    fn into_column(self) -> Column {
1904        self
1905    }
1906}
1907
1908/// We don't want to serialize the scalar columns. So this helps pretend that columns are always
1909/// initialized without implementing From<Column> for Series.
1910///
1911/// Those casts should be explicit.
1912#[derive(Clone)]
1913#[cfg_attr(feature = "serde", derive(serde::Serialize))]
1914#[cfg_attr(feature = "serde", serde(into = "Series"))]
1915struct _SerdeSeries(Series);
1916
1917impl From<Column> for _SerdeSeries {
1918    #[inline]
1919    fn from(value: Column) -> Self {
1920        Self(value.take_materialized_series())
1921    }
1922}
1923
1924impl From<_SerdeSeries> for Series {
1925    #[inline]
1926    fn from(value: _SerdeSeries) -> Self {
1927        value.0
1928    }
1929}