polars_core/frame/column/
mod.rs

1use std::borrow::Cow;
2
3use arrow::bitmap::{Bitmap, BitmapBuilder};
4use arrow::trusted_len::TrustMyLength;
5use num_traits::{Num, NumCast};
6use polars_compute::rolling::QuantileMethod;
7use polars_error::PolarsResult;
8use polars_utils::aliases::PlSeedableRandomStateQuality;
9use polars_utils::index::check_bounds;
10use polars_utils::pl_str::PlSmallStr;
11pub use scalar::ScalarColumn;
12
13use self::compare_inner::{TotalEqInner, TotalOrdInner};
14use self::gather::check_bounds_ca;
15use self::series::SeriesColumn;
16use crate::chunked_array::cast::CastOptions;
17use crate::chunked_array::flags::StatisticsFlags;
18use crate::datatypes::ReshapeDimension;
19use crate::prelude::*;
20use crate::series::{BitRepr, IsSorted, SeriesPhysIter};
21use crate::utils::{Container, slice_offsets};
22use crate::{HEAD_DEFAULT_LENGTH, TAIL_DEFAULT_LENGTH};
23
24mod arithmetic;
25mod compare;
26mod scalar;
27mod series;
28
29/// A column within a [`DataFrame`].
30///
31/// This is lazily initialized to a [`Series`] with methods like
32/// [`as_materialized_series`][Column::as_materialized_series] and
33/// [`take_materialized_series`][Column::take_materialized_series].
34///
35/// Currently, there are two ways to represent a [`Column`].
36/// 1. A [`Series`] of values
37/// 2. A [`ScalarColumn`] that repeats a single [`Scalar`]
38#[derive(Debug, Clone)]
39#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
40#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
41pub enum Column {
42    Series(SeriesColumn),
43    Scalar(ScalarColumn),
44}
45
46/// Convert `Self` into a [`Column`]
47pub trait IntoColumn: Sized {
48    fn into_column(self) -> Column;
49}
50
51impl Column {
52    #[inline]
53    #[track_caller]
54    pub fn new<T, Phantom>(name: PlSmallStr, values: T) -> Self
55    where
56        Phantom: ?Sized,
57        Series: NamedFrom<T, Phantom>,
58    {
59        Self::Series(SeriesColumn::new(NamedFrom::new(name, values)))
60    }
61
62    #[inline]
63    pub fn new_empty(name: PlSmallStr, dtype: &DataType) -> Self {
64        Self::new_scalar(name, Scalar::new(dtype.clone(), AnyValue::Null), 0)
65    }
66
67    #[inline]
68    pub fn new_scalar(name: PlSmallStr, scalar: Scalar, length: usize) -> Self {
69        Self::Scalar(ScalarColumn::new(name, scalar, length))
70    }
71
72    pub fn new_row_index(name: PlSmallStr, offset: IdxSize, length: usize) -> PolarsResult<Column> {
73        let Ok(length) = IdxSize::try_from(length) else {
74            polars_bail!(
75                ComputeError:
76                "row index length {} overflows IdxSize::MAX ({})",
77                length,
78                IdxSize::MAX,
79            )
80        };
81
82        if offset.checked_add(length).is_none() {
83            polars_bail!(
84                ComputeError:
85                "row index with offset {} overflows on dataframe with height {}",
86                offset, length
87            )
88        }
89
90        let range = offset..offset + length;
91
92        let mut ca = IdxCa::from_vec(name, range.collect());
93        ca.set_sorted_flag(IsSorted::Ascending);
94        let col = ca.into_series().into();
95
96        Ok(col)
97    }
98
99    // # Materialize
100    /// Get a reference to a [`Series`] for this [`Column`]
101    ///
102    /// This may need to materialize the [`Series`] on the first invocation for a specific column.
103    #[inline]
104    pub fn as_materialized_series(&self) -> &Series {
105        match self {
106            Column::Series(s) => s,
107            Column::Scalar(s) => s.as_materialized_series(),
108        }
109    }
110
111    /// If the memory repr of this Column is a scalar, a unit-length Series will
112    /// be returned.
113    #[inline]
114    pub fn as_materialized_series_maintain_scalar(&self) -> Series {
115        match self {
116            Column::Scalar(s) => s.as_single_value_series(),
117            v => v.as_materialized_series().clone(),
118        }
119    }
120
121    /// Returns the backing `Series` for the values of this column.
122    ///
123    /// * For `Column::Series` columns, simply returns the inner `Series`.
124    /// * For `Column::Scalar` columns, returns an empty or unit length series.
125    ///
126    /// # Note
127    /// This method is safe to use. However, care must be taken when operating on the returned
128    /// `Series` to ensure result correctness. E.g. It is suitable to perform elementwise operations
129    /// on it, however e.g. aggregations will return unspecified results.
130    pub fn _get_backing_series(&self) -> Series {
131        match self {
132            Column::Series(s) => (**s).clone(),
133            Column::Scalar(s) => s.as_single_value_series(),
134        }
135    }
136
137    /// Constructs a new `Column` of the same variant as `self` from a backing `Series` representing
138    /// the values.
139    ///
140    /// # Panics
141    /// Panics if:
142    /// * `self` is `Column::Series` and the length of `new_s` does not match that of `self`.
143    /// * `self` is `Column::Scalar` and if either:
144    ///   * `self` is not empty and `new_s` is not of unit length.
145    ///   * `self` is empty and `new_s` is not empty.
146    pub fn _to_new_from_backing(&self, new_s: Series) -> Self {
147        match self {
148            Column::Series(s) => {
149                assert_eq!(new_s.len(), s.len());
150                Column::Series(SeriesColumn::new(new_s))
151            },
152            Column::Scalar(s) => {
153                assert_eq!(new_s.len(), s.as_single_value_series().len());
154                Column::Scalar(ScalarColumn::from_single_value_series(new_s, self.len()))
155            },
156        }
157    }
158
159    /// Turn [`Column`] into a [`Column::Series`].
160    ///
161    /// This may need to materialize the [`Series`] on the first invocation for a specific column.
162    #[inline]
163    pub fn into_materialized_series(&mut self) -> &mut Series {
164        match self {
165            Column::Series(s) => s,
166            Column::Scalar(s) => {
167                let series = std::mem::replace(
168                    s,
169                    ScalarColumn::new_empty(PlSmallStr::EMPTY, DataType::Null),
170                )
171                .take_materialized_series();
172                *self = Column::Series(series.into());
173                let Column::Series(s) = self else {
174                    unreachable!();
175                };
176                s
177            },
178        }
179    }
180    /// Take [`Series`] from a [`Column`]
181    ///
182    /// This may need to materialize the [`Series`] on the first invocation for a specific column.
183    #[inline]
184    pub fn take_materialized_series(self) -> Series {
185        match self {
186            Column::Series(s) => s.take(),
187            Column::Scalar(s) => s.take_materialized_series(),
188        }
189    }
190
191    #[inline]
192    pub fn dtype(&self) -> &DataType {
193        match self {
194            Column::Series(s) => s.dtype(),
195            Column::Scalar(s) => s.dtype(),
196        }
197    }
198
199    #[inline]
200    pub fn field(&self) -> Cow<'_, Field> {
201        match self {
202            Column::Series(s) => s.field(),
203            Column::Scalar(s) => match s.lazy_as_materialized_series() {
204                None => Cow::Owned(Field::new(s.name().clone(), s.dtype().clone())),
205                Some(s) => s.field(),
206            },
207        }
208    }
209
210    #[inline]
211    pub fn name(&self) -> &PlSmallStr {
212        match self {
213            Column::Series(s) => s.name(),
214            Column::Scalar(s) => s.name(),
215        }
216    }
217
218    #[inline]
219    pub fn len(&self) -> usize {
220        match self {
221            Column::Series(s) => s.len(),
222            Column::Scalar(s) => s.len(),
223        }
224    }
225
226    #[inline]
227    pub fn with_name(mut self, name: PlSmallStr) -> Column {
228        self.rename(name);
229        self
230    }
231
232    #[inline]
233    pub fn rename(&mut self, name: PlSmallStr) {
234        match self {
235            Column::Series(s) => _ = s.rename(name),
236            Column::Scalar(s) => _ = s.rename(name),
237        }
238    }
239
240    // # Downcasting
241    #[inline]
242    pub fn as_series(&self) -> Option<&Series> {
243        match self {
244            Column::Series(s) => Some(s),
245            _ => None,
246        }
247    }
248    #[inline]
249    pub fn as_scalar_column(&self) -> Option<&ScalarColumn> {
250        match self {
251            Column::Scalar(s) => Some(s),
252            _ => None,
253        }
254    }
255    #[inline]
256    pub fn as_scalar_column_mut(&mut self) -> Option<&mut ScalarColumn> {
257        match self {
258            Column::Scalar(s) => Some(s),
259            _ => None,
260        }
261    }
262
263    // # Try to Chunked Arrays
264    pub fn try_bool(&self) -> Option<&BooleanChunked> {
265        self.as_materialized_series().try_bool()
266    }
267    pub fn try_i8(&self) -> Option<&Int8Chunked> {
268        self.as_materialized_series().try_i8()
269    }
270    pub fn try_i16(&self) -> Option<&Int16Chunked> {
271        self.as_materialized_series().try_i16()
272    }
273    pub fn try_i32(&self) -> Option<&Int32Chunked> {
274        self.as_materialized_series().try_i32()
275    }
276    pub fn try_i64(&self) -> Option<&Int64Chunked> {
277        self.as_materialized_series().try_i64()
278    }
279    pub fn try_u8(&self) -> Option<&UInt8Chunked> {
280        self.as_materialized_series().try_u8()
281    }
282    pub fn try_u16(&self) -> Option<&UInt16Chunked> {
283        self.as_materialized_series().try_u16()
284    }
285    pub fn try_u32(&self) -> Option<&UInt32Chunked> {
286        self.as_materialized_series().try_u32()
287    }
288    pub fn try_u64(&self) -> Option<&UInt64Chunked> {
289        self.as_materialized_series().try_u64()
290    }
291    #[cfg(feature = "dtype-u128")]
292    pub fn try_u128(&self) -> Option<&UInt128Chunked> {
293        self.as_materialized_series().try_u128()
294    }
295    #[cfg(feature = "dtype-f16")]
296    pub fn try_f16(&self) -> Option<&Float16Chunked> {
297        self.as_materialized_series().try_f16()
298    }
299    pub fn try_f32(&self) -> Option<&Float32Chunked> {
300        self.as_materialized_series().try_f32()
301    }
302    pub fn try_f64(&self) -> Option<&Float64Chunked> {
303        self.as_materialized_series().try_f64()
304    }
305    pub fn try_str(&self) -> Option<&StringChunked> {
306        self.as_materialized_series().try_str()
307    }
308    pub fn try_list(&self) -> Option<&ListChunked> {
309        self.as_materialized_series().try_list()
310    }
311    pub fn try_binary(&self) -> Option<&BinaryChunked> {
312        self.as_materialized_series().try_binary()
313    }
314    pub fn try_idx(&self) -> Option<&IdxCa> {
315        self.as_materialized_series().try_idx()
316    }
317    pub fn try_binary_offset(&self) -> Option<&BinaryOffsetChunked> {
318        self.as_materialized_series().try_binary_offset()
319    }
320    #[cfg(feature = "dtype-datetime")]
321    pub fn try_datetime(&self) -> Option<&DatetimeChunked> {
322        self.as_materialized_series().try_datetime()
323    }
324    #[cfg(feature = "dtype-struct")]
325    pub fn try_struct(&self) -> Option<&StructChunked> {
326        self.as_materialized_series().try_struct()
327    }
328    #[cfg(feature = "dtype-decimal")]
329    pub fn try_decimal(&self) -> Option<&DecimalChunked> {
330        self.as_materialized_series().try_decimal()
331    }
332    #[cfg(feature = "dtype-array")]
333    pub fn try_array(&self) -> Option<&ArrayChunked> {
334        self.as_materialized_series().try_array()
335    }
336    #[cfg(feature = "dtype-categorical")]
337    pub fn try_cat<T: PolarsCategoricalType>(&self) -> Option<&CategoricalChunked<T>> {
338        self.as_materialized_series().try_cat::<T>()
339    }
340    #[cfg(feature = "dtype-categorical")]
341    pub fn try_cat8(&self) -> Option<&Categorical8Chunked> {
342        self.as_materialized_series().try_cat8()
343    }
344    #[cfg(feature = "dtype-categorical")]
345    pub fn try_cat16(&self) -> Option<&Categorical16Chunked> {
346        self.as_materialized_series().try_cat16()
347    }
348    #[cfg(feature = "dtype-categorical")]
349    pub fn try_cat32(&self) -> Option<&Categorical32Chunked> {
350        self.as_materialized_series().try_cat32()
351    }
352    #[cfg(feature = "dtype-date")]
353    pub fn try_date(&self) -> Option<&DateChunked> {
354        self.as_materialized_series().try_date()
355    }
356    #[cfg(feature = "dtype-duration")]
357    pub fn try_duration(&self) -> Option<&DurationChunked> {
358        self.as_materialized_series().try_duration()
359    }
360
361    // # To Chunked Arrays
362    pub fn bool(&self) -> PolarsResult<&BooleanChunked> {
363        self.as_materialized_series().bool()
364    }
365    pub fn i8(&self) -> PolarsResult<&Int8Chunked> {
366        self.as_materialized_series().i8()
367    }
368    pub fn i16(&self) -> PolarsResult<&Int16Chunked> {
369        self.as_materialized_series().i16()
370    }
371    pub fn i32(&self) -> PolarsResult<&Int32Chunked> {
372        self.as_materialized_series().i32()
373    }
374    pub fn i64(&self) -> PolarsResult<&Int64Chunked> {
375        self.as_materialized_series().i64()
376    }
377    #[cfg(feature = "dtype-i128")]
378    pub fn i128(&self) -> PolarsResult<&Int128Chunked> {
379        self.as_materialized_series().i128()
380    }
381    pub fn u8(&self) -> PolarsResult<&UInt8Chunked> {
382        self.as_materialized_series().u8()
383    }
384    pub fn u16(&self) -> PolarsResult<&UInt16Chunked> {
385        self.as_materialized_series().u16()
386    }
387    pub fn u32(&self) -> PolarsResult<&UInt32Chunked> {
388        self.as_materialized_series().u32()
389    }
390    pub fn u64(&self) -> PolarsResult<&UInt64Chunked> {
391        self.as_materialized_series().u64()
392    }
393    #[cfg(feature = "dtype-u128")]
394    pub fn u128(&self) -> PolarsResult<&UInt128Chunked> {
395        self.as_materialized_series().u128()
396    }
397    #[cfg(feature = "dtype-f16")]
398    pub fn f16(&self) -> PolarsResult<&Float16Chunked> {
399        self.as_materialized_series().f16()
400    }
401    pub fn f32(&self) -> PolarsResult<&Float32Chunked> {
402        self.as_materialized_series().f32()
403    }
404    pub fn f64(&self) -> PolarsResult<&Float64Chunked> {
405        self.as_materialized_series().f64()
406    }
407    pub fn str(&self) -> PolarsResult<&StringChunked> {
408        self.as_materialized_series().str()
409    }
410    pub fn list(&self) -> PolarsResult<&ListChunked> {
411        self.as_materialized_series().list()
412    }
413    pub fn binary(&self) -> PolarsResult<&BinaryChunked> {
414        self.as_materialized_series().binary()
415    }
416    pub fn idx(&self) -> PolarsResult<&IdxCa> {
417        self.as_materialized_series().idx()
418    }
419    pub fn binary_offset(&self) -> PolarsResult<&BinaryOffsetChunked> {
420        self.as_materialized_series().binary_offset()
421    }
422    #[cfg(feature = "dtype-datetime")]
423    pub fn datetime(&self) -> PolarsResult<&DatetimeChunked> {
424        self.as_materialized_series().datetime()
425    }
426    #[cfg(feature = "dtype-struct")]
427    pub fn struct_(&self) -> PolarsResult<&StructChunked> {
428        self.as_materialized_series().struct_()
429    }
430    #[cfg(feature = "dtype-decimal")]
431    pub fn decimal(&self) -> PolarsResult<&DecimalChunked> {
432        self.as_materialized_series().decimal()
433    }
434    #[cfg(feature = "dtype-array")]
435    pub fn array(&self) -> PolarsResult<&ArrayChunked> {
436        self.as_materialized_series().array()
437    }
438    #[cfg(feature = "dtype-categorical")]
439    pub fn cat<T: PolarsCategoricalType>(&self) -> PolarsResult<&CategoricalChunked<T>> {
440        self.as_materialized_series().cat::<T>()
441    }
442    #[cfg(feature = "dtype-categorical")]
443    pub fn cat8(&self) -> PolarsResult<&Categorical8Chunked> {
444        self.as_materialized_series().cat8()
445    }
446    #[cfg(feature = "dtype-categorical")]
447    pub fn cat16(&self) -> PolarsResult<&Categorical16Chunked> {
448        self.as_materialized_series().cat16()
449    }
450    #[cfg(feature = "dtype-categorical")]
451    pub fn cat32(&self) -> PolarsResult<&Categorical32Chunked> {
452        self.as_materialized_series().cat32()
453    }
454    #[cfg(feature = "dtype-date")]
455    pub fn date(&self) -> PolarsResult<&DateChunked> {
456        self.as_materialized_series().date()
457    }
458    #[cfg(feature = "dtype-duration")]
459    pub fn duration(&self) -> PolarsResult<&DurationChunked> {
460        self.as_materialized_series().duration()
461    }
462
463    // # Casting
464    pub fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Self> {
465        match self {
466            Column::Series(s) => s.cast_with_options(dtype, options).map(Column::from),
467            Column::Scalar(s) => s.cast_with_options(dtype, options).map(Column::from),
468        }
469    }
470    pub fn strict_cast(&self, dtype: &DataType) -> PolarsResult<Self> {
471        match self {
472            Column::Series(s) => s.strict_cast(dtype).map(Column::from),
473            Column::Scalar(s) => s.strict_cast(dtype).map(Column::from),
474        }
475    }
476    pub fn cast(&self, dtype: &DataType) -> PolarsResult<Column> {
477        match self {
478            Column::Series(s) => s.cast(dtype).map(Column::from),
479            Column::Scalar(s) => s.cast(dtype).map(Column::from),
480        }
481    }
482    /// # Safety
483    ///
484    /// This can lead to invalid memory access in downstream code.
485    pub unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Column> {
486        match self {
487            Column::Series(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from),
488            Column::Scalar(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from),
489        }
490    }
491
492    #[must_use]
493    pub fn clear(&self) -> Self {
494        match self {
495            Column::Series(s) => s.clear().into(),
496            Column::Scalar(s) => s.resize(0).into(),
497        }
498    }
499
500    #[inline]
501    pub fn shrink_to_fit(&mut self) {
502        match self {
503            Column::Series(s) => s.shrink_to_fit(),
504            Column::Scalar(_) => {},
505        }
506    }
507
508    #[inline]
509    pub fn new_from_index(&self, index: usize, length: usize) -> Self {
510        if index >= self.len() {
511            return Self::full_null(self.name().clone(), length, self.dtype());
512        }
513
514        match self {
515            Column::Series(s) => {
516                // SAFETY: Bounds check done before.
517                let av = unsafe { s.get_unchecked(index) };
518                let scalar = Scalar::new(self.dtype().clone(), av.into_static());
519                Self::new_scalar(self.name().clone(), scalar, length)
520            },
521            Column::Scalar(s) => s.resize(length).into(),
522        }
523    }
524
525    #[inline]
526    pub fn has_nulls(&self) -> bool {
527        match self {
528            Self::Series(s) => s.has_nulls(),
529            Self::Scalar(s) => s.has_nulls(),
530        }
531    }
532
533    #[inline]
534    pub fn is_null(&self) -> BooleanChunked {
535        match self {
536            Self::Series(s) => s.is_null(),
537            Self::Scalar(s) => {
538                BooleanChunked::full(s.name().clone(), s.scalar().is_null(), s.len())
539            },
540        }
541    }
542    #[inline]
543    pub fn is_not_null(&self) -> BooleanChunked {
544        match self {
545            Self::Series(s) => s.is_not_null(),
546            Self::Scalar(s) => {
547                BooleanChunked::full(s.name().clone(), !s.scalar().is_null(), s.len())
548            },
549        }
550    }
551
552    pub fn to_physical_repr(&self) -> Column {
553        // @scalar-opt
554        self.as_materialized_series()
555            .to_physical_repr()
556            .into_owned()
557            .into()
558    }
559    /// # Safety
560    ///
561    /// This can lead to invalid memory access in downstream code.
562    pub unsafe fn from_physical_unchecked(&self, dtype: &DataType) -> PolarsResult<Column> {
563        // @scalar-opt
564        self.as_materialized_series()
565            .from_physical_unchecked(dtype)
566            .map(Column::from)
567    }
568
569    pub fn head(&self, length: Option<usize>) -> Column {
570        let len = length.unwrap_or(HEAD_DEFAULT_LENGTH);
571        let len = usize::min(len, self.len());
572        self.slice(0, len)
573    }
574    pub fn tail(&self, length: Option<usize>) -> Column {
575        let len = length.unwrap_or(TAIL_DEFAULT_LENGTH);
576        let len = usize::min(len, self.len());
577        debug_assert!(len <= i64::MAX as usize);
578        self.slice(-(len as i64), len)
579    }
580    pub fn slice(&self, offset: i64, length: usize) -> Column {
581        match self {
582            Column::Series(s) => s.slice(offset, length).into(),
583            Column::Scalar(s) => {
584                let (_, length) = slice_offsets(offset, length, s.len());
585                s.resize(length).into()
586            },
587        }
588    }
589
590    pub fn split_at(&self, offset: i64) -> (Column, Column) {
591        // @scalar-opt
592        let (l, r) = self.as_materialized_series().split_at(offset);
593        (l.into(), r.into())
594    }
595
596    #[inline]
597    pub fn null_count(&self) -> usize {
598        match self {
599            Self::Series(s) => s.null_count(),
600            Self::Scalar(s) if s.scalar().is_null() => s.len(),
601            Self::Scalar(_) => 0,
602        }
603    }
604
605    pub fn take(&self, indices: &IdxCa) -> PolarsResult<Column> {
606        check_bounds_ca(indices, self.len() as IdxSize)?;
607        Ok(unsafe { self.take_unchecked(indices) })
608    }
609    pub fn take_slice(&self, indices: &[IdxSize]) -> PolarsResult<Column> {
610        check_bounds(indices, self.len() as IdxSize)?;
611        Ok(unsafe { self.take_slice_unchecked(indices) })
612    }
613    /// # Safety
614    ///
615    /// No bounds on the indexes are performed.
616    pub unsafe fn take_unchecked(&self, indices: &IdxCa) -> Column {
617        debug_assert!(check_bounds_ca(indices, self.len() as IdxSize).is_ok());
618
619        match self {
620            Self::Series(s) => unsafe { s.take_unchecked(indices) }.into(),
621            Self::Scalar(s) => {
622                let idxs_length = indices.len();
623                let idxs_null_count = indices.null_count();
624
625                let scalar = ScalarColumn::from_single_value_series(
626                    s.as_single_value_series().take_unchecked(&IdxCa::new(
627                        indices.name().clone(),
628                        &[0][..s.len().min(1)],
629                    )),
630                    idxs_length,
631                );
632
633                // We need to make sure that null values in `idx` become null values in the result
634                if idxs_null_count == 0 || scalar.has_nulls() {
635                    scalar.into_column()
636                } else if idxs_null_count == idxs_length {
637                    scalar.into_nulls().into_column()
638                } else {
639                    let validity = indices.rechunk_validity();
640                    let series = scalar.take_materialized_series();
641                    let name = series.name().clone();
642                    let dtype = series.dtype().clone();
643                    let mut chunks = series.into_chunks();
644                    assert_eq!(chunks.len(), 1);
645                    chunks[0] = chunks[0].with_validity(validity);
646                    unsafe { Series::from_chunks_and_dtype_unchecked(name, chunks, &dtype) }
647                        .into_column()
648                }
649            },
650        }
651    }
652    /// # Safety
653    ///
654    /// No bounds on the indexes are performed.
655    pub unsafe fn take_slice_unchecked(&self, indices: &[IdxSize]) -> Column {
656        debug_assert!(check_bounds(indices, self.len() as IdxSize).is_ok());
657
658        match self {
659            Self::Series(s) => unsafe { s.take_slice_unchecked(indices) }.into(),
660            Self::Scalar(s) => ScalarColumn::from_single_value_series(
661                s.as_single_value_series()
662                    .take_slice_unchecked(&[0][..s.len().min(1)]),
663                indices.len(),
664            )
665            .into(),
666        }
667    }
668
669    /// General implementation for aggregation where a non-missing scalar would map to itself.
670    #[inline(always)]
671    #[cfg(any(feature = "algorithm_group_by", feature = "bitwise"))]
672    fn agg_with_unit_scalar(
673        &self,
674        groups: &GroupsType,
675        series_agg: impl Fn(&Series, &GroupsType) -> Series,
676    ) -> Column {
677        match self {
678            Column::Series(s) => series_agg(s, groups).into_column(),
679            Column::Scalar(s) => {
680                if s.is_empty() {
681                    return series_agg(s.as_materialized_series(), groups).into_column();
682                }
683
684                // We utilize the aggregation on Series to see:
685                // 1. the output datatype of the aggregation
686                // 2. whether this aggregation is even defined
687                let series_aggregation = series_agg(
688                    &s.as_single_value_series(),
689                    // @NOTE: this group is always valid since s is non-empty.
690                    &GroupsType::new_slice(vec![[0, 1]], false, true),
691                );
692
693                // If the aggregation is not defined, just return all nulls.
694                if series_aggregation.has_nulls() {
695                    return Self::new_scalar(
696                        series_aggregation.name().clone(),
697                        Scalar::new(series_aggregation.dtype().clone(), AnyValue::Null),
698                        groups.len(),
699                    );
700                }
701
702                let mut scalar_col = s.resize(groups.len());
703                // The aggregation might change the type (e.g. mean changes int -> float), so we do
704                // a cast here to the output type.
705                if series_aggregation.dtype() != s.dtype() {
706                    scalar_col = scalar_col.cast(series_aggregation.dtype()).unwrap();
707                }
708
709                let Some(first_empty_idx) = groups.iter().position(|g| g.is_empty()) else {
710                    // Fast path: no empty groups. keep the scalar intact.
711                    return scalar_col.into_column();
712                };
713
714                // All empty groups produce a *missing* or `null` value.
715                let mut validity = BitmapBuilder::with_capacity(groups.len());
716                validity.extend_constant(first_empty_idx, true);
717                // SAFETY: We trust the length of this iterator.
718                let iter = unsafe {
719                    TrustMyLength::new(
720                        groups.iter().skip(first_empty_idx).map(|g| !g.is_empty()),
721                        groups.len() - first_empty_idx,
722                    )
723                };
724                validity.extend_trusted_len_iter(iter);
725
726                let mut s = scalar_col.take_materialized_series().rechunk();
727                // SAFETY: We perform a compute_len afterwards.
728                let chunks = unsafe { s.chunks_mut() };
729                let arr = &mut chunks[0];
730                *arr = arr.with_validity(validity.into_opt_validity());
731                s.compute_len();
732
733                s.into_column()
734            },
735        }
736    }
737
738    /// # Safety
739    ///
740    /// Does no bounds checks, groups must be correct.
741    #[cfg(feature = "algorithm_group_by")]
742    pub unsafe fn agg_min(&self, groups: &GroupsType) -> Self {
743        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_min(g) })
744    }
745
746    /// # Safety
747    ///
748    /// Does no bounds checks, groups must be correct.
749    #[cfg(feature = "algorithm_group_by")]
750    pub unsafe fn agg_max(&self, groups: &GroupsType) -> Self {
751        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_max(g) })
752    }
753
754    /// # Safety
755    ///
756    /// Does no bounds checks, groups must be correct.
757    #[cfg(feature = "algorithm_group_by")]
758    pub unsafe fn agg_mean(&self, groups: &GroupsType) -> Self {
759        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_mean(g) })
760    }
761
762    /// # Safety
763    ///
764    /// Does no bounds checks, groups must be correct.
765    #[cfg(feature = "algorithm_group_by")]
766    pub unsafe fn agg_arg_min(&self, groups: &GroupsType) -> Self {
767        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_arg_min(g) })
768    }
769
770    /// # Safety
771    ///
772    /// Does no bounds checks, groups must be correct.
773    #[cfg(feature = "algorithm_group_by")]
774    pub unsafe fn agg_arg_max(&self, groups: &GroupsType) -> Self {
775        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_arg_max(g) })
776    }
777
778    /// # Safety
779    ///
780    /// Does no bounds checks, groups must be correct.
781    #[cfg(feature = "algorithm_group_by")]
782    pub unsafe fn agg_sum(&self, groups: &GroupsType) -> Self {
783        // @scalar-opt
784        unsafe { self.as_materialized_series().agg_sum(groups) }.into()
785    }
786
787    /// # Safety
788    ///
789    /// Does no bounds checks, groups must be correct.
790    #[cfg(feature = "algorithm_group_by")]
791    pub unsafe fn agg_first(&self, groups: &GroupsType) -> Self {
792        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_first(g) })
793    }
794
795    /// # Safety
796    ///
797    /// Does no bounds checks, groups must be correct.
798    #[cfg(feature = "algorithm_group_by")]
799    pub unsafe fn agg_first_non_null(&self, groups: &GroupsType) -> Self {
800        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_first_non_null(g) })
801    }
802
803    /// # Safety
804    ///
805    /// Does no bounds checks, groups must be correct.
806    #[cfg(feature = "algorithm_group_by")]
807    pub unsafe fn agg_last(&self, groups: &GroupsType) -> Self {
808        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_last(g) })
809    }
810
811    /// # Safety
812    ///
813    /// Does no bounds checks, groups must be correct.
814    #[cfg(feature = "algorithm_group_by")]
815    pub unsafe fn agg_last_non_null(&self, groups: &GroupsType) -> Self {
816        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_last_non_null(g) })
817    }
818
819    /// # Safety
820    ///
821    /// Does no bounds checks, groups must be correct.
822    #[cfg(feature = "algorithm_group_by")]
823    pub unsafe fn agg_n_unique(&self, groups: &GroupsType) -> Self {
824        // @scalar-opt
825        unsafe { self.as_materialized_series().agg_n_unique(groups) }.into()
826    }
827
828    /// # Safety
829    ///
830    /// Does no bounds checks, groups must be correct.
831    #[cfg(feature = "algorithm_group_by")]
832    pub unsafe fn agg_quantile(
833        &self,
834        groups: &GroupsType,
835        quantile: f64,
836        method: QuantileMethod,
837    ) -> Self {
838        // @scalar-opt
839
840        unsafe {
841            self.as_materialized_series()
842                .agg_quantile(groups, quantile, method)
843        }
844        .into()
845    }
846
847    /// # Safety
848    ///
849    /// Does no bounds checks, groups must be correct.
850    #[cfg(feature = "algorithm_group_by")]
851    pub unsafe fn agg_median(&self, groups: &GroupsType) -> Self {
852        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_median(g) })
853    }
854
855    /// # Safety
856    ///
857    /// Does no bounds checks, groups must be correct.
858    #[cfg(feature = "algorithm_group_by")]
859    pub unsafe fn agg_var(&self, groups: &GroupsType, ddof: u8) -> Self {
860        // @scalar-opt
861        unsafe { self.as_materialized_series().agg_var(groups, ddof) }.into()
862    }
863
864    /// # Safety
865    ///
866    /// Does no bounds checks, groups must be correct.
867    #[cfg(feature = "algorithm_group_by")]
868    pub unsafe fn agg_std(&self, groups: &GroupsType, ddof: u8) -> Self {
869        // @scalar-opt
870        unsafe { self.as_materialized_series().agg_std(groups, ddof) }.into()
871    }
872
873    /// # Safety
874    ///
875    /// Does no bounds checks, groups must be correct.
876    #[cfg(feature = "algorithm_group_by")]
877    pub unsafe fn agg_list(&self, groups: &GroupsType) -> Self {
878        // @scalar-opt
879        unsafe { self.as_materialized_series().agg_list(groups) }.into()
880    }
881
882    /// # Safety
883    ///
884    /// Does no bounds checks, groups must be correct.
885    #[cfg(feature = "algorithm_group_by")]
886    pub fn agg_valid_count(&self, groups: &GroupsType) -> Self {
887        // @scalar-opt
888        unsafe { self.as_materialized_series().agg_valid_count(groups) }.into()
889    }
890
891    /// # Safety
892    ///
893    /// Does no bounds checks, groups must be correct.
894    #[cfg(feature = "bitwise")]
895    pub unsafe fn agg_and(&self, groups: &GroupsType) -> Self {
896        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_and(g) })
897    }
898    /// # Safety
899    ///
900    /// Does no bounds checks, groups must be correct.
901    #[cfg(feature = "bitwise")]
902    pub unsafe fn agg_or(&self, groups: &GroupsType) -> Self {
903        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_or(g) })
904    }
905    /// # Safety
906    ///
907    /// Does no bounds checks, groups must be correct.
908    #[cfg(feature = "bitwise")]
909    pub unsafe fn agg_xor(&self, groups: &GroupsType) -> Self {
910        // @scalar-opt
911        unsafe { self.as_materialized_series().agg_xor(groups) }.into()
912    }
913
914    pub fn full_null(name: PlSmallStr, size: usize, dtype: &DataType) -> Self {
915        Self::new_scalar(name, Scalar::new(dtype.clone(), AnyValue::Null), size)
916    }
917
918    pub fn is_empty(&self) -> bool {
919        self.len() == 0
920    }
921
922    pub fn reverse(&self) -> Column {
923        match self {
924            Column::Series(s) => s.reverse().into(),
925            Column::Scalar(_) => self.clone(),
926        }
927    }
928
929    pub fn equals(&self, other: &Column) -> bool {
930        // @scalar-opt
931        self.as_materialized_series()
932            .equals(other.as_materialized_series())
933    }
934
935    pub fn equals_missing(&self, other: &Column) -> bool {
936        // @scalar-opt
937        self.as_materialized_series()
938            .equals_missing(other.as_materialized_series())
939    }
940
941    pub fn set_sorted_flag(&mut self, sorted: IsSorted) {
942        // @scalar-opt
943        match self {
944            Column::Series(s) => s.set_sorted_flag(sorted),
945            Column::Scalar(_) => {},
946        }
947    }
948
949    pub fn get_flags(&self) -> StatisticsFlags {
950        match self {
951            Column::Series(s) => s.get_flags(),
952            Column::Scalar(_) => {
953                StatisticsFlags::IS_SORTED_ASC | StatisticsFlags::CAN_FAST_EXPLODE_LIST
954            },
955        }
956    }
957
958    /// Returns whether the flags were set
959    pub fn set_flags(&mut self, flags: StatisticsFlags) -> bool {
960        match self {
961            Column::Series(s) => {
962                s.set_flags(flags);
963                true
964            },
965            Column::Scalar(_) => false,
966        }
967    }
968
969    pub fn vec_hash(
970        &self,
971        build_hasher: PlSeedableRandomStateQuality,
972        buf: &mut Vec<u64>,
973    ) -> PolarsResult<()> {
974        // @scalar-opt?
975        self.as_materialized_series().vec_hash(build_hasher, buf)
976    }
977
978    pub fn vec_hash_combine(
979        &self,
980        build_hasher: PlSeedableRandomStateQuality,
981        hashes: &mut [u64],
982    ) -> PolarsResult<()> {
983        // @scalar-opt?
984        self.as_materialized_series()
985            .vec_hash_combine(build_hasher, hashes)
986    }
987
988    pub fn append(&mut self, other: &Column) -> PolarsResult<&mut Self> {
989        // @scalar-opt
990        self.into_materialized_series()
991            .append(other.as_materialized_series())?;
992        Ok(self)
993    }
994    pub fn append_owned(&mut self, other: Column) -> PolarsResult<&mut Self> {
995        self.into_materialized_series()
996            .append_owned(other.take_materialized_series())?;
997        Ok(self)
998    }
999
1000    pub fn arg_sort(&self, options: SortOptions) -> IdxCa {
1001        if self.is_empty() {
1002            return IdxCa::from_vec(self.name().clone(), Vec::new());
1003        }
1004
1005        if self.null_count() == self.len() {
1006            // We might need to maintain order so just respect the descending parameter.
1007            let values = if options.descending {
1008                (0..self.len() as IdxSize).rev().collect()
1009            } else {
1010                (0..self.len() as IdxSize).collect()
1011            };
1012
1013            return IdxCa::from_vec(self.name().clone(), values);
1014        }
1015
1016        let is_sorted = Some(self.is_sorted_flag());
1017        let Some(is_sorted) = is_sorted.filter(|v| !matches!(v, IsSorted::Not)) else {
1018            return self.as_materialized_series().arg_sort(options);
1019        };
1020
1021        // Fast path: the data is sorted.
1022        let is_sorted_dsc = matches!(is_sorted, IsSorted::Descending);
1023        let invert = options.descending != is_sorted_dsc;
1024
1025        let mut values = Vec::with_capacity(self.len());
1026
1027        #[inline(never)]
1028        fn extend(
1029            start: IdxSize,
1030            end: IdxSize,
1031            slf: &Column,
1032            values: &mut Vec<IdxSize>,
1033            is_only_nulls: bool,
1034            invert: bool,
1035            maintain_order: bool,
1036        ) {
1037            debug_assert!(start <= end);
1038            debug_assert!(start as usize <= slf.len());
1039            debug_assert!(end as usize <= slf.len());
1040
1041            if !invert || is_only_nulls {
1042                values.extend(start..end);
1043                return;
1044            }
1045
1046            // If we don't have to maintain order but we have to invert. Just flip it around.
1047            if !maintain_order {
1048                values.extend((start..end).rev());
1049                return;
1050            }
1051
1052            // If we want to maintain order but we also needs to invert, we need to invert
1053            // per group of items.
1054            //
1055            // @NOTE: Since the column is sorted, arg_unique can also take a fast path and
1056            // just do a single traversal.
1057            let arg_unique = slf
1058                .slice(start as i64, (end - start) as usize)
1059                .arg_unique()
1060                .unwrap();
1061
1062            assert!(!arg_unique.has_nulls());
1063
1064            let num_unique = arg_unique.len();
1065
1066            // Fast path: all items are unique.
1067            if num_unique == (end - start) as usize {
1068                values.extend((start..end).rev());
1069                return;
1070            }
1071
1072            if num_unique == 1 {
1073                values.extend(start..end);
1074                return;
1075            }
1076
1077            let mut prev_idx = end - start;
1078            for chunk in arg_unique.downcast_iter() {
1079                for &idx in chunk.values().as_slice().iter().rev() {
1080                    values.extend(start + idx..start + prev_idx);
1081                    prev_idx = idx;
1082                }
1083            }
1084        }
1085        macro_rules! extend {
1086            ($start:expr, $end:expr) => {
1087                extend!($start, $end, is_only_nulls = false);
1088            };
1089            ($start:expr, $end:expr, is_only_nulls = $is_only_nulls:expr) => {
1090                extend(
1091                    $start,
1092                    $end,
1093                    self,
1094                    &mut values,
1095                    $is_only_nulls,
1096                    invert,
1097                    options.maintain_order,
1098                );
1099            };
1100        }
1101
1102        let length = self.len() as IdxSize;
1103        let null_count = self.null_count() as IdxSize;
1104
1105        if null_count == 0 {
1106            extend!(0, length);
1107        } else {
1108            let has_nulls_last = self.get(self.len() - 1).unwrap().is_null();
1109            match (options.nulls_last, has_nulls_last) {
1110                (true, true) => {
1111                    // Current: Nulls last, Wanted: Nulls last
1112                    extend!(0, length - null_count);
1113                    extend!(length - null_count, length, is_only_nulls = true);
1114                },
1115                (true, false) => {
1116                    // Current: Nulls first, Wanted: Nulls last
1117                    extend!(null_count, length);
1118                    extend!(0, null_count, is_only_nulls = true);
1119                },
1120                (false, true) => {
1121                    // Current: Nulls last, Wanted: Nulls first
1122                    extend!(length - null_count, length, is_only_nulls = true);
1123                    extend!(0, length - null_count);
1124                },
1125                (false, false) => {
1126                    // Current: Nulls first, Wanted: Nulls first
1127                    extend!(0, null_count, is_only_nulls = true);
1128                    extend!(null_count, length);
1129                },
1130            }
1131        }
1132
1133        // @NOTE: This can theoretically be pushed into the previous operation but it is really
1134        // worth it... probably not...
1135        if let Some(limit) = options.limit {
1136            let limit = limit.min(length);
1137            values.truncate(limit as usize);
1138        }
1139
1140        IdxCa::from_vec(self.name().clone(), values)
1141    }
1142
1143    pub fn arg_sort_multiple(
1144        &self,
1145        by: &[Column],
1146        options: &SortMultipleOptions,
1147    ) -> PolarsResult<IdxCa> {
1148        // @scalar-opt
1149        self.as_materialized_series().arg_sort_multiple(by, options)
1150    }
1151
1152    pub fn arg_unique(&self) -> PolarsResult<IdxCa> {
1153        match self {
1154            Column::Scalar(s) => Ok(IdxCa::new_vec(s.name().clone(), vec![0])),
1155            _ => self.as_materialized_series().arg_unique(),
1156        }
1157    }
1158
1159    pub fn bit_repr(&self) -> Option<BitRepr> {
1160        // @scalar-opt
1161        self.as_materialized_series().bit_repr()
1162    }
1163
1164    pub fn into_frame(self) -> DataFrame {
1165        // SAFETY: A single-column dataframe cannot have length mismatches or duplicate names
1166        unsafe { DataFrame::new_unchecked(self.len(), vec![self]) }
1167    }
1168
1169    pub fn extend(&mut self, other: &Column) -> PolarsResult<&mut Self> {
1170        // @scalar-opt
1171        self.into_materialized_series()
1172            .extend(other.as_materialized_series())?;
1173        Ok(self)
1174    }
1175
1176    pub fn rechunk(&self) -> Column {
1177        match self {
1178            Column::Series(s) => s.rechunk().into(),
1179            Column::Scalar(s) => {
1180                if s.lazy_as_materialized_series()
1181                    .filter(|x| x.n_chunks() > 1)
1182                    .is_some()
1183                {
1184                    Column::Scalar(ScalarColumn::new(
1185                        s.name().clone(),
1186                        s.scalar().clone(),
1187                        s.len(),
1188                    ))
1189                } else {
1190                    self.clone()
1191                }
1192            },
1193        }
1194    }
1195
1196    pub fn explode(&self, options: ExplodeOptions) -> PolarsResult<Column> {
1197        self.as_materialized_series()
1198            .explode(options)
1199            .map(Column::from)
1200    }
1201    pub fn implode(&self) -> PolarsResult<ListChunked> {
1202        self.as_materialized_series().implode()
1203    }
1204
1205    pub fn fill_null(&self, strategy: FillNullStrategy) -> PolarsResult<Self> {
1206        // @scalar-opt
1207        self.as_materialized_series()
1208            .fill_null(strategy)
1209            .map(Column::from)
1210    }
1211
1212    pub fn divide(&self, rhs: &Column) -> PolarsResult<Self> {
1213        // @scalar-opt
1214        self.as_materialized_series()
1215            .divide(rhs.as_materialized_series())
1216            .map(Column::from)
1217    }
1218
1219    pub fn shift(&self, periods: i64) -> Column {
1220        // @scalar-opt
1221        self.as_materialized_series().shift(periods).into()
1222    }
1223
1224    #[cfg(feature = "zip_with")]
1225    pub fn zip_with(&self, mask: &BooleanChunked, other: &Self) -> PolarsResult<Self> {
1226        // @scalar-opt
1227        self.as_materialized_series()
1228            .zip_with(mask, other.as_materialized_series())
1229            .map(Self::from)
1230    }
1231
1232    #[cfg(feature = "zip_with")]
1233    pub fn zip_with_same_type(
1234        &self,
1235        mask: &ChunkedArray<BooleanType>,
1236        other: &Column,
1237    ) -> PolarsResult<Column> {
1238        // @scalar-opt
1239        self.as_materialized_series()
1240            .zip_with_same_type(mask, other.as_materialized_series())
1241            .map(Column::from)
1242    }
1243
1244    pub fn drop_nulls(&self) -> Column {
1245        match self {
1246            Column::Series(s) => s.drop_nulls().into_column(),
1247            Column::Scalar(s) => s.drop_nulls().into_column(),
1248        }
1249    }
1250
1251    /// Packs every element into a list.
1252    pub fn as_list(&self) -> ListChunked {
1253        // @scalar-opt
1254        self.as_materialized_series().as_list()
1255    }
1256
1257    pub fn is_sorted_flag(&self) -> IsSorted {
1258        match self {
1259            Column::Series(s) => s.is_sorted_flag(),
1260            Column::Scalar(_) => IsSorted::Ascending,
1261        }
1262    }
1263
1264    pub fn unique(&self) -> PolarsResult<Column> {
1265        match self {
1266            Column::Series(s) => s.unique().map(Column::from),
1267            Column::Scalar(s) => {
1268                _ = s.as_single_value_series().unique()?;
1269                if s.is_empty() {
1270                    return Ok(s.clone().into_column());
1271                }
1272
1273                Ok(s.resize(1).into_column())
1274            },
1275        }
1276    }
1277    pub fn unique_stable(&self) -> PolarsResult<Column> {
1278        match self {
1279            Column::Series(s) => s.unique_stable().map(Column::from),
1280            Column::Scalar(s) => {
1281                _ = s.as_single_value_series().unique_stable()?;
1282                if s.is_empty() {
1283                    return Ok(s.clone().into_column());
1284                }
1285
1286                Ok(s.resize(1).into_column())
1287            },
1288        }
1289    }
1290
1291    pub fn reshape_list(&self, dimensions: &[ReshapeDimension]) -> PolarsResult<Self> {
1292        // @scalar-opt
1293        self.as_materialized_series()
1294            .reshape_list(dimensions)
1295            .map(Self::from)
1296    }
1297
1298    #[cfg(feature = "dtype-array")]
1299    pub fn reshape_array(&self, dimensions: &[ReshapeDimension]) -> PolarsResult<Self> {
1300        // @scalar-opt
1301        self.as_materialized_series()
1302            .reshape_array(dimensions)
1303            .map(Self::from)
1304    }
1305
1306    pub fn sort(&self, sort_options: SortOptions) -> PolarsResult<Self> {
1307        // @scalar-opt
1308        self.as_materialized_series()
1309            .sort(sort_options)
1310            .map(Self::from)
1311    }
1312
1313    pub fn filter(&self, filter: &BooleanChunked) -> PolarsResult<Self> {
1314        match self {
1315            Column::Series(s) => s.filter(filter).map(Column::from),
1316            Column::Scalar(s) => {
1317                if s.is_empty() {
1318                    return Ok(s.clone().into_column());
1319                }
1320
1321                // Broadcasting
1322                if filter.len() == 1 {
1323                    return match filter.get(0) {
1324                        Some(true) => Ok(s.clone().into_column()),
1325                        _ => Ok(s.resize(0).into_column()),
1326                    };
1327                }
1328
1329                Ok(s.resize(filter.sum().unwrap() as usize).into_column())
1330            },
1331        }
1332    }
1333
1334    #[cfg(feature = "random")]
1335    pub fn shuffle(&self, seed: Option<u64>) -> Self {
1336        // @scalar-opt
1337        self.as_materialized_series().shuffle(seed).into()
1338    }
1339
1340    #[cfg(feature = "random")]
1341    pub fn sample_frac(
1342        &self,
1343        frac: f64,
1344        with_replacement: bool,
1345        shuffle: bool,
1346        seed: Option<u64>,
1347    ) -> PolarsResult<Self> {
1348        self.as_materialized_series()
1349            .sample_frac(frac, with_replacement, shuffle, seed)
1350            .map(Self::from)
1351    }
1352
1353    #[cfg(feature = "random")]
1354    pub fn sample_n(
1355        &self,
1356        n: usize,
1357        with_replacement: bool,
1358        shuffle: bool,
1359        seed: Option<u64>,
1360    ) -> PolarsResult<Self> {
1361        self.as_materialized_series()
1362            .sample_n(n, with_replacement, shuffle, seed)
1363            .map(Self::from)
1364    }
1365
1366    pub fn gather_every(&self, n: usize, offset: usize) -> PolarsResult<Column> {
1367        polars_ensure!(n > 0, InvalidOperation: "gather_every(n): n should be positive");
1368        if self.len().saturating_sub(offset) == 0 {
1369            return Ok(self.clear());
1370        }
1371
1372        match self {
1373            Column::Series(s) => Ok(s.gather_every(n, offset)?.into()),
1374            Column::Scalar(s) => {
1375                let total = s.len() - offset;
1376                Ok(s.resize(1 + (total - 1) / n).into())
1377            },
1378        }
1379    }
1380
1381    pub fn extend_constant(&self, value: AnyValue, n: usize) -> PolarsResult<Self> {
1382        if self.is_empty() {
1383            return Ok(Self::new_scalar(
1384                self.name().clone(),
1385                Scalar::new(self.dtype().clone(), value.into_static()),
1386                n,
1387            ));
1388        }
1389
1390        match self {
1391            Column::Series(s) => s.extend_constant(value, n).map(Column::from),
1392            Column::Scalar(s) => {
1393                if s.scalar().as_any_value() == value {
1394                    Ok(s.resize(s.len() + n).into())
1395                } else {
1396                    s.as_materialized_series()
1397                        .extend_constant(value, n)
1398                        .map(Column::from)
1399                }
1400            },
1401        }
1402    }
1403
1404    pub fn is_finite(&self) -> PolarsResult<BooleanChunked> {
1405        self.try_map_unary_elementwise_to_bool(|s| s.is_finite())
1406    }
1407    pub fn is_infinite(&self) -> PolarsResult<BooleanChunked> {
1408        self.try_map_unary_elementwise_to_bool(|s| s.is_infinite())
1409    }
1410    pub fn is_nan(&self) -> PolarsResult<BooleanChunked> {
1411        self.try_map_unary_elementwise_to_bool(|s| s.is_nan())
1412    }
1413    pub fn is_not_nan(&self) -> PolarsResult<BooleanChunked> {
1414        self.try_map_unary_elementwise_to_bool(|s| s.is_not_nan())
1415    }
1416
1417    pub fn wrapping_trunc_div_scalar<T>(&self, rhs: T) -> Self
1418    where
1419        T: Num + NumCast,
1420    {
1421        // @scalar-opt
1422        self.as_materialized_series()
1423            .wrapping_trunc_div_scalar(rhs)
1424            .into()
1425    }
1426
1427    pub fn product(&self) -> PolarsResult<Scalar> {
1428        // @scalar-opt
1429        self.as_materialized_series().product()
1430    }
1431
1432    pub fn phys_iter(&self) -> SeriesPhysIter<'_> {
1433        // @scalar-opt
1434        self.as_materialized_series().phys_iter()
1435    }
1436
1437    #[inline]
1438    pub fn get(&self, index: usize) -> PolarsResult<AnyValue<'_>> {
1439        polars_ensure!(index < self.len(), oob = index, self.len());
1440
1441        // SAFETY: Bounds check done just before.
1442        Ok(unsafe { self.get_unchecked(index) })
1443    }
1444    /// # Safety
1445    ///
1446    /// Does not perform bounds check on `index`
1447    #[inline(always)]
1448    pub unsafe fn get_unchecked(&self, index: usize) -> AnyValue<'_> {
1449        debug_assert!(index < self.len());
1450
1451        match self {
1452            Column::Series(s) => unsafe { s.get_unchecked(index) },
1453            Column::Scalar(s) => s.scalar().as_any_value(),
1454        }
1455    }
1456
1457    #[cfg(feature = "object")]
1458    pub fn get_object(
1459        &self,
1460        index: usize,
1461    ) -> Option<&dyn crate::chunked_array::object::PolarsObjectSafe> {
1462        self.as_materialized_series().get_object(index)
1463    }
1464
1465    pub fn bitand(&self, rhs: &Self) -> PolarsResult<Self> {
1466        self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l & r)
1467    }
1468    pub fn bitor(&self, rhs: &Self) -> PolarsResult<Self> {
1469        self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l | r)
1470    }
1471    pub fn bitxor(&self, rhs: &Self) -> PolarsResult<Self> {
1472        self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l ^ r)
1473    }
1474
1475    pub fn try_add_owned(self, other: Self) -> PolarsResult<Self> {
1476        match (self, other) {
1477            (Column::Series(lhs), Column::Series(rhs)) => {
1478                lhs.take().try_add_owned(rhs.take()).map(Column::from)
1479            },
1480            (lhs, rhs) => lhs + rhs,
1481        }
1482    }
1483    pub fn try_sub_owned(self, other: Self) -> PolarsResult<Self> {
1484        match (self, other) {
1485            (Column::Series(lhs), Column::Series(rhs)) => {
1486                lhs.take().try_sub_owned(rhs.take()).map(Column::from)
1487            },
1488            (lhs, rhs) => lhs - rhs,
1489        }
1490    }
1491    pub fn try_mul_owned(self, other: Self) -> PolarsResult<Self> {
1492        match (self, other) {
1493            (Column::Series(lhs), Column::Series(rhs)) => {
1494                lhs.take().try_mul_owned(rhs.take()).map(Column::from)
1495            },
1496            (lhs, rhs) => lhs * rhs,
1497        }
1498    }
1499
1500    pub(crate) fn str_value(&self, index: usize) -> PolarsResult<Cow<'_, str>> {
1501        Ok(self.get(index)?.str_value())
1502    }
1503
1504    pub fn min_reduce(&self) -> PolarsResult<Scalar> {
1505        match self {
1506            Column::Series(s) => s.min_reduce(),
1507            Column::Scalar(s) => {
1508                // We don't really want to deal with handling the full semantics here so we just
1509                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1510                s.as_single_value_series().min_reduce()
1511            },
1512        }
1513    }
1514    pub fn max_reduce(&self) -> PolarsResult<Scalar> {
1515        match self {
1516            Column::Series(s) => s.max_reduce(),
1517            Column::Scalar(s) => {
1518                // We don't really want to deal with handling the full semantics here so we just
1519                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1520                s.as_single_value_series().max_reduce()
1521            },
1522        }
1523    }
1524    pub fn median_reduce(&self) -> PolarsResult<Scalar> {
1525        match self {
1526            Column::Series(s) => s.median_reduce(),
1527            Column::Scalar(s) => {
1528                // We don't really want to deal with handling the full semantics here so we just
1529                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1530                s.as_single_value_series().median_reduce()
1531            },
1532        }
1533    }
1534    pub fn mean_reduce(&self) -> PolarsResult<Scalar> {
1535        match self {
1536            Column::Series(s) => s.mean_reduce(),
1537            Column::Scalar(s) => {
1538                // We don't really want to deal with handling the full semantics here so we just
1539                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1540                s.as_single_value_series().mean_reduce()
1541            },
1542        }
1543    }
1544    pub fn std_reduce(&self, ddof: u8) -> PolarsResult<Scalar> {
1545        match self {
1546            Column::Series(s) => s.std_reduce(ddof),
1547            Column::Scalar(s) => {
1548                // We don't really want to deal with handling the full semantics here so we just
1549                // cast to a small series. This is a tiny bit wasteful, but probably fine.
1550                let n = s.len().min(ddof as usize + 1);
1551                s.as_n_values_series(n).std_reduce(ddof)
1552            },
1553        }
1554    }
1555    pub fn var_reduce(&self, ddof: u8) -> PolarsResult<Scalar> {
1556        match self {
1557            Column::Series(s) => s.var_reduce(ddof),
1558            Column::Scalar(s) => {
1559                // We don't really want to deal with handling the full semantics here so we just
1560                // cast to a small series. This is a tiny bit wasteful, but probably fine.
1561                let n = s.len().min(ddof as usize + 1);
1562                s.as_n_values_series(n).var_reduce(ddof)
1563            },
1564        }
1565    }
1566    pub fn sum_reduce(&self) -> PolarsResult<Scalar> {
1567        // @scalar-opt
1568        self.as_materialized_series().sum_reduce()
1569    }
1570    pub fn and_reduce(&self) -> PolarsResult<Scalar> {
1571        match self {
1572            Column::Series(s) => s.and_reduce(),
1573            Column::Scalar(s) => {
1574                // We don't really want to deal with handling the full semantics here so we just
1575                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1576                s.as_single_value_series().and_reduce()
1577            },
1578        }
1579    }
1580    pub fn or_reduce(&self) -> PolarsResult<Scalar> {
1581        match self {
1582            Column::Series(s) => s.or_reduce(),
1583            Column::Scalar(s) => {
1584                // We don't really want to deal with handling the full semantics here so we just
1585                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1586                s.as_single_value_series().or_reduce()
1587            },
1588        }
1589    }
1590    pub fn xor_reduce(&self) -> PolarsResult<Scalar> {
1591        match self {
1592            Column::Series(s) => s.xor_reduce(),
1593            Column::Scalar(s) => {
1594                // We don't really want to deal with handling the full semantics here so we just
1595                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1596                //
1597                // We have to deal with the fact that xor is 0 if there is an even number of
1598                // elements and the value if there is an odd number of elements. If there are zero
1599                // elements the result should be `null`.
1600                s.as_n_values_series(2 - s.len() % 2).xor_reduce()
1601            },
1602        }
1603    }
1604    pub fn n_unique(&self) -> PolarsResult<usize> {
1605        match self {
1606            Column::Series(s) => s.n_unique(),
1607            Column::Scalar(s) => s.as_single_value_series().n_unique(),
1608        }
1609    }
1610    pub fn quantile_reduce(&self, quantile: f64, method: QuantileMethod) -> PolarsResult<Scalar> {
1611        self.as_materialized_series()
1612            .quantile_reduce(quantile, method)
1613    }
1614
1615    pub(crate) fn estimated_size(&self) -> usize {
1616        // @scalar-opt
1617        self.as_materialized_series().estimated_size()
1618    }
1619
1620    pub fn sort_with(&self, options: SortOptions) -> PolarsResult<Self> {
1621        match self {
1622            Column::Series(s) => s.sort_with(options).map(Self::from),
1623            Column::Scalar(s) => {
1624                // This makes this function throw the same errors as Series::sort_with
1625                _ = s.as_single_value_series().sort_with(options)?;
1626
1627                Ok(self.clone())
1628            },
1629        }
1630    }
1631
1632    pub fn map_unary_elementwise_to_bool(
1633        &self,
1634        f: impl Fn(&Series) -> BooleanChunked,
1635    ) -> BooleanChunked {
1636        self.try_map_unary_elementwise_to_bool(|s| Ok(f(s)))
1637            .unwrap()
1638    }
1639    pub fn try_map_unary_elementwise_to_bool(
1640        &self,
1641        f: impl Fn(&Series) -> PolarsResult<BooleanChunked>,
1642    ) -> PolarsResult<BooleanChunked> {
1643        match self {
1644            Column::Series(s) => f(s),
1645            Column::Scalar(s) => Ok(f(&s.as_single_value_series())?.new_from_index(0, s.len())),
1646        }
1647    }
1648
1649    pub fn apply_unary_elementwise(&self, f: impl Fn(&Series) -> Series) -> Column {
1650        self.try_apply_unary_elementwise(|s| Ok(f(s))).unwrap()
1651    }
1652    pub fn try_apply_unary_elementwise(
1653        &self,
1654        f: impl Fn(&Series) -> PolarsResult<Series>,
1655    ) -> PolarsResult<Column> {
1656        match self {
1657            Column::Series(s) => f(s).map(Column::from),
1658            Column::Scalar(s) => Ok(ScalarColumn::from_single_value_series(
1659                f(&s.as_single_value_series())?,
1660                s.len(),
1661            )
1662            .into()),
1663        }
1664    }
1665
1666    pub fn apply_broadcasting_binary_elementwise(
1667        &self,
1668        other: &Self,
1669        op: impl Fn(&Series, &Series) -> Series,
1670    ) -> PolarsResult<Column> {
1671        self.try_apply_broadcasting_binary_elementwise(other, |lhs, rhs| Ok(op(lhs, rhs)))
1672    }
1673    pub fn try_apply_broadcasting_binary_elementwise(
1674        &self,
1675        other: &Self,
1676        op: impl Fn(&Series, &Series) -> PolarsResult<Series>,
1677    ) -> PolarsResult<Column> {
1678        fn output_length(a: &Column, b: &Column) -> PolarsResult<usize> {
1679            match (a.len(), b.len()) {
1680                // broadcasting
1681                (1, o) | (o, 1) => Ok(o),
1682                // equal
1683                (a, b) if a == b => Ok(a),
1684                // unequal
1685                (a, b) => {
1686                    polars_bail!(InvalidOperation: "cannot do a binary operation on columns of different lengths: got {} and {}", a, b)
1687                },
1688            }
1689        }
1690
1691        // Here we rely on the underlying broadcast operations.
1692        let length = output_length(self, other)?;
1693        match (self, other) {
1694            (Column::Series(lhs), Column::Series(rhs)) => op(lhs, rhs).map(Column::from),
1695            (Column::Series(lhs), Column::Scalar(rhs)) => {
1696                op(lhs, &rhs.as_single_value_series()).map(Column::from)
1697            },
1698            (Column::Scalar(lhs), Column::Series(rhs)) => {
1699                op(&lhs.as_single_value_series(), rhs).map(Column::from)
1700            },
1701            (Column::Scalar(lhs), Column::Scalar(rhs)) => {
1702                let lhs = lhs.as_single_value_series();
1703                let rhs = rhs.as_single_value_series();
1704
1705                Ok(ScalarColumn::from_single_value_series(op(&lhs, &rhs)?, length).into_column())
1706            },
1707        }
1708    }
1709
1710    pub fn apply_binary_elementwise(
1711        &self,
1712        other: &Self,
1713        f: impl Fn(&Series, &Series) -> Series,
1714        f_lb: impl Fn(&Scalar, &Series) -> Series,
1715        f_rb: impl Fn(&Series, &Scalar) -> Series,
1716    ) -> Column {
1717        self.try_apply_binary_elementwise(
1718            other,
1719            |lhs, rhs| Ok(f(lhs, rhs)),
1720            |lhs, rhs| Ok(f_lb(lhs, rhs)),
1721            |lhs, rhs| Ok(f_rb(lhs, rhs)),
1722        )
1723        .unwrap()
1724    }
1725    pub fn try_apply_binary_elementwise(
1726        &self,
1727        other: &Self,
1728        f: impl Fn(&Series, &Series) -> PolarsResult<Series>,
1729        f_lb: impl Fn(&Scalar, &Series) -> PolarsResult<Series>,
1730        f_rb: impl Fn(&Series, &Scalar) -> PolarsResult<Series>,
1731    ) -> PolarsResult<Column> {
1732        debug_assert_eq!(self.len(), other.len());
1733
1734        match (self, other) {
1735            (Column::Series(lhs), Column::Series(rhs)) => f(lhs, rhs).map(Column::from),
1736            (Column::Series(lhs), Column::Scalar(rhs)) => f_rb(lhs, rhs.scalar()).map(Column::from),
1737            (Column::Scalar(lhs), Column::Series(rhs)) => f_lb(lhs.scalar(), rhs).map(Column::from),
1738            (Column::Scalar(lhs), Column::Scalar(rhs)) => {
1739                let lhs = lhs.as_single_value_series();
1740                let rhs = rhs.as_single_value_series();
1741
1742                Ok(
1743                    ScalarColumn::from_single_value_series(f(&lhs, &rhs)?, self.len())
1744                        .into_column(),
1745                )
1746            },
1747        }
1748    }
1749
1750    #[cfg(feature = "approx_unique")]
1751    pub fn approx_n_unique(&self) -> PolarsResult<IdxSize> {
1752        match self {
1753            Column::Series(s) => s.approx_n_unique(),
1754            Column::Scalar(s) => {
1755                // @NOTE: We do this for the error handling.
1756                s.as_single_value_series().approx_n_unique()?;
1757                Ok(1)
1758            },
1759        }
1760    }
1761
1762    pub fn n_chunks(&self) -> usize {
1763        match self {
1764            Column::Series(s) => s.n_chunks(),
1765            Column::Scalar(s) => s.lazy_as_materialized_series().map_or(1, |x| x.n_chunks()),
1766        }
1767    }
1768
1769    #[expect(clippy::wrong_self_convention)]
1770    pub(crate) fn into_total_ord_inner<'a>(&'a self) -> Box<dyn TotalOrdInner + 'a> {
1771        // @scalar-opt
1772        self.as_materialized_series().into_total_ord_inner()
1773    }
1774    #[expect(unused, clippy::wrong_self_convention)]
1775    pub(crate) fn into_total_eq_inner<'a>(&'a self) -> Box<dyn TotalEqInner + 'a> {
1776        // @scalar-opt
1777        self.as_materialized_series().into_total_eq_inner()
1778    }
1779
1780    pub fn rechunk_to_arrow(self, compat_level: CompatLevel) -> Box<dyn Array> {
1781        // Rechunk to one chunk if necessary
1782        let mut series = self.take_materialized_series();
1783        if series.n_chunks() > 1 {
1784            series = series.rechunk();
1785        }
1786        series.to_arrow(0, compat_level)
1787    }
1788
1789    pub fn trim_lists_to_normalized_offsets(&self) -> Option<Column> {
1790        self.as_materialized_series()
1791            .trim_lists_to_normalized_offsets()
1792            .map(Column::from)
1793    }
1794
1795    pub fn propagate_nulls(&self) -> Option<Column> {
1796        self.as_materialized_series()
1797            .propagate_nulls()
1798            .map(Column::from)
1799    }
1800
1801    pub fn deposit(&self, validity: &Bitmap) -> Column {
1802        self.as_materialized_series()
1803            .deposit(validity)
1804            .into_column()
1805    }
1806
1807    pub fn rechunk_validity(&self) -> Option<Bitmap> {
1808        // @scalar-opt
1809        self.as_materialized_series().rechunk_validity()
1810    }
1811
1812    pub fn unique_id(&self) -> PolarsResult<(IdxSize, Vec<IdxSize>)> {
1813        self.as_materialized_series().unique_id()
1814    }
1815}
1816
1817impl Default for Column {
1818    fn default() -> Self {
1819        Self::new_scalar(
1820            PlSmallStr::EMPTY,
1821            Scalar::new(DataType::Int64, AnyValue::Null),
1822            0,
1823        )
1824    }
1825}
1826
1827impl PartialEq for Column {
1828    fn eq(&self, other: &Self) -> bool {
1829        // @scalar-opt
1830        self.as_materialized_series()
1831            .eq(other.as_materialized_series())
1832    }
1833}
1834
1835impl From<Series> for Column {
1836    #[inline]
1837    fn from(series: Series) -> Self {
1838        // We instantiate a Scalar Column if the Series is length is 1. This makes it possible for
1839        // future operations to be faster.
1840        if series.len() == 1 {
1841            return Self::Scalar(ScalarColumn::unit_scalar_from_series(series));
1842        }
1843
1844        Self::Series(SeriesColumn::new(series))
1845    }
1846}
1847
1848impl<T: IntoSeries> IntoColumn for T {
1849    #[inline]
1850    fn into_column(self) -> Column {
1851        self.into_series().into()
1852    }
1853}
1854
1855impl IntoColumn for Column {
1856    #[inline(always)]
1857    fn into_column(self) -> Column {
1858        self
1859    }
1860}
1861
1862/// We don't want to serialize the scalar columns. So this helps pretend that columns are always
1863/// initialized without implementing From<Column> for Series.
1864///
1865/// Those casts should be explicit.
1866#[derive(Clone)]
1867#[cfg_attr(feature = "serde", derive(serde::Serialize))]
1868#[cfg_attr(feature = "serde", serde(into = "Series"))]
1869struct _SerdeSeries(Series);
1870
1871impl From<Column> for _SerdeSeries {
1872    #[inline]
1873    fn from(value: Column) -> Self {
1874        Self(value.take_materialized_series())
1875    }
1876}
1877
1878impl From<_SerdeSeries> for Series {
1879    #[inline]
1880    fn from(value: _SerdeSeries) -> Self {
1881        value.0
1882    }
1883}