polars_core/frame/column/
scalar.rs

1use std::sync::OnceLock;
2
3use polars_error::PolarsResult;
4use polars_utils::pl_str::PlSmallStr;
5
6use super::{AnyValue, Column, DataType, IntoColumn, Scalar, Series};
7use crate::chunked_array::cast::CastOptions;
8
9/// A [`Column`] that consists of a repeated [`Scalar`]
10///
11/// This is lazily materialized into a [`Series`].
12#[derive(Debug, Clone)]
13pub struct ScalarColumn {
14    name: PlSmallStr,
15    // The value of this scalar may be incoherent when `length == 0`.
16    scalar: Scalar,
17    length: usize,
18
19    // invariants:
20    // materialized.name() == name
21    // materialized.len() == length
22    // materialized.dtype() == value.dtype
23    // materialized[i] == value, for all 0 <= i < length
24    /// A lazily materialized [`Series`] variant of this [`ScalarColumn`]
25    materialized: OnceLock<Series>,
26}
27
28impl ScalarColumn {
29    #[inline]
30    pub fn new(name: PlSmallStr, scalar: Scalar, length: usize) -> Self {
31        Self {
32            name,
33            scalar,
34            length,
35
36            materialized: OnceLock::new(),
37        }
38    }
39
40    #[inline]
41    pub fn new_empty(name: PlSmallStr, dtype: DataType) -> Self {
42        Self {
43            name,
44            scalar: Scalar::new(dtype, AnyValue::Null),
45            length: 0,
46
47            materialized: OnceLock::new(),
48        }
49    }
50
51    pub fn full_null(name: PlSmallStr, length: usize, dtype: DataType) -> Self {
52        Self::new(name, Scalar::null(dtype), length)
53    }
54
55    pub fn name(&self) -> &PlSmallStr {
56        &self.name
57    }
58
59    pub fn scalar(&self) -> &Scalar {
60        &self.scalar
61    }
62
63    pub fn dtype(&self) -> &DataType {
64        self.scalar.dtype()
65    }
66
67    pub fn len(&self) -> usize {
68        self.length
69    }
70
71    pub fn is_empty(&self) -> bool {
72        self.length == 0
73    }
74
75    fn _to_series(name: PlSmallStr, value: Scalar, length: usize) -> Series {
76        let series = if length == 0 {
77            Series::new_empty(name, value.dtype())
78        } else {
79            value.into_series(name).new_from_index(0, length)
80        };
81
82        debug_assert_eq!(series.len(), length);
83
84        series
85    }
86
87    /// Materialize the [`ScalarColumn`] into a [`Series`].
88    pub fn to_series(&self) -> Series {
89        Self::_to_series(self.name.clone(), self.scalar.clone(), self.length)
90    }
91
92    /// Get the [`ScalarColumn`] as [`Series`] if it was already materialized.
93    pub fn lazy_as_materialized_series(&self) -> Option<&Series> {
94        self.materialized.get()
95    }
96
97    /// Get the [`ScalarColumn`] as [`Series`]
98    ///
99    /// This needs to materialize upon the first call. Afterwards, this is cached.
100    pub fn as_materialized_series(&self) -> &Series {
101        self.materialized.get_or_init(|| self.to_series())
102    }
103
104    /// Take the [`ScalarColumn`] and materialize as a [`Series`] if not already done.
105    pub fn take_materialized_series(self) -> Series {
106        self.materialized
107            .into_inner()
108            .unwrap_or_else(|| Self::_to_series(self.name, self.scalar, self.length))
109    }
110
111    /// Take the [`ScalarColumn`] as a series with a single value.
112    ///
113    /// If the [`ScalarColumn`] has `length=0` the resulting `Series` will also have `length=0`.
114    pub fn as_single_value_series(&self) -> Series {
115        self.as_n_values_series(1)
116    }
117
118    /// Take the [`ScalarColumn`] as a series with a `n` values.
119    ///
120    /// If the [`ScalarColumn`] has `length=0` the resulting `Series` will also have `length=0`.
121    pub fn as_n_values_series(&self, n: usize) -> Series {
122        let length = usize::min(n, self.length);
123
124        match self.materialized.get() {
125            // Don't take a refcount if we only want length-1 (or empty) - the materialized series
126            // could be extremely large.
127            Some(s) if length == self.length || length > 1 => s.head(Some(length)),
128            _ => Self::_to_series(self.name.clone(), self.scalar.clone(), length),
129        }
130    }
131
132    /// Create a new [`ScalarColumn`] from a `length=1` Series and expand it `length`.
133    ///
134    /// This will panic if the value cannot be made static or if the series has length `0`.
135    #[inline]
136    pub fn unit_scalar_from_series(series: Series) -> Self {
137        assert_eq!(series.len(), 1);
138        // SAFETY: We just did the bounds check
139        let value = unsafe { series.get_unchecked(0) };
140        let value = value.into_static();
141        let value = Scalar::new(series.dtype().clone(), value);
142        let mut sc = ScalarColumn::new(series.name().clone(), value, 1);
143        sc.materialized = OnceLock::from(series);
144        sc
145    }
146
147    /// Create a new [`ScalarColumn`] from a `length<=1` Series and expand it `length`.
148    ///
149    /// If `series` is empty and `length` is non-zero, a full-NULL column of `length` will be returned.
150    ///
151    /// This will panic if the value cannot be made static.
152    pub fn from_single_value_series(series: Series, length: usize) -> Self {
153        debug_assert!(series.len() <= 1);
154
155        let value = if series.is_empty() {
156            AnyValue::Null
157        } else {
158            unsafe { series.get_unchecked(0) }.into_static()
159        };
160        let value = Scalar::new(series.dtype().clone(), value);
161        ScalarColumn::new(series.name().clone(), value, length)
162    }
163
164    /// Resize the [`ScalarColumn`] to new `length`.
165    ///
166    /// This reuses the materialized [`Series`], if `length <= self.length`.
167    pub fn resize(&self, length: usize) -> ScalarColumn {
168        if self.length == length {
169            return self.clone();
170        }
171
172        // This is violates an invariant if this triggers, the scalar value is undefined if the
173        // self.length == 0 so therefore we should never resize using that value.
174        debug_assert!(length == 0 || self.length > 0);
175
176        let mut resized = Self {
177            name: self.name.clone(),
178            scalar: self.scalar.clone(),
179            length,
180            materialized: OnceLock::new(),
181        };
182
183        if length == self.length || (length < self.length && length > 1) {
184            if let Some(materialized) = self.materialized.get() {
185                resized.materialized = OnceLock::from(materialized.head(Some(length)));
186                debug_assert_eq!(resized.materialized.get().unwrap().len(), length);
187            }
188        }
189
190        resized
191    }
192
193    pub fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Self> {
194        // @NOTE: We expect that when casting the materialized series mostly does not need change
195        // the physical array. Therefore, we try to cast the entire materialized array if it is
196        // available.
197
198        match self.materialized.get() {
199            Some(s) => {
200                let materialized = s.cast_with_options(dtype, options)?;
201                assert_eq!(self.length, materialized.len());
202
203                let mut casted = if materialized.is_empty() {
204                    Self::new_empty(materialized.name().clone(), materialized.dtype().clone())
205                } else {
206                    // SAFETY: Just did bounds check
207                    let scalar = unsafe { materialized.get_unchecked(0) }.into_static();
208                    Self::new(
209                        materialized.name().clone(),
210                        Scalar::new(materialized.dtype().clone(), scalar),
211                        self.length,
212                    )
213                };
214                casted.materialized = OnceLock::from(materialized);
215                Ok(casted)
216            },
217            None => {
218                let s = self
219                    .as_single_value_series()
220                    .cast_with_options(dtype, options)?;
221
222                if self.length == 0 {
223                    Ok(Self::new_empty(s.name().clone(), s.dtype().clone()))
224                } else {
225                    assert_eq!(1, s.len());
226                    Ok(Self::from_single_value_series(s, self.length))
227                }
228            },
229        }
230    }
231
232    pub fn strict_cast(&self, dtype: &DataType) -> PolarsResult<Self> {
233        self.cast_with_options(dtype, CastOptions::Strict)
234    }
235    pub fn cast(&self, dtype: &DataType) -> PolarsResult<Self> {
236        self.cast_with_options(dtype, CastOptions::NonStrict)
237    }
238    /// # Safety
239    ///
240    /// This can lead to invalid memory access in downstream code.
241    pub unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Self> {
242        // @NOTE: We expect that when casting the materialized series mostly does not need change
243        // the physical array. Therefore, we try to cast the entire materialized array if it is
244        // available.
245
246        match self.materialized.get() {
247            Some(s) => {
248                let materialized = s.cast_unchecked(dtype)?;
249                assert_eq!(self.length, materialized.len());
250
251                let mut casted = if materialized.is_empty() {
252                    Self::new_empty(materialized.name().clone(), materialized.dtype().clone())
253                } else {
254                    // SAFETY: Just did bounds check
255                    let scalar = unsafe { materialized.get_unchecked(0) }.into_static();
256                    Self::new(
257                        materialized.name().clone(),
258                        Scalar::new(materialized.dtype().clone(), scalar),
259                        self.length,
260                    )
261                };
262                casted.materialized = OnceLock::from(materialized);
263                Ok(casted)
264            },
265            None => {
266                let s = self.as_single_value_series().cast_unchecked(dtype)?;
267                assert_eq!(1, s.len());
268
269                if self.length == 0 {
270                    Ok(Self::new_empty(s.name().clone(), s.dtype().clone()))
271                } else {
272                    Ok(Self::from_single_value_series(s, self.length))
273                }
274            },
275        }
276    }
277
278    pub fn rename(&mut self, name: PlSmallStr) -> &mut Self {
279        if let Some(series) = self.materialized.get_mut() {
280            series.rename(name.clone());
281        }
282
283        self.name = name;
284        self
285    }
286
287    pub fn has_nulls(&self) -> bool {
288        self.length != 0 && self.scalar.is_null()
289    }
290
291    pub fn drop_nulls(&self) -> Self {
292        if self.scalar.is_null() {
293            self.resize(0)
294        } else {
295            self.clone()
296        }
297    }
298
299    pub fn into_nulls(mut self) -> Self {
300        self.scalar.update(AnyValue::Null);
301        self
302    }
303
304    pub fn map_scalar(&mut self, map_scalar: impl Fn(Scalar) -> Scalar) {
305        self.scalar = map_scalar(std::mem::take(&mut self.scalar));
306        self.materialized.take();
307    }
308    pub fn with_value(&mut self, value: AnyValue<'static>) -> &mut Self {
309        self.scalar.update(value);
310        self.materialized.take();
311        self
312    }
313}
314
315impl IntoColumn for ScalarColumn {
316    #[inline(always)]
317    fn into_column(self) -> Column {
318        self.into()
319    }
320}
321
322impl From<ScalarColumn> for Column {
323    #[inline]
324    fn from(value: ScalarColumn) -> Self {
325        Self::Scalar(value)
326    }
327}
328
329#[cfg(feature = "dsl-schema")]
330impl schemars::JsonSchema for ScalarColumn {
331    fn schema_name() -> String {
332        "ScalarColumn".to_owned()
333    }
334
335    fn schema_id() -> std::borrow::Cow<'static, str> {
336        std::borrow::Cow::Borrowed(concat!(module_path!(), "::", "ScalarColumn"))
337    }
338
339    fn json_schema(generator: &mut schemars::r#gen::SchemaGenerator) -> schemars::schema::Schema {
340        serde_impl::SerializeWrap::json_schema(generator)
341    }
342}
343
344#[cfg(feature = "serde")]
345mod serde_impl {
346    use std::sync::OnceLock;
347
348    use polars_error::PolarsError;
349    use polars_utils::pl_str::PlSmallStr;
350
351    use super::ScalarColumn;
352    use crate::frame::{Scalar, Series};
353
354    #[derive(serde::Serialize, serde::Deserialize)]
355    #[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
356    pub struct SerializeWrap {
357        name: PlSmallStr,
358        /// Unit-length series for dispatching to IPC serialize
359        unit_series: Series,
360        length: usize,
361    }
362
363    impl From<&ScalarColumn> for SerializeWrap {
364        fn from(value: &ScalarColumn) -> Self {
365            Self {
366                name: value.name.clone(),
367                unit_series: value.scalar.clone().into_series(PlSmallStr::EMPTY),
368                length: value.length,
369            }
370        }
371    }
372
373    impl TryFrom<SerializeWrap> for ScalarColumn {
374        type Error = PolarsError;
375
376        fn try_from(value: SerializeWrap) -> Result<Self, Self::Error> {
377            let slf = Self {
378                name: value.name,
379                scalar: Scalar::new(
380                    value.unit_series.dtype().clone(),
381                    value.unit_series.get(0)?.into_static(),
382                ),
383                length: value.length,
384                materialized: OnceLock::new(),
385            };
386
387            Ok(slf)
388        }
389    }
390
391    impl serde::ser::Serialize for ScalarColumn {
392        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
393        where
394            S: serde::Serializer,
395        {
396            SerializeWrap::from(self).serialize(serializer)
397        }
398    }
399
400    impl<'de> serde::de::Deserialize<'de> for ScalarColumn {
401        fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
402        where
403            D: serde::Deserializer<'de>,
404        {
405            use serde::de::Error;
406
407            SerializeWrap::deserialize(deserializer)
408                .and_then(|x| ScalarColumn::try_from(x).map_err(D::Error::custom))
409        }
410    }
411}