Skip to main content

polars_core/frame/column/
scalar.rs

1use std::sync::OnceLock;
2
3use polars_error::PolarsResult;
4use polars_utils::pl_str::PlSmallStr;
5
6use super::{AnyValue, Column, DataType, IntoColumn, Scalar, Series};
7use crate::chunked_array::cast::CastOptions;
8
9/// A [`Column`] that consists of a repeated [`Scalar`]
10///
11/// This is lazily materialized into a [`Series`].
12#[derive(Debug, Clone)]
13pub struct ScalarColumn {
14    name: PlSmallStr,
15    // The value of this scalar may be incoherent when `length == 0`.
16    scalar: Scalar,
17    length: usize,
18
19    // invariants:
20    // materialized.name() == name
21    // materialized.len() == length
22    // materialized.dtype() == value.dtype
23    // materialized[i] == value, for all 0 <= i < length
24    /// A lazily materialized [`Series`] variant of this [`ScalarColumn`]
25    materialized: OnceLock<Series>,
26}
27
28impl ScalarColumn {
29    #[inline]
30    pub fn new(name: PlSmallStr, scalar: Scalar, length: usize) -> Self {
31        Self {
32            name,
33            scalar,
34            length,
35
36            materialized: OnceLock::new(),
37        }
38    }
39
40    #[inline]
41    pub fn new_empty(name: PlSmallStr, dtype: DataType) -> Self {
42        Self {
43            name,
44            scalar: Scalar::new(dtype, AnyValue::Null),
45            length: 0,
46
47            materialized: OnceLock::new(),
48        }
49    }
50
51    pub fn full_null(name: PlSmallStr, length: usize, dtype: DataType) -> Self {
52        Self::new(name, Scalar::null(dtype), length)
53    }
54
55    pub fn name(&self) -> &PlSmallStr {
56        &self.name
57    }
58
59    pub fn scalar(&self) -> &Scalar {
60        &self.scalar
61    }
62
63    pub fn dtype(&self) -> &DataType {
64        self.scalar.dtype()
65    }
66
67    pub fn len(&self) -> usize {
68        self.length
69    }
70
71    pub fn is_empty(&self) -> bool {
72        self.length == 0
73    }
74
75    pub fn is_full_null(&self) -> bool {
76        self.scalar.is_null()
77    }
78
79    fn _to_series(name: PlSmallStr, value: Scalar, length: usize) -> Series {
80        let series = if length == 0 {
81            Series::new_empty(name, value.dtype())
82        } else {
83            value.into_series(name).new_from_index(0, length)
84        };
85
86        debug_assert_eq!(series.len(), length);
87
88        series
89    }
90
91    /// Materialize the [`ScalarColumn`] into a [`Series`].
92    pub fn to_series(&self) -> Series {
93        Self::_to_series(self.name.clone(), self.scalar.clone(), self.length)
94    }
95
96    /// Get the [`ScalarColumn`] as [`Series`] if it was already materialized.
97    pub fn lazy_as_materialized_series(&self) -> Option<&Series> {
98        self.materialized.get()
99    }
100
101    /// Get the [`ScalarColumn`] as [`Series`]
102    ///
103    /// This needs to materialize upon the first call. Afterwards, this is cached.
104    pub fn as_materialized_series(&self) -> &Series {
105        self.materialized.get_or_init(|| self.to_series())
106    }
107
108    /// Take the [`ScalarColumn`] and materialize as a [`Series`] if not already done.
109    pub fn take_materialized_series(self) -> Series {
110        self.materialized
111            .into_inner()
112            .unwrap_or_else(|| Self::_to_series(self.name, self.scalar, self.length))
113    }
114
115    /// Take the [`ScalarColumn`] as a series with a single value.
116    ///
117    /// If the [`ScalarColumn`] has `length=0` the resulting `Series` will also have `length=0`.
118    pub fn as_single_value_series(&self) -> Series {
119        self.as_n_values_series(1)
120    }
121
122    /// Take the [`ScalarColumn`] as a series with a `n` values.
123    ///
124    /// If the [`ScalarColumn`] has `length=0` the resulting `Series` will also have `length=0`.
125    pub fn as_n_values_series(&self, n: usize) -> Series {
126        let length = usize::min(n, self.length);
127
128        match self.materialized.get() {
129            // Don't take a refcount if we only want length-1 (or empty) - the materialized series
130            // could be extremely large.
131            Some(s) if length == self.length || length > 1 => s.head(Some(length)),
132            _ => Self::_to_series(self.name.clone(), self.scalar.clone(), length),
133        }
134    }
135
136    /// Create a new [`ScalarColumn`] from a `length=1` Series and expand it `length`.
137    ///
138    /// This will panic if the value cannot be made static or if the series has length `0`.
139    #[inline]
140    pub fn unit_scalar_from_series(series: Series) -> Self {
141        assert_eq!(series.len(), 1);
142        // SAFETY: We just did the bounds check
143        let value = unsafe { series.get_unchecked(0) };
144        let value = value.into_static();
145        let value = Scalar::new(series.dtype().clone(), value);
146        let mut sc = ScalarColumn::new(series.name().clone(), value, 1);
147        sc.materialized = OnceLock::from(series);
148        sc
149    }
150
151    /// Create a new [`ScalarColumn`] from a `length<=1` Series and expand it `length`.
152    ///
153    /// If `series` is empty and `length` is non-zero, a full-NULL column of `length` will be returned.
154    ///
155    /// This will panic if the value cannot be made static.
156    pub fn from_single_value_series(series: Series, length: usize) -> Self {
157        debug_assert!(series.len() <= 1);
158
159        let value = if series.is_empty() {
160            AnyValue::Null
161        } else {
162            unsafe { series.get_unchecked(0) }.into_static()
163        };
164        let value = Scalar::new(series.dtype().clone(), value);
165        ScalarColumn::new(series.name().clone(), value, length)
166    }
167
168    /// Resize the [`ScalarColumn`] to new `length`.
169    ///
170    /// This reuses the materialized [`Series`], if `length <= self.length`.
171    pub fn resize(&self, length: usize) -> ScalarColumn {
172        if self.length == length {
173            return self.clone();
174        }
175
176        // This is violates an invariant if this triggers, the scalar value is undefined if the
177        // self.length == 0 so therefore we should never resize using that value.
178        debug_assert!(length == 0 || self.length > 0);
179
180        let mut resized = Self {
181            name: self.name.clone(),
182            scalar: self.scalar.clone(),
183            length,
184            materialized: OnceLock::new(),
185        };
186
187        if length == self.length || (length < self.length && length > 1) {
188            if let Some(materialized) = self.materialized.get() {
189                resized.materialized = OnceLock::from(materialized.head(Some(length)));
190                debug_assert_eq!(resized.materialized.get().unwrap().len(), length);
191            }
192        }
193
194        resized
195    }
196
197    pub fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Self> {
198        // @NOTE: We expect that when casting the materialized series mostly does not need change
199        // the physical array. Therefore, we try to cast the entire materialized array if it is
200        // available.
201
202        match self.materialized.get() {
203            Some(s) => {
204                let materialized = s.cast_with_options(dtype, options)?;
205                assert_eq!(self.length, materialized.len());
206
207                let mut casted = if materialized.is_empty() {
208                    Self::new_empty(materialized.name().clone(), materialized.dtype().clone())
209                } else {
210                    // SAFETY: Just did bounds check
211                    let scalar = unsafe { materialized.get_unchecked(0) }.into_static();
212                    Self::new(
213                        materialized.name().clone(),
214                        Scalar::new(materialized.dtype().clone(), scalar),
215                        self.length,
216                    )
217                };
218                casted.materialized = OnceLock::from(materialized);
219                Ok(casted)
220            },
221            None => {
222                let s = self
223                    .as_single_value_series()
224                    .cast_with_options(dtype, options)?;
225
226                if self.length == 0 {
227                    Ok(Self::new_empty(s.name().clone(), s.dtype().clone()))
228                } else {
229                    assert_eq!(1, s.len());
230                    Ok(Self::from_single_value_series(s, self.length))
231                }
232            },
233        }
234    }
235
236    pub fn strict_cast(&self, dtype: &DataType) -> PolarsResult<Self> {
237        self.cast_with_options(dtype, CastOptions::Strict)
238    }
239    pub fn cast(&self, dtype: &DataType) -> PolarsResult<Self> {
240        self.cast_with_options(dtype, CastOptions::NonStrict)
241    }
242    /// # Safety
243    ///
244    /// This can lead to invalid memory access in downstream code.
245    pub unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Self> {
246        // @NOTE: We expect that when casting the materialized series mostly does not need change
247        // the physical array. Therefore, we try to cast the entire materialized array if it is
248        // available.
249
250        match self.materialized.get() {
251            Some(s) => {
252                let materialized = s.cast_unchecked(dtype)?;
253                assert_eq!(self.length, materialized.len());
254
255                let mut casted = if materialized.is_empty() {
256                    Self::new_empty(materialized.name().clone(), materialized.dtype().clone())
257                } else {
258                    // SAFETY: Just did bounds check
259                    let scalar = unsafe { materialized.get_unchecked(0) }.into_static();
260                    Self::new(
261                        materialized.name().clone(),
262                        Scalar::new(materialized.dtype().clone(), scalar),
263                        self.length,
264                    )
265                };
266                casted.materialized = OnceLock::from(materialized);
267                Ok(casted)
268            },
269            None => {
270                let s = self.as_single_value_series().cast_unchecked(dtype)?;
271                assert_eq!(1, s.len());
272
273                if self.length == 0 {
274                    Ok(Self::new_empty(s.name().clone(), s.dtype().clone()))
275                } else {
276                    Ok(Self::from_single_value_series(s, self.length))
277                }
278            },
279        }
280    }
281
282    pub fn rename(&mut self, name: PlSmallStr) -> &mut Self {
283        if let Some(series) = self.materialized.get_mut() {
284            series.rename(name.clone());
285        }
286
287        self.name = name;
288        self
289    }
290
291    pub fn has_nulls(&self) -> bool {
292        self.length != 0 && self.scalar.is_null()
293    }
294
295    pub fn drop_nulls(&self) -> Self {
296        if self.scalar.is_null() {
297            self.resize(0)
298        } else {
299            self.clone()
300        }
301    }
302
303    pub fn into_nulls(mut self) -> Self {
304        self.scalar.update(AnyValue::Null);
305        self
306    }
307
308    pub fn map_scalar(&mut self, map_scalar: impl Fn(Scalar) -> Scalar) {
309        self.scalar = map_scalar(std::mem::take(&mut self.scalar));
310        self.materialized.take();
311    }
312    pub fn with_value(&mut self, value: AnyValue<'static>) -> &mut Self {
313        self.scalar.update(value);
314        self.materialized.take();
315        self
316    }
317}
318
319impl IntoColumn for ScalarColumn {
320    #[inline(always)]
321    fn into_column(self) -> Column {
322        self.into()
323    }
324}
325
326impl From<ScalarColumn> for Column {
327    #[inline]
328    fn from(value: ScalarColumn) -> Self {
329        Self::Scalar(value)
330    }
331}
332
333#[cfg(feature = "dsl-schema")]
334impl schemars::JsonSchema for ScalarColumn {
335    fn schema_name() -> std::borrow::Cow<'static, str> {
336        "ScalarColumn".into()
337    }
338
339    fn schema_id() -> std::borrow::Cow<'static, str> {
340        std::borrow::Cow::Borrowed(concat!(module_path!(), "::", "ScalarColumn"))
341    }
342
343    fn json_schema(generator: &mut schemars::SchemaGenerator) -> schemars::Schema {
344        serde_impl::SerializeWrap::json_schema(generator)
345    }
346}
347
348#[cfg(feature = "serde")]
349mod serde_impl {
350    use std::sync::OnceLock;
351
352    use polars_error::PolarsError;
353    use polars_utils::pl_str::PlSmallStr;
354
355    use super::ScalarColumn;
356    use crate::frame::{Scalar, Series};
357
358    #[derive(serde::Serialize, serde::Deserialize)]
359    #[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
360    pub struct SerializeWrap {
361        name: PlSmallStr,
362        /// Unit-length series for dispatching to IPC serialize
363        unit_series: Series,
364        length: usize,
365    }
366
367    impl From<&ScalarColumn> for SerializeWrap {
368        fn from(value: &ScalarColumn) -> Self {
369            Self {
370                name: value.name.clone(),
371                unit_series: value.scalar.clone().into_series(PlSmallStr::EMPTY),
372                length: value.length,
373            }
374        }
375    }
376
377    impl TryFrom<SerializeWrap> for ScalarColumn {
378        type Error = PolarsError;
379
380        fn try_from(value: SerializeWrap) -> Result<Self, Self::Error> {
381            let slf = Self {
382                name: value.name,
383                scalar: Scalar::new(
384                    value.unit_series.dtype().clone(),
385                    value.unit_series.get(0)?.into_static(),
386                ),
387                length: value.length,
388                materialized: OnceLock::new(),
389            };
390
391            Ok(slf)
392        }
393    }
394
395    impl serde::ser::Serialize for ScalarColumn {
396        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
397        where
398            S: serde::Serializer,
399        {
400            SerializeWrap::from(self).serialize(serializer)
401        }
402    }
403
404    impl<'de> serde::de::Deserialize<'de> for ScalarColumn {
405        fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
406        where
407            D: serde::Deserializer<'de>,
408        {
409            use serde::de::Error;
410
411            SerializeWrap::deserialize(deserializer)
412                .and_then(|x| ScalarColumn::try_from(x).map_err(D::Error::custom))
413        }
414    }
415}