polars_core/frame/column/
scalar.rs

1use std::sync::OnceLock;
2
3use polars_error::PolarsResult;
4use polars_utils::pl_str::PlSmallStr;
5
6use super::{AnyValue, Column, DataType, IntoColumn, Scalar, Series};
7use crate::chunked_array::cast::CastOptions;
8
9/// A [`Column`] that consists of a repeated [`Scalar`]
10///
11/// This is lazily materialized into a [`Series`].
12#[derive(Debug, Clone)]
13pub struct ScalarColumn {
14    name: PlSmallStr,
15    // The value of this scalar may be incoherent when `length == 0`.
16    scalar: Scalar,
17    length: usize,
18
19    // invariants:
20    // materialized.name() == name
21    // materialized.len() == length
22    // materialized.dtype() == value.dtype
23    // materialized[i] == value, for all 0 <= i < length
24    /// A lazily materialized [`Series`] variant of this [`ScalarColumn`]
25    materialized: OnceLock<Series>,
26}
27
28impl ScalarColumn {
29    #[inline]
30    pub fn new(name: PlSmallStr, scalar: Scalar, length: usize) -> Self {
31        Self {
32            name,
33            scalar,
34            length,
35
36            materialized: OnceLock::new(),
37        }
38    }
39
40    #[inline]
41    pub fn new_empty(name: PlSmallStr, dtype: DataType) -> Self {
42        Self {
43            name,
44            scalar: Scalar::new(dtype, AnyValue::Null),
45            length: 0,
46
47            materialized: OnceLock::new(),
48        }
49    }
50
51    pub fn name(&self) -> &PlSmallStr {
52        &self.name
53    }
54
55    pub fn scalar(&self) -> &Scalar {
56        &self.scalar
57    }
58
59    pub fn dtype(&self) -> &DataType {
60        self.scalar.dtype()
61    }
62
63    pub fn len(&self) -> usize {
64        self.length
65    }
66
67    pub fn is_empty(&self) -> bool {
68        self.length == 0
69    }
70
71    fn _to_series(name: PlSmallStr, value: Scalar, length: usize) -> Series {
72        let series = if length == 0 {
73            Series::new_empty(name, value.dtype())
74        } else {
75            value.into_series(name).new_from_index(0, length)
76        };
77
78        debug_assert_eq!(series.len(), length);
79
80        series
81    }
82
83    /// Materialize the [`ScalarColumn`] into a [`Series`].
84    pub fn to_series(&self) -> Series {
85        Self::_to_series(self.name.clone(), self.scalar.clone(), self.length)
86    }
87
88    /// Get the [`ScalarColumn`] as [`Series`] if it was already materialized.
89    pub fn lazy_as_materialized_series(&self) -> Option<&Series> {
90        self.materialized.get()
91    }
92
93    /// Get the [`ScalarColumn`] as [`Series`]
94    ///
95    /// This needs to materialize upon the first call. Afterwards, this is cached.
96    pub fn as_materialized_series(&self) -> &Series {
97        self.materialized.get_or_init(|| self.to_series())
98    }
99
100    /// Take the [`ScalarColumn`] and materialize as a [`Series`] if not already done.
101    pub fn take_materialized_series(self) -> Series {
102        self.materialized
103            .into_inner()
104            .unwrap_or_else(|| Self::_to_series(self.name, self.scalar, self.length))
105    }
106
107    /// Take the [`ScalarColumn`] as a series with a single value.
108    ///
109    /// If the [`ScalarColumn`] has `length=0` the resulting `Series` will also have `length=0`.
110    pub fn as_single_value_series(&self) -> Series {
111        self.as_n_values_series(1)
112    }
113
114    /// Take the [`ScalarColumn`] as a series with a `n` values.
115    ///
116    /// If the [`ScalarColumn`] has `length=0` the resulting `Series` will also have `length=0`.
117    pub fn as_n_values_series(&self, n: usize) -> Series {
118        let length = usize::min(n, self.length);
119
120        match self.materialized.get() {
121            // Don't take a refcount if we only want length-1 (or empty) - the materialized series
122            // could be extremely large.
123            Some(s) if length == self.length || length > 1 => s.head(Some(length)),
124            _ => Self::_to_series(self.name.clone(), self.scalar.clone(), length),
125        }
126    }
127
128    /// Create a new [`ScalarColumn`] from a `length=1` Series and expand it `length`.
129    ///
130    /// This will panic if the value cannot be made static or if the series has length `0`.
131    #[inline]
132    pub fn unit_scalar_from_series(series: Series) -> Self {
133        assert_eq!(series.len(), 1);
134        // SAFETY: We just did the bounds check
135        let value = unsafe { series.get_unchecked(0) };
136        let value = value.into_static();
137        let value = Scalar::new(series.dtype().clone(), value);
138        let mut sc = ScalarColumn::new(series.name().clone(), value, 1);
139        sc.materialized = OnceLock::from(series);
140        sc
141    }
142
143    /// Create a new [`ScalarColumn`] from a `length=1` Series and expand it `length`.
144    ///
145    /// This will panic if the value cannot be made static.
146    pub fn from_single_value_series(series: Series, length: usize) -> Self {
147        debug_assert!(series.len() <= 1);
148
149        let value = if series.is_empty() {
150            AnyValue::Null
151        } else {
152            unsafe { series.get_unchecked(0) }.into_static()
153        };
154        let value = Scalar::new(series.dtype().clone(), value);
155        ScalarColumn::new(series.name().clone(), value, length)
156    }
157
158    /// Resize the [`ScalarColumn`] to new `length`.
159    ///
160    /// This reuses the materialized [`Series`], if `length <= self.length`.
161    pub fn resize(&self, length: usize) -> ScalarColumn {
162        if self.length == length {
163            return self.clone();
164        }
165
166        // This is violates an invariant if this triggers, the scalar value is undefined if the
167        // self.length == 0 so therefore we should never resize using that value.
168        debug_assert!(length == 0 || self.length > 0);
169
170        let mut resized = Self {
171            name: self.name.clone(),
172            scalar: self.scalar.clone(),
173            length,
174            materialized: OnceLock::new(),
175        };
176
177        if length == self.length || (length < self.length && length > 1) {
178            if let Some(materialized) = self.materialized.get() {
179                resized.materialized = OnceLock::from(materialized.head(Some(length)));
180                debug_assert_eq!(resized.materialized.get().unwrap().len(), length);
181            }
182        }
183
184        resized
185    }
186
187    pub fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Self> {
188        // @NOTE: We expect that when casting the materialized series mostly does not need change
189        // the physical array. Therefore, we try to cast the entire materialized array if it is
190        // available.
191
192        match self.materialized.get() {
193            Some(s) => {
194                let materialized = s.cast_with_options(dtype, options)?;
195                assert_eq!(self.length, materialized.len());
196
197                let mut casted = if materialized.is_empty() {
198                    Self::new_empty(materialized.name().clone(), materialized.dtype().clone())
199                } else {
200                    // SAFETY: Just did bounds check
201                    let scalar = unsafe { materialized.get_unchecked(0) }.into_static();
202                    Self::new(
203                        materialized.name().clone(),
204                        Scalar::new(materialized.dtype().clone(), scalar),
205                        self.length,
206                    )
207                };
208                casted.materialized = OnceLock::from(materialized);
209                Ok(casted)
210            },
211            None => {
212                let s = self
213                    .as_single_value_series()
214                    .cast_with_options(dtype, options)?;
215
216                if self.length == 0 {
217                    Ok(Self::new_empty(s.name().clone(), s.dtype().clone()))
218                } else {
219                    assert_eq!(1, s.len());
220                    Ok(Self::from_single_value_series(s, self.length))
221                }
222            },
223        }
224    }
225
226    pub fn strict_cast(&self, dtype: &DataType) -> PolarsResult<Self> {
227        self.cast_with_options(dtype, CastOptions::Strict)
228    }
229    pub fn cast(&self, dtype: &DataType) -> PolarsResult<Self> {
230        self.cast_with_options(dtype, CastOptions::NonStrict)
231    }
232    /// # Safety
233    ///
234    /// This can lead to invalid memory access in downstream code.
235    pub unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Self> {
236        // @NOTE: We expect that when casting the materialized series mostly does not need change
237        // the physical array. Therefore, we try to cast the entire materialized array if it is
238        // available.
239
240        match self.materialized.get() {
241            Some(s) => {
242                let materialized = s.cast_unchecked(dtype)?;
243                assert_eq!(self.length, materialized.len());
244
245                let mut casted = if materialized.is_empty() {
246                    Self::new_empty(materialized.name().clone(), materialized.dtype().clone())
247                } else {
248                    // SAFETY: Just did bounds check
249                    let scalar = unsafe { materialized.get_unchecked(0) }.into_static();
250                    Self::new(
251                        materialized.name().clone(),
252                        Scalar::new(materialized.dtype().clone(), scalar),
253                        self.length,
254                    )
255                };
256                casted.materialized = OnceLock::from(materialized);
257                Ok(casted)
258            },
259            None => {
260                let s = self.as_single_value_series().cast_unchecked(dtype)?;
261                assert_eq!(1, s.len());
262
263                if self.length == 0 {
264                    Ok(Self::new_empty(s.name().clone(), s.dtype().clone()))
265                } else {
266                    Ok(Self::from_single_value_series(s, self.length))
267                }
268            },
269        }
270    }
271
272    pub fn rename(&mut self, name: PlSmallStr) -> &mut Self {
273        if let Some(series) = self.materialized.get_mut() {
274            series.rename(name.clone());
275        }
276
277        self.name = name;
278        self
279    }
280
281    pub fn has_nulls(&self) -> bool {
282        self.length != 0 && self.scalar.is_null()
283    }
284
285    pub fn drop_nulls(&self) -> Self {
286        if self.scalar.is_null() {
287            self.resize(0)
288        } else {
289            self.clone()
290        }
291    }
292
293    pub fn into_nulls(mut self) -> Self {
294        self.scalar.update(AnyValue::Null);
295        self
296    }
297
298    pub fn map_scalar(&mut self, map_scalar: impl Fn(Scalar) -> Scalar) {
299        self.scalar = map_scalar(std::mem::take(&mut self.scalar));
300        self.materialized.take();
301    }
302    pub fn with_value(&mut self, value: AnyValue<'static>) -> &mut Self {
303        self.scalar.update(value);
304        self.materialized.take();
305        self
306    }
307}
308
309impl IntoColumn for ScalarColumn {
310    #[inline(always)]
311    fn into_column(self) -> Column {
312        self.into()
313    }
314}
315
316impl From<ScalarColumn> for Column {
317    #[inline]
318    fn from(value: ScalarColumn) -> Self {
319        Self::Scalar(value)
320    }
321}
322
323#[cfg(feature = "serde")]
324mod serde_impl {
325    use std::sync::OnceLock;
326
327    use polars_error::PolarsError;
328    use polars_utils::pl_str::PlSmallStr;
329
330    use super::ScalarColumn;
331    use crate::frame::{Scalar, Series};
332
333    #[derive(serde::Serialize, serde::Deserialize)]
334    struct SerializeWrap {
335        name: PlSmallStr,
336        /// Unit-length series for dispatching to IPC serialize
337        unit_series: Series,
338        length: usize,
339    }
340
341    impl From<&ScalarColumn> for SerializeWrap {
342        fn from(value: &ScalarColumn) -> Self {
343            Self {
344                name: value.name.clone(),
345                unit_series: value.scalar.clone().into_series(PlSmallStr::EMPTY),
346                length: value.length,
347            }
348        }
349    }
350
351    impl TryFrom<SerializeWrap> for ScalarColumn {
352        type Error = PolarsError;
353
354        fn try_from(value: SerializeWrap) -> Result<Self, Self::Error> {
355            let slf = Self {
356                name: value.name,
357                scalar: Scalar::new(
358                    value.unit_series.dtype().clone(),
359                    value.unit_series.get(0)?.into_static(),
360                ),
361                length: value.length,
362                materialized: OnceLock::new(),
363            };
364
365            Ok(slf)
366        }
367    }
368
369    impl serde::ser::Serialize for ScalarColumn {
370        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
371        where
372            S: serde::Serializer,
373        {
374            SerializeWrap::from(self).serialize(serializer)
375        }
376    }
377
378    impl<'de> serde::de::Deserialize<'de> for ScalarColumn {
379        fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
380        where
381            D: serde::Deserializer<'de>,
382        {
383            use serde::de::Error;
384
385            SerializeWrap::deserialize(deserializer)
386                .and_then(|x| ScalarColumn::try_from(x).map_err(D::Error::custom))
387        }
388    }
389}