polars_core/series/implementations/
null.rs

1use std::any::Any;
2
3use polars_error::constants::LENGTH_LIMIT_MSG;
4
5use self::compare_inner::TotalOrdInner;
6use super::*;
7use crate::chunked_array::ops::compare_inner::{IntoTotalEqInner, NonNull, TotalEqInner};
8use crate::chunked_array::ops::sort::arg_sort_multiple::arg_sort_multiple_impl;
9use crate::prelude::*;
10use crate::series::private::{PrivateSeries, PrivateSeriesNumeric};
11use crate::series::*;
12
13impl Series {
14    pub fn new_null(name: PlSmallStr, len: usize) -> Series {
15        NullChunked::new(name, len).into_series()
16    }
17}
18
19#[derive(Clone)]
20pub struct NullChunked {
21    pub(crate) name: PlSmallStr,
22    length: IdxSize,
23    // we still need chunks as many series consumers expect
24    // chunks to be there
25    chunks: Vec<ArrayRef>,
26}
27
28impl NullChunked {
29    pub(crate) fn new(name: PlSmallStr, len: usize) -> Self {
30        Self {
31            name,
32            length: len as IdxSize,
33            chunks: vec![Box::new(arrow::array::NullArray::new(
34                ArrowDataType::Null,
35                len,
36            ))],
37        }
38    }
39
40    pub fn len(&self) -> usize {
41        self.length as usize
42    }
43
44    pub fn is_empty(&self) -> bool {
45        self.length == 0
46    }
47}
48impl PrivateSeriesNumeric for NullChunked {
49    fn bit_repr(&self) -> Option<BitRepr> {
50        Some(BitRepr::U32(UInt32Chunked::full_null(
51            self.name.clone(),
52            self.len(),
53        )))
54    }
55}
56
57impl PrivateSeries for NullChunked {
58    fn compute_len(&mut self) {
59        fn inner(chunks: &[ArrayRef]) -> usize {
60            match chunks.len() {
61                // fast path
62                1 => chunks[0].len(),
63                _ => chunks.iter().fold(0, |acc, arr| acc + arr.len()),
64            }
65        }
66        self.length = IdxSize::try_from(inner(&self.chunks)).expect(LENGTH_LIMIT_MSG);
67    }
68    fn _field(&self) -> Cow<'_, Field> {
69        Cow::Owned(Field::new(self.name().clone(), DataType::Null))
70    }
71
72    #[allow(unused)]
73    fn _set_flags(&mut self, flags: StatisticsFlags) {}
74
75    fn _dtype(&self) -> &DataType {
76        &DataType::Null
77    }
78
79    #[cfg(feature = "zip_with")]
80    fn zip_with_same_type(&self, mask: &BooleanChunked, other: &Series) -> PolarsResult<Series> {
81        let len = match (self.len(), mask.len(), other.len()) {
82            (a, b, c) if a == b && b == c => a,
83            (1, a, b) | (a, 1, b) | (a, b, 1) if a == b => a,
84            (a, 1, 1) | (1, a, 1) | (1, 1, a) => a,
85            (_, 0, _) => 0,
86            _ => {
87                polars_bail!(ShapeMismatch: "shapes of `self`, `mask` and `other` are not suitable for `zip_with` operation")
88            },
89        };
90
91        Ok(Self::new(self.name().clone(), len).into_series())
92    }
93
94    fn into_total_eq_inner<'a>(&'a self) -> Box<dyn TotalEqInner + 'a> {
95        IntoTotalEqInner::into_total_eq_inner(self)
96    }
97    fn into_total_ord_inner<'a>(&'a self) -> Box<dyn TotalOrdInner + 'a> {
98        IntoTotalOrdInner::into_total_ord_inner(self)
99    }
100
101    fn subtract(&self, _rhs: &Series) -> PolarsResult<Series> {
102        null_arithmetic(self, _rhs, "subtract")
103    }
104
105    fn add_to(&self, _rhs: &Series) -> PolarsResult<Series> {
106        null_arithmetic(self, _rhs, "add_to")
107    }
108    fn multiply(&self, _rhs: &Series) -> PolarsResult<Series> {
109        null_arithmetic(self, _rhs, "multiply")
110    }
111    fn divide(&self, _rhs: &Series) -> PolarsResult<Series> {
112        null_arithmetic(self, _rhs, "divide")
113    }
114    fn remainder(&self, _rhs: &Series) -> PolarsResult<Series> {
115        null_arithmetic(self, _rhs, "remainder")
116    }
117
118    #[cfg(feature = "algorithm_group_by")]
119    fn group_tuples(&self, _multithreaded: bool, _sorted: bool) -> PolarsResult<GroupsType> {
120        Ok(if self.is_empty() {
121            GroupsType::default()
122        } else {
123            GroupsType::Slice {
124                groups: vec![[0, self.length]],
125                overlapping: false,
126            }
127        })
128    }
129
130    #[cfg(feature = "algorithm_group_by")]
131    unsafe fn agg_list(&self, groups: &GroupsType) -> Series {
132        AggList::agg_list(self, groups)
133    }
134
135    fn _get_flags(&self) -> StatisticsFlags {
136        StatisticsFlags::empty()
137    }
138
139    fn vec_hash(
140        &self,
141        random_state: PlSeedableRandomStateQuality,
142        buf: &mut Vec<u64>,
143    ) -> PolarsResult<()> {
144        VecHash::vec_hash(self, random_state, buf)?;
145        Ok(())
146    }
147
148    fn vec_hash_combine(
149        &self,
150        build_hasher: PlSeedableRandomStateQuality,
151        hashes: &mut [u64],
152    ) -> PolarsResult<()> {
153        VecHash::vec_hash_combine(self, build_hasher, hashes)?;
154        Ok(())
155    }
156
157    fn arg_sort_multiple(
158        &self,
159        by: &[Column],
160        options: &SortMultipleOptions,
161    ) -> PolarsResult<IdxCa> {
162        let vals = (0..self.len())
163            .map(|i| (i as IdxSize, NonNull(())))
164            .collect();
165        arg_sort_multiple_impl(vals, by, options)
166    }
167}
168
169fn null_arithmetic(lhs: &NullChunked, rhs: &Series, op: &str) -> PolarsResult<Series> {
170    let output_len = match (lhs.len(), rhs.len()) {
171        (1, len_r) => len_r,
172        (len_l, 1) => len_l,
173        (len_l, len_r) if len_l == len_r => len_l,
174        _ => polars_bail!(ComputeError: "Cannot {:?} two series of different lengths.", op),
175    };
176    Ok(NullChunked::new(lhs.name().clone(), output_len).into_series())
177}
178
179impl SeriesTrait for NullChunked {
180    fn name(&self) -> &PlSmallStr {
181        &self.name
182    }
183
184    fn rename(&mut self, name: PlSmallStr) {
185        self.name = name
186    }
187
188    fn chunks(&self) -> &Vec<ArrayRef> {
189        &self.chunks
190    }
191    unsafe fn chunks_mut(&mut self) -> &mut Vec<ArrayRef> {
192        &mut self.chunks
193    }
194
195    fn chunk_lengths(&self) -> ChunkLenIter<'_> {
196        self.chunks.iter().map(|chunk| chunk.len())
197    }
198
199    fn take(&self, indices: &IdxCa) -> PolarsResult<Series> {
200        Ok(NullChunked::new(self.name.clone(), indices.len()).into_series())
201    }
202
203    unsafe fn take_unchecked(&self, indices: &IdxCa) -> Series {
204        NullChunked::new(self.name.clone(), indices.len()).into_series()
205    }
206
207    fn take_slice(&self, indices: &[IdxSize]) -> PolarsResult<Series> {
208        Ok(NullChunked::new(self.name.clone(), indices.len()).into_series())
209    }
210
211    unsafe fn take_slice_unchecked(&self, indices: &[IdxSize]) -> Series {
212        NullChunked::new(self.name.clone(), indices.len()).into_series()
213    }
214
215    fn deposit(&self, validity: &Bitmap) -> Series {
216        assert_eq!(validity.set_bits(), 0);
217        self.clone().into_series()
218    }
219
220    fn len(&self) -> usize {
221        self.length as usize
222    }
223
224    fn has_nulls(&self) -> bool {
225        !self.is_empty()
226    }
227
228    fn rechunk(&self) -> Series {
229        NullChunked::new(self.name.clone(), self.len()).into_series()
230    }
231
232    fn drop_nulls(&self) -> Series {
233        NullChunked::new(self.name.clone(), 0).into_series()
234    }
235
236    fn cast(&self, dtype: &DataType, _cast_options: CastOptions) -> PolarsResult<Series> {
237        Ok(Series::full_null(self.name.clone(), self.len(), dtype))
238    }
239
240    fn null_count(&self) -> usize {
241        self.len()
242    }
243
244    #[cfg(feature = "algorithm_group_by")]
245    fn unique(&self) -> PolarsResult<Series> {
246        let ca = NullChunked::new(self.name.clone(), self.n_unique().unwrap());
247        Ok(ca.into_series())
248    }
249
250    #[cfg(feature = "algorithm_group_by")]
251    fn n_unique(&self) -> PolarsResult<usize> {
252        let n = if self.is_empty() { 0 } else { 1 };
253        Ok(n)
254    }
255
256    #[cfg(feature = "algorithm_group_by")]
257    fn arg_unique(&self) -> PolarsResult<IdxCa> {
258        let idxs: Vec<IdxSize> = (0..self.n_unique().unwrap() as IdxSize).collect();
259        Ok(IdxCa::new(self.name().clone(), idxs))
260    }
261
262    fn unique_id(&self) -> PolarsResult<(IdxSize, Vec<IdxSize>)> {
263        if self.is_empty() {
264            Ok((0, Vec::new()))
265        } else {
266            Ok((1, vec![0; self.len()]))
267        }
268    }
269
270    fn new_from_index(&self, _index: usize, length: usize) -> Series {
271        NullChunked::new(self.name.clone(), length).into_series()
272    }
273
274    unsafe fn get_unchecked(&self, _index: usize) -> AnyValue<'_> {
275        AnyValue::Null
276    }
277
278    fn slice(&self, offset: i64, length: usize) -> Series {
279        let (chunks, len) = chunkops::slice(&self.chunks, offset, length, self.len());
280        NullChunked {
281            name: self.name.clone(),
282            length: len as IdxSize,
283            chunks,
284        }
285        .into_series()
286    }
287
288    fn split_at(&self, offset: i64) -> (Series, Series) {
289        let (l, r) = chunkops::split_at(self.chunks(), offset, self.len());
290        (
291            NullChunked {
292                name: self.name.clone(),
293                length: l.iter().map(|arr| arr.len() as IdxSize).sum(),
294                chunks: l,
295            }
296            .into_series(),
297            NullChunked {
298                name: self.name.clone(),
299                length: r.iter().map(|arr| arr.len() as IdxSize).sum(),
300                chunks: r,
301            }
302            .into_series(),
303        )
304    }
305
306    fn sort_with(&self, _options: SortOptions) -> PolarsResult<Series> {
307        Ok(self.clone().into_series())
308    }
309
310    fn arg_sort(&self, _options: SortOptions) -> IdxCa {
311        IdxCa::from_vec(self.name().clone(), (0..self.len() as IdxSize).collect())
312    }
313
314    fn is_null(&self) -> BooleanChunked {
315        BooleanChunked::full(self.name().clone(), true, self.len())
316    }
317
318    fn is_not_null(&self) -> BooleanChunked {
319        BooleanChunked::full(self.name().clone(), false, self.len())
320    }
321
322    fn reverse(&self) -> Series {
323        self.clone().into_series()
324    }
325
326    fn filter(&self, filter: &BooleanChunked) -> PolarsResult<Series> {
327        let len = if self.is_empty() {
328            // We still allow a length of `1` because it could be `lit(true)`.
329            polars_ensure!(filter.len() <= 1, ShapeMismatch: "filter's length: {} differs from that of the series: 0", filter.len());
330            0
331        } else if filter.len() == 1 {
332            return match filter.get(0) {
333                Some(true) => Ok(self.clone().into_series()),
334                None | Some(false) => Ok(NullChunked::new(self.name.clone(), 0).into_series()),
335            };
336        } else {
337            polars_ensure!(filter.len() == self.len(), ShapeMismatch: "filter's length: {} differs from that of the series: {}", filter.len(), self.len());
338            filter.sum().unwrap_or(0) as usize
339        };
340        Ok(NullChunked::new(self.name.clone(), len).into_series())
341    }
342
343    fn shift(&self, _periods: i64) -> Series {
344        self.clone().into_series()
345    }
346
347    fn sum_reduce(&self) -> PolarsResult<Scalar> {
348        Ok(Scalar::null(DataType::Null))
349    }
350
351    fn min_reduce(&self) -> PolarsResult<Scalar> {
352        Ok(Scalar::null(DataType::Null))
353    }
354
355    fn max_reduce(&self) -> PolarsResult<Scalar> {
356        Ok(Scalar::null(DataType::Null))
357    }
358
359    fn mean_reduce(&self) -> PolarsResult<Scalar> {
360        Ok(Scalar::null(DataType::Null))
361    }
362
363    fn median_reduce(&self) -> PolarsResult<Scalar> {
364        Ok(Scalar::null(DataType::Null))
365    }
366
367    fn std_reduce(&self, _ddof: u8) -> PolarsResult<Scalar> {
368        Ok(Scalar::null(DataType::Null))
369    }
370
371    fn var_reduce(&self, _ddof: u8) -> PolarsResult<Scalar> {
372        Ok(Scalar::null(DataType::Null))
373    }
374
375    fn append(&mut self, other: &Series) -> PolarsResult<()> {
376        polars_ensure!(other.dtype() == &DataType::Null, ComputeError: "expected null dtype");
377        // we don't create a new null array to keep probability of aligned chunks higher
378        self.length += other.len() as IdxSize;
379        self.chunks.extend(other.chunks().iter().cloned());
380        Ok(())
381    }
382    fn append_owned(&mut self, mut other: Series) -> PolarsResult<()> {
383        polars_ensure!(other.dtype() == &DataType::Null, ComputeError: "expected null dtype");
384        // we don't create a new null array to keep probability of aligned chunks higher
385        let other: &mut NullChunked = other._get_inner_mut().as_any_mut().downcast_mut().unwrap();
386        self.length += other.len() as IdxSize;
387        self.chunks.extend(std::mem::take(&mut other.chunks));
388        Ok(())
389    }
390
391    fn extend(&mut self, other: &Series) -> PolarsResult<()> {
392        *self = NullChunked::new(self.name.clone(), self.len() + other.len());
393        Ok(())
394    }
395
396    #[cfg(feature = "approx_unique")]
397    fn approx_n_unique(&self) -> PolarsResult<IdxSize> {
398        Ok(if self.is_empty() { 0 } else { 1 })
399    }
400
401    fn clone_inner(&self) -> Arc<dyn SeriesTrait> {
402        Arc::new(self.clone())
403    }
404
405    fn find_validity_mismatch(&self, other: &Series, idxs: &mut Vec<IdxSize>) {
406        ChunkNestingUtils::find_validity_mismatch(self, other, idxs)
407    }
408
409    fn as_any(&self) -> &dyn Any {
410        self
411    }
412
413    fn as_any_mut(&mut self) -> &mut dyn Any {
414        self
415    }
416
417    fn as_phys_any(&self) -> &dyn Any {
418        self
419    }
420
421    fn as_arc_any(self: Arc<Self>) -> Arc<dyn Any + Send + Sync> {
422        self as _
423    }
424}
425
426unsafe impl IntoSeries for NullChunked {
427    fn into_series(self) -> Series
428    where
429        Self: Sized,
430    {
431        Series(Arc::new(self))
432    }
433}