Skip to main content

polars_core/series/implementations/
null.rs

1use std::any::Any;
2
3use polars_error::constants::LENGTH_LIMIT_MSG;
4
5use self::compare_inner::TotalOrdInner;
6use super::*;
7use crate::chunked_array::ops::compare_inner::{IntoTotalEqInner, NonNull, TotalEqInner};
8use crate::chunked_array::ops::sort::arg_sort_multiple::arg_sort_multiple_impl;
9use crate::prelude::*;
10use crate::series::private::{PrivateSeries, PrivateSeriesNumeric};
11use crate::series::*;
12
13impl Series {
14    pub fn new_null(name: PlSmallStr, len: usize) -> Series {
15        NullChunked::new(name, len).into_series()
16    }
17}
18
19#[derive(Clone)]
20pub struct NullChunked {
21    pub(crate) name: PlSmallStr,
22    length: usize,
23    // we still need chunks as many series consumers expect
24    // chunks to be there
25    chunks: Vec<ArrayRef>,
26}
27
28impl NullChunked {
29    pub(crate) fn new(name: PlSmallStr, len: usize) -> Self {
30        if len >= (IdxSize::MAX as usize) && chunkops::CHECK_LENGTH.get() {
31            panic!("{}", LENGTH_LIMIT_MSG);
32        }
33
34        Self {
35            name,
36            length: len,
37            chunks: vec![Box::new(arrow::array::NullArray::new(
38                ArrowDataType::Null,
39                len,
40            ))],
41        }
42    }
43
44    pub fn len(&self) -> usize {
45        self.length
46    }
47
48    pub fn is_empty(&self) -> bool {
49        self.length == 0
50    }
51}
52impl PrivateSeriesNumeric for NullChunked {
53    fn bit_repr(&self) -> Option<BitRepr> {
54        Some(BitRepr::U32(UInt32Chunked::full_null(
55            self.name.clone(),
56            self.len(),
57        )))
58    }
59}
60
61impl PrivateSeries for NullChunked {
62    fn compute_len(&mut self) {
63        fn inner(chunks: &[ArrayRef]) -> usize {
64            match chunks.len() {
65                // fast path
66                1 => chunks[0].len(),
67                _ => chunks.iter().fold(0, |acc, arr| acc + arr.len()),
68            }
69        }
70        let len = inner(&self.chunks);
71        if len >= (IdxSize::MAX as usize) && chunkops::CHECK_LENGTH.get() {
72            panic!("{}", LENGTH_LIMIT_MSG);
73        }
74        self.length = len;
75    }
76    fn _field(&self) -> Cow<'_, Field> {
77        Cow::Owned(Field::new(self.name().clone(), DataType::Null))
78    }
79
80    #[allow(unused)]
81    fn _set_flags(&mut self, flags: StatisticsFlags) {}
82
83    fn _dtype(&self) -> &DataType {
84        &DataType::Null
85    }
86
87    #[cfg(feature = "zip_with")]
88    fn zip_with_same_type(&self, mask: &BooleanChunked, other: &Series) -> PolarsResult<Series> {
89        let len = match (self.len(), mask.len(), other.len()) {
90            (a, b, c) if a == b && b == c => a,
91            (1, a, b) | (a, 1, b) | (a, b, 1) if a == b => a,
92            (a, 1, 1) | (1, a, 1) | (1, 1, a) => a,
93            (_, 0, _) => 0,
94            _ => {
95                polars_bail!(ShapeMismatch: "shapes of `self`, `mask` and `other` are not suitable for `zip_with` operation")
96            },
97        };
98
99        Ok(Self::new(self.name().clone(), len).into_series())
100    }
101
102    fn into_total_eq_inner<'a>(&'a self) -> Box<dyn TotalEqInner + 'a> {
103        IntoTotalEqInner::into_total_eq_inner(self)
104    }
105    fn into_total_ord_inner<'a>(&'a self) -> Box<dyn TotalOrdInner + 'a> {
106        IntoTotalOrdInner::into_total_ord_inner(self)
107    }
108
109    fn subtract(&self, _rhs: &Series) -> PolarsResult<Series> {
110        null_arithmetic(self, _rhs, "subtract")
111    }
112
113    fn add_to(&self, _rhs: &Series) -> PolarsResult<Series> {
114        null_arithmetic(self, _rhs, "add_to")
115    }
116    fn multiply(&self, _rhs: &Series) -> PolarsResult<Series> {
117        null_arithmetic(self, _rhs, "multiply")
118    }
119    fn divide(&self, _rhs: &Series) -> PolarsResult<Series> {
120        null_arithmetic(self, _rhs, "divide")
121    }
122    fn remainder(&self, _rhs: &Series) -> PolarsResult<Series> {
123        null_arithmetic(self, _rhs, "remainder")
124    }
125
126    #[cfg(feature = "algorithm_group_by")]
127    fn group_tuples(&self, _multithreaded: bool, _sorted: bool) -> PolarsResult<GroupsType> {
128        Ok(if self.is_empty() {
129            GroupsType::default()
130        } else {
131            GroupsType::new_slice(vec![[0, self.length as IdxSize]], false, true)
132        })
133    }
134
135    #[cfg(feature = "algorithm_group_by")]
136    unsafe fn agg_list(&self, groups: &GroupsType) -> Series {
137        AggList::agg_list(self, groups)
138    }
139
140    fn _get_flags(&self) -> StatisticsFlags {
141        StatisticsFlags::empty()
142    }
143
144    fn vec_hash(
145        &self,
146        random_state: PlSeedableRandomStateQuality,
147        buf: &mut Vec<u64>,
148    ) -> PolarsResult<()> {
149        VecHash::vec_hash(self, random_state, buf)?;
150        Ok(())
151    }
152
153    fn vec_hash_combine(
154        &self,
155        build_hasher: PlSeedableRandomStateQuality,
156        hashes: &mut [u64],
157    ) -> PolarsResult<()> {
158        VecHash::vec_hash_combine(self, build_hasher, hashes)?;
159        Ok(())
160    }
161
162    fn arg_sort_multiple(
163        &self,
164        by: &[Column],
165        options: &SortMultipleOptions,
166    ) -> PolarsResult<IdxCa> {
167        let vals = (0..self.len())
168            .map(|i| (i as IdxSize, NonNull(())))
169            .collect();
170        arg_sort_multiple_impl(vals, by, options)
171    }
172}
173
174fn null_arithmetic(lhs: &NullChunked, rhs: &Series, op: &str) -> PolarsResult<Series> {
175    let output_len = match (lhs.len(), rhs.len()) {
176        (1, len_r) => len_r,
177        (len_l, 1) => len_l,
178        (len_l, len_r) if len_l == len_r => len_l,
179        _ => polars_bail!(ComputeError: "Cannot {:?} two series of different lengths.", op),
180    };
181    Ok(NullChunked::new(lhs.name().clone(), output_len).into_series())
182}
183
184impl SeriesTrait for NullChunked {
185    fn name(&self) -> &PlSmallStr {
186        &self.name
187    }
188
189    fn rename(&mut self, name: PlSmallStr) {
190        self.name = name
191    }
192
193    fn chunks(&self) -> &Vec<ArrayRef> {
194        &self.chunks
195    }
196    unsafe fn chunks_mut(&mut self) -> &mut Vec<ArrayRef> {
197        &mut self.chunks
198    }
199
200    fn chunk_lengths(&self) -> ChunkLenIter<'_> {
201        self.chunks.iter().map(|chunk| chunk.len())
202    }
203
204    fn take(&self, indices: &IdxCa) -> PolarsResult<Series> {
205        Ok(NullChunked::new(self.name.clone(), indices.len()).into_series())
206    }
207
208    unsafe fn take_unchecked(&self, indices: &IdxCa) -> Series {
209        NullChunked::new(self.name.clone(), indices.len()).into_series()
210    }
211
212    fn take_slice(&self, indices: &[IdxSize]) -> PolarsResult<Series> {
213        Ok(NullChunked::new(self.name.clone(), indices.len()).into_series())
214    }
215
216    unsafe fn take_slice_unchecked(&self, indices: &[IdxSize]) -> Series {
217        NullChunked::new(self.name.clone(), indices.len()).into_series()
218    }
219
220    fn deposit(&self, validity: &Bitmap) -> Series {
221        assert_eq!(validity.set_bits(), 0);
222        self.clone().into_series()
223    }
224
225    fn len(&self) -> usize {
226        self.length
227    }
228
229    fn has_nulls(&self) -> bool {
230        !self.is_empty()
231    }
232
233    fn rechunk(&self) -> Series {
234        NullChunked::new(self.name.clone(), self.len()).into_series()
235    }
236
237    fn with_validity(&self, _validity: Option<Bitmap>) -> Series {
238        self.clone().into_series()
239    }
240
241    fn drop_nulls(&self) -> Series {
242        NullChunked::new(self.name.clone(), 0).into_series()
243    }
244
245    fn cast(&self, dtype: &DataType, _cast_options: CastOptions) -> PolarsResult<Series> {
246        Ok(Series::full_null(self.name.clone(), self.len(), dtype))
247    }
248
249    fn null_count(&self) -> usize {
250        self.len()
251    }
252
253    #[cfg(feature = "algorithm_group_by")]
254    fn unique(&self) -> PolarsResult<Series> {
255        let ca = NullChunked::new(self.name.clone(), self.n_unique().unwrap());
256        Ok(ca.into_series())
257    }
258
259    #[cfg(feature = "algorithm_group_by")]
260    fn n_unique(&self) -> PolarsResult<usize> {
261        let n = if self.is_empty() { 0 } else { 1 };
262        Ok(n)
263    }
264
265    #[cfg(feature = "algorithm_group_by")]
266    fn arg_unique(&self) -> PolarsResult<IdxCa> {
267        let idxs: Vec<IdxSize> = (0..self.n_unique().unwrap() as IdxSize).collect();
268        Ok(IdxCa::new(self.name().clone(), idxs))
269    }
270
271    fn unique_id(&self) -> PolarsResult<(IdxSize, Vec<IdxSize>)> {
272        if self.is_empty() {
273            Ok((0, Vec::new()))
274        } else {
275            Ok((1, vec![0; self.len()]))
276        }
277    }
278
279    fn new_from_index(&self, _index: usize, length: usize) -> Series {
280        NullChunked::new(self.name.clone(), length).into_series()
281    }
282
283    unsafe fn get_unchecked(&self, _index: usize) -> AnyValue<'_> {
284        AnyValue::Null
285    }
286
287    fn slice(&self, offset: i64, length: usize) -> Series {
288        let (chunks, len) = chunkops::slice(&self.chunks, offset, length, self.len());
289        NullChunked {
290            name: self.name.clone(),
291            length: len,
292            chunks,
293        }
294        .into_series()
295    }
296
297    fn split_at(&self, offset: i64) -> (Series, Series) {
298        let (l, r) = chunkops::split_at(self.chunks(), offset, self.len());
299        (
300            NullChunked {
301                name: self.name.clone(),
302                length: l.iter().map(|arr| arr.len()).sum(),
303                chunks: l,
304            }
305            .into_series(),
306            NullChunked {
307                name: self.name.clone(),
308                length: r.iter().map(|arr| arr.len()).sum(),
309                chunks: r,
310            }
311            .into_series(),
312        )
313    }
314
315    fn sort_with(&self, _options: SortOptions) -> PolarsResult<Series> {
316        Ok(self.clone().into_series())
317    }
318
319    fn arg_sort(&self, _options: SortOptions) -> IdxCa {
320        IdxCa::from_vec(self.name().clone(), (0..self.len() as IdxSize).collect())
321    }
322
323    fn is_null(&self) -> BooleanChunked {
324        BooleanChunked::full(self.name().clone(), true, self.len())
325    }
326
327    fn is_not_null(&self) -> BooleanChunked {
328        BooleanChunked::full(self.name().clone(), false, self.len())
329    }
330
331    fn reverse(&self) -> Series {
332        self.clone().into_series()
333    }
334
335    fn filter(&self, filter: &BooleanChunked) -> PolarsResult<Series> {
336        let len = if self.is_empty() {
337            // We still allow a length of `1` because it could be `lit(true)`.
338            polars_ensure!(filter.len() <= 1, ShapeMismatch: "filter's length: {} differs from that of the series: 0", filter.len());
339            0
340        } else if filter.len() == 1 {
341            return match filter.get(0) {
342                Some(true) => Ok(self.clone().into_series()),
343                None | Some(false) => Ok(NullChunked::new(self.name.clone(), 0).into_series()),
344            };
345        } else {
346            polars_ensure!(filter.len() == self.len(), ShapeMismatch: "filter's length: {} differs from that of the series: {}", filter.len(), self.len());
347            filter.sum().unwrap_or(0) as usize
348        };
349        Ok(NullChunked::new(self.name.clone(), len).into_series())
350    }
351
352    fn shift(&self, _periods: i64) -> Series {
353        self.clone().into_series()
354    }
355
356    fn sum_reduce(&self) -> PolarsResult<Scalar> {
357        Ok(Scalar::null(DataType::Null))
358    }
359
360    fn min_reduce(&self) -> PolarsResult<Scalar> {
361        Ok(Scalar::null(DataType::Null))
362    }
363
364    fn max_reduce(&self) -> PolarsResult<Scalar> {
365        Ok(Scalar::null(DataType::Null))
366    }
367
368    fn mean_reduce(&self) -> PolarsResult<Scalar> {
369        Ok(Scalar::null(DataType::Null))
370    }
371
372    fn median_reduce(&self) -> PolarsResult<Scalar> {
373        Ok(Scalar::null(DataType::Null))
374    }
375
376    fn std_reduce(&self, _ddof: u8) -> PolarsResult<Scalar> {
377        Ok(Scalar::null(DataType::Null))
378    }
379
380    fn var_reduce(&self, _ddof: u8) -> PolarsResult<Scalar> {
381        Ok(Scalar::null(DataType::Null))
382    }
383
384    fn append(&mut self, other: &Series) -> PolarsResult<()> {
385        polars_ensure!(other.dtype() == &DataType::Null, ComputeError: "expected null dtype");
386        // we don't create a new null array to keep probability of aligned chunks higher
387        self.length += other.len();
388        self.chunks.extend(other.chunks().iter().cloned());
389        Ok(())
390    }
391    fn append_owned(&mut self, mut other: Series) -> PolarsResult<()> {
392        polars_ensure!(other.dtype() == &DataType::Null, ComputeError: "expected null dtype");
393        // we don't create a new null array to keep probability of aligned chunks higher
394        let other: &mut NullChunked = other._get_inner_mut().as_any_mut().downcast_mut().unwrap();
395        self.length += other.len();
396        self.chunks.extend(std::mem::take(&mut other.chunks));
397        Ok(())
398    }
399
400    fn extend(&mut self, other: &Series) -> PolarsResult<()> {
401        *self = NullChunked::new(self.name.clone(), self.len() + other.len());
402        Ok(())
403    }
404
405    #[cfg(feature = "approx_unique")]
406    fn approx_n_unique(&self) -> PolarsResult<IdxSize> {
407        Ok(if self.is_empty() { 0 } else { 1 })
408    }
409
410    fn clone_inner(&self) -> Arc<dyn SeriesTrait> {
411        Arc::new(self.clone())
412    }
413
414    fn find_validity_mismatch(&self, other: &Series, idxs: &mut Vec<IdxSize>) {
415        ChunkNestingUtils::find_validity_mismatch(self, other, idxs)
416    }
417
418    fn as_any(&self) -> &dyn Any {
419        self
420    }
421
422    fn as_any_mut(&mut self) -> &mut dyn Any {
423        self
424    }
425
426    fn as_phys_any(&self) -> &dyn Any {
427        self
428    }
429
430    fn as_arc_any(self: Arc<Self>) -> Arc<dyn Any + Send + Sync> {
431        self as _
432    }
433}
434
435unsafe impl IntoSeries for NullChunked {
436    fn into_series(self) -> Series
437    where
438        Self: Sized,
439    {
440        Series(Arc::new(self))
441    }
442}