polars_core/chunked_array/ops/
fill_null.rs

1use arrow::bitmap::{Bitmap, BitmapBuilder};
2use arrow::legacy::kernels::set::set_at_nulls;
3use bytemuck::Zeroable;
4use num_traits::{NumCast, One, Zero};
5use polars_utils::itertools::Itertools;
6
7use crate::prelude::*;
8
9fn err_fill_null() -> PolarsError {
10    polars_err!(ComputeError: "could not determine the fill value")
11}
12
13impl Series {
14    /// Replace None values with one of the following strategies:
15    /// * Forward fill (replace None with the previous value)
16    /// * Backward fill (replace None with the next value)
17    /// * Mean fill (replace None with the mean of the whole array)
18    /// * Min fill (replace None with the minimum of the whole array)
19    /// * Max fill (replace None with the maximum of the whole array)
20    /// * Zero fill (replace None with the value zero)
21    /// * One fill (replace None with the value one)
22    ///
23    /// *NOTE: If you want to fill the Nones with a value use the
24    /// [`fill_null` operation on `ChunkedArray<T>`](crate::chunked_array::ops::ChunkFillNullValue)*.
25    ///
26    /// # Example
27    ///
28    /// ```rust
29    /// # use polars_core::prelude::*;
30    /// fn example() -> PolarsResult<()> {
31    ///     let s = Column::new("some_missing".into(), &[Some(1), None, Some(2)]);
32    ///
33    ///     let filled = s.fill_null(FillNullStrategy::Forward(None))?;
34    ///     assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(1), Some(2)]);
35    ///
36    ///     let filled = s.fill_null(FillNullStrategy::Backward(None))?;
37    ///     assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(2), Some(2)]);
38    ///
39    ///     let filled = s.fill_null(FillNullStrategy::Min)?;
40    ///     assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(1), Some(2)]);
41    ///
42    ///     let filled = s.fill_null(FillNullStrategy::Max)?;
43    ///     assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(2), Some(2)]);
44    ///
45    ///     let filled = s.fill_null(FillNullStrategy::Mean)?;
46    ///     assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(1), Some(2)]);
47    ///
48    ///     let filled = s.fill_null(FillNullStrategy::Zero)?;
49    ///     assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(0), Some(2)]);
50    ///
51    ///     let filled = s.fill_null(FillNullStrategy::One)?;
52    ///     assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(1), Some(2)]);
53    ///
54    ///     Ok(())
55    /// }
56    /// example();
57    /// ```
58    pub fn fill_null(&self, strategy: FillNullStrategy) -> PolarsResult<Series> {
59        // Nothing to fill.
60        let nc = self.null_count();
61        if nc == 0
62            || (nc == self.len()
63                && matches!(
64                    strategy,
65                    FillNullStrategy::Forward(_)
66                        | FillNullStrategy::Backward(_)
67                        | FillNullStrategy::Max
68                        | FillNullStrategy::Min
69                        | FillNullStrategy::Mean
70                ))
71        {
72            return Ok(self.clone());
73        }
74
75        let physical_type = self.dtype().to_physical();
76
77        match strategy {
78            FillNullStrategy::Forward(None) if !physical_type.is_primitive_numeric() => {
79                fill_forward_gather(self)
80            },
81            FillNullStrategy::Forward(Some(limit)) => fill_forward_gather_limit(self, limit),
82            FillNullStrategy::Backward(None) if !physical_type.is_primitive_numeric() => {
83                fill_backward_gather(self)
84            },
85            FillNullStrategy::Backward(Some(limit)) => fill_backward_gather_limit(self, limit),
86            #[cfg(feature = "dtype-decimal")]
87            FillNullStrategy::One if self.dtype().is_decimal() => {
88                let ca = self.decimal().unwrap();
89                let precision = ca.precision();
90                let scale = ca.scale();
91                let fill_value = 10i128.pow(scale as u32);
92                let phys = ca.physical().fill_null_with_values(fill_value)?;
93                Ok(phys.into_decimal_unchecked(precision, scale).into_series())
94            },
95            _ => {
96                let logical_type = self.dtype();
97                let s = self.to_physical_repr();
98                use DataType::*;
99                let out = match s.dtype() {
100                    Boolean => fill_null_bool(s.bool().unwrap(), strategy),
101                    String => {
102                        let s = unsafe { s.cast_unchecked(&Binary)? };
103                        let out = s.fill_null(strategy)?;
104                        return unsafe { out.cast_unchecked(&String) };
105                    },
106                    Binary => {
107                        let ca = s.binary().unwrap();
108                        fill_null_binary(ca, strategy).map(|ca| ca.into_series())
109                    },
110                    dt if dt.is_primitive_numeric() => {
111                        with_match_physical_numeric_polars_type!(dt, |$T| {
112                            let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();
113                                fill_null_numeric(ca, strategy).map(|ca| ca.into_series())
114                        })
115                    },
116                    dt => {
117                        polars_bail!(InvalidOperation: "fill null strategy not yet supported for dtype: {}", dt)
118                    },
119                }?;
120                unsafe { out.from_physical_unchecked(logical_type) }
121            },
122        }
123    }
124}
125
126fn fill_forward_numeric<'a, T, I>(ca: &'a ChunkedArray<T>) -> ChunkedArray<T>
127where
128    T: PolarsDataType,
129    &'a ChunkedArray<T>: IntoIterator<IntoIter = I>,
130    I: TrustedLen + Iterator<Item = Option<T::Physical<'a>>>,
131    T::ZeroablePhysical<'a>: Copy,
132{
133    // Compute values.
134    let values: Vec<T::ZeroablePhysical<'a>> = ca
135        .into_iter()
136        .scan(T::ZeroablePhysical::zeroed(), |prev, v| {
137            *prev = v.map(|v| v.into()).unwrap_or(*prev);
138            Some(*prev)
139        })
140        .collect_trusted();
141
142    // Compute bitmask.
143    let num_start_nulls = ca.first_non_null().unwrap_or(ca.len());
144    let mut bm = BitmapBuilder::with_capacity(ca.len());
145    bm.extend_constant(num_start_nulls, false);
146    bm.extend_constant(ca.len() - num_start_nulls, true);
147    ChunkedArray::from_chunk_iter_like(
148        ca,
149        [
150            T::Array::from_zeroable_vec(values, ca.dtype().to_arrow(CompatLevel::newest()))
151                .with_validity_typed(bm.into_opt_validity()),
152        ],
153    )
154}
155
156fn fill_backward_numeric<'a, T, I>(ca: &'a ChunkedArray<T>) -> ChunkedArray<T>
157where
158    T: PolarsDataType,
159    &'a ChunkedArray<T>: IntoIterator<IntoIter = I>,
160    I: TrustedLen + Iterator<Item = Option<T::Physical<'a>>> + DoubleEndedIterator,
161    T::ZeroablePhysical<'a>: Copy,
162{
163    // Compute values.
164    let values: Vec<T::ZeroablePhysical<'a>> = ca
165        .into_iter()
166        .rev()
167        .scan(T::ZeroablePhysical::zeroed(), |prev, v| {
168            *prev = v.map(|v| v.into()).unwrap_or(*prev);
169            Some(*prev)
170        })
171        .collect_reversed();
172
173    // Compute bitmask.
174    let num_end_nulls = ca
175        .last_non_null()
176        .map(|i| ca.len() - 1 - i)
177        .unwrap_or(ca.len());
178    let mut bm = BitmapBuilder::with_capacity(ca.len());
179    bm.extend_constant(ca.len() - num_end_nulls, true);
180    bm.extend_constant(num_end_nulls, false);
181    ChunkedArray::from_chunk_iter_like(
182        ca,
183        [
184            T::Array::from_zeroable_vec(values, ca.dtype().to_arrow(CompatLevel::newest()))
185                .with_validity_typed(bm.into_opt_validity()),
186        ],
187    )
188}
189
190fn fill_null_numeric<T>(
191    ca: &ChunkedArray<T>,
192    strategy: FillNullStrategy,
193) -> PolarsResult<ChunkedArray<T>>
194where
195    T: PolarsNumericType,
196    ChunkedArray<T>: ChunkAgg<T::Native>,
197{
198    // Nothing to fill.
199    let mut out = match strategy {
200        FillNullStrategy::Min => {
201            ca.fill_null_with_values(ChunkAgg::min(ca).ok_or_else(err_fill_null)?)?
202        },
203        FillNullStrategy::Max => {
204            ca.fill_null_with_values(ChunkAgg::max(ca).ok_or_else(err_fill_null)?)?
205        },
206        FillNullStrategy::Mean => ca.fill_null_with_values(
207            ca.mean()
208                .map(|v| NumCast::from(v).unwrap())
209                .ok_or_else(err_fill_null)?,
210        )?,
211        FillNullStrategy::One => return ca.fill_null_with_values(One::one()),
212        FillNullStrategy::Zero => return ca.fill_null_with_values(Zero::zero()),
213        FillNullStrategy::Forward(None) => fill_forward_numeric(ca),
214        FillNullStrategy::Backward(None) => fill_backward_numeric(ca),
215        // Handled earlier
216        FillNullStrategy::Forward(_) => unreachable!(),
217        FillNullStrategy::Backward(_) => unreachable!(),
218    };
219    out.rename(ca.name().clone());
220    Ok(out)
221}
222
223fn fill_with_gather<F: Fn(&Bitmap) -> Vec<IdxSize>>(
224    s: &Series,
225    bits_to_idx: F,
226) -> PolarsResult<Series> {
227    let s = s.rechunk();
228    let arr = s.chunks()[0].clone();
229    let validity = arr.validity().expect("nulls");
230
231    let idx = bits_to_idx(validity);
232
233    Ok(unsafe { s.take_slice_unchecked(&idx) })
234}
235
236fn fill_forward_gather(s: &Series) -> PolarsResult<Series> {
237    fill_with_gather(s, |validity| {
238        let mut last_valid = 0;
239        validity
240            .iter()
241            .enumerate_idx()
242            .map(|(i, v)| {
243                if v {
244                    last_valid = i;
245                    i
246                } else {
247                    last_valid
248                }
249            })
250            .collect::<Vec<_>>()
251    })
252}
253
254fn fill_forward_gather_limit(s: &Series, limit: IdxSize) -> PolarsResult<Series> {
255    fill_with_gather(s, |validity| {
256        let mut last_valid = 0;
257        let mut conseq_invalid_count = 0;
258        validity
259            .iter()
260            .enumerate_idx()
261            .map(|(i, v)| {
262                if v {
263                    last_valid = i;
264                    conseq_invalid_count = 0;
265                    i
266                } else if conseq_invalid_count < limit {
267                    conseq_invalid_count += 1;
268                    last_valid
269                } else {
270                    i
271                }
272            })
273            .collect::<Vec<_>>()
274    })
275}
276
277fn fill_backward_gather(s: &Series) -> PolarsResult<Series> {
278    fill_with_gather(s, |validity| {
279        let last = validity.len() as IdxSize - 1;
280        let mut last_valid = last;
281        unsafe {
282            validity
283                .iter()
284                .rev()
285                .enumerate_idx()
286                .map(|(i, v)| {
287                    if v {
288                        last_valid = last - i;
289                        last - i
290                    } else {
291                        last_valid
292                    }
293                })
294                .trust_my_length((last + 1) as usize)
295                .collect_reversed::<Vec<_>>()
296        }
297    })
298}
299
300fn fill_backward_gather_limit(s: &Series, limit: IdxSize) -> PolarsResult<Series> {
301    fill_with_gather(s, |validity| {
302        let last = validity.len() as IdxSize - 1;
303        let mut last_valid = last;
304        let mut conseq_invalid_count = 0;
305        unsafe {
306            validity
307                .iter()
308                .rev()
309                .enumerate_idx()
310                .map(|(i, v)| {
311                    if v {
312                        last_valid = last - i;
313                        conseq_invalid_count = 0;
314                        last - i
315                    } else if conseq_invalid_count < limit {
316                        conseq_invalid_count += 1;
317                        last_valid
318                    } else {
319                        last - i
320                    }
321                })
322                .trust_my_length((last + 1) as usize)
323                .collect_reversed()
324        }
325    })
326}
327
328fn fill_null_bool(ca: &BooleanChunked, strategy: FillNullStrategy) -> PolarsResult<Series> {
329    match strategy {
330        FillNullStrategy::Min => ca
331            .fill_null_with_values(ca.min().ok_or_else(err_fill_null)?)
332            .map(|ca| ca.into_series()),
333        FillNullStrategy::Max => ca
334            .fill_null_with_values(ca.max().ok_or_else(err_fill_null)?)
335            .map(|ca| ca.into_series()),
336        FillNullStrategy::Mean => polars_bail!(opq = mean, "Boolean"),
337        FillNullStrategy::One => ca.fill_null_with_values(true).map(|ca| ca.into_series()),
338        FillNullStrategy::Zero => ca.fill_null_with_values(false).map(|ca| ca.into_series()),
339        FillNullStrategy::Forward(_) => unreachable!(),
340        FillNullStrategy::Backward(_) => unreachable!(),
341    }
342}
343
344fn fill_null_binary(ca: &BinaryChunked, strategy: FillNullStrategy) -> PolarsResult<BinaryChunked> {
345    match strategy {
346        FillNullStrategy::Min => {
347            ca.fill_null_with_values(ca.min_binary().ok_or_else(err_fill_null)?)
348        },
349        FillNullStrategy::Max => {
350            ca.fill_null_with_values(ca.max_binary().ok_or_else(err_fill_null)?)
351        },
352        FillNullStrategy::Zero => ca.fill_null_with_values(&[]),
353        FillNullStrategy::Forward(_) => unreachable!(),
354        FillNullStrategy::Backward(_) => unreachable!(),
355        strat => polars_bail!(InvalidOperation: "fill-null strategy {:?} is not supported", strat),
356    }
357}
358
359impl<T> ChunkFillNullValue<T::Native> for ChunkedArray<T>
360where
361    T: PolarsNumericType,
362{
363    fn fill_null_with_values(&self, value: T::Native) -> PolarsResult<Self> {
364        Ok(self.apply_kernel(&|arr| Box::new(set_at_nulls(arr, value))))
365    }
366}
367
368impl ChunkFillNullValue<bool> for BooleanChunked {
369    fn fill_null_with_values(&self, value: bool) -> PolarsResult<Self> {
370        self.set(&self.is_null(), Some(value))
371    }
372}
373
374impl ChunkFillNullValue<&[u8]> for BinaryChunked {
375    fn fill_null_with_values(&self, value: &[u8]) -> PolarsResult<Self> {
376        self.set(&self.is_null(), Some(value))
377    }
378}