polars_core/chunked_array/ops/
fill_null.rs

1use arrow::bitmap::{Bitmap, BitmapBuilder};
2use arrow::legacy::kernels::set::set_at_nulls;
3use bytemuck::Zeroable;
4use num_traits::{Bounded, NumCast, One, Zero};
5use polars_utils::itertools::Itertools;
6
7use crate::prelude::*;
8
9fn err_fill_null() -> PolarsError {
10    polars_err!(ComputeError: "could not determine the fill value")
11}
12
13impl Series {
14    /// Replace None values with one of the following strategies:
15    /// * Forward fill (replace None with the previous value)
16    /// * Backward fill (replace None with the next value)
17    /// * Mean fill (replace None with the mean of the whole array)
18    /// * Min fill (replace None with the minimum of the whole array)
19    /// * Max fill (replace None with the maximum of the whole array)
20    /// * Zero fill (replace None with the value zero)
21    /// * One fill (replace None with the value one)
22    /// * MinBound fill (replace with the minimum of that data type)
23    /// * MaxBound fill (replace with the maximum of that data type)
24    ///
25    /// *NOTE: If you want to fill the Nones with a value use the
26    /// [`fill_null` operation on `ChunkedArray<T>`](crate::chunked_array::ops::ChunkFillNullValue)*.
27    ///
28    /// # Example
29    ///
30    /// ```rust
31    /// # use polars_core::prelude::*;
32    /// fn example() -> PolarsResult<()> {
33    ///     let s = Column::new("some_missing".into(), &[Some(1), None, Some(2)]);
34    ///
35    ///     let filled = s.fill_null(FillNullStrategy::Forward(None))?;
36    ///     assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(1), Some(2)]);
37    ///
38    ///     let filled = s.fill_null(FillNullStrategy::Backward(None))?;
39    ///     assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(2), Some(2)]);
40    ///
41    ///     let filled = s.fill_null(FillNullStrategy::Min)?;
42    ///     assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(1), Some(2)]);
43    ///
44    ///     let filled = s.fill_null(FillNullStrategy::Max)?;
45    ///     assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(2), Some(2)]);
46    ///
47    ///     let filled = s.fill_null(FillNullStrategy::Mean)?;
48    ///     assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(1), Some(2)]);
49    ///
50    ///     let filled = s.fill_null(FillNullStrategy::Zero)?;
51    ///     assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(0), Some(2)]);
52    ///
53    ///     let filled = s.fill_null(FillNullStrategy::One)?;
54    ///     assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(1), Some(2)]);
55    ///
56    ///     let filled = s.fill_null(FillNullStrategy::MinBound)?;
57    ///     assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(-2147483648), Some(2)]);
58    ///
59    ///     let filled = s.fill_null(FillNullStrategy::MaxBound)?;
60    ///     assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(2147483647), Some(2)]);
61    ///
62    ///     Ok(())
63    /// }
64    /// example();
65    /// ```
66    pub fn fill_null(&self, strategy: FillNullStrategy) -> PolarsResult<Series> {
67        // Nothing to fill.
68        let nc = self.null_count();
69        if nc == 0
70            || (nc == self.len()
71                && matches!(
72                    strategy,
73                    FillNullStrategy::Forward(_)
74                        | FillNullStrategy::Backward(_)
75                        | FillNullStrategy::Max
76                        | FillNullStrategy::Min
77                        | FillNullStrategy::MaxBound
78                        | FillNullStrategy::MinBound
79                        | FillNullStrategy::Mean
80                ))
81        {
82            return Ok(self.clone());
83        }
84
85        let physical_type = self.dtype().to_physical();
86
87        match strategy {
88            FillNullStrategy::Forward(None) if !physical_type.is_primitive_numeric() => {
89                fill_forward_gather(self)
90            },
91            FillNullStrategy::Forward(Some(limit)) => fill_forward_gather_limit(self, limit),
92            FillNullStrategy::Backward(None) if !physical_type.is_primitive_numeric() => {
93                fill_backward_gather(self)
94            },
95            FillNullStrategy::Backward(Some(limit)) => fill_backward_gather_limit(self, limit),
96            #[cfg(feature = "dtype-decimal")]
97            FillNullStrategy::One if self.dtype().is_decimal() => {
98                let ca = self.decimal().unwrap();
99                let precision = ca.precision();
100                let scale = ca.scale();
101                let fill_value = 10i128.pow(scale as u32);
102                let phys = ca.as_ref().fill_null_with_values(fill_value)?;
103                Ok(phys.into_decimal_unchecked(precision, scale).into_series())
104            },
105            _ => {
106                let logical_type = self.dtype();
107                let s = self.to_physical_repr();
108                use DataType::*;
109                let out = match s.dtype() {
110                    Boolean => fill_null_bool(s.bool().unwrap(), strategy),
111                    String => {
112                        let s = unsafe { s.cast_unchecked(&Binary)? };
113                        let out = s.fill_null(strategy)?;
114                        return unsafe { out.cast_unchecked(&String) };
115                    },
116                    Binary => {
117                        let ca = s.binary().unwrap();
118                        fill_null_binary(ca, strategy).map(|ca| ca.into_series())
119                    },
120                    dt if dt.is_primitive_numeric() => {
121                        with_match_physical_numeric_polars_type!(dt, |$T| {
122                            let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();
123                                fill_null_numeric(ca, strategy).map(|ca| ca.into_series())
124                        })
125                    },
126                    dt => {
127                        polars_bail!(InvalidOperation: "fill null strategy not yet supported for dtype: {}", dt)
128                    },
129                }?;
130                unsafe { out.from_physical_unchecked(logical_type) }
131            },
132        }
133    }
134}
135
136fn fill_forward_numeric<'a, T, I>(ca: &'a ChunkedArray<T>) -> ChunkedArray<T>
137where
138    T: PolarsDataType,
139    &'a ChunkedArray<T>: IntoIterator<IntoIter = I>,
140    I: TrustedLen + Iterator<Item = Option<T::Physical<'a>>>,
141    T::ZeroablePhysical<'a>: Copy,
142{
143    // Compute values.
144    let values: Vec<T::ZeroablePhysical<'a>> = ca
145        .into_iter()
146        .scan(T::ZeroablePhysical::zeroed(), |prev, v| {
147            *prev = v.map(|v| v.into()).unwrap_or(*prev);
148            Some(*prev)
149        })
150        .collect_trusted();
151
152    // Compute bitmask.
153    let num_start_nulls = ca.first_non_null().unwrap_or(ca.len());
154    let mut bm = BitmapBuilder::with_capacity(ca.len());
155    bm.extend_constant(num_start_nulls, false);
156    bm.extend_constant(ca.len() - num_start_nulls, true);
157    ChunkedArray::from_chunk_iter_like(
158        ca,
159        [
160            T::Array::from_zeroable_vec(values, ca.dtype().to_arrow(CompatLevel::newest()))
161                .with_validity_typed(bm.into_opt_validity()),
162        ],
163    )
164}
165
166fn fill_backward_numeric<'a, T, I>(ca: &'a ChunkedArray<T>) -> ChunkedArray<T>
167where
168    T: PolarsDataType,
169    &'a ChunkedArray<T>: IntoIterator<IntoIter = I>,
170    I: TrustedLen + Iterator<Item = Option<T::Physical<'a>>> + DoubleEndedIterator,
171    T::ZeroablePhysical<'a>: Copy,
172{
173    // Compute values.
174    let values: Vec<T::ZeroablePhysical<'a>> = ca
175        .into_iter()
176        .rev()
177        .scan(T::ZeroablePhysical::zeroed(), |prev, v| {
178            *prev = v.map(|v| v.into()).unwrap_or(*prev);
179            Some(*prev)
180        })
181        .collect_reversed();
182
183    // Compute bitmask.
184    let num_end_nulls = ca
185        .last_non_null()
186        .map(|i| ca.len() - 1 - i)
187        .unwrap_or(ca.len());
188    let mut bm = BitmapBuilder::with_capacity(ca.len());
189    bm.extend_constant(ca.len() - num_end_nulls, true);
190    bm.extend_constant(num_end_nulls, false);
191    ChunkedArray::from_chunk_iter_like(
192        ca,
193        [
194            T::Array::from_zeroable_vec(values, ca.dtype().to_arrow(CompatLevel::newest()))
195                .with_validity_typed(bm.into_opt_validity()),
196        ],
197    )
198}
199
200fn fill_null_numeric<T>(
201    ca: &ChunkedArray<T>,
202    strategy: FillNullStrategy,
203) -> PolarsResult<ChunkedArray<T>>
204where
205    T: PolarsNumericType,
206    ChunkedArray<T>: ChunkAgg<T::Native>,
207{
208    // Nothing to fill.
209    let mut out = match strategy {
210        FillNullStrategy::Min => {
211            ca.fill_null_with_values(ChunkAgg::min(ca).ok_or_else(err_fill_null)?)?
212        },
213        FillNullStrategy::Max => {
214            ca.fill_null_with_values(ChunkAgg::max(ca).ok_or_else(err_fill_null)?)?
215        },
216        FillNullStrategy::Mean => ca.fill_null_with_values(
217            ca.mean()
218                .map(|v| NumCast::from(v).unwrap())
219                .ok_or_else(err_fill_null)?,
220        )?,
221        FillNullStrategy::One => return ca.fill_null_with_values(One::one()),
222        FillNullStrategy::Zero => return ca.fill_null_with_values(Zero::zero()),
223        FillNullStrategy::MinBound => return ca.fill_null_with_values(Bounded::min_value()),
224        FillNullStrategy::MaxBound => return ca.fill_null_with_values(Bounded::max_value()),
225        FillNullStrategy::Forward(None) => fill_forward_numeric(ca),
226        FillNullStrategy::Backward(None) => fill_backward_numeric(ca),
227        // Handled earlier
228        FillNullStrategy::Forward(_) => unreachable!(),
229        FillNullStrategy::Backward(_) => unreachable!(),
230    };
231    out.rename(ca.name().clone());
232    Ok(out)
233}
234
235fn fill_with_gather<F: Fn(&Bitmap) -> Vec<IdxSize>>(
236    s: &Series,
237    bits_to_idx: F,
238) -> PolarsResult<Series> {
239    let s = s.rechunk();
240    let arr = s.chunks()[0].clone();
241    let validity = arr.validity().expect("nulls");
242
243    let idx = bits_to_idx(validity);
244
245    Ok(unsafe { s.take_slice_unchecked(&idx) })
246}
247
248fn fill_forward_gather(s: &Series) -> PolarsResult<Series> {
249    fill_with_gather(s, |validity| {
250        let mut last_valid = 0;
251        validity
252            .iter()
253            .enumerate_idx()
254            .map(|(i, v)| {
255                if v {
256                    last_valid = i;
257                    i
258                } else {
259                    last_valid
260                }
261            })
262            .collect::<Vec<_>>()
263    })
264}
265
266fn fill_forward_gather_limit(s: &Series, limit: IdxSize) -> PolarsResult<Series> {
267    fill_with_gather(s, |validity| {
268        let mut last_valid = 0;
269        let mut conseq_invalid_count = 0;
270        validity
271            .iter()
272            .enumerate_idx()
273            .map(|(i, v)| {
274                if v {
275                    last_valid = i;
276                    conseq_invalid_count = 0;
277                    i
278                } else if conseq_invalid_count < limit {
279                    conseq_invalid_count += 1;
280                    last_valid
281                } else {
282                    i
283                }
284            })
285            .collect::<Vec<_>>()
286    })
287}
288
289fn fill_backward_gather(s: &Series) -> PolarsResult<Series> {
290    fill_with_gather(s, |validity| {
291        let last = validity.len() as IdxSize - 1;
292        let mut last_valid = last;
293        unsafe {
294            validity
295                .iter()
296                .rev()
297                .enumerate_idx()
298                .map(|(i, v)| {
299                    if v {
300                        last_valid = last - i;
301                        last - i
302                    } else {
303                        last_valid
304                    }
305                })
306                .trust_my_length((last + 1) as usize)
307                .collect_reversed::<Vec<_>>()
308        }
309    })
310}
311
312fn fill_backward_gather_limit(s: &Series, limit: IdxSize) -> PolarsResult<Series> {
313    fill_with_gather(s, |validity| {
314        let last = validity.len() as IdxSize - 1;
315        let mut last_valid = last;
316        let mut conseq_invalid_count = 0;
317        unsafe {
318            validity
319                .iter()
320                .rev()
321                .enumerate_idx()
322                .map(|(i, v)| {
323                    if v {
324                        last_valid = last - i;
325                        conseq_invalid_count = 0;
326                        last - i
327                    } else if conseq_invalid_count < limit {
328                        conseq_invalid_count += 1;
329                        last_valid
330                    } else {
331                        last - i
332                    }
333                })
334                .trust_my_length((last + 1) as usize)
335                .collect_reversed()
336        }
337    })
338}
339
340fn fill_null_bool(ca: &BooleanChunked, strategy: FillNullStrategy) -> PolarsResult<Series> {
341    match strategy {
342        FillNullStrategy::Min => ca
343            .fill_null_with_values(ca.min().ok_or_else(err_fill_null)?)
344            .map(|ca| ca.into_series()),
345        FillNullStrategy::Max => ca
346            .fill_null_with_values(ca.max().ok_or_else(err_fill_null)?)
347            .map(|ca| ca.into_series()),
348        FillNullStrategy::Mean => polars_bail!(opq = mean, "Boolean"),
349        FillNullStrategy::One | FillNullStrategy::MaxBound => {
350            ca.fill_null_with_values(true).map(|ca| ca.into_series())
351        },
352        FillNullStrategy::Zero | FillNullStrategy::MinBound => {
353            ca.fill_null_with_values(false).map(|ca| ca.into_series())
354        },
355        FillNullStrategy::Forward(_) => unreachable!(),
356        FillNullStrategy::Backward(_) => unreachable!(),
357    }
358}
359
360fn fill_null_binary(ca: &BinaryChunked, strategy: FillNullStrategy) -> PolarsResult<BinaryChunked> {
361    match strategy {
362        FillNullStrategy::Min => {
363            ca.fill_null_with_values(ca.min_binary().ok_or_else(err_fill_null)?)
364        },
365        FillNullStrategy::Max => {
366            ca.fill_null_with_values(ca.max_binary().ok_or_else(err_fill_null)?)
367        },
368        FillNullStrategy::Zero => ca.fill_null_with_values(&[]),
369        FillNullStrategy::Forward(_) => unreachable!(),
370        FillNullStrategy::Backward(_) => unreachable!(),
371        strat => polars_bail!(InvalidOperation: "fill-null strategy {:?} is not supported", strat),
372    }
373}
374
375impl<T> ChunkFillNullValue<T::Native> for ChunkedArray<T>
376where
377    T: PolarsNumericType,
378{
379    fn fill_null_with_values(&self, value: T::Native) -> PolarsResult<Self> {
380        Ok(self.apply_kernel(&|arr| Box::new(set_at_nulls(arr, value))))
381    }
382}
383
384impl ChunkFillNullValue<bool> for BooleanChunked {
385    fn fill_null_with_values(&self, value: bool) -> PolarsResult<Self> {
386        self.set(&self.is_null(), Some(value))
387    }
388}
389
390impl ChunkFillNullValue<&[u8]> for BinaryChunked {
391    fn fill_null_with_values(&self, value: &[u8]) -> PolarsResult<Self> {
392        self.set(&self.is_null(), Some(value))
393    }
394}