Skip to main content

polars_core/chunked_array/ops/
fill_null.rs

1use arrow::bitmap::{Bitmap, BitmapBuilder};
2use arrow::legacy::kernels::set::set_at_nulls;
3use bytemuck::Zeroable;
4use num_traits::{NumCast, One, Zero};
5use polars_utils::itertools::Itertools;
6
7use crate::prelude::*;
8
9fn err_fill_null() -> PolarsError {
10    polars_err!(ComputeError: "could not determine the fill value")
11}
12
13impl Series {
14    /// Replace None values with one of the following strategies:
15    /// * Forward fill (replace None with the previous value)
16    /// * Backward fill (replace None with the next value)
17    /// * Mean fill (replace None with the mean of the whole array)
18    /// * Min fill (replace None with the minimum of the whole array)
19    /// * Max fill (replace None with the maximum of the whole array)
20    /// * Zero fill (replace None with the value zero)
21    /// * One fill (replace None with the value one)
22    ///
23    /// *NOTE: If you want to fill the Nones with a value use the
24    /// [`fill_null` operation on `ChunkedArray<T>`](crate::chunked_array::ops::ChunkFillNullValue)*.
25    ///
26    /// # Example
27    ///
28    /// ```rust
29    /// # use polars_core::prelude::*;
30    /// fn example() -> PolarsResult<()> {
31    ///     let s = Column::new("some_missing".into(), &[Some(1), None, Some(2)]);
32    ///
33    ///     let filled = s.fill_null(FillNullStrategy::Forward(None))?;
34    ///     assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(1), Some(2)]);
35    ///
36    ///     let filled = s.fill_null(FillNullStrategy::Backward(None))?;
37    ///     assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(2), Some(2)]);
38    ///
39    ///     let filled = s.fill_null(FillNullStrategy::Min)?;
40    ///     assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(1), Some(2)]);
41    ///
42    ///     let filled = s.fill_null(FillNullStrategy::Max)?;
43    ///     assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(2), Some(2)]);
44    ///
45    ///     let filled = s.fill_null(FillNullStrategy::Mean)?;
46    ///     assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(1), Some(2)]);
47    ///
48    ///     let filled = s.fill_null(FillNullStrategy::Zero)?;
49    ///     assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(0), Some(2)]);
50    ///
51    ///     let filled = s.fill_null(FillNullStrategy::One)?;
52    ///     assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(1), Some(2)]);
53    ///
54    ///     Ok(())
55    /// }
56    /// example();
57    /// ```
58    pub fn fill_null(&self, strategy: FillNullStrategy) -> PolarsResult<Series> {
59        // Nothing to fill.
60        let nc = self.null_count();
61        if nc == 0
62            || (nc == self.len()
63                && matches!(
64                    strategy,
65                    FillNullStrategy::Forward(_)
66                        | FillNullStrategy::Backward(_)
67                        | FillNullStrategy::Max
68                        | FillNullStrategy::Min
69                        | FillNullStrategy::Mean
70                ))
71        {
72            return Ok(self.clone());
73        }
74
75        let physical_type = self.dtype().to_physical();
76
77        match strategy {
78            FillNullStrategy::Forward(None) if !physical_type.is_primitive_numeric() => {
79                fill_forward_gather(self)
80            },
81
82            // Fast path to remove limit.
83            FillNullStrategy::Forward(Some(limit)) if limit >= nc as IdxSize => {
84                self.fill_null(FillNullStrategy::Forward(None))
85            },
86            FillNullStrategy::Backward(Some(limit)) if limit >= nc as IdxSize => {
87                self.fill_null(FillNullStrategy::Backward(None))
88            },
89
90            FillNullStrategy::Forward(Some(limit)) => fill_forward_gather_limit(self, limit),
91            FillNullStrategy::Backward(None) if !physical_type.is_primitive_numeric() => {
92                fill_backward_gather(self)
93            },
94            FillNullStrategy::Backward(Some(limit)) => fill_backward_gather_limit(self, limit),
95            #[cfg(feature = "dtype-decimal")]
96            FillNullStrategy::One if self.dtype().is_decimal() => {
97                use polars_compute::decimal::i128_to_dec128;
98
99                let ca = self.decimal().unwrap();
100                let precision = ca.precision();
101                let scale = ca.scale();
102                let fill_value = i128_to_dec128(1, precision, scale).ok_or_else(|| {
103                    polars_err!(ComputeError: "value '1' is out of range for Decimal({precision}, {scale})")
104                })?;
105                let phys = ca.physical().fill_null_with_values(fill_value)?;
106                Ok(phys.into_decimal_unchecked(precision, scale).into_series())
107            },
108            _ => {
109                let logical_type = self.dtype();
110                let s = self.to_physical_repr();
111                use DataType::*;
112                let out = match s.dtype() {
113                    Boolean => fill_null_bool(s.bool().unwrap(), strategy),
114                    String => {
115                        let s = unsafe { s.cast_unchecked(&Binary)? };
116                        let out = s.fill_null(strategy)?;
117                        return unsafe { out.cast_unchecked(&String) };
118                    },
119                    Binary => {
120                        let ca = s.binary().unwrap();
121                        fill_null_binary(ca, strategy).map(|ca| ca.into_series())
122                    },
123                    dt if dt.is_primitive_numeric() => {
124                        with_match_physical_numeric_polars_type!(dt, |$T| {
125                            let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();
126                                fill_null_numeric(ca, strategy).map(|ca| ca.into_series())
127                        })
128                    },
129                    dt => {
130                        polars_bail!(InvalidOperation: "fill null strategy not yet supported for dtype: {}", dt)
131                    },
132                }?;
133                unsafe { out.from_physical_unchecked(logical_type) }
134            },
135        }
136    }
137}
138
139fn fill_forward_numeric<'a, T>(ca: &'a ChunkedArray<T>) -> ChunkedArray<T>
140where
141    T: PolarsDataType,
142    T::ZeroablePhysical<'a>: Copy,
143{
144    // Compute values.
145    let mut last = T::ZeroablePhysical::zeroed();
146    let values: Vec<T::ZeroablePhysical<'a>> = ca
147        .iter()
148        .map(|v| {
149            last = v.map(|v| v.into()).unwrap_or(last);
150            last
151        })
152        .collect_trusted();
153
154    // Compute bitmask.
155    let num_start_nulls = ca.first_non_null().unwrap_or(ca.len());
156    let mut bm = BitmapBuilder::with_capacity(ca.len());
157    bm.extend_constant(num_start_nulls, false);
158    bm.extend_constant(ca.len() - num_start_nulls, true);
159    ChunkedArray::from_chunk_iter_like(
160        ca,
161        [
162            T::Array::from_zeroable_vec(values, ca.dtype().to_arrow(CompatLevel::newest()))
163                .with_validity_typed(bm.into_opt_validity()),
164        ],
165    )
166}
167
168fn fill_backward_numeric<'a, T>(ca: &'a ChunkedArray<T>) -> ChunkedArray<T>
169where
170    T: PolarsDataType,
171    T::ZeroablePhysical<'a>: Copy,
172{
173    // Compute values.
174    let mut last = T::ZeroablePhysical::zeroed();
175    let values: Vec<T::ZeroablePhysical<'a>> = ca
176        .iter()
177        .rev()
178        .map(|v| {
179            last = v.map(|v| v.into()).unwrap_or(last);
180            last
181        })
182        .collect_reversed();
183
184    // Compute bitmask.
185    let num_end_nulls = ca
186        .last_non_null()
187        .map(|i| ca.len() - 1 - i)
188        .unwrap_or(ca.len());
189    let mut bm = BitmapBuilder::with_capacity(ca.len());
190    bm.extend_constant(ca.len() - num_end_nulls, true);
191    bm.extend_constant(num_end_nulls, false);
192    ChunkedArray::from_chunk_iter_like(
193        ca,
194        [
195            T::Array::from_zeroable_vec(values, ca.dtype().to_arrow(CompatLevel::newest()))
196                .with_validity_typed(bm.into_opt_validity()),
197        ],
198    )
199}
200
201fn fill_null_numeric<T>(
202    ca: &ChunkedArray<T>,
203    strategy: FillNullStrategy,
204) -> PolarsResult<ChunkedArray<T>>
205where
206    T: PolarsNumericType,
207    ChunkedArray<T>: ChunkAgg<T::Native>,
208{
209    // Nothing to fill.
210    let mut out = match strategy {
211        FillNullStrategy::Min => {
212            ca.fill_null_with_values(ChunkAgg::min(ca).ok_or_else(err_fill_null)?)?
213        },
214        FillNullStrategy::Max => {
215            ca.fill_null_with_values(ChunkAgg::max(ca).ok_or_else(err_fill_null)?)?
216        },
217        FillNullStrategy::Mean => ca.fill_null_with_values(
218            ca.mean()
219                .map(|v| NumCast::from(v).unwrap())
220                .ok_or_else(err_fill_null)?,
221        )?,
222        FillNullStrategy::One => return ca.fill_null_with_values(One::one()),
223        FillNullStrategy::Zero => return ca.fill_null_with_values(Zero::zero()),
224        FillNullStrategy::Forward(None) => fill_forward_numeric(ca),
225        FillNullStrategy::Backward(None) => fill_backward_numeric(ca),
226        // Handled earlier
227        FillNullStrategy::Forward(_) => unreachable!(),
228        FillNullStrategy::Backward(_) => unreachable!(),
229    };
230    out.rename(ca.name().clone());
231    Ok(out)
232}
233
234fn fill_with_gather<F: Fn(&Bitmap) -> Vec<IdxSize>>(
235    s: &Series,
236    bits_to_idx: F,
237) -> PolarsResult<Series> {
238    let s = s.rechunk();
239    let arr = s.chunks()[0].clone();
240    let validity = arr.validity().expect("nulls");
241
242    let idx = bits_to_idx(validity);
243
244    Ok(unsafe { s.take_slice_unchecked(&idx) })
245}
246
247fn fill_forward_gather(s: &Series) -> PolarsResult<Series> {
248    fill_with_gather(s, |validity| {
249        let mut last_valid = 0;
250        validity
251            .iter()
252            .enumerate_idx()
253            .map(|(i, v)| {
254                if v {
255                    last_valid = i;
256                    i
257                } else {
258                    last_valid
259                }
260            })
261            .collect::<Vec<_>>()
262    })
263}
264
265fn fill_forward_gather_limit(s: &Series, limit: IdxSize) -> PolarsResult<Series> {
266    fill_with_gather(s, |validity| {
267        let mut last_valid = 0;
268        let mut conseq_invalid_count = 0;
269        validity
270            .iter()
271            .enumerate_idx()
272            .map(|(i, v)| {
273                if v {
274                    last_valid = i;
275                    conseq_invalid_count = 0;
276                    i
277                } else if conseq_invalid_count < limit {
278                    conseq_invalid_count += 1;
279                    last_valid
280                } else {
281                    i
282                }
283            })
284            .collect::<Vec<_>>()
285    })
286}
287
288fn fill_backward_gather(s: &Series) -> PolarsResult<Series> {
289    fill_with_gather(s, |validity| {
290        let last = validity.len() as IdxSize - 1;
291        let mut last_valid = last;
292        unsafe {
293            validity
294                .iter()
295                .rev()
296                .enumerate_idx()
297                .map(|(i, v)| {
298                    if v {
299                        last_valid = last - i;
300                        last - i
301                    } else {
302                        last_valid
303                    }
304                })
305                .trust_my_length((last + 1) as usize)
306                .collect_reversed::<Vec<_>>()
307        }
308    })
309}
310
311fn fill_backward_gather_limit(s: &Series, limit: IdxSize) -> PolarsResult<Series> {
312    fill_with_gather(s, |validity| {
313        let last = validity.len() as IdxSize - 1;
314        let mut last_valid = last;
315        let mut conseq_invalid_count = 0;
316        unsafe {
317            validity
318                .iter()
319                .rev()
320                .enumerate_idx()
321                .map(|(i, v)| {
322                    if v {
323                        last_valid = last - i;
324                        conseq_invalid_count = 0;
325                        last - i
326                    } else if conseq_invalid_count < limit {
327                        conseq_invalid_count += 1;
328                        last_valid
329                    } else {
330                        last - i
331                    }
332                })
333                .trust_my_length((last + 1) as usize)
334                .collect_reversed()
335        }
336    })
337}
338
339fn fill_null_bool(ca: &BooleanChunked, strategy: FillNullStrategy) -> PolarsResult<Series> {
340    match strategy {
341        FillNullStrategy::Min => ca
342            .fill_null_with_values(ca.min().ok_or_else(err_fill_null)?)
343            .map(|ca| ca.into_series()),
344        FillNullStrategy::Max => ca
345            .fill_null_with_values(ca.max().ok_or_else(err_fill_null)?)
346            .map(|ca| ca.into_series()),
347        FillNullStrategy::Mean => polars_bail!(opq = mean, "Boolean"),
348        FillNullStrategy::One => ca.fill_null_with_values(true).map(|ca| ca.into_series()),
349        FillNullStrategy::Zero => ca.fill_null_with_values(false).map(|ca| ca.into_series()),
350        FillNullStrategy::Forward(_) => unreachable!(),
351        FillNullStrategy::Backward(_) => unreachable!(),
352    }
353}
354
355fn fill_null_binary(ca: &BinaryChunked, strategy: FillNullStrategy) -> PolarsResult<BinaryChunked> {
356    match strategy {
357        FillNullStrategy::Min => {
358            ca.fill_null_with_values(ca.min_binary().ok_or_else(err_fill_null)?)
359        },
360        FillNullStrategy::Max => {
361            ca.fill_null_with_values(ca.max_binary().ok_or_else(err_fill_null)?)
362        },
363        FillNullStrategy::Zero => ca.fill_null_with_values(&[]),
364        FillNullStrategy::Forward(_) => unreachable!(),
365        FillNullStrategy::Backward(_) => unreachable!(),
366        strat => polars_bail!(InvalidOperation: "fill-null strategy {:?} is not supported", strat),
367    }
368}
369
370impl<T> ChunkFillNullValue<T::Native> for ChunkedArray<T>
371where
372    T: PolarsNumericType,
373{
374    fn fill_null_with_values(&self, value: T::Native) -> PolarsResult<Self> {
375        Ok(self.apply_kernel(&|arr| Box::new(set_at_nulls(arr, value))))
376    }
377}
378
379impl ChunkFillNullValue<bool> for BooleanChunked {
380    fn fill_null_with_values(&self, value: bool) -> PolarsResult<Self> {
381        self.set(&self.is_null(), Some(value))
382    }
383}
384
385impl ChunkFillNullValue<&[u8]> for BinaryChunked {
386    fn fill_null_with_values(&self, value: &[u8]) -> PolarsResult<Self> {
387        self.set(&self.is_null(), Some(value))
388    }
389}