polars_core/chunked_array/ops/
fill_null.rs1use arrow::bitmap::{Bitmap, BitmapBuilder};
2use arrow::legacy::kernels::set::set_at_nulls;
3use bytemuck::Zeroable;
4use num_traits::{NumCast, One, Zero};
5use polars_utils::itertools::Itertools;
6
7use crate::prelude::*;
8
9fn err_fill_null() -> PolarsError {
10 polars_err!(ComputeError: "could not determine the fill value")
11}
12
13impl Series {
14 pub fn fill_null(&self, strategy: FillNullStrategy) -> PolarsResult<Series> {
59 let nc = self.null_count();
61 if nc == 0
62 || (nc == self.len()
63 && matches!(
64 strategy,
65 FillNullStrategy::Forward(_)
66 | FillNullStrategy::Backward(_)
67 | FillNullStrategy::Max
68 | FillNullStrategy::Min
69 | FillNullStrategy::Mean
70 ))
71 {
72 return Ok(self.clone());
73 }
74
75 let physical_type = self.dtype().to_physical();
76
77 match strategy {
78 FillNullStrategy::Forward(None) if !physical_type.is_primitive_numeric() => {
79 fill_forward_gather(self)
80 },
81
82 FillNullStrategy::Forward(Some(limit)) if limit >= nc as IdxSize => {
84 self.fill_null(FillNullStrategy::Forward(None))
85 },
86 FillNullStrategy::Backward(Some(limit)) if limit >= nc as IdxSize => {
87 self.fill_null(FillNullStrategy::Backward(None))
88 },
89
90 FillNullStrategy::Forward(Some(limit)) => fill_forward_gather_limit(self, limit),
91 FillNullStrategy::Backward(None) if !physical_type.is_primitive_numeric() => {
92 fill_backward_gather(self)
93 },
94 FillNullStrategy::Backward(Some(limit)) => fill_backward_gather_limit(self, limit),
95 #[cfg(feature = "dtype-decimal")]
96 FillNullStrategy::One if self.dtype().is_decimal() => {
97 use polars_compute::decimal::i128_to_dec128;
98
99 let ca = self.decimal().unwrap();
100 let precision = ca.precision();
101 let scale = ca.scale();
102 let fill_value = i128_to_dec128(1, precision, scale).ok_or_else(|| {
103 polars_err!(ComputeError: "value '1' is out of range for Decimal({precision}, {scale})")
104 })?;
105 let phys = ca.physical().fill_null_with_values(fill_value)?;
106 Ok(phys.into_decimal_unchecked(precision, scale).into_series())
107 },
108 _ => {
109 let logical_type = self.dtype();
110 let s = self.to_physical_repr();
111 use DataType::*;
112 let out = match s.dtype() {
113 Boolean => fill_null_bool(s.bool().unwrap(), strategy),
114 String => {
115 let s = unsafe { s.cast_unchecked(&Binary)? };
116 let out = s.fill_null(strategy)?;
117 return unsafe { out.cast_unchecked(&String) };
118 },
119 Binary => {
120 let ca = s.binary().unwrap();
121 fill_null_binary(ca, strategy).map(|ca| ca.into_series())
122 },
123 dt if dt.is_primitive_numeric() => {
124 with_match_physical_numeric_polars_type!(dt, |$T| {
125 let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();
126 fill_null_numeric(ca, strategy).map(|ca| ca.into_series())
127 })
128 },
129 dt => {
130 polars_bail!(InvalidOperation: "fill null strategy not yet supported for dtype: {}", dt)
131 },
132 }?;
133 unsafe { out.from_physical_unchecked(logical_type) }
134 },
135 }
136 }
137}
138
139fn fill_forward_numeric<'a, T, I>(ca: &'a ChunkedArray<T>) -> ChunkedArray<T>
140where
141 T: PolarsDataType,
142 &'a ChunkedArray<T>: IntoIterator<IntoIter = I>,
143 I: TrustedLen + Iterator<Item = Option<T::Physical<'a>>>,
144 T::ZeroablePhysical<'a>: Copy,
145{
146 let values: Vec<T::ZeroablePhysical<'a>> = ca
148 .into_iter()
149 .scan(T::ZeroablePhysical::zeroed(), |prev, v| {
150 *prev = v.map(|v| v.into()).unwrap_or(*prev);
151 Some(*prev)
152 })
153 .collect_trusted();
154
155 let num_start_nulls = ca.first_non_null().unwrap_or(ca.len());
157 let mut bm = BitmapBuilder::with_capacity(ca.len());
158 bm.extend_constant(num_start_nulls, false);
159 bm.extend_constant(ca.len() - num_start_nulls, true);
160 ChunkedArray::from_chunk_iter_like(
161 ca,
162 [
163 T::Array::from_zeroable_vec(values, ca.dtype().to_arrow(CompatLevel::newest()))
164 .with_validity_typed(bm.into_opt_validity()),
165 ],
166 )
167}
168
169fn fill_backward_numeric<'a, T, I>(ca: &'a ChunkedArray<T>) -> ChunkedArray<T>
170where
171 T: PolarsDataType,
172 &'a ChunkedArray<T>: IntoIterator<IntoIter = I>,
173 I: TrustedLen + Iterator<Item = Option<T::Physical<'a>>> + DoubleEndedIterator,
174 T::ZeroablePhysical<'a>: Copy,
175{
176 let values: Vec<T::ZeroablePhysical<'a>> = ca
178 .into_iter()
179 .rev()
180 .scan(T::ZeroablePhysical::zeroed(), |prev, v| {
181 *prev = v.map(|v| v.into()).unwrap_or(*prev);
182 Some(*prev)
183 })
184 .collect_reversed();
185
186 let num_end_nulls = ca
188 .last_non_null()
189 .map(|i| ca.len() - 1 - i)
190 .unwrap_or(ca.len());
191 let mut bm = BitmapBuilder::with_capacity(ca.len());
192 bm.extend_constant(ca.len() - num_end_nulls, true);
193 bm.extend_constant(num_end_nulls, false);
194 ChunkedArray::from_chunk_iter_like(
195 ca,
196 [
197 T::Array::from_zeroable_vec(values, ca.dtype().to_arrow(CompatLevel::newest()))
198 .with_validity_typed(bm.into_opt_validity()),
199 ],
200 )
201}
202
203fn fill_null_numeric<T>(
204 ca: &ChunkedArray<T>,
205 strategy: FillNullStrategy,
206) -> PolarsResult<ChunkedArray<T>>
207where
208 T: PolarsNumericType,
209 ChunkedArray<T>: ChunkAgg<T::Native>,
210{
211 let mut out = match strategy {
213 FillNullStrategy::Min => {
214 ca.fill_null_with_values(ChunkAgg::min(ca).ok_or_else(err_fill_null)?)?
215 },
216 FillNullStrategy::Max => {
217 ca.fill_null_with_values(ChunkAgg::max(ca).ok_or_else(err_fill_null)?)?
218 },
219 FillNullStrategy::Mean => ca.fill_null_with_values(
220 ca.mean()
221 .map(|v| NumCast::from(v).unwrap())
222 .ok_or_else(err_fill_null)?,
223 )?,
224 FillNullStrategy::One => return ca.fill_null_with_values(One::one()),
225 FillNullStrategy::Zero => return ca.fill_null_with_values(Zero::zero()),
226 FillNullStrategy::Forward(None) => fill_forward_numeric(ca),
227 FillNullStrategy::Backward(None) => fill_backward_numeric(ca),
228 FillNullStrategy::Forward(_) => unreachable!(),
230 FillNullStrategy::Backward(_) => unreachable!(),
231 };
232 out.rename(ca.name().clone());
233 Ok(out)
234}
235
236fn fill_with_gather<F: Fn(&Bitmap) -> Vec<IdxSize>>(
237 s: &Series,
238 bits_to_idx: F,
239) -> PolarsResult<Series> {
240 let s = s.rechunk();
241 let arr = s.chunks()[0].clone();
242 let validity = arr.validity().expect("nulls");
243
244 let idx = bits_to_idx(validity);
245
246 Ok(unsafe { s.take_slice_unchecked(&idx) })
247}
248
249fn fill_forward_gather(s: &Series) -> PolarsResult<Series> {
250 fill_with_gather(s, |validity| {
251 let mut last_valid = 0;
252 validity
253 .iter()
254 .enumerate_idx()
255 .map(|(i, v)| {
256 if v {
257 last_valid = i;
258 i
259 } else {
260 last_valid
261 }
262 })
263 .collect::<Vec<_>>()
264 })
265}
266
267fn fill_forward_gather_limit(s: &Series, limit: IdxSize) -> PolarsResult<Series> {
268 fill_with_gather(s, |validity| {
269 let mut last_valid = 0;
270 let mut conseq_invalid_count = 0;
271 validity
272 .iter()
273 .enumerate_idx()
274 .map(|(i, v)| {
275 if v {
276 last_valid = i;
277 conseq_invalid_count = 0;
278 i
279 } else if conseq_invalid_count < limit {
280 conseq_invalid_count += 1;
281 last_valid
282 } else {
283 i
284 }
285 })
286 .collect::<Vec<_>>()
287 })
288}
289
290fn fill_backward_gather(s: &Series) -> PolarsResult<Series> {
291 fill_with_gather(s, |validity| {
292 let last = validity.len() as IdxSize - 1;
293 let mut last_valid = last;
294 unsafe {
295 validity
296 .iter()
297 .rev()
298 .enumerate_idx()
299 .map(|(i, v)| {
300 if v {
301 last_valid = last - i;
302 last - i
303 } else {
304 last_valid
305 }
306 })
307 .trust_my_length((last + 1) as usize)
308 .collect_reversed::<Vec<_>>()
309 }
310 })
311}
312
313fn fill_backward_gather_limit(s: &Series, limit: IdxSize) -> PolarsResult<Series> {
314 fill_with_gather(s, |validity| {
315 let last = validity.len() as IdxSize - 1;
316 let mut last_valid = last;
317 let mut conseq_invalid_count = 0;
318 unsafe {
319 validity
320 .iter()
321 .rev()
322 .enumerate_idx()
323 .map(|(i, v)| {
324 if v {
325 last_valid = last - i;
326 conseq_invalid_count = 0;
327 last - i
328 } else if conseq_invalid_count < limit {
329 conseq_invalid_count += 1;
330 last_valid
331 } else {
332 last - i
333 }
334 })
335 .trust_my_length((last + 1) as usize)
336 .collect_reversed()
337 }
338 })
339}
340
341fn fill_null_bool(ca: &BooleanChunked, strategy: FillNullStrategy) -> PolarsResult<Series> {
342 match strategy {
343 FillNullStrategy::Min => ca
344 .fill_null_with_values(ca.min().ok_or_else(err_fill_null)?)
345 .map(|ca| ca.into_series()),
346 FillNullStrategy::Max => ca
347 .fill_null_with_values(ca.max().ok_or_else(err_fill_null)?)
348 .map(|ca| ca.into_series()),
349 FillNullStrategy::Mean => polars_bail!(opq = mean, "Boolean"),
350 FillNullStrategy::One => ca.fill_null_with_values(true).map(|ca| ca.into_series()),
351 FillNullStrategy::Zero => ca.fill_null_with_values(false).map(|ca| ca.into_series()),
352 FillNullStrategy::Forward(_) => unreachable!(),
353 FillNullStrategy::Backward(_) => unreachable!(),
354 }
355}
356
357fn fill_null_binary(ca: &BinaryChunked, strategy: FillNullStrategy) -> PolarsResult<BinaryChunked> {
358 match strategy {
359 FillNullStrategy::Min => {
360 ca.fill_null_with_values(ca.min_binary().ok_or_else(err_fill_null)?)
361 },
362 FillNullStrategy::Max => {
363 ca.fill_null_with_values(ca.max_binary().ok_or_else(err_fill_null)?)
364 },
365 FillNullStrategy::Zero => ca.fill_null_with_values(&[]),
366 FillNullStrategy::Forward(_) => unreachable!(),
367 FillNullStrategy::Backward(_) => unreachable!(),
368 strat => polars_bail!(InvalidOperation: "fill-null strategy {:?} is not supported", strat),
369 }
370}
371
372impl<T> ChunkFillNullValue<T::Native> for ChunkedArray<T>
373where
374 T: PolarsNumericType,
375{
376 fn fill_null_with_values(&self, value: T::Native) -> PolarsResult<Self> {
377 Ok(self.apply_kernel(&|arr| Box::new(set_at_nulls(arr, value))))
378 }
379}
380
381impl ChunkFillNullValue<bool> for BooleanChunked {
382 fn fill_null_with_values(&self, value: bool) -> PolarsResult<Self> {
383 self.set(&self.is_null(), Some(value))
384 }
385}
386
387impl ChunkFillNullValue<&[u8]> for BinaryChunked {
388 fn fill_null_with_values(&self, value: &[u8]) -> PolarsResult<Self> {
389 self.set(&self.is_null(), Some(value))
390 }
391}