polars_core/chunked_array/ops/
fill_null.rs1use arrow::bitmap::{Bitmap, BitmapBuilder};
2use arrow::legacy::kernels::set::set_at_nulls;
3use bytemuck::Zeroable;
4use num_traits::{Bounded, NumCast, One, Zero};
5use polars_utils::itertools::Itertools;
6
7use crate::prelude::*;
8
9fn err_fill_null() -> PolarsError {
10 polars_err!(ComputeError: "could not determine the fill value")
11}
12
13impl Series {
14 pub fn fill_null(&self, strategy: FillNullStrategy) -> PolarsResult<Series> {
67 let nc = self.null_count();
69 if nc == 0
70 || (nc == self.len()
71 && matches!(
72 strategy,
73 FillNullStrategy::Forward(_)
74 | FillNullStrategy::Backward(_)
75 | FillNullStrategy::Max
76 | FillNullStrategy::Min
77 | FillNullStrategy::MaxBound
78 | FillNullStrategy::MinBound
79 | FillNullStrategy::Mean
80 ))
81 {
82 return Ok(self.clone());
83 }
84
85 let physical_type = self.dtype().to_physical();
86
87 match strategy {
88 FillNullStrategy::Forward(None) if !physical_type.is_primitive_numeric() => {
89 fill_forward_gather(self)
90 },
91 FillNullStrategy::Forward(Some(limit)) => fill_forward_gather_limit(self, limit),
92 FillNullStrategy::Backward(None) if !physical_type.is_primitive_numeric() => {
93 fill_backward_gather(self)
94 },
95 FillNullStrategy::Backward(Some(limit)) => fill_backward_gather_limit(self, limit),
96 #[cfg(feature = "dtype-decimal")]
97 FillNullStrategy::One if self.dtype().is_decimal() => {
98 let ca = self.decimal().unwrap();
99 let precision = ca.precision();
100 let scale = ca.scale();
101 let fill_value = 10i128.pow(scale as u32);
102 let phys = ca.as_ref().fill_null_with_values(fill_value)?;
103 Ok(phys.into_decimal_unchecked(precision, scale).into_series())
104 },
105 _ => {
106 let logical_type = self.dtype();
107 let s = self.to_physical_repr();
108 use DataType::*;
109 let out = match s.dtype() {
110 Boolean => fill_null_bool(s.bool().unwrap(), strategy),
111 String => {
112 let s = unsafe { s.cast_unchecked(&Binary)? };
113 let out = s.fill_null(strategy)?;
114 return unsafe { out.cast_unchecked(&String) };
115 },
116 Binary => {
117 let ca = s.binary().unwrap();
118 fill_null_binary(ca, strategy).map(|ca| ca.into_series())
119 },
120 dt if dt.is_primitive_numeric() => {
121 with_match_physical_numeric_polars_type!(dt, |$T| {
122 let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();
123 fill_null_numeric(ca, strategy).map(|ca| ca.into_series())
124 })
125 },
126 dt => {
127 polars_bail!(InvalidOperation: "fill null strategy not yet supported for dtype: {}", dt)
128 },
129 }?;
130 unsafe { out.from_physical_unchecked(logical_type) }
131 },
132 }
133 }
134}
135
136fn fill_forward_numeric<'a, T, I>(ca: &'a ChunkedArray<T>) -> ChunkedArray<T>
137where
138 T: PolarsDataType,
139 &'a ChunkedArray<T>: IntoIterator<IntoIter = I>,
140 I: TrustedLen + Iterator<Item = Option<T::Physical<'a>>>,
141 T::ZeroablePhysical<'a>: Copy,
142{
143 let values: Vec<T::ZeroablePhysical<'a>> = ca
145 .into_iter()
146 .scan(T::ZeroablePhysical::zeroed(), |prev, v| {
147 *prev = v.map(|v| v.into()).unwrap_or(*prev);
148 Some(*prev)
149 })
150 .collect_trusted();
151
152 let num_start_nulls = ca.first_non_null().unwrap_or(ca.len());
154 let mut bm = BitmapBuilder::with_capacity(ca.len());
155 bm.extend_constant(num_start_nulls, false);
156 bm.extend_constant(ca.len() - num_start_nulls, true);
157 ChunkedArray::from_chunk_iter_like(
158 ca,
159 [
160 T::Array::from_zeroable_vec(values, ca.dtype().to_arrow(CompatLevel::newest()))
161 .with_validity_typed(bm.into_opt_validity()),
162 ],
163 )
164}
165
166fn fill_backward_numeric<'a, T, I>(ca: &'a ChunkedArray<T>) -> ChunkedArray<T>
167where
168 T: PolarsDataType,
169 &'a ChunkedArray<T>: IntoIterator<IntoIter = I>,
170 I: TrustedLen + Iterator<Item = Option<T::Physical<'a>>> + DoubleEndedIterator,
171 T::ZeroablePhysical<'a>: Copy,
172{
173 let values: Vec<T::ZeroablePhysical<'a>> = ca
175 .into_iter()
176 .rev()
177 .scan(T::ZeroablePhysical::zeroed(), |prev, v| {
178 *prev = v.map(|v| v.into()).unwrap_or(*prev);
179 Some(*prev)
180 })
181 .collect_reversed();
182
183 let num_end_nulls = ca
185 .last_non_null()
186 .map(|i| ca.len() - 1 - i)
187 .unwrap_or(ca.len());
188 let mut bm = BitmapBuilder::with_capacity(ca.len());
189 bm.extend_constant(ca.len() - num_end_nulls, true);
190 bm.extend_constant(num_end_nulls, false);
191 ChunkedArray::from_chunk_iter_like(
192 ca,
193 [
194 T::Array::from_zeroable_vec(values, ca.dtype().to_arrow(CompatLevel::newest()))
195 .with_validity_typed(bm.into_opt_validity()),
196 ],
197 )
198}
199
200fn fill_null_numeric<T>(
201 ca: &ChunkedArray<T>,
202 strategy: FillNullStrategy,
203) -> PolarsResult<ChunkedArray<T>>
204where
205 T: PolarsNumericType,
206 ChunkedArray<T>: ChunkAgg<T::Native>,
207{
208 let mut out = match strategy {
210 FillNullStrategy::Min => {
211 ca.fill_null_with_values(ChunkAgg::min(ca).ok_or_else(err_fill_null)?)?
212 },
213 FillNullStrategy::Max => {
214 ca.fill_null_with_values(ChunkAgg::max(ca).ok_or_else(err_fill_null)?)?
215 },
216 FillNullStrategy::Mean => ca.fill_null_with_values(
217 ca.mean()
218 .map(|v| NumCast::from(v).unwrap())
219 .ok_or_else(err_fill_null)?,
220 )?,
221 FillNullStrategy::One => return ca.fill_null_with_values(One::one()),
222 FillNullStrategy::Zero => return ca.fill_null_with_values(Zero::zero()),
223 FillNullStrategy::MinBound => return ca.fill_null_with_values(Bounded::min_value()),
224 FillNullStrategy::MaxBound => return ca.fill_null_with_values(Bounded::max_value()),
225 FillNullStrategy::Forward(None) => fill_forward_numeric(ca),
226 FillNullStrategy::Backward(None) => fill_backward_numeric(ca),
227 FillNullStrategy::Forward(_) => unreachable!(),
229 FillNullStrategy::Backward(_) => unreachable!(),
230 };
231 out.rename(ca.name().clone());
232 Ok(out)
233}
234
235fn fill_with_gather<F: Fn(&Bitmap) -> Vec<IdxSize>>(
236 s: &Series,
237 bits_to_idx: F,
238) -> PolarsResult<Series> {
239 let s = s.rechunk();
240 let arr = s.chunks()[0].clone();
241 let validity = arr.validity().expect("nulls");
242
243 let idx = bits_to_idx(validity);
244
245 Ok(unsafe { s.take_slice_unchecked(&idx) })
246}
247
248fn fill_forward_gather(s: &Series) -> PolarsResult<Series> {
249 fill_with_gather(s, |validity| {
250 let mut last_valid = 0;
251 validity
252 .iter()
253 .enumerate_idx()
254 .map(|(i, v)| {
255 if v {
256 last_valid = i;
257 i
258 } else {
259 last_valid
260 }
261 })
262 .collect::<Vec<_>>()
263 })
264}
265
266fn fill_forward_gather_limit(s: &Series, limit: IdxSize) -> PolarsResult<Series> {
267 fill_with_gather(s, |validity| {
268 let mut last_valid = 0;
269 let mut conseq_invalid_count = 0;
270 validity
271 .iter()
272 .enumerate_idx()
273 .map(|(i, v)| {
274 if v {
275 last_valid = i;
276 conseq_invalid_count = 0;
277 i
278 } else if conseq_invalid_count < limit {
279 conseq_invalid_count += 1;
280 last_valid
281 } else {
282 i
283 }
284 })
285 .collect::<Vec<_>>()
286 })
287}
288
289fn fill_backward_gather(s: &Series) -> PolarsResult<Series> {
290 fill_with_gather(s, |validity| {
291 let last = validity.len() as IdxSize - 1;
292 let mut last_valid = last;
293 unsafe {
294 validity
295 .iter()
296 .rev()
297 .enumerate_idx()
298 .map(|(i, v)| {
299 if v {
300 last_valid = last - i;
301 last - i
302 } else {
303 last_valid
304 }
305 })
306 .trust_my_length((last + 1) as usize)
307 .collect_reversed::<Vec<_>>()
308 }
309 })
310}
311
312fn fill_backward_gather_limit(s: &Series, limit: IdxSize) -> PolarsResult<Series> {
313 fill_with_gather(s, |validity| {
314 let last = validity.len() as IdxSize - 1;
315 let mut last_valid = last;
316 let mut conseq_invalid_count = 0;
317 unsafe {
318 validity
319 .iter()
320 .rev()
321 .enumerate_idx()
322 .map(|(i, v)| {
323 if v {
324 last_valid = last - i;
325 conseq_invalid_count = 0;
326 last - i
327 } else if conseq_invalid_count < limit {
328 conseq_invalid_count += 1;
329 last_valid
330 } else {
331 last - i
332 }
333 })
334 .trust_my_length((last + 1) as usize)
335 .collect_reversed()
336 }
337 })
338}
339
340fn fill_null_bool(ca: &BooleanChunked, strategy: FillNullStrategy) -> PolarsResult<Series> {
341 match strategy {
342 FillNullStrategy::Min => ca
343 .fill_null_with_values(ca.min().ok_or_else(err_fill_null)?)
344 .map(|ca| ca.into_series()),
345 FillNullStrategy::Max => ca
346 .fill_null_with_values(ca.max().ok_or_else(err_fill_null)?)
347 .map(|ca| ca.into_series()),
348 FillNullStrategy::Mean => polars_bail!(opq = mean, "Boolean"),
349 FillNullStrategy::One | FillNullStrategy::MaxBound => {
350 ca.fill_null_with_values(true).map(|ca| ca.into_series())
351 },
352 FillNullStrategy::Zero | FillNullStrategy::MinBound => {
353 ca.fill_null_with_values(false).map(|ca| ca.into_series())
354 },
355 FillNullStrategy::Forward(_) => unreachable!(),
356 FillNullStrategy::Backward(_) => unreachable!(),
357 }
358}
359
360fn fill_null_binary(ca: &BinaryChunked, strategy: FillNullStrategy) -> PolarsResult<BinaryChunked> {
361 match strategy {
362 FillNullStrategy::Min => {
363 ca.fill_null_with_values(ca.min_binary().ok_or_else(err_fill_null)?)
364 },
365 FillNullStrategy::Max => {
366 ca.fill_null_with_values(ca.max_binary().ok_or_else(err_fill_null)?)
367 },
368 FillNullStrategy::Zero => ca.fill_null_with_values(&[]),
369 FillNullStrategy::Forward(_) => unreachable!(),
370 FillNullStrategy::Backward(_) => unreachable!(),
371 strat => polars_bail!(InvalidOperation: "fill-null strategy {:?} is not supported", strat),
372 }
373}
374
375impl<T> ChunkFillNullValue<T::Native> for ChunkedArray<T>
376where
377 T: PolarsNumericType,
378{
379 fn fill_null_with_values(&self, value: T::Native) -> PolarsResult<Self> {
380 Ok(self.apply_kernel(&|arr| Box::new(set_at_nulls(arr, value))))
381 }
382}
383
384impl ChunkFillNullValue<bool> for BooleanChunked {
385 fn fill_null_with_values(&self, value: bool) -> PolarsResult<Self> {
386 self.set(&self.is_null(), Some(value))
387 }
388}
389
390impl ChunkFillNullValue<&[u8]> for BinaryChunked {
391 fn fill_null_with_values(&self, value: &[u8]) -> PolarsResult<Self> {
392 self.set(&self.is_null(), Some(value))
393 }
394}