polars_core/chunked_array/ops/
fill_null.rs1use arrow::bitmap::{Bitmap, BitmapBuilder};
2use arrow::legacy::kernels::set::set_at_nulls;
3use bytemuck::Zeroable;
4use num_traits::{NumCast, One, Zero};
5use polars_utils::itertools::Itertools;
6
7use crate::prelude::*;
8
9fn err_fill_null() -> PolarsError {
10 polars_err!(ComputeError: "could not determine the fill value")
11}
12
13impl Series {
14 pub fn fill_null(&self, strategy: FillNullStrategy) -> PolarsResult<Series> {
59 let nc = self.null_count();
61 if nc == 0
62 || (nc == self.len()
63 && matches!(
64 strategy,
65 FillNullStrategy::Forward(_)
66 | FillNullStrategy::Backward(_)
67 | FillNullStrategy::Max
68 | FillNullStrategy::Min
69 | FillNullStrategy::Mean
70 ))
71 {
72 return Ok(self.clone());
73 }
74
75 let physical_type = self.dtype().to_physical();
76
77 match strategy {
78 FillNullStrategy::Forward(None) if !physical_type.is_primitive_numeric() => {
79 fill_forward_gather(self)
80 },
81 FillNullStrategy::Forward(Some(limit)) => fill_forward_gather_limit(self, limit),
82 FillNullStrategy::Backward(None) if !physical_type.is_primitive_numeric() => {
83 fill_backward_gather(self)
84 },
85 FillNullStrategy::Backward(Some(limit)) => fill_backward_gather_limit(self, limit),
86 #[cfg(feature = "dtype-decimal")]
87 FillNullStrategy::One if self.dtype().is_decimal() => {
88 use polars_compute::decimal::i128_to_dec128;
89
90 let ca = self.decimal().unwrap();
91 let precision = ca.precision();
92 let scale = ca.scale();
93 let fill_value = i128_to_dec128(1, precision, scale).ok_or_else(|| {
94 polars_err!(ComputeError: "value '1' is out of range for Decimal({precision}, {scale})")
95 })?;
96 let phys = ca.physical().fill_null_with_values(fill_value)?;
97 Ok(phys.into_decimal_unchecked(precision, scale).into_series())
98 },
99 _ => {
100 let logical_type = self.dtype();
101 let s = self.to_physical_repr();
102 use DataType::*;
103 let out = match s.dtype() {
104 Boolean => fill_null_bool(s.bool().unwrap(), strategy),
105 String => {
106 let s = unsafe { s.cast_unchecked(&Binary)? };
107 let out = s.fill_null(strategy)?;
108 return unsafe { out.cast_unchecked(&String) };
109 },
110 Binary => {
111 let ca = s.binary().unwrap();
112 fill_null_binary(ca, strategy).map(|ca| ca.into_series())
113 },
114 dt if dt.is_primitive_numeric() => {
115 with_match_physical_numeric_polars_type!(dt, |$T| {
116 let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();
117 fill_null_numeric(ca, strategy).map(|ca| ca.into_series())
118 })
119 },
120 dt => {
121 polars_bail!(InvalidOperation: "fill null strategy not yet supported for dtype: {}", dt)
122 },
123 }?;
124 unsafe { out.from_physical_unchecked(logical_type) }
125 },
126 }
127 }
128}
129
130fn fill_forward_numeric<'a, T, I>(ca: &'a ChunkedArray<T>) -> ChunkedArray<T>
131where
132 T: PolarsDataType,
133 &'a ChunkedArray<T>: IntoIterator<IntoIter = I>,
134 I: TrustedLen + Iterator<Item = Option<T::Physical<'a>>>,
135 T::ZeroablePhysical<'a>: Copy,
136{
137 let values: Vec<T::ZeroablePhysical<'a>> = ca
139 .into_iter()
140 .scan(T::ZeroablePhysical::zeroed(), |prev, v| {
141 *prev = v.map(|v| v.into()).unwrap_or(*prev);
142 Some(*prev)
143 })
144 .collect_trusted();
145
146 let num_start_nulls = ca.first_non_null().unwrap_or(ca.len());
148 let mut bm = BitmapBuilder::with_capacity(ca.len());
149 bm.extend_constant(num_start_nulls, false);
150 bm.extend_constant(ca.len() - num_start_nulls, true);
151 ChunkedArray::from_chunk_iter_like(
152 ca,
153 [
154 T::Array::from_zeroable_vec(values, ca.dtype().to_arrow(CompatLevel::newest()))
155 .with_validity_typed(bm.into_opt_validity()),
156 ],
157 )
158}
159
160fn fill_backward_numeric<'a, T, I>(ca: &'a ChunkedArray<T>) -> ChunkedArray<T>
161where
162 T: PolarsDataType,
163 &'a ChunkedArray<T>: IntoIterator<IntoIter = I>,
164 I: TrustedLen + Iterator<Item = Option<T::Physical<'a>>> + DoubleEndedIterator,
165 T::ZeroablePhysical<'a>: Copy,
166{
167 let values: Vec<T::ZeroablePhysical<'a>> = ca
169 .into_iter()
170 .rev()
171 .scan(T::ZeroablePhysical::zeroed(), |prev, v| {
172 *prev = v.map(|v| v.into()).unwrap_or(*prev);
173 Some(*prev)
174 })
175 .collect_reversed();
176
177 let num_end_nulls = ca
179 .last_non_null()
180 .map(|i| ca.len() - 1 - i)
181 .unwrap_or(ca.len());
182 let mut bm = BitmapBuilder::with_capacity(ca.len());
183 bm.extend_constant(ca.len() - num_end_nulls, true);
184 bm.extend_constant(num_end_nulls, false);
185 ChunkedArray::from_chunk_iter_like(
186 ca,
187 [
188 T::Array::from_zeroable_vec(values, ca.dtype().to_arrow(CompatLevel::newest()))
189 .with_validity_typed(bm.into_opt_validity()),
190 ],
191 )
192}
193
194fn fill_null_numeric<T>(
195 ca: &ChunkedArray<T>,
196 strategy: FillNullStrategy,
197) -> PolarsResult<ChunkedArray<T>>
198where
199 T: PolarsNumericType,
200 ChunkedArray<T>: ChunkAgg<T::Native>,
201{
202 let mut out = match strategy {
204 FillNullStrategy::Min => {
205 ca.fill_null_with_values(ChunkAgg::min(ca).ok_or_else(err_fill_null)?)?
206 },
207 FillNullStrategy::Max => {
208 ca.fill_null_with_values(ChunkAgg::max(ca).ok_or_else(err_fill_null)?)?
209 },
210 FillNullStrategy::Mean => ca.fill_null_with_values(
211 ca.mean()
212 .map(|v| NumCast::from(v).unwrap())
213 .ok_or_else(err_fill_null)?,
214 )?,
215 FillNullStrategy::One => return ca.fill_null_with_values(One::one()),
216 FillNullStrategy::Zero => return ca.fill_null_with_values(Zero::zero()),
217 FillNullStrategy::Forward(None) => fill_forward_numeric(ca),
218 FillNullStrategy::Backward(None) => fill_backward_numeric(ca),
219 FillNullStrategy::Forward(_) => unreachable!(),
221 FillNullStrategy::Backward(_) => unreachable!(),
222 };
223 out.rename(ca.name().clone());
224 Ok(out)
225}
226
227fn fill_with_gather<F: Fn(&Bitmap) -> Vec<IdxSize>>(
228 s: &Series,
229 bits_to_idx: F,
230) -> PolarsResult<Series> {
231 let s = s.rechunk();
232 let arr = s.chunks()[0].clone();
233 let validity = arr.validity().expect("nulls");
234
235 let idx = bits_to_idx(validity);
236
237 Ok(unsafe { s.take_slice_unchecked(&idx) })
238}
239
240fn fill_forward_gather(s: &Series) -> PolarsResult<Series> {
241 fill_with_gather(s, |validity| {
242 let mut last_valid = 0;
243 validity
244 .iter()
245 .enumerate_idx()
246 .map(|(i, v)| {
247 if v {
248 last_valid = i;
249 i
250 } else {
251 last_valid
252 }
253 })
254 .collect::<Vec<_>>()
255 })
256}
257
258fn fill_forward_gather_limit(s: &Series, limit: IdxSize) -> PolarsResult<Series> {
259 fill_with_gather(s, |validity| {
260 let mut last_valid = 0;
261 let mut conseq_invalid_count = 0;
262 validity
263 .iter()
264 .enumerate_idx()
265 .map(|(i, v)| {
266 if v {
267 last_valid = i;
268 conseq_invalid_count = 0;
269 i
270 } else if conseq_invalid_count < limit {
271 conseq_invalid_count += 1;
272 last_valid
273 } else {
274 i
275 }
276 })
277 .collect::<Vec<_>>()
278 })
279}
280
281fn fill_backward_gather(s: &Series) -> PolarsResult<Series> {
282 fill_with_gather(s, |validity| {
283 let last = validity.len() as IdxSize - 1;
284 let mut last_valid = last;
285 unsafe {
286 validity
287 .iter()
288 .rev()
289 .enumerate_idx()
290 .map(|(i, v)| {
291 if v {
292 last_valid = last - i;
293 last - i
294 } else {
295 last_valid
296 }
297 })
298 .trust_my_length((last + 1) as usize)
299 .collect_reversed::<Vec<_>>()
300 }
301 })
302}
303
304fn fill_backward_gather_limit(s: &Series, limit: IdxSize) -> PolarsResult<Series> {
305 fill_with_gather(s, |validity| {
306 let last = validity.len() as IdxSize - 1;
307 let mut last_valid = last;
308 let mut conseq_invalid_count = 0;
309 unsafe {
310 validity
311 .iter()
312 .rev()
313 .enumerate_idx()
314 .map(|(i, v)| {
315 if v {
316 last_valid = last - i;
317 conseq_invalid_count = 0;
318 last - i
319 } else if conseq_invalid_count < limit {
320 conseq_invalid_count += 1;
321 last_valid
322 } else {
323 last - i
324 }
325 })
326 .trust_my_length((last + 1) as usize)
327 .collect_reversed()
328 }
329 })
330}
331
332fn fill_null_bool(ca: &BooleanChunked, strategy: FillNullStrategy) -> PolarsResult<Series> {
333 match strategy {
334 FillNullStrategy::Min => ca
335 .fill_null_with_values(ca.min().ok_or_else(err_fill_null)?)
336 .map(|ca| ca.into_series()),
337 FillNullStrategy::Max => ca
338 .fill_null_with_values(ca.max().ok_or_else(err_fill_null)?)
339 .map(|ca| ca.into_series()),
340 FillNullStrategy::Mean => polars_bail!(opq = mean, "Boolean"),
341 FillNullStrategy::One => ca.fill_null_with_values(true).map(|ca| ca.into_series()),
342 FillNullStrategy::Zero => ca.fill_null_with_values(false).map(|ca| ca.into_series()),
343 FillNullStrategy::Forward(_) => unreachable!(),
344 FillNullStrategy::Backward(_) => unreachable!(),
345 }
346}
347
348fn fill_null_binary(ca: &BinaryChunked, strategy: FillNullStrategy) -> PolarsResult<BinaryChunked> {
349 match strategy {
350 FillNullStrategy::Min => {
351 ca.fill_null_with_values(ca.min_binary().ok_or_else(err_fill_null)?)
352 },
353 FillNullStrategy::Max => {
354 ca.fill_null_with_values(ca.max_binary().ok_or_else(err_fill_null)?)
355 },
356 FillNullStrategy::Zero => ca.fill_null_with_values(&[]),
357 FillNullStrategy::Forward(_) => unreachable!(),
358 FillNullStrategy::Backward(_) => unreachable!(),
359 strat => polars_bail!(InvalidOperation: "fill-null strategy {:?} is not supported", strat),
360 }
361}
362
363impl<T> ChunkFillNullValue<T::Native> for ChunkedArray<T>
364where
365 T: PolarsNumericType,
366{
367 fn fill_null_with_values(&self, value: T::Native) -> PolarsResult<Self> {
368 Ok(self.apply_kernel(&|arr| Box::new(set_at_nulls(arr, value))))
369 }
370}
371
372impl ChunkFillNullValue<bool> for BooleanChunked {
373 fn fill_null_with_values(&self, value: bool) -> PolarsResult<Self> {
374 self.set(&self.is_null(), Some(value))
375 }
376}
377
378impl ChunkFillNullValue<&[u8]> for BinaryChunked {
379 fn fill_null_with_values(&self, value: &[u8]) -> PolarsResult<Self> {
380 self.set(&self.is_null(), Some(value))
381 }
382}