polars_core/chunked_array/ops/
fill_null.rs1use arrow::bitmap::{Bitmap, BitmapBuilder};
2use arrow::legacy::kernels::set::set_at_nulls;
3use bytemuck::Zeroable;
4use num_traits::{NumCast, One, Zero};
5use polars_utils::itertools::Itertools;
6
7use crate::prelude::*;
8
9fn err_fill_null() -> PolarsError {
10 polars_err!(ComputeError: "could not determine the fill value")
11}
12
13impl Series {
14 pub fn fill_null(&self, strategy: FillNullStrategy) -> PolarsResult<Series> {
59 let nc = self.null_count();
61 if nc == 0
62 || (nc == self.len()
63 && matches!(
64 strategy,
65 FillNullStrategy::Forward(_)
66 | FillNullStrategy::Backward(_)
67 | FillNullStrategy::Max
68 | FillNullStrategy::Min
69 | FillNullStrategy::Mean
70 ))
71 {
72 return Ok(self.clone());
73 }
74
75 let physical_type = self.dtype().to_physical();
76
77 match strategy {
78 FillNullStrategy::Forward(None) if !physical_type.is_primitive_numeric() => {
79 fill_forward_gather(self)
80 },
81 FillNullStrategy::Forward(Some(limit)) => fill_forward_gather_limit(self, limit),
82 FillNullStrategy::Backward(None) if !physical_type.is_primitive_numeric() => {
83 fill_backward_gather(self)
84 },
85 FillNullStrategy::Backward(Some(limit)) => fill_backward_gather_limit(self, limit),
86 #[cfg(feature = "dtype-decimal")]
87 FillNullStrategy::One if self.dtype().is_decimal() => {
88 let ca = self.decimal().unwrap();
89 let precision = ca.precision();
90 let scale = ca.scale();
91 let fill_value = 10i128.pow(scale as u32);
92 let phys = ca.physical().fill_null_with_values(fill_value)?;
93 Ok(phys.into_decimal_unchecked(precision, scale).into_series())
94 },
95 _ => {
96 let logical_type = self.dtype();
97 let s = self.to_physical_repr();
98 use DataType::*;
99 let out = match s.dtype() {
100 Boolean => fill_null_bool(s.bool().unwrap(), strategy),
101 String => {
102 let s = unsafe { s.cast_unchecked(&Binary)? };
103 let out = s.fill_null(strategy)?;
104 return unsafe { out.cast_unchecked(&String) };
105 },
106 Binary => {
107 let ca = s.binary().unwrap();
108 fill_null_binary(ca, strategy).map(|ca| ca.into_series())
109 },
110 dt if dt.is_primitive_numeric() => {
111 with_match_physical_numeric_polars_type!(dt, |$T| {
112 let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();
113 fill_null_numeric(ca, strategy).map(|ca| ca.into_series())
114 })
115 },
116 dt => {
117 polars_bail!(InvalidOperation: "fill null strategy not yet supported for dtype: {}", dt)
118 },
119 }?;
120 unsafe { out.from_physical_unchecked(logical_type) }
121 },
122 }
123 }
124}
125
126fn fill_forward_numeric<'a, T, I>(ca: &'a ChunkedArray<T>) -> ChunkedArray<T>
127where
128 T: PolarsDataType,
129 &'a ChunkedArray<T>: IntoIterator<IntoIter = I>,
130 I: TrustedLen + Iterator<Item = Option<T::Physical<'a>>>,
131 T::ZeroablePhysical<'a>: Copy,
132{
133 let values: Vec<T::ZeroablePhysical<'a>> = ca
135 .into_iter()
136 .scan(T::ZeroablePhysical::zeroed(), |prev, v| {
137 *prev = v.map(|v| v.into()).unwrap_or(*prev);
138 Some(*prev)
139 })
140 .collect_trusted();
141
142 let num_start_nulls = ca.first_non_null().unwrap_or(ca.len());
144 let mut bm = BitmapBuilder::with_capacity(ca.len());
145 bm.extend_constant(num_start_nulls, false);
146 bm.extend_constant(ca.len() - num_start_nulls, true);
147 ChunkedArray::from_chunk_iter_like(
148 ca,
149 [
150 T::Array::from_zeroable_vec(values, ca.dtype().to_arrow(CompatLevel::newest()))
151 .with_validity_typed(bm.into_opt_validity()),
152 ],
153 )
154}
155
156fn fill_backward_numeric<'a, T, I>(ca: &'a ChunkedArray<T>) -> ChunkedArray<T>
157where
158 T: PolarsDataType,
159 &'a ChunkedArray<T>: IntoIterator<IntoIter = I>,
160 I: TrustedLen + Iterator<Item = Option<T::Physical<'a>>> + DoubleEndedIterator,
161 T::ZeroablePhysical<'a>: Copy,
162{
163 let values: Vec<T::ZeroablePhysical<'a>> = ca
165 .into_iter()
166 .rev()
167 .scan(T::ZeroablePhysical::zeroed(), |prev, v| {
168 *prev = v.map(|v| v.into()).unwrap_or(*prev);
169 Some(*prev)
170 })
171 .collect_reversed();
172
173 let num_end_nulls = ca
175 .last_non_null()
176 .map(|i| ca.len() - 1 - i)
177 .unwrap_or(ca.len());
178 let mut bm = BitmapBuilder::with_capacity(ca.len());
179 bm.extend_constant(ca.len() - num_end_nulls, true);
180 bm.extend_constant(num_end_nulls, false);
181 ChunkedArray::from_chunk_iter_like(
182 ca,
183 [
184 T::Array::from_zeroable_vec(values, ca.dtype().to_arrow(CompatLevel::newest()))
185 .with_validity_typed(bm.into_opt_validity()),
186 ],
187 )
188}
189
190fn fill_null_numeric<T>(
191 ca: &ChunkedArray<T>,
192 strategy: FillNullStrategy,
193) -> PolarsResult<ChunkedArray<T>>
194where
195 T: PolarsNumericType,
196 ChunkedArray<T>: ChunkAgg<T::Native>,
197{
198 let mut out = match strategy {
200 FillNullStrategy::Min => {
201 ca.fill_null_with_values(ChunkAgg::min(ca).ok_or_else(err_fill_null)?)?
202 },
203 FillNullStrategy::Max => {
204 ca.fill_null_with_values(ChunkAgg::max(ca).ok_or_else(err_fill_null)?)?
205 },
206 FillNullStrategy::Mean => ca.fill_null_with_values(
207 ca.mean()
208 .map(|v| NumCast::from(v).unwrap())
209 .ok_or_else(err_fill_null)?,
210 )?,
211 FillNullStrategy::One => return ca.fill_null_with_values(One::one()),
212 FillNullStrategy::Zero => return ca.fill_null_with_values(Zero::zero()),
213 FillNullStrategy::Forward(None) => fill_forward_numeric(ca),
214 FillNullStrategy::Backward(None) => fill_backward_numeric(ca),
215 FillNullStrategy::Forward(_) => unreachable!(),
217 FillNullStrategy::Backward(_) => unreachable!(),
218 };
219 out.rename(ca.name().clone());
220 Ok(out)
221}
222
223fn fill_with_gather<F: Fn(&Bitmap) -> Vec<IdxSize>>(
224 s: &Series,
225 bits_to_idx: F,
226) -> PolarsResult<Series> {
227 let s = s.rechunk();
228 let arr = s.chunks()[0].clone();
229 let validity = arr.validity().expect("nulls");
230
231 let idx = bits_to_idx(validity);
232
233 Ok(unsafe { s.take_slice_unchecked(&idx) })
234}
235
236fn fill_forward_gather(s: &Series) -> PolarsResult<Series> {
237 fill_with_gather(s, |validity| {
238 let mut last_valid = 0;
239 validity
240 .iter()
241 .enumerate_idx()
242 .map(|(i, v)| {
243 if v {
244 last_valid = i;
245 i
246 } else {
247 last_valid
248 }
249 })
250 .collect::<Vec<_>>()
251 })
252}
253
254fn fill_forward_gather_limit(s: &Series, limit: IdxSize) -> PolarsResult<Series> {
255 fill_with_gather(s, |validity| {
256 let mut last_valid = 0;
257 let mut conseq_invalid_count = 0;
258 validity
259 .iter()
260 .enumerate_idx()
261 .map(|(i, v)| {
262 if v {
263 last_valid = i;
264 conseq_invalid_count = 0;
265 i
266 } else if conseq_invalid_count < limit {
267 conseq_invalid_count += 1;
268 last_valid
269 } else {
270 i
271 }
272 })
273 .collect::<Vec<_>>()
274 })
275}
276
277fn fill_backward_gather(s: &Series) -> PolarsResult<Series> {
278 fill_with_gather(s, |validity| {
279 let last = validity.len() as IdxSize - 1;
280 let mut last_valid = last;
281 unsafe {
282 validity
283 .iter()
284 .rev()
285 .enumerate_idx()
286 .map(|(i, v)| {
287 if v {
288 last_valid = last - i;
289 last - i
290 } else {
291 last_valid
292 }
293 })
294 .trust_my_length((last + 1) as usize)
295 .collect_reversed::<Vec<_>>()
296 }
297 })
298}
299
300fn fill_backward_gather_limit(s: &Series, limit: IdxSize) -> PolarsResult<Series> {
301 fill_with_gather(s, |validity| {
302 let last = validity.len() as IdxSize - 1;
303 let mut last_valid = last;
304 let mut conseq_invalid_count = 0;
305 unsafe {
306 validity
307 .iter()
308 .rev()
309 .enumerate_idx()
310 .map(|(i, v)| {
311 if v {
312 last_valid = last - i;
313 conseq_invalid_count = 0;
314 last - i
315 } else if conseq_invalid_count < limit {
316 conseq_invalid_count += 1;
317 last_valid
318 } else {
319 last - i
320 }
321 })
322 .trust_my_length((last + 1) as usize)
323 .collect_reversed()
324 }
325 })
326}
327
328fn fill_null_bool(ca: &BooleanChunked, strategy: FillNullStrategy) -> PolarsResult<Series> {
329 match strategy {
330 FillNullStrategy::Min => ca
331 .fill_null_with_values(ca.min().ok_or_else(err_fill_null)?)
332 .map(|ca| ca.into_series()),
333 FillNullStrategy::Max => ca
334 .fill_null_with_values(ca.max().ok_or_else(err_fill_null)?)
335 .map(|ca| ca.into_series()),
336 FillNullStrategy::Mean => polars_bail!(opq = mean, "Boolean"),
337 FillNullStrategy::One => ca.fill_null_with_values(true).map(|ca| ca.into_series()),
338 FillNullStrategy::Zero => ca.fill_null_with_values(false).map(|ca| ca.into_series()),
339 FillNullStrategy::Forward(_) => unreachable!(),
340 FillNullStrategy::Backward(_) => unreachable!(),
341 }
342}
343
344fn fill_null_binary(ca: &BinaryChunked, strategy: FillNullStrategy) -> PolarsResult<BinaryChunked> {
345 match strategy {
346 FillNullStrategy::Min => {
347 ca.fill_null_with_values(ca.min_binary().ok_or_else(err_fill_null)?)
348 },
349 FillNullStrategy::Max => {
350 ca.fill_null_with_values(ca.max_binary().ok_or_else(err_fill_null)?)
351 },
352 FillNullStrategy::Zero => ca.fill_null_with_values(&[]),
353 FillNullStrategy::Forward(_) => unreachable!(),
354 FillNullStrategy::Backward(_) => unreachable!(),
355 strat => polars_bail!(InvalidOperation: "fill-null strategy {:?} is not supported", strat),
356 }
357}
358
359impl<T> ChunkFillNullValue<T::Native> for ChunkedArray<T>
360where
361 T: PolarsNumericType,
362{
363 fn fill_null_with_values(&self, value: T::Native) -> PolarsResult<Self> {
364 Ok(self.apply_kernel(&|arr| Box::new(set_at_nulls(arr, value))))
365 }
366}
367
368impl ChunkFillNullValue<bool> for BooleanChunked {
369 fn fill_null_with_values(&self, value: bool) -> PolarsResult<Self> {
370 self.set(&self.is_null(), Some(value))
371 }
372}
373
374impl ChunkFillNullValue<&[u8]> for BinaryChunked {
375 fn fill_null_with_values(&self, value: &[u8]) -> PolarsResult<Self> {
376 self.set(&self.is_null(), Some(value))
377 }
378}