1#![allow(unsafe_op_in_unsafe_fn)]
2use arrow::array::*;
3use arrow::bitmap::utils::set_bit_unchecked;
4use arrow::bitmap::{Bitmap, MutableBitmap};
5use arrow::legacy::prelude::*;
6
7use crate::prelude::*;
8use crate::series::implementations::null::NullChunked;
9
10pub(crate) trait ExplodeByOffsets {
11 fn explode_by_offsets(&self, offsets: &[i64], options: ExplodeOptions) -> Series;
12}
13
14unsafe fn unset_nulls(
15 start: usize,
16 last: usize,
17 validity_values: &Bitmap,
18 nulls: &mut Vec<usize>,
19 empty_row_idx: &[usize],
20 base_offset: usize,
21) {
22 for i in start..last {
23 if !validity_values.get_bit_unchecked(i) {
24 nulls.push(i + empty_row_idx.len() - base_offset);
25 }
26 }
27}
28
29fn get_capacity(offsets: &[i64]) -> usize {
30 (offsets[offsets.len() - 1] - offsets[0] + 1) as usize
31}
32
33impl<T> ExplodeByOffsets for ChunkedArray<T>
34where
35 T: PolarsIntegerType,
36{
37 fn explode_by_offsets(&self, offsets: &[i64], options: ExplodeOptions) -> Series {
38 debug_assert_eq!(self.chunks.len(), 1);
39 let arr = self.downcast_iter().next().unwrap();
40
41 let values = &arr.values().as_slice()[..offsets[offsets.len() - 1] as usize];
43
44 let mut empty_row_idx = vec![];
45 let mut nulls = vec![];
46
47 let mut start = offsets[0] as usize;
48 let base_offset = start;
49 let mut last = start;
50
51 let mut new_values = Vec::with_capacity(offsets[offsets.len() - 1] as usize - start + 1);
52
53 if arr.null_count() > 0 {
66 let validity_values = arr.validity().unwrap();
67
68 for &o in &offsets[1..] {
69 let o = o as usize;
70 if options.empty_as_null && o == last {
71 if start != last {
72 #[cfg(debug_assertions)]
73 new_values.extend_from_slice(&values[start..last]);
74
75 #[cfg(not(debug_assertions))]
76 unsafe {
77 new_values.extend_from_slice(values.get_unchecked(start..last))
78 };
79
80 unsafe {
83 unset_nulls(
84 start,
85 last,
86 validity_values,
87 &mut nulls,
88 &empty_row_idx,
89 base_offset,
90 )
91 }
92 }
93
94 empty_row_idx.push(o + empty_row_idx.len() - base_offset);
95 new_values.push(T::Native::default());
96 start = o;
97 }
98 last = o;
99 }
100
101 unsafe {
105 unset_nulls(
106 start,
107 last,
108 validity_values,
109 &mut nulls,
110 &empty_row_idx,
111 base_offset,
112 )
113 }
114 } else {
115 for &o in &offsets[1..] {
116 let o = o as usize;
117 if options.empty_as_null && o == last {
118 if start != last {
119 unsafe { new_values.extend_from_slice(values.get_unchecked(start..last)) };
120 }
121
122 empty_row_idx.push(o + empty_row_idx.len() - base_offset);
123 new_values.push(T::Native::default());
124 start = o;
125 }
126 last = o;
127 }
128 }
129
130 new_values.extend_from_slice(&values[start..]);
132
133 let mut validity = MutableBitmap::with_capacity(new_values.len());
134 validity.extend_constant(new_values.len(), true);
135 let validity_slice = validity.as_mut_slice();
136
137 for i in empty_row_idx {
138 unsafe { set_bit_unchecked(validity_slice, i, false) }
139 }
140 for i in nulls {
141 unsafe { set_bit_unchecked(validity_slice, i, false) }
142 }
143 let arr = PrimitiveArray::new(
144 T::get_static_dtype().to_arrow(CompatLevel::newest()),
145 new_values.into(),
146 Some(validity.into()),
147 );
148 Series::try_from((self.name().clone(), Box::new(arr) as ArrayRef)).unwrap()
149 }
150}
151
152#[cfg(feature = "dtype-f16")]
153impl ExplodeByOffsets for Float16Chunked {
154 fn explode_by_offsets(&self, offsets: &[i64], options: ExplodeOptions) -> Series {
155 self.apply_as_ints(|s| {
156 let ca = s.u16().unwrap();
157 ca.explode_by_offsets(offsets, options)
158 })
159 }
160}
161
162impl ExplodeByOffsets for Float32Chunked {
163 fn explode_by_offsets(&self, offsets: &[i64], options: ExplodeOptions) -> Series {
164 self.apply_as_ints(|s| {
165 let ca = s.u32().unwrap();
166 ca.explode_by_offsets(offsets, options)
167 })
168 }
169}
170impl ExplodeByOffsets for Float64Chunked {
171 fn explode_by_offsets(&self, offsets: &[i64], options: ExplodeOptions) -> Series {
172 self.apply_as_ints(|s| {
173 let ca = s.u64().unwrap();
174 ca.explode_by_offsets(offsets, options)
175 })
176 }
177}
178
179impl ExplodeByOffsets for NullChunked {
180 fn explode_by_offsets(&self, offsets: &[i64], options: ExplodeOptions) -> Series {
181 let mut last_offset = offsets[0];
182
183 let mut len = 0;
184 for &offset in &offsets[1..] {
185 len += std::cmp::max(offset - last_offset, i64::from(options.empty_as_null)) as usize;
188 last_offset = offset;
189 }
190 NullChunked::new(self.name.clone(), len).into_series()
191 }
192}
193
194impl ExplodeByOffsets for BooleanChunked {
195 fn explode_by_offsets(&self, offsets: &[i64], options: ExplodeOptions) -> Series {
196 debug_assert_eq!(self.chunks.len(), 1);
197 let arr = self.downcast_iter().next().unwrap();
198
199 let cap = get_capacity(offsets);
200 let mut builder = BooleanChunkedBuilder::new(self.name().clone(), cap);
201
202 let mut start = offsets[0] as usize;
203 let mut last = start;
204 for &o in &offsets[1..] {
205 let o = o as usize;
206 if options.empty_as_null && o == last {
207 if start != last {
208 let vals = arr.slice_typed(start, last - start);
209
210 if vals.null_count() == 0 {
211 builder
212 .array_builder
213 .extend_trusted_len_values(vals.values_iter())
214 } else {
215 builder.array_builder.extend_trusted_len(vals.into_iter());
216 }
217 }
218 builder.append_null();
219 start = o;
220 }
221 last = o;
222 }
223 let vals = arr.slice_typed(start, last - start);
224 if vals.null_count() == 0 {
225 builder
226 .array_builder
227 .extend_trusted_len_values(vals.values_iter())
228 } else {
229 builder.array_builder.extend_trusted_len(vals.into_iter());
230 }
231 builder.finish().into()
232 }
233}
234
235pub(crate) fn offsets_to_indexes(
237 offsets: &[i64],
238 capacity: usize,
239 options: ExplodeOptions,
240 validity: Option<&Bitmap>,
241) -> Vec<IdxSize> {
242 if offsets.is_empty() {
243 return vec![];
244 }
245
246 let mut idx = Vec::with_capacity(capacity);
247
248 let mut last_idx = 0;
249 match validity {
250 None => {
251 for (offset_start, offset_end) in offsets.iter().zip(offsets[1..].iter()) {
252 if idx.len() >= capacity {
253 break;
256 }
257
258 if offset_start == offset_end {
259 if options.empty_as_null {
260 idx.push(last_idx);
263 }
264 } else {
265 let width = (offset_end - offset_start) as usize;
266 for _ in 0..width {
267 idx.push(last_idx);
268 }
269 }
270
271 last_idx += 1;
272 }
273 },
274 Some(validity) => {
275 for ((offset_start, offset_end), is_valid) in
276 offsets.iter().zip(offsets[1..].iter()).zip(validity.iter())
277 {
278 if idx.len() >= capacity {
279 break;
282 }
283
284 if offset_start == offset_end {
285 if (is_valid && options.empty_as_null) || (!is_valid && options.keep_nulls) {
286 idx.push(last_idx);
289 }
290 } else {
291 let width = (offset_end - offset_start) as usize;
292 for _ in 0..width {
293 idx.push(last_idx);
294 }
295 }
296
297 last_idx += 1;
298 }
299 },
300 }
301
302 for _ in 0..capacity.saturating_sub(idx.len()) {
304 idx.push(last_idx);
305 }
306 idx.truncate(capacity);
307 idx
308}
309
310#[cfg(test)]
311mod test {
312 use super::*;
313 use crate::chunked_array::builder::get_list_builder;
314
315 #[test]
316 fn test_explode_list() -> PolarsResult<()> {
317 let mut builder = get_list_builder(&DataType::Int32, 5, 5, PlSmallStr::from_static("a"));
318
319 builder
320 .append_series(&Series::new(PlSmallStr::EMPTY, &[1, 2, 3, 3]))
321 .unwrap();
322 builder
323 .append_series(&Series::new(PlSmallStr::EMPTY, &[1]))
324 .unwrap();
325 builder
326 .append_series(&Series::new(PlSmallStr::EMPTY, &[2]))
327 .unwrap();
328
329 let ca = builder.finish();
330 assert!(ca._can_fast_explode());
331
332 let exploded = ca.explode(ExplodeOptions {
334 empty_as_null: true,
335 keep_nulls: true,
336 })?;
337 let out: Vec<_> = exploded.i32()?.into_no_null_iter().collect();
338 assert_eq!(out, &[1, 2, 3, 3, 1, 2]);
339
340 let exploded = ca.slice(0, 1).explode(ExplodeOptions {
342 empty_as_null: true,
343 keep_nulls: true,
344 })?;
345 let out: Vec<_> = exploded.i32()?.into_no_null_iter().collect();
346 assert_eq!(out, &[1, 2, 3, 3]);
347
348 Ok(())
349 }
350
351 #[test]
352 fn test_explode_empty_list_slot() -> PolarsResult<()> {
353 let mut builder = get_list_builder(&DataType::Int32, 5, 5, PlSmallStr::from_static("a"));
355 builder
356 .append_series(&Series::new(PlSmallStr::EMPTY, &[1i32, 2]))
357 .unwrap();
358 builder
359 .append_series(&Int32Chunked::from_slice(PlSmallStr::EMPTY, &[]).into_series())
360 .unwrap();
361 builder
362 .append_series(&Series::new(PlSmallStr::EMPTY, &[3i32]))
363 .unwrap();
364
365 let ca = builder.finish();
366 let exploded = ca.explode(ExplodeOptions {
367 empty_as_null: true,
368 keep_nulls: true,
369 })?;
370 assert_eq!(
371 Vec::from(exploded.i32()?),
372 &[Some(1), Some(2), None, Some(3)]
373 );
374
375 let mut builder = get_list_builder(&DataType::Int32, 5, 5, PlSmallStr::from_static("a"));
377 builder
378 .append_series(&Series::new(PlSmallStr::EMPTY, &[1i32]))
379 .unwrap();
380 builder
381 .append_series(&Int32Chunked::from_slice(PlSmallStr::EMPTY, &[]).into_series())
382 .unwrap();
383 builder
384 .append_series(&Series::new(PlSmallStr::EMPTY, &[2i32]))
385 .unwrap();
386 builder
387 .append_series(&Int32Chunked::from_slice(PlSmallStr::EMPTY, &[]).into_series())
388 .unwrap();
389 builder
390 .append_series(&Series::new(PlSmallStr::EMPTY, &[3, 4i32]))
391 .unwrap();
392
393 let ca = builder.finish();
394 let exploded = ca.explode(ExplodeOptions {
395 empty_as_null: true,
396 keep_nulls: true,
397 })?;
398 assert_eq!(
399 Vec::from(exploded.i32()?),
400 &[Some(1), None, Some(2), None, Some(3), Some(4)]
401 );
402
403 let mut builder = get_list_builder(&DataType::String, 5, 5, PlSmallStr::from_static("a"));
405 builder
406 .append_series(&Series::new(PlSmallStr::EMPTY, &["abc"]))
407 .unwrap();
408 builder
409 .append_series(
410 &<StringChunked as NewChunkedArray<StringType, &str>>::from_slice(
411 PlSmallStr::EMPTY,
412 &[],
413 )
414 .into_series(),
415 )
416 .unwrap();
417 builder
418 .append_series(&Series::new(PlSmallStr::EMPTY, &["de"]))
419 .unwrap();
420 builder
421 .append_series(
422 &<StringChunked as NewChunkedArray<StringType, &str>>::from_slice(
423 PlSmallStr::EMPTY,
424 &[],
425 )
426 .into_series(),
427 )
428 .unwrap();
429 builder
430 .append_series(&Series::new(PlSmallStr::EMPTY, &["fg"]))
431 .unwrap();
432 builder
433 .append_series(
434 &<StringChunked as NewChunkedArray<StringType, &str>>::from_slice(
435 PlSmallStr::EMPTY,
436 &[],
437 )
438 .into_series(),
439 )
440 .unwrap();
441
442 let ca = builder.finish();
443 let exploded = ca.explode(ExplodeOptions {
444 empty_as_null: true,
445 keep_nulls: true,
446 })?;
447 assert_eq!(
448 Vec::from(exploded.str()?),
449 &[Some("abc"), None, Some("de"), None, Some("fg"), None]
450 );
451
452 let mut builder = get_list_builder(&DataType::Boolean, 5, 5, PlSmallStr::from_static("a"));
454 builder
455 .append_series(&Series::new(PlSmallStr::EMPTY, &[true]))
456 .unwrap();
457 builder
458 .append_series(&BooleanChunked::from_slice(PlSmallStr::EMPTY, &[]).into_series())
459 .unwrap();
460 builder
461 .append_series(&Series::new(PlSmallStr::EMPTY, &[false]))
462 .unwrap();
463 builder
464 .append_series(&BooleanChunked::from_slice(PlSmallStr::EMPTY, &[]).into_series())
465 .unwrap();
466 builder
467 .append_series(&Series::new(PlSmallStr::EMPTY, &[true, true]))
468 .unwrap();
469
470 let ca = builder.finish();
471 let exploded = ca.explode(ExplodeOptions {
472 empty_as_null: true,
473 keep_nulls: true,
474 })?;
475 assert_eq!(
476 Vec::from(exploded.bool()?),
477 &[Some(true), None, Some(false), None, Some(true), Some(true)]
478 );
479
480 Ok(())
481 }
482
483 #[test]
484 fn test_row_offsets() {
485 let offsets = &[0, 1, 2, 2, 3, 4, 4];
486 let out = offsets_to_indexes(
487 offsets,
488 6,
489 ExplodeOptions {
490 empty_as_null: true,
491 keep_nulls: true,
492 },
493 None,
494 );
495 assert_eq!(out, &[0, 1, 2, 3, 4, 5]);
496 }
497
498 #[test]
499 fn test_empty_row_offsets() {
500 let offsets = &[0, 0];
501 let out = offsets_to_indexes(
502 offsets,
503 0,
504 ExplodeOptions {
505 empty_as_null: true,
506 keep_nulls: true,
507 },
508 None,
509 );
510 let expected: Vec<IdxSize> = Vec::new();
511 assert_eq!(out, expected);
512 }
513
514 #[test]
515 fn test_row_offsets_over_capacity() {
516 let offsets = &[0, 1, 1, 2, 2];
517 let out = offsets_to_indexes(
518 offsets,
519 2,
520 ExplodeOptions {
521 empty_as_null: true,
522 keep_nulls: true,
523 },
524 None,
525 );
526 assert_eq!(out, &[0, 1]);
527 }
528
529 #[test]
530 fn test_row_offsets_nonzero_first_offset() {
531 let offsets = &[3, 6, 8];
532 let out = offsets_to_indexes(
533 offsets,
534 10,
535 ExplodeOptions {
536 empty_as_null: true,
537 keep_nulls: true,
538 },
539 None,
540 );
541 assert_eq!(out, &[0, 0, 0, 1, 1, 2, 2, 2, 2, 2]);
542 }
543}