1#![allow(unsafe_op_in_unsafe_fn)]
2use arrow::array::*;
3use arrow::bitmap::utils::set_bit_unchecked;
4use arrow::bitmap::{Bitmap, MutableBitmap};
5use arrow::legacy::prelude::*;
6
7use crate::prelude::*;
8use crate::series::implementations::null::NullChunked;
9
10pub(crate) trait ExplodeByOffsets {
11 fn explode_by_offsets(&self, offsets: &[i64]) -> Series;
12}
13
14unsafe fn unset_nulls(
15 start: usize,
16 last: usize,
17 validity_values: &Bitmap,
18 nulls: &mut Vec<usize>,
19 empty_row_idx: &[usize],
20 base_offset: usize,
21) {
22 for i in start..last {
23 if !validity_values.get_bit_unchecked(i) {
24 nulls.push(i + empty_row_idx.len() - base_offset);
25 }
26 }
27}
28
29fn get_capacity(offsets: &[i64]) -> usize {
30 (offsets[offsets.len() - 1] - offsets[0] + 1) as usize
31}
32
33impl<T> ExplodeByOffsets for ChunkedArray<T>
34where
35 T: PolarsIntegerType,
36{
37 fn explode_by_offsets(&self, offsets: &[i64]) -> Series {
38 debug_assert_eq!(self.chunks.len(), 1);
39 let arr = self.downcast_iter().next().unwrap();
40
41 let values = &arr.values().as_slice()[..offsets[offsets.len() - 1] as usize];
43
44 let mut empty_row_idx = vec![];
45 let mut nulls = vec![];
46
47 let mut start = offsets[0] as usize;
48 let base_offset = start;
49 let mut last = start;
50
51 let mut new_values = Vec::with_capacity(offsets[offsets.len() - 1] as usize - start + 1);
52
53 if arr.null_count() > 0 {
66 let validity_values = arr.validity().unwrap();
67
68 for &o in &offsets[1..] {
69 let o = o as usize;
70 if o == last {
71 if start != last {
72 #[cfg(debug_assertions)]
73 new_values.extend_from_slice(&values[start..last]);
74
75 #[cfg(not(debug_assertions))]
76 unsafe {
77 new_values.extend_from_slice(values.get_unchecked(start..last))
78 };
79
80 unsafe {
83 unset_nulls(
84 start,
85 last,
86 validity_values,
87 &mut nulls,
88 &empty_row_idx,
89 base_offset,
90 )
91 }
92 }
93
94 empty_row_idx.push(o + empty_row_idx.len() - base_offset);
95 new_values.push(T::Native::default());
96 start = o;
97 }
98 last = o;
99 }
100
101 unsafe {
105 unset_nulls(
106 start,
107 last,
108 validity_values,
109 &mut nulls,
110 &empty_row_idx,
111 base_offset,
112 )
113 }
114 } else {
115 for &o in &offsets[1..] {
116 let o = o as usize;
117 if o == last {
118 if start != last {
119 unsafe { new_values.extend_from_slice(values.get_unchecked(start..last)) };
120 }
121
122 empty_row_idx.push(o + empty_row_idx.len() - base_offset);
123 new_values.push(T::Native::default());
124 start = o;
125 }
126 last = o;
127 }
128 }
129
130 new_values.extend_from_slice(&values[start..]);
132
133 let mut validity = MutableBitmap::with_capacity(new_values.len());
134 validity.extend_constant(new_values.len(), true);
135 let validity_slice = validity.as_mut_slice();
136
137 for i in empty_row_idx {
138 unsafe { set_bit_unchecked(validity_slice, i, false) }
139 }
140 for i in nulls {
141 unsafe { set_bit_unchecked(validity_slice, i, false) }
142 }
143 let arr = PrimitiveArray::new(
144 T::get_dtype().to_arrow(CompatLevel::newest()),
145 new_values.into(),
146 Some(validity.into()),
147 );
148 Series::try_from((self.name().clone(), Box::new(arr) as ArrayRef)).unwrap()
149 }
150}
151
152impl ExplodeByOffsets for Float32Chunked {
153 fn explode_by_offsets(&self, offsets: &[i64]) -> Series {
154 self.apply_as_ints(|s| {
155 let ca = s.u32().unwrap();
156 ca.explode_by_offsets(offsets)
157 })
158 }
159}
160impl ExplodeByOffsets for Float64Chunked {
161 fn explode_by_offsets(&self, offsets: &[i64]) -> Series {
162 self.apply_as_ints(|s| {
163 let ca = s.u64().unwrap();
164 ca.explode_by_offsets(offsets)
165 })
166 }
167}
168
169impl ExplodeByOffsets for NullChunked {
170 fn explode_by_offsets(&self, offsets: &[i64]) -> Series {
171 let mut last_offset = offsets[0];
172
173 let mut len = 0;
174 for &offset in &offsets[1..] {
175 len += std::cmp::max(offset - last_offset, 1) as usize;
178 last_offset = offset;
179 }
180 NullChunked::new(self.name.clone(), len).into_series()
181 }
182}
183
184impl ExplodeByOffsets for BooleanChunked {
185 fn explode_by_offsets(&self, offsets: &[i64]) -> Series {
186 debug_assert_eq!(self.chunks.len(), 1);
187 let arr = self.downcast_iter().next().unwrap();
188
189 let cap = get_capacity(offsets);
190 let mut builder = BooleanChunkedBuilder::new(self.name().clone(), cap);
191
192 let mut start = offsets[0] as usize;
193 let mut last = start;
194 for &o in &offsets[1..] {
195 let o = o as usize;
196 if o == last {
197 if start != last {
198 let vals = arr.slice_typed(start, last - start);
199
200 if vals.null_count() == 0 {
201 builder
202 .array_builder
203 .extend_trusted_len_values(vals.values_iter())
204 } else {
205 builder.array_builder.extend_trusted_len(vals.into_iter());
206 }
207 }
208 builder.append_null();
209 start = o;
210 }
211 last = o;
212 }
213 let vals = arr.slice_typed(start, last - start);
214 if vals.null_count() == 0 {
215 builder
216 .array_builder
217 .extend_trusted_len_values(vals.values_iter())
218 } else {
219 builder.array_builder.extend_trusted_len(vals.into_iter());
220 }
221 builder.finish().into()
222 }
223}
224
225pub(crate) fn offsets_to_indexes(offsets: &[i64], capacity: usize) -> Vec<IdxSize> {
227 if offsets.is_empty() {
228 return vec![];
229 }
230
231 let mut idx = Vec::with_capacity(capacity);
232
233 let mut last_idx = 0;
234 for (offset_start, offset_end) in offsets.iter().zip(offsets[1..].iter()) {
235 if idx.len() >= capacity {
236 break;
239 }
240
241 if offset_start == offset_end {
242 idx.push(last_idx);
245 } else {
246 let width = (offset_end - offset_start) as usize;
247 for _ in 0..width {
248 idx.push(last_idx);
249 }
250 }
251
252 last_idx += 1;
253 }
254
255 for _ in 0..capacity.saturating_sub(idx.len()) {
257 idx.push(last_idx);
258 }
259 idx.truncate(capacity);
260 idx
261}
262
263#[cfg(test)]
264mod test {
265 use super::*;
266 use crate::chunked_array::builder::get_list_builder;
267
268 #[test]
269 fn test_explode_list() -> PolarsResult<()> {
270 let mut builder = get_list_builder(&DataType::Int32, 5, 5, PlSmallStr::from_static("a"));
271
272 builder
273 .append_series(&Series::new(PlSmallStr::EMPTY, &[1, 2, 3, 3]))
274 .unwrap();
275 builder
276 .append_series(&Series::new(PlSmallStr::EMPTY, &[1]))
277 .unwrap();
278 builder
279 .append_series(&Series::new(PlSmallStr::EMPTY, &[2]))
280 .unwrap();
281
282 let ca = builder.finish();
283 assert!(ca._can_fast_explode());
284
285 let exploded = ca.explode()?;
287 let out: Vec<_> = exploded.i32()?.into_no_null_iter().collect();
288 assert_eq!(out, &[1, 2, 3, 3, 1, 2]);
289
290 let exploded = ca.slice(0, 1).explode()?;
292 let out: Vec<_> = exploded.i32()?.into_no_null_iter().collect();
293 assert_eq!(out, &[1, 2, 3, 3]);
294
295 Ok(())
296 }
297
298 #[test]
299 fn test_explode_empty_list_slot() -> PolarsResult<()> {
300 let mut builder = get_list_builder(&DataType::Int32, 5, 5, PlSmallStr::from_static("a"));
302 builder
303 .append_series(&Series::new(PlSmallStr::EMPTY, &[1i32, 2]))
304 .unwrap();
305 builder
306 .append_series(&Int32Chunked::from_slice(PlSmallStr::EMPTY, &[]).into_series())
307 .unwrap();
308 builder
309 .append_series(&Series::new(PlSmallStr::EMPTY, &[3i32]))
310 .unwrap();
311
312 let ca = builder.finish();
313 let exploded = ca.explode()?;
314 assert_eq!(
315 Vec::from(exploded.i32()?),
316 &[Some(1), Some(2), None, Some(3)]
317 );
318
319 let mut builder = get_list_builder(&DataType::Int32, 5, 5, PlSmallStr::from_static("a"));
321 builder
322 .append_series(&Series::new(PlSmallStr::EMPTY, &[1i32]))
323 .unwrap();
324 builder
325 .append_series(&Int32Chunked::from_slice(PlSmallStr::EMPTY, &[]).into_series())
326 .unwrap();
327 builder
328 .append_series(&Series::new(PlSmallStr::EMPTY, &[2i32]))
329 .unwrap();
330 builder
331 .append_series(&Int32Chunked::from_slice(PlSmallStr::EMPTY, &[]).into_series())
332 .unwrap();
333 builder
334 .append_series(&Series::new(PlSmallStr::EMPTY, &[3, 4i32]))
335 .unwrap();
336
337 let ca = builder.finish();
338 let exploded = ca.explode()?;
339 assert_eq!(
340 Vec::from(exploded.i32()?),
341 &[Some(1), None, Some(2), None, Some(3), Some(4)]
342 );
343
344 let mut builder = get_list_builder(&DataType::String, 5, 5, PlSmallStr::from_static("a"));
346 builder
347 .append_series(&Series::new(PlSmallStr::EMPTY, &["abc"]))
348 .unwrap();
349 builder
350 .append_series(
351 &<StringChunked as NewChunkedArray<StringType, &str>>::from_slice(
352 PlSmallStr::EMPTY,
353 &[],
354 )
355 .into_series(),
356 )
357 .unwrap();
358 builder
359 .append_series(&Series::new(PlSmallStr::EMPTY, &["de"]))
360 .unwrap();
361 builder
362 .append_series(
363 &<StringChunked as NewChunkedArray<StringType, &str>>::from_slice(
364 PlSmallStr::EMPTY,
365 &[],
366 )
367 .into_series(),
368 )
369 .unwrap();
370 builder
371 .append_series(&Series::new(PlSmallStr::EMPTY, &["fg"]))
372 .unwrap();
373 builder
374 .append_series(
375 &<StringChunked as NewChunkedArray<StringType, &str>>::from_slice(
376 PlSmallStr::EMPTY,
377 &[],
378 )
379 .into_series(),
380 )
381 .unwrap();
382
383 let ca = builder.finish();
384 let exploded = ca.explode()?;
385 assert_eq!(
386 Vec::from(exploded.str()?),
387 &[Some("abc"), None, Some("de"), None, Some("fg"), None]
388 );
389
390 let mut builder = get_list_builder(&DataType::Boolean, 5, 5, PlSmallStr::from_static("a"));
392 builder
393 .append_series(&Series::new(PlSmallStr::EMPTY, &[true]))
394 .unwrap();
395 builder
396 .append_series(&BooleanChunked::from_slice(PlSmallStr::EMPTY, &[]).into_series())
397 .unwrap();
398 builder
399 .append_series(&Series::new(PlSmallStr::EMPTY, &[false]))
400 .unwrap();
401 builder
402 .append_series(&BooleanChunked::from_slice(PlSmallStr::EMPTY, &[]).into_series())
403 .unwrap();
404 builder
405 .append_series(&Series::new(PlSmallStr::EMPTY, &[true, true]))
406 .unwrap();
407
408 let ca = builder.finish();
409 let exploded = ca.explode()?;
410 assert_eq!(
411 Vec::from(exploded.bool()?),
412 &[Some(true), None, Some(false), None, Some(true), Some(true)]
413 );
414
415 Ok(())
416 }
417
418 #[test]
419 fn test_row_offsets() {
420 let offsets = &[0, 1, 2, 2, 3, 4, 4];
421 let out = offsets_to_indexes(offsets, 6);
422 assert_eq!(out, &[0, 1, 2, 3, 4, 5]);
423 }
424
425 #[test]
426 fn test_empty_row_offsets() {
427 let offsets = &[0, 0];
428 let out = offsets_to_indexes(offsets, 0);
429 let expected: Vec<IdxSize> = Vec::new();
430 assert_eq!(out, expected);
431 }
432
433 #[test]
434 fn test_row_offsets_over_capacity() {
435 let offsets = &[0, 1, 1, 2, 2];
436 let out = offsets_to_indexes(offsets, 2);
437 assert_eq!(out, &[0, 1]);
438 }
439
440 #[test]
441 fn test_row_offsets_nonzero_first_offset() {
442 let offsets = &[3, 6, 8];
443 let out = offsets_to_indexes(offsets, 10);
444 assert_eq!(out, &[0, 0, 0, 1, 1, 2, 2, 2, 2, 2]);
445 }
446}