polars_ops/chunked_array/list/
count.rs

1use arrow::array::{Array, BooleanArray};
2use arrow::bitmap::Bitmap;
3use arrow::bitmap::utils::count_zeros;
4use arrow::legacy::utils::CustomIterTools;
5
6use super::*;
7
8fn count_bits_set_by_offsets(values: &Bitmap, offset: &[i64]) -> Vec<IdxSize> {
9    // Fast path where all bits are either set or unset.
10    if values.unset_bits() == values.len() {
11        return vec![0 as IdxSize; offset.len() - 1];
12    } else if values.unset_bits() == 0 {
13        let mut start = offset[0];
14        let v = (offset[1..])
15            .iter()
16            .map(|end| {
17                let current_offset = start;
18                start = *end;
19                (end - current_offset) as IdxSize
20            })
21            .collect_trusted();
22        return v;
23    }
24
25    let (bits, bitmap_offset, _) = values.as_slice();
26
27    let mut running_offset = offset[0];
28
29    (offset[1..])
30        .iter()
31        .map(|end| {
32            let current_offset = running_offset;
33            running_offset = *end;
34
35            let len = (end - current_offset) as usize;
36
37            let set_ones = len - count_zeros(bits, bitmap_offset + current_offset as usize, len);
38            set_ones as IdxSize
39        })
40        .collect_trusted()
41}
42
43#[cfg(feature = "list_count")]
44pub fn list_count_matches(ca: &ListChunked, value: AnyValue) -> PolarsResult<Series> {
45    let value = Series::new(PlSmallStr::EMPTY, [value]);
46
47    let ca = ca.apply_to_inner(&|s| {
48        ChunkCompareEq::<&Series>::equal_missing(&s, &value).map(|ca| ca.into_series())
49    })?;
50    let out = count_boolean_bits(&ca);
51    Ok(out.into_series())
52}
53
54pub(super) fn count_boolean_bits(ca: &ListChunked) -> IdxCa {
55    let chunks = ca.downcast_iter().map(|arr| {
56        let inner_arr = arr.values();
57        let mask = inner_arr.as_any().downcast_ref::<BooleanArray>().unwrap();
58        assert_eq!(mask.null_count(), 0);
59        let out = count_bits_set_by_offsets(mask.values(), arr.offsets().as_slice());
60        IdxArr::from_data_default(out.into(), arr.validity().cloned())
61    });
62    IdxCa::from_chunk_iter(ca.name().clone(), chunks)
63}