polars_core/chunked_array/builder/
categorical.rs

1use arrow::bitmap::BitmapBuilder;
2
3use crate::prelude::*;
4
5pub struct CategoricalChunkedBuilder<T: PolarsCategoricalType> {
6    name: PlSmallStr,
7    dtype: DataType,
8    mapping: Arc<CategoricalMapping>,
9    is_enum: bool,
10    cats: Vec<T::Native>,
11    validity: BitmapBuilder,
12}
13
14impl<T: PolarsCategoricalType> CategoricalChunkedBuilder<T> {
15    pub fn new(name: PlSmallStr, dtype: DataType) -> Self {
16        let (DataType::Categorical(_, mapping) | DataType::Enum(_, mapping)) = &dtype else {
17            panic!("non-Categorical/Enum dtype in CategoricalChunkedbuilder")
18        };
19        Self {
20            name,
21            mapping: mapping.clone(),
22            is_enum: matches!(dtype, DataType::Enum(_, _)),
23            dtype,
24            cats: Vec::new(),
25            validity: BitmapBuilder::new(),
26        }
27    }
28
29    pub fn dtype(&self) -> &DataType {
30        &self.dtype
31    }
32
33    pub fn reserve(&mut self, len: usize) {
34        self.cats.reserve(len);
35        self.validity.reserve(len);
36    }
37
38    pub fn append_cat(
39        &mut self,
40        cat: CatSize,
41        mapping: &Arc<CategoricalMapping>,
42    ) -> PolarsResult<()> {
43        if Arc::ptr_eq(&self.mapping, mapping) {
44            self.cats.push(T::Native::from_cat(cat));
45            self.validity.push(true);
46        } else if let Some(s) = mapping.cat_to_str(cat) {
47            self.append_str(s)?;
48        } else {
49            self.append_null();
50        }
51        Ok(())
52    }
53
54    pub fn append_str(&mut self, val: &str) -> PolarsResult<()> {
55        let cat = if self.is_enum {
56            self.mapping.get_cat(val).ok_or_else(|| {
57                polars_err!(ComputeError: "attempted to insert '{val}' into Enum which does not contain this string")
58            })?
59        } else {
60            self.mapping.insert_cat(val)?
61        };
62        self.cats.push(T::Native::from_cat(cat));
63        self.validity.push(true);
64        Ok(())
65    }
66
67    pub fn append_null(&mut self) {
68        self.cats.push(T::Native::default());
69        self.validity.push(false);
70    }
71
72    pub fn finish(self) -> CategoricalChunked<T> {
73        unsafe {
74            let phys = ChunkedArray::from_vec_validity(
75                self.name,
76                self.cats,
77                self.validity.into_opt_validity(),
78            );
79            CategoricalChunked::from_cats_and_dtype_unchecked(phys, self.dtype)
80        }
81    }
82}