polars_core/chunked_array/builder/
categorical.rs1use arrow::bitmap::BitmapBuilder;
2
3use crate::prelude::*;
4
5pub struct CategoricalChunkedBuilder<T: PolarsCategoricalType> {
6 name: PlSmallStr,
7 dtype: DataType,
8 mapping: Arc<CategoricalMapping>,
9 is_enum: bool,
10 cats: Vec<T::Native>,
11 validity: BitmapBuilder,
12}
13
14impl<T: PolarsCategoricalType> CategoricalChunkedBuilder<T> {
15 pub fn new(name: PlSmallStr, dtype: DataType) -> Self {
16 let (DataType::Categorical(_, mapping) | DataType::Enum(_, mapping)) = &dtype else {
17 panic!("non-Categorical/Enum dtype in CategoricalChunkedbuilder")
18 };
19 Self {
20 name,
21 mapping: mapping.clone(),
22 is_enum: matches!(dtype, DataType::Enum(_, _)),
23 dtype,
24 cats: Vec::new(),
25 validity: BitmapBuilder::new(),
26 }
27 }
28
29 pub fn dtype(&self) -> &DataType {
30 &self.dtype
31 }
32
33 pub fn reserve(&mut self, len: usize) {
34 self.cats.reserve(len);
35 self.validity.reserve(len);
36 }
37
38 pub fn append_cat(
39 &mut self,
40 cat: CatSize,
41 mapping: &Arc<CategoricalMapping>,
42 ) -> PolarsResult<()> {
43 if Arc::ptr_eq(&self.mapping, mapping) {
44 self.cats.push(T::Native::from_cat(cat));
45 self.validity.push(true);
46 } else if let Some(s) = mapping.cat_to_str(cat) {
47 self.append_str(s)?;
48 } else {
49 self.append_null();
50 }
51 Ok(())
52 }
53
54 pub fn append_str(&mut self, val: &str) -> PolarsResult<()> {
55 let cat = if self.is_enum {
56 self.mapping.get_cat(val).ok_or_else(|| {
57 polars_err!(ComputeError: "attempted to insert '{val}' into Enum which does not contain this string")
58 })?
59 } else {
60 self.mapping.insert_cat(val)?
61 };
62 self.cats.push(T::Native::from_cat(cat));
63 self.validity.push(true);
64 Ok(())
65 }
66
67 pub fn append_null(&mut self) {
68 self.cats.push(T::Native::default());
69 self.validity.push(false);
70 }
71
72 pub fn finish(self) -> CategoricalChunked<T> {
73 unsafe {
74 let phys = ChunkedArray::from_vec_validity(
75 self.name,
76 self.cats,
77 self.validity.into_opt_validity(),
78 );
79 CategoricalChunked::from_cats_and_dtype_unchecked(phys, self.dtype)
80 }
81 }
82}