polars_core/chunked_array/logical/enum_/
mod.rs1use std::sync::Arc;
2
3use arrow::array::UInt32Vec;
4use arrow::bitmap::MutableBitmap;
5use polars_error::{PolarsResult, polars_bail, polars_err};
6use polars_utils::aliases::{InitHashMaps, PlHashMap};
7use polars_utils::pl_str::PlSmallStr;
8
9use super::{CategoricalChunked, CategoricalOrdering, DataType, Field, RevMapping, UInt32Chunked};
10
11pub struct EnumChunkedBuilder {
12 name: PlSmallStr,
13 enum_builder: UInt32Vec,
14
15 rev: Arc<RevMapping>,
16 ordering: CategoricalOrdering,
17 seen: MutableBitmap,
18
19 mapping: PlHashMap<PlSmallStr, u32>,
21 strict: bool,
22}
23
24impl EnumChunkedBuilder {
25 pub fn new(
26 name: PlSmallStr,
27 capacity: usize,
28 rev: Arc<RevMapping>,
29 ordering: CategoricalOrdering,
30 strict: bool,
31 ) -> Self {
32 let seen = MutableBitmap::from_len_zeroed(rev.len());
33
34 Self {
35 name,
36 enum_builder: UInt32Vec::with_capacity(capacity),
37
38 rev,
39 ordering,
40 seen,
41
42 mapping: PlHashMap::new(),
43 strict,
44 }
45 }
46
47 pub fn append_str(&mut self, v: &str) -> PolarsResult<&mut Self> {
48 match self.mapping.get(v) {
49 Some(v) => self.enum_builder.push(Some(*v)),
50 None => {
51 let Some(iv) = self.rev.find(v) else {
52 if self.strict {
53 polars_bail!(InvalidOperation: "cannot append '{v}' to enum without that variant");
54 } else {
55 self.enum_builder.push(None);
56 return Ok(self);
57 }
58 };
59 self.seen.set(iv as usize, true);
60 self.mapping.insert(v.into(), iv);
61 self.enum_builder.push(Some(iv));
62 },
63 }
64
65 Ok(self)
66 }
67
68 pub fn append_null(&mut self) -> &mut Self {
69 self.enum_builder.push(None);
70 self
71 }
72
73 pub fn append_enum(&mut self, v: u32, rev: &RevMapping) -> PolarsResult<&mut Self> {
74 if !self.rev.same_src(rev) {
75 if self.strict {
76 return Err(polars_err!(ComputeError: "incompatible enum types"));
77 } else {
78 self.enum_builder.push(None);
79 }
80 } else {
81 self.seen.set(v as usize, true);
82 self.enum_builder.push(Some(v));
83 }
84
85 Ok(self)
86 }
87
88 pub fn finish(self) -> CategoricalChunked {
89 let arr = self.enum_builder.freeze();
90 let null_count = arr.validity().map_or(0, |a| a.unset_bits());
91 let length = arr.len();
92 let ca = unsafe {
93 UInt32Chunked::new_with_dims(
94 Arc::new(Field::new(self.name, DataType::UInt32)),
95 vec![Box::new(arr)],
96 length,
97 null_count,
98 )
99 };
100 let fast_unique = !ca.has_nulls() && self.seen.unset_bits() == 0;
102
103 unsafe {
105 CategoricalChunked::from_cats_and_rev_map_unchecked(ca, self.rev, true, self.ordering)
106 .with_fast_unique(fast_unique)
107 }
108 }
109}