polars_core/chunked_array/logical/categorical/
revmap.rs1#![allow(unsafe_op_in_unsafe_fn)]
2use std::fmt::{Debug, Formatter};
3use std::hash::{BuildHasher, Hash, Hasher};
4
5use arrow::array::*;
6use polars_utils::aliases::PlFixedStateQuality;
7
8use crate::datatypes::PlHashMap;
9use crate::{StringCache, using_string_cache};
10
11#[derive(Clone)]
12pub enum RevMapping {
13 Global(PlHashMap<u32, u32>, Utf8ViewArray, u32),
16 Local(Utf8ViewArray, u128),
18}
19
20impl Debug for RevMapping {
21 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
22 match self {
23 RevMapping::Global(_, _, _) => {
24 write!(f, "global")
25 },
26 RevMapping::Local(_, _) => {
27 write!(f, "local")
28 },
29 }
30 }
31}
32
33impl Default for RevMapping {
34 fn default() -> Self {
35 let slice: &[Option<&str>] = &[];
36 let cats = Utf8ViewArray::from_slice(slice);
37 if using_string_cache() {
38 let cache = &mut crate::STRING_CACHE.lock_map();
39 let id = cache.uuid;
40 RevMapping::Global(Default::default(), cats, id)
41 } else {
42 RevMapping::build_local(cats)
43 }
44 }
45}
46
47#[allow(clippy::len_without_is_empty)]
48impl RevMapping {
49 pub fn is_active_global(&self) -> bool {
50 match self {
51 Self::Global(_, _, id) => *id == StringCache::active_cache_id(),
52 _ => false,
53 }
54 }
55
56 pub fn is_global(&self) -> bool {
57 matches!(self, Self::Global(_, _, _))
58 }
59
60 pub fn is_local(&self) -> bool {
61 matches!(self, Self::Local(_, _))
62 }
63
64 pub fn get_categories(&self) -> &Utf8ViewArray {
66 match self {
67 Self::Global(_, a, _) => a,
68 Self::Local(a, _) => a,
69 }
70 }
71
72 fn build_hash(categories: &Utf8ViewArray) -> u128 {
73 let mut hb = PlFixedStateQuality::with_seed(0).build_hasher();
75 categories.values_iter().for_each(|val| {
76 val.hash(&mut hb);
77 });
78 let hash = hb.finish();
79 ((hash as u128) << 64) | (categories.total_buffer_len() as u128)
80 }
81
82 pub fn build_local(categories: Utf8ViewArray) -> Self {
83 debug_assert_eq!(categories.null_count(), 0);
84 let hash = Self::build_hash(&categories);
85 Self::Local(categories, hash)
86 }
87
88 pub fn len(&self) -> usize {
90 self.get_categories().len()
91 }
92
93 pub fn get(&self, idx: u32) -> &str {
97 match self {
98 Self::Global(map, a, _) => {
99 let idx = *map.get(&idx).unwrap();
100 a.value(idx as usize)
101 },
102 Self::Local(a, _) => a.value(idx as usize),
103 }
104 }
105
106 pub fn get_optional(&self, idx: u32) -> Option<&str> {
107 match self {
108 Self::Global(map, a, _) => {
109 let idx = *map.get(&idx)?;
110 a.get(idx as usize)
111 },
112 Self::Local(a, _) => a.get(idx as usize),
113 }
114 }
115
116 pub(crate) unsafe fn get_unchecked(&self, idx: u32) -> &str {
123 match self {
124 Self::Global(map, a, _) => {
125 let idx = *map.get(&idx).unwrap();
126 a.value_unchecked(idx as usize)
127 },
128 Self::Local(a, _) => a.value_unchecked(idx as usize),
129 }
130 }
131 #[inline]
133 pub fn same_src(&self, other: &Self) -> bool {
134 match (self, other) {
135 (RevMapping::Global(_, _, l), RevMapping::Global(_, _, r)) => *l == *r,
136 (RevMapping::Local(_, l_hash), RevMapping::Local(_, r_hash)) => l_hash == r_hash,
137 _ => false,
138 }
139 }
140
141 pub fn find(&self, value: &str) -> Option<u32> {
146 match self {
147 Self::Global(rev_map, a, id) => {
148 if using_string_cache() {
150 let map = crate::STRING_CACHE.read_map();
151 if map.uuid == *id {
152 return map.get_cat(value);
153 }
154 }
155 rev_map
156 .iter()
157 .find(|&(_k, &v)| (unsafe { a.value_unchecked(v as usize) } == value))
160 .map(|(k, _v)| *k)
161 },
162
163 Self::Local(a, _) => {
164 unsafe { (0..a.len()).find(|idx| a.value_unchecked(*idx) == value) }
166 .map(|idx| idx as u32)
167 },
168 }
169 }
170}