polars_core/chunked_array/logical/categorical/ops/
unique.rs1use super::*;
2
3impl CategoricalChunked {
4 pub fn unique(&self) -> PolarsResult<Self> {
5 let cat_map = self.get_rev_map();
6 if self.is_empty() {
7 unsafe {
9 return Ok(CategoricalChunked::from_cats_and_rev_map_unchecked(
10 UInt32Chunked::full_null(self.name().clone(), 0),
11 cat_map.clone(),
12 self.is_enum(),
13 self.get_ordering(),
14 ));
15 }
16 };
17
18 if self._can_fast_unique() {
19 let ca = match &**cat_map {
20 RevMapping::Local(a, _) => UInt32Chunked::from_iter_values(
21 self.physical().name().clone(),
22 0..(a.len() as u32),
23 ),
24 RevMapping::Global(map, _, _) => UInt32Chunked::from_iter_values(
25 self.physical().name().clone(),
26 map.keys().copied(),
27 ),
28 };
29 unsafe {
32 let mut out = CategoricalChunked::from_cats_and_rev_map_unchecked(
33 ca,
34 cat_map.clone(),
35 self.is_enum(),
36 self.get_ordering(),
37 );
38 out.set_fast_unique(true);
39 Ok(out)
40 }
41 } else {
42 let ca = self.physical().unique()?;
43 unsafe {
46 Ok(CategoricalChunked::from_cats_and_rev_map_unchecked(
47 ca,
48 cat_map.clone(),
49 self.is_enum(),
50 self.get_ordering(),
51 ))
52 }
53 }
54 }
55
56 pub fn n_unique(&self) -> PolarsResult<usize> {
57 if self._can_fast_unique() {
58 Ok(self.get_rev_map().len())
59 } else {
60 self.physical().n_unique()
61 }
62 }
63
64 pub fn value_counts(&self) -> PolarsResult<DataFrame> {
65 let groups = self.physical().group_tuples(true, false).unwrap();
66 let physical_values = unsafe {
67 self.physical()
68 .clone()
69 .into_series()
70 .agg_first(&groups)
71 .u32()
72 .unwrap()
73 .clone()
74 };
75
76 let mut values = self.clone();
77 *values.physical_mut() = physical_values;
78
79 let mut counts = groups.group_count();
80 counts.rename(PlSmallStr::from_static("counts"));
81 let height = counts.len();
82 let cols = vec![values.into_series().into(), counts.into_series().into()];
83 let df = unsafe { DataFrame::new_no_checks(height, cols) };
84 df.sort(
85 ["counts"],
86 SortMultipleOptions::default().with_order_descending(true),
87 )
88 }
89}