polars_core/chunked_array/logical/categorical/ops/
unique.rs

1use super::*;
2
3impl CategoricalChunked {
4    pub fn unique(&self) -> PolarsResult<Self> {
5        let cat_map = self.get_rev_map();
6        if self.is_empty() {
7            // SAFETY: rev map is valid.
8            unsafe {
9                return Ok(CategoricalChunked::from_cats_and_rev_map_unchecked(
10                    UInt32Chunked::full_null(self.name().clone(), 0),
11                    cat_map.clone(),
12                    self.is_enum(),
13                    self.get_ordering(),
14                ));
15            }
16        };
17
18        if self._can_fast_unique() {
19            let ca = match &**cat_map {
20                RevMapping::Local(a, _) => UInt32Chunked::from_iter_values(
21                    self.physical().name().clone(),
22                    0..(a.len() as u32),
23                ),
24                RevMapping::Global(map, _, _) => UInt32Chunked::from_iter_values(
25                    self.physical().name().clone(),
26                    map.keys().copied(),
27                ),
28            };
29            // SAFETY:
30            // we only removed some indexes so we are still in bounds
31            unsafe {
32                let mut out = CategoricalChunked::from_cats_and_rev_map_unchecked(
33                    ca,
34                    cat_map.clone(),
35                    self.is_enum(),
36                    self.get_ordering(),
37                );
38                out.set_fast_unique(true);
39                Ok(out)
40            }
41        } else {
42            let ca = self.physical().unique()?;
43            // SAFETY:
44            // we only removed some indexes so we are still in bounds
45            unsafe {
46                Ok(CategoricalChunked::from_cats_and_rev_map_unchecked(
47                    ca,
48                    cat_map.clone(),
49                    self.is_enum(),
50                    self.get_ordering(),
51                ))
52            }
53        }
54    }
55
56    pub fn n_unique(&self) -> PolarsResult<usize> {
57        if self._can_fast_unique() {
58            Ok(self.get_rev_map().len())
59        } else {
60            self.physical().n_unique()
61        }
62    }
63
64    pub fn value_counts(&self) -> PolarsResult<DataFrame> {
65        let groups = self.physical().group_tuples(true, false).unwrap();
66        let physical_values = unsafe {
67            self.physical()
68                .clone()
69                .into_series()
70                .agg_first(&groups)
71                .u32()
72                .unwrap()
73                .clone()
74        };
75
76        let mut values = self.clone();
77        *values.physical_mut() = physical_values;
78
79        let mut counts = groups.group_count();
80        counts.rename(PlSmallStr::from_static("counts"));
81        let height = counts.len();
82        let cols = vec![values.into_series().into(), counts.into_series().into()];
83        let df = unsafe { DataFrame::new_no_checks(height, cols) };
84        df.sort(
85            ["counts"],
86            SortMultipleOptions::default().with_order_descending(true),
87        )
88    }
89}