1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
use super::*;

impl CategoricalChunked {
    pub fn unique(&self) -> PolarsResult<Self> {
        let cat_map = self.get_rev_map();
        if self._can_fast_unique() {
            let ca = match &**cat_map {
                RevMapping::Local(a, _) => {
                    UInt32Chunked::from_iter_values(self.physical().name(), 0..(a.len() as u32))
                },
                RevMapping::Global(map, _, _) => {
                    UInt32Chunked::from_iter_values(self.physical().name(), map.keys().copied())
                },
            };
            // SAFETY:
            // we only removed some indexes so we are still in bounds
            unsafe {
                let mut out = CategoricalChunked::from_cats_and_rev_map_unchecked(
                    ca,
                    cat_map.clone(),
                    self.is_enum(),
                    self.get_ordering(),
                );
                out.set_fast_unique(true);
                Ok(out)
            }
        } else {
            let ca = self.physical().unique()?;
            // SAFETY:
            // we only removed some indexes so we are still in bounds
            unsafe {
                Ok(CategoricalChunked::from_cats_and_rev_map_unchecked(
                    ca,
                    cat_map.clone(),
                    self.is_enum(),
                    self.get_ordering(),
                ))
            }
        }
    }

    pub fn n_unique(&self) -> PolarsResult<usize> {
        if self._can_fast_unique() {
            Ok(self.get_rev_map().len())
        } else {
            self.physical().n_unique()
        }
    }

    pub fn value_counts(&self) -> PolarsResult<DataFrame> {
        let groups = self.physical().group_tuples(true, false).unwrap();
        let physical_values = unsafe {
            self.physical()
                .clone()
                .into_series()
                .agg_first(&groups)
                .u32()
                .unwrap()
                .clone()
        };

        let mut values = self.clone();
        *values.physical_mut() = physical_values;

        let mut counts = groups.group_count();
        counts.rename("counts");
        let cols = vec![values.into_series(), counts.into_series()];
        let df = unsafe { DataFrame::new_no_checks(cols) };
        df.sort(
            ["counts"],
            SortMultipleOptions::default().with_order_descending(true),
        )
    }
}