polars_core/series/implementations/
categorical.rs

1use super::*;
2use crate::chunked_array::comparison::*;
3use crate::prelude::*;
4
5unsafe impl<T: PolarsCategoricalType> IntoSeries for CategoricalChunked<T> {
6    fn into_series(self) -> Series {
7        // We do this hack to go from generic T to concrete T to avoid adding bounds on IntoSeries.
8        with_match_categorical_physical_type!(T::physical(), |$C| {
9            unsafe {
10                Series(Arc::new(SeriesWrap(core::mem::transmute::<Self, CategoricalChunked<$C>>(self))))
11            }
12        })
13    }
14}
15
16impl<T: PolarsCategoricalType> SeriesWrap<CategoricalChunked<T>> {
17    unsafe fn apply_on_phys<F>(&self, apply: F) -> CategoricalChunked<T>
18    where
19        F: Fn(&ChunkedArray<T::PolarsPhysical>) -> ChunkedArray<T::PolarsPhysical>,
20    {
21        let cats = apply(self.0.physical());
22        unsafe { CategoricalChunked::from_cats_and_dtype_unchecked(cats, self.0.dtype().clone()) }
23    }
24
25    unsafe fn try_apply_on_phys<F>(&self, apply: F) -> PolarsResult<CategoricalChunked<T>>
26    where
27        F: Fn(&ChunkedArray<T::PolarsPhysical>) -> PolarsResult<ChunkedArray<T::PolarsPhysical>>,
28    {
29        let cats = apply(self.0.physical())?;
30        unsafe {
31            Ok(CategoricalChunked::from_cats_and_dtype_unchecked(
32                cats,
33                self.0.dtype().clone(),
34            ))
35        }
36    }
37}
38
39macro_rules! impl_cat_series {
40    ($ca: ident, $pdt:ty, $ca_fn:ident) => {
41        impl private::PrivateSeries for SeriesWrap<$ca> {
42            fn compute_len(&mut self) {
43                self.0.physical_mut().compute_len()
44            }
45            fn _field(&self) -> Cow<'_, Field> {
46                Cow::Owned(self.0.field())
47            }
48            fn _dtype(&self) -> &DataType {
49                self.0.dtype()
50            }
51            fn _get_flags(&self) -> StatisticsFlags {
52                self.0.get_flags()
53            }
54            fn _set_flags(&mut self, flags: StatisticsFlags) {
55                self.0.set_flags(flags)
56            }
57
58            unsafe fn equal_element(&self, idx_self: usize, idx_other: usize, other: &Series) -> bool {
59                self.0.physical().equal_element(idx_self, idx_other, other)
60            }
61
62            #[cfg(feature = "zip_with")]
63            fn zip_with_same_type(&self, mask: &BooleanChunked, other: &Series) -> PolarsResult<Series> {
64                polars_ensure!(self.dtype() == other.dtype(), SchemaMismatch: "expected '{}' found '{}'", self.dtype(), other.dtype());
65                let other = other.to_physical_repr().into_owned();
66                unsafe {
67                    Ok(self.try_apply_on_phys(|ca| {
68                        ca.zip_with(mask, other.as_ref().as_ref())
69                    })?.into_series())
70                }
71            }
72
73            fn into_total_ord_inner<'a>(&'a self) -> Box<dyn TotalOrdInner + 'a> {
74                if self.0.uses_lexical_ordering() {
75                    (&self.0).into_total_ord_inner()
76                } else {
77                    self.0.physical().into_total_ord_inner()
78                }
79            }
80            fn into_total_eq_inner<'a>(&'a self) -> Box<dyn TotalEqInner + 'a> {
81                invalid_operation_panic!(into_total_eq_inner, self)
82            }
83
84            fn vec_hash(
85                &self,
86                random_state: PlSeedableRandomStateQuality,
87                buf: &mut Vec<u64>,
88            ) -> PolarsResult<()> {
89                self.0.physical().vec_hash(random_state, buf)?;
90                Ok(())
91            }
92
93            fn vec_hash_combine(
94                &self,
95                build_hasher: PlSeedableRandomStateQuality,
96                hashes: &mut [u64],
97            ) -> PolarsResult<()> {
98                self.0.physical().vec_hash_combine(build_hasher, hashes)?;
99                Ok(())
100            }
101
102            #[cfg(feature = "algorithm_group_by")]
103            unsafe fn agg_min(&self, groups: &GroupsType) -> Series {
104                if self.0.uses_lexical_ordering() {
105                    unimplemented!()
106                } else {
107                    self.apply_on_phys(|phys| phys.agg_min(groups).$ca_fn().unwrap().clone())
108                        .into_series()
109                }
110            }
111
112            #[cfg(feature = "algorithm_group_by")]
113            unsafe fn agg_max(&self, groups: &GroupsType) -> Series {
114                if self.0.uses_lexical_ordering() {
115                    unimplemented!()
116                } else {
117                    self.apply_on_phys(|phys| phys.agg_max(groups).$ca_fn().unwrap().clone())
118                        .into_series()
119                }
120            }
121
122            #[cfg(feature = "algorithm_group_by")]
123            unsafe fn agg_list(&self, groups: &GroupsType) -> Series {
124                // we cannot cast and dispatch as the inner type of the list would be incorrect
125                let list = self.0.physical().agg_list(groups);
126                let mut list = list.list().unwrap().clone();
127                unsafe { list.to_logical(self.dtype().clone()) };
128                list.into_series()
129            }
130
131            #[cfg(feature = "algorithm_group_by")]
132            fn group_tuples(&self, multithreaded: bool, sorted: bool) -> PolarsResult<GroupsType> {
133                self.0.physical().group_tuples(multithreaded, sorted)
134            }
135
136            fn arg_sort_multiple(
137                &self,
138                by: &[Column],
139                options: &SortMultipleOptions,
140            ) -> PolarsResult<IdxCa> {
141                self.0.arg_sort_multiple(by, options)
142            }
143        }
144
145        impl SeriesTrait for SeriesWrap<$ca> {
146            fn rename(&mut self, name: PlSmallStr) {
147                self.0.physical_mut().rename(name);
148            }
149
150            fn chunk_lengths(&self) -> ChunkLenIter<'_> {
151                self.0.physical().chunk_lengths()
152            }
153
154            fn name(&self) -> &PlSmallStr {
155                self.0.physical().name()
156            }
157
158            fn chunks(&self) -> &Vec<ArrayRef> {
159                self.0.physical().chunks()
160            }
161
162            unsafe fn chunks_mut(&mut self) -> &mut Vec<ArrayRef> {
163                self.0.physical_mut().chunks_mut()
164            }
165
166            fn shrink_to_fit(&mut self) {
167                self.0.physical_mut().shrink_to_fit()
168            }
169
170            fn slice(&self, offset: i64, length: usize) -> Series {
171                unsafe { self.apply_on_phys(|cats| cats.slice(offset, length)).into_series() }
172            }
173
174            fn split_at(&self, offset: i64) -> (Series, Series) {
175                unsafe {
176                    let (a, b) = self.0.physical().split_at(offset);
177                    let a = <$ca>::from_cats_and_dtype_unchecked(a, self.0.dtype().clone()).into_series();
178                    let b = <$ca>::from_cats_and_dtype_unchecked(b, self.0.dtype().clone()).into_series();
179                    (a, b)
180                }
181            }
182
183            fn append(&mut self, other: &Series) -> PolarsResult<()> {
184                polars_ensure!(self.0.dtype() == other.dtype(), append);
185                self.0.append(other.cat::<$pdt>().unwrap())
186            }
187
188            fn append_owned(&mut self, mut other: Series) -> PolarsResult<()> {
189                polars_ensure!(self.0.dtype() == other.dtype(), append);
190                self.0.physical_mut().append_owned(std::mem::take(
191                    other
192                        ._get_inner_mut()
193                        .as_any_mut()
194                        .downcast_mut::<$ca>()
195                        .unwrap()
196                        .physical_mut(),
197                ))
198            }
199
200            fn extend(&mut self, other: &Series) -> PolarsResult<()> {
201                polars_ensure!(self.0.dtype() == other.dtype(), extend);
202                self.0.extend(other.cat::<$pdt>().unwrap())
203            }
204
205            fn filter(&self, filter: &BooleanChunked) -> PolarsResult<Series> {
206                unsafe { Ok(self.try_apply_on_phys(|cats| cats.filter(filter))?.into_series()) }
207            }
208
209            fn take(&self, indices: &IdxCa) -> PolarsResult<Series> {
210                unsafe { Ok(self.try_apply_on_phys(|cats| cats.take(indices))?.into_series() ) }
211            }
212
213            unsafe fn take_unchecked(&self, indices: &IdxCa) -> Series {
214                unsafe { self.apply_on_phys(|cats| cats.take_unchecked(indices)).into_series() }
215            }
216
217            fn take_slice(&self, indices: &[IdxSize]) -> PolarsResult<Series> {
218                unsafe { Ok(self.try_apply_on_phys(|cats| cats.take(indices))?.into_series()) }
219            }
220
221            unsafe fn take_slice_unchecked(&self, indices: &[IdxSize]) -> Series {
222                unsafe { self.apply_on_phys(|cats| cats.take_unchecked(indices)).into_series() }
223            }
224
225            fn deposit(&self, validity: &Bitmap) -> Series {
226                unsafe { self.apply_on_phys(|cats| cats.deposit(validity)) }
227                    .into_series()
228            }
229
230            fn len(&self) -> usize {
231                self.0.len()
232            }
233
234            fn rechunk(&self) -> Series {
235                unsafe { self.apply_on_phys(|cats| cats.rechunk().into_owned()).into_series() }
236            }
237
238            fn new_from_index(&self, index: usize, length: usize) -> Series {
239                unsafe { self.apply_on_phys(|cats| cats.new_from_index(index, length)).into_series() }
240            }
241
242            fn cast(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Series> {
243                self.0.cast_with_options(dtype, options)
244            }
245
246            #[inline]
247            unsafe fn get_unchecked(&self, index: usize) -> AnyValue<'_> {
248                self.0.get_any_value_unchecked(index)
249            }
250
251            fn sort_with(&self, options: SortOptions) -> PolarsResult<Series> {
252                Ok(self.0.sort_with(options).into_series())
253            }
254
255            fn arg_sort(&self, options: SortOptions) -> IdxCa {
256                self.0.arg_sort(options)
257            }
258
259            fn null_count(&self) -> usize {
260                self.0.physical().null_count()
261            }
262
263            fn has_nulls(&self) -> bool {
264                self.0.physical().has_nulls()
265            }
266
267            #[cfg(feature = "algorithm_group_by")]
268            fn unique(&self) -> PolarsResult<Series> {
269                unsafe { Ok(self.try_apply_on_phys(|cats| cats.unique())?.into_series()) }
270            }
271
272            #[cfg(feature = "algorithm_group_by")]
273            fn n_unique(&self) -> PolarsResult<usize> {
274                self.0.physical().n_unique()
275            }
276
277            #[cfg(feature = "approx_unique")]
278            fn approx_n_unique(&self) -> PolarsResult<IdxSize> {
279                Ok(self.0.physical().approx_n_unique())
280            }
281
282            #[cfg(feature = "algorithm_group_by")]
283            fn arg_unique(&self) -> PolarsResult<IdxCa> {
284                self.0.physical().arg_unique()
285            }
286
287            fn is_null(&self) -> BooleanChunked {
288                self.0.physical().is_null()
289            }
290
291            fn is_not_null(&self) -> BooleanChunked {
292                self.0.physical().is_not_null()
293            }
294
295            fn reverse(&self) -> Series {
296                unsafe { self.apply_on_phys(|cats| cats.reverse()).into_series() }
297            }
298
299            fn as_single_ptr(&mut self) -> PolarsResult<usize> {
300                self.0.physical_mut().as_single_ptr()
301            }
302
303            fn shift(&self, periods: i64) -> Series {
304                unsafe { self.apply_on_phys(|ca| ca.shift(periods)).into_series() }
305            }
306
307            fn clone_inner(&self) -> Arc<dyn SeriesTrait> {
308                Arc::new(SeriesWrap(Clone::clone(&self.0)))
309            }
310
311            fn min_reduce(&self) -> PolarsResult<Scalar> {
312                Ok(ChunkAggSeries::min_reduce(&self.0))
313            }
314
315            fn max_reduce(&self) -> PolarsResult<Scalar> {
316                Ok(ChunkAggSeries::max_reduce(&self.0))
317            }
318
319            fn find_validity_mismatch(&self, other: &Series, idxs: &mut Vec<IdxSize>) {
320                self.0.physical().find_validity_mismatch(other, idxs)
321            }
322
323            fn as_any(&self) -> &dyn Any {
324                &self.0
325            }
326
327            fn as_any_mut(&mut self) -> &mut dyn Any {
328                &mut self.0
329            }
330
331            fn as_phys_any(&self) -> &dyn Any {
332                self.0.physical()
333            }
334
335            fn as_arc_any(self: Arc<Self>) -> Arc<dyn Any + Send + Sync> {
336                self as _
337            }
338        }
339
340        impl private::PrivateSeriesNumeric for SeriesWrap<$ca> {
341            fn bit_repr(&self) -> Option<BitRepr> {
342                Some(self.0.physical().to_bit_repr())
343            }
344        }
345    }
346}
347
348impl_cat_series!(Categorical8Chunked, Categorical8Type, u8);
349impl_cat_series!(Categorical16Chunked, Categorical16Type, u16);
350impl_cat_series!(Categorical32Chunked, Categorical32Type, u32);