polars_core/series/implementations/
categorical.rs

1use super::*;
2use crate::chunked_array::comparison::*;
3use crate::prelude::*;
4
5unsafe impl<T: PolarsCategoricalType> IntoSeries for CategoricalChunked<T> {
6    fn into_series(self) -> Series {
7        // We do this hack to go from generic T to concrete T to avoid adding bounds on IntoSeries.
8        with_match_categorical_physical_type!(T::physical(), |$C| {
9            unsafe {
10                Series(Arc::new(SeriesWrap(core::mem::transmute::<Self, CategoricalChunked<$C>>(self))))
11            }
12        })
13    }
14}
15
16impl<T: PolarsCategoricalType> SeriesWrap<CategoricalChunked<T>> {
17    unsafe fn apply_on_phys<F>(&self, apply: F) -> CategoricalChunked<T>
18    where
19        F: Fn(&ChunkedArray<T::PolarsPhysical>) -> ChunkedArray<T::PolarsPhysical>,
20    {
21        let cats = apply(self.0.physical());
22        unsafe { CategoricalChunked::from_cats_and_dtype_unchecked(cats, self.0.dtype().clone()) }
23    }
24
25    unsafe fn try_apply_on_phys<F>(&self, apply: F) -> PolarsResult<CategoricalChunked<T>>
26    where
27        F: Fn(&ChunkedArray<T::PolarsPhysical>) -> PolarsResult<ChunkedArray<T::PolarsPhysical>>,
28    {
29        let cats = apply(self.0.physical())?;
30        unsafe {
31            Ok(CategoricalChunked::from_cats_and_dtype_unchecked(
32                cats,
33                self.0.dtype().clone(),
34            ))
35        }
36    }
37}
38
39macro_rules! impl_cat_series {
40    ($ca: ident, $pdt:ty, $ca_fn:ident) => {
41        impl private::PrivateSeries for SeriesWrap<$ca> {
42            fn compute_len(&mut self) {
43                self.0.physical_mut().compute_len()
44            }
45            fn _field(&self) -> Cow<'_, Field> {
46                Cow::Owned(self.0.field())
47            }
48            fn _dtype(&self) -> &DataType {
49                self.0.dtype()
50            }
51            fn _get_flags(&self) -> StatisticsFlags {
52                self.0.get_flags()
53            }
54            fn _set_flags(&mut self, flags: StatisticsFlags) {
55                self.0.set_flags(flags)
56            }
57
58            unsafe fn equal_element(&self, idx_self: usize, idx_other: usize, other: &Series) -> bool {
59                self.0.physical().equal_element(idx_self, idx_other, other)
60            }
61
62            #[cfg(feature = "zip_with")]
63            fn zip_with_same_type(&self, mask: &BooleanChunked, other: &Series) -> PolarsResult<Series> {
64                polars_ensure!(self.dtype() == other.dtype(), SchemaMismatch: "expected '{}' found '{}'", self.dtype(), other.dtype());
65                let other = other.to_physical_repr().into_owned();
66                unsafe {
67                    Ok(self.try_apply_on_phys(|ca| {
68                        ca.zip_with(mask, other.as_ref().as_ref())
69                    })?.into_series())
70                }
71            }
72
73            fn into_total_ord_inner<'a>(&'a self) -> Box<dyn TotalOrdInner + 'a> {
74                if self.0.uses_lexical_ordering() {
75                    (&self.0).into_total_ord_inner()
76                } else {
77                    self.0.physical().into_total_ord_inner()
78                }
79            }
80            fn into_total_eq_inner<'a>(&'a self) -> Box<dyn TotalEqInner + 'a> {
81                invalid_operation_panic!(into_total_eq_inner, self)
82            }
83
84            fn vec_hash(
85                &self,
86                random_state: PlSeedableRandomStateQuality,
87                buf: &mut Vec<u64>,
88            ) -> PolarsResult<()> {
89                self.0.vec_hash(random_state, buf)
90            }
91
92            fn vec_hash_combine(
93                &self,
94                build_hasher: PlSeedableRandomStateQuality,
95                hashes: &mut [u64],
96            ) -> PolarsResult<()> {
97                self.0.vec_hash_combine(build_hasher, hashes)
98            }
99
100            #[cfg(feature = "algorithm_group_by")]
101            unsafe fn agg_min(&self, groups: &GroupsType) -> Series {
102                if self.0.uses_lexical_ordering() {
103                    unimplemented!()
104                } else {
105                    self.apply_on_phys(|phys| phys.agg_min(groups).$ca_fn().unwrap().clone())
106                        .into_series()
107                }
108            }
109
110            #[cfg(feature = "algorithm_group_by")]
111            unsafe fn agg_max(&self, groups: &GroupsType) -> Series {
112                if self.0.uses_lexical_ordering() {
113                    unimplemented!()
114                } else {
115                    self.apply_on_phys(|phys| phys.agg_max(groups).$ca_fn().unwrap().clone())
116                        .into_series()
117                }
118            }
119
120            #[cfg(feature = "algorithm_group_by")]
121            unsafe fn agg_arg_min(&self, groups: &GroupsType) -> Series {
122                if self.0.uses_lexical_ordering() {
123                    unimplemented!()
124                } else {
125                    self.0.physical().agg_arg_min(groups)
126                }
127            }
128
129            #[cfg(feature = "algorithm_group_by")]
130            unsafe fn agg_arg_max(&self, groups: &GroupsType) -> Series {
131                if self.0.uses_lexical_ordering() {
132                    unimplemented!()
133                } else {
134                    self.0.physical().agg_arg_max(groups)
135                }
136            }
137
138
139            #[cfg(feature = "algorithm_group_by")]
140            unsafe fn agg_list(&self, groups: &GroupsType) -> Series {
141                // we cannot cast and dispatch as the inner type of the list would be incorrect
142                let list = self.0.physical().agg_list(groups);
143                let mut list = list.list().unwrap().clone();
144                unsafe { list.to_logical(self.dtype().clone()) };
145                list.into_series()
146            }
147
148            #[cfg(feature = "algorithm_group_by")]
149            fn group_tuples(&self, multithreaded: bool, sorted: bool) -> PolarsResult<GroupsType> {
150                self.0.physical().group_tuples(multithreaded, sorted)
151            }
152
153            fn arg_sort_multiple(
154                &self,
155                by: &[Column],
156                options: &SortMultipleOptions,
157            ) -> PolarsResult<IdxCa> {
158                self.0.arg_sort_multiple(by, options)
159            }
160        }
161
162        impl SeriesTrait for SeriesWrap<$ca> {
163            fn rename(&mut self, name: PlSmallStr) {
164                self.0.physical_mut().rename(name);
165            }
166
167            fn chunk_lengths(&self) -> ChunkLenIter<'_> {
168                self.0.physical().chunk_lengths()
169            }
170
171            fn name(&self) -> &PlSmallStr {
172                self.0.physical().name()
173            }
174
175            fn chunks(&self) -> &Vec<ArrayRef> {
176                self.0.physical().chunks()
177            }
178
179            unsafe fn chunks_mut(&mut self) -> &mut Vec<ArrayRef> {
180                self.0.physical_mut().chunks_mut()
181            }
182
183            fn shrink_to_fit(&mut self) {
184                self.0.physical_mut().shrink_to_fit()
185            }
186
187            fn slice(&self, offset: i64, length: usize) -> Series {
188                unsafe { self.apply_on_phys(|cats| cats.slice(offset, length)).into_series() }
189            }
190
191            fn split_at(&self, offset: i64) -> (Series, Series) {
192                unsafe {
193                    let (a, b) = self.0.physical().split_at(offset);
194                    let a = <$ca>::from_cats_and_dtype_unchecked(a, self.0.dtype().clone()).into_series();
195                    let b = <$ca>::from_cats_and_dtype_unchecked(b, self.0.dtype().clone()).into_series();
196                    (a, b)
197                }
198            }
199
200            fn append(&mut self, other: &Series) -> PolarsResult<()> {
201                polars_ensure!(self.0.dtype() == other.dtype(), append);
202                self.0.append(other.cat::<$pdt>().unwrap())
203            }
204
205            fn append_owned(&mut self, mut other: Series) -> PolarsResult<()> {
206                polars_ensure!(self.0.dtype() == other.dtype(), append);
207                self.0.physical_mut().append_owned(std::mem::take(
208                    other
209                        ._get_inner_mut()
210                        .as_any_mut()
211                        .downcast_mut::<$ca>()
212                        .unwrap()
213                        .physical_mut(),
214                ))
215            }
216
217            fn extend(&mut self, other: &Series) -> PolarsResult<()> {
218                polars_ensure!(self.0.dtype() == other.dtype(), extend);
219                self.0.extend(other.cat::<$pdt>().unwrap())
220            }
221
222            fn filter(&self, filter: &BooleanChunked) -> PolarsResult<Series> {
223                unsafe { Ok(self.try_apply_on_phys(|cats| cats.filter(filter))?.into_series()) }
224            }
225
226            fn take(&self, indices: &IdxCa) -> PolarsResult<Series> {
227                unsafe { Ok(self.try_apply_on_phys(|cats| cats.take(indices))?.into_series() ) }
228            }
229
230            unsafe fn take_unchecked(&self, indices: &IdxCa) -> Series {
231                unsafe { self.apply_on_phys(|cats| cats.take_unchecked(indices)).into_series() }
232            }
233
234            fn take_slice(&self, indices: &[IdxSize]) -> PolarsResult<Series> {
235                unsafe { Ok(self.try_apply_on_phys(|cats| cats.take(indices))?.into_series()) }
236            }
237
238            unsafe fn take_slice_unchecked(&self, indices: &[IdxSize]) -> Series {
239                unsafe { self.apply_on_phys(|cats| cats.take_unchecked(indices)).into_series() }
240            }
241
242            fn deposit(&self, validity: &Bitmap) -> Series {
243                unsafe { self.apply_on_phys(|cats| cats.deposit(validity)) }
244                    .into_series()
245            }
246
247            fn len(&self) -> usize {
248                self.0.len()
249            }
250
251            fn rechunk(&self) -> Series {
252                unsafe { self.apply_on_phys(|cats| cats.rechunk().into_owned()).into_series() }
253            }
254
255            fn new_from_index(&self, index: usize, length: usize) -> Series {
256                unsafe { self.apply_on_phys(|cats| cats.new_from_index(index, length)).into_series() }
257            }
258
259            fn cast(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Series> {
260                self.0.cast_with_options(dtype, options)
261            }
262
263            #[inline]
264            unsafe fn get_unchecked(&self, index: usize) -> AnyValue<'_> {
265                self.0.get_any_value_unchecked(index)
266            }
267
268            fn sort_with(&self, options: SortOptions) -> PolarsResult<Series> {
269                Ok(self.0.sort_with(options).into_series())
270            }
271
272            fn arg_sort(&self, options: SortOptions) -> IdxCa {
273                self.0.arg_sort(options)
274            }
275
276            fn null_count(&self) -> usize {
277                self.0.physical().null_count()
278            }
279
280            fn has_nulls(&self) -> bool {
281                self.0.physical().has_nulls()
282            }
283
284            #[cfg(feature = "algorithm_group_by")]
285            fn unique(&self) -> PolarsResult<Series> {
286                unsafe { Ok(self.try_apply_on_phys(|cats| cats.unique())?.into_series()) }
287            }
288
289            #[cfg(feature = "algorithm_group_by")]
290            fn n_unique(&self) -> PolarsResult<usize> {
291                self.0.physical().n_unique()
292            }
293
294            #[cfg(feature = "approx_unique")]
295            fn approx_n_unique(&self) -> PolarsResult<IdxSize> {
296                Ok(self.0.physical().approx_n_unique())
297            }
298
299            #[cfg(feature = "algorithm_group_by")]
300            fn arg_unique(&self) -> PolarsResult<IdxCa> {
301                self.0.physical().arg_unique()
302            }
303
304            fn unique_id(&self) -> PolarsResult<(IdxSize, Vec<IdxSize>)> {
305                ChunkUnique::unique_id(self.0.physical())
306            }
307
308            fn is_null(&self) -> BooleanChunked {
309                self.0.physical().is_null()
310            }
311
312            fn is_not_null(&self) -> BooleanChunked {
313                self.0.physical().is_not_null()
314            }
315
316            fn reverse(&self) -> Series {
317                unsafe { self.apply_on_phys(|cats| cats.reverse()).into_series() }
318            }
319
320            fn as_single_ptr(&mut self) -> PolarsResult<usize> {
321                self.0.physical_mut().as_single_ptr()
322            }
323
324            fn shift(&self, periods: i64) -> Series {
325                unsafe { self.apply_on_phys(|ca| ca.shift(periods)).into_series() }
326            }
327
328            fn clone_inner(&self) -> Arc<dyn SeriesTrait> {
329                Arc::new(SeriesWrap(Clone::clone(&self.0)))
330            }
331
332            fn min_reduce(&self) -> PolarsResult<Scalar> {
333                Ok(ChunkAggSeries::min_reduce(&self.0))
334            }
335
336            fn max_reduce(&self) -> PolarsResult<Scalar> {
337                Ok(ChunkAggSeries::max_reduce(&self.0))
338            }
339
340            fn find_validity_mismatch(&self, other: &Series, idxs: &mut Vec<IdxSize>) {
341                self.0.physical().find_validity_mismatch(other, idxs)
342            }
343
344            fn as_any(&self) -> &dyn Any {
345                &self.0
346            }
347
348            fn as_any_mut(&mut self) -> &mut dyn Any {
349                &mut self.0
350            }
351
352            fn as_phys_any(&self) -> &dyn Any {
353                self.0.physical()
354            }
355
356            fn as_arc_any(self: Arc<Self>) -> Arc<dyn Any + Send + Sync> {
357                self as _
358            }
359        }
360
361        impl private::PrivateSeriesNumeric for SeriesWrap<$ca> {
362            fn bit_repr(&self) -> Option<BitRepr> {
363                Some(self.0.physical().to_bit_repr())
364            }
365        }
366    }
367}
368
369impl_cat_series!(Categorical8Chunked, Categorical8Type, u8);
370impl_cat_series!(Categorical16Chunked, Categorical16Type, u16);
371impl_cat_series!(Categorical32Chunked, Categorical32Type, u32);