1use num_traits::Bounded;
2#[cfg(feature = "dtype-struct")]
3use polars_core::chunked_array::ops::row_encode::_get_rows_encoded_ca;
4use polars_core::prelude::arity::unary_elementwise_values;
5use polars_core::prelude::*;
6use polars_core::series::IsSorted;
7use polars_core::with_match_physical_numeric_polars_type;
8#[cfg(feature = "hash")]
9use polars_utils::aliases::PlSeedableRandomStateQuality;
10use polars_utils::total_ord::TotalOrd;
11
12use crate::series::ops::SeriesSealed;
13
14pub trait SeriesMethods: SeriesSealed {
15 fn value_counts(
18 &self,
19 sort: bool,
20 parallel: bool,
21 name: PlSmallStr,
22 normalize: bool,
23 ) -> PolarsResult<DataFrame> {
24 let s = self.as_series();
25 polars_ensure!(
26 s.name() != &name,
27 Duplicate: "using `value_counts` on a column/series named '{}' would lead to duplicate \
28 column names; change `name` to fix", name,
29 );
30 let groups = s.group_tuples(parallel, sort)?;
32 let values = unsafe { s.agg_first(&groups) }
33 .with_name(s.name().clone())
34 .into();
35 let counts = groups.group_count().with_name(name.clone());
36
37 let counts = if normalize {
38 let len = s.len() as f64;
39 let counts: Float64Chunked =
40 unary_elementwise_values(&counts, |count| count as f64 / len);
41 counts.into_column()
42 } else {
43 counts.into_column()
44 };
45
46 let height = counts.len();
47 let cols = vec![values, counts];
48 let df = unsafe { DataFrame::new_unchecked(height, cols) };
49 if sort {
50 df.sort(
51 [name],
52 SortMultipleOptions::default()
53 .with_order_descending(true)
54 .with_multithreaded(parallel),
55 )
56 } else {
57 Ok(df)
58 }
59 }
60
61 #[cfg(feature = "hash")]
62 fn hash(&self, build_hasher: PlSeedableRandomStateQuality) -> UInt64Chunked {
63 let s = self.as_series();
64 let mut h = vec![];
65 s.0.vec_hash(build_hasher, &mut h).unwrap();
66 UInt64Chunked::from_vec(s.name().clone(), h)
67 }
68
69 fn ensure_sorted_arg(&self, operation: &str) -> PolarsResult<()> {
70 polars_ensure!(self.is_sorted(Default::default())?, InvalidOperation: "argument in operation '{}' is not sorted, please sort the 'expr/series/column' first", operation);
71 Ok(())
72 }
73
74 fn is_sorted(&self, options: SortOptions) -> PolarsResult<bool> {
76 let s = self.as_series();
77 let null_count = s.null_count();
78
79 if (options.descending
81 && (options.nulls_last || null_count == 0)
82 && matches!(s.is_sorted_flag(), IsSorted::Descending))
83 || (!options.descending
84 && (!options.nulls_last || null_count == 0)
85 && matches!(s.is_sorted_flag(), IsSorted::Ascending))
86 {
87 return Ok(true);
88 }
89
90 #[cfg(feature = "dtype-struct")]
92 if matches!(s.dtype(), DataType::Struct(_)) {
93 let encoded = _get_rows_encoded_ca(
94 PlSmallStr::EMPTY,
95 &[s.clone().into()],
96 &[options.descending],
97 &[options.nulls_last],
98 false,
99 )?;
100 let options = SortOptions {
101 descending: false,
102 nulls_last: false,
103 ..options
104 };
105 return encoded.into_series().is_sorted(options);
106 }
107
108 let s_len = s.len();
109 if null_count == s_len {
110 return Ok(true);
112 }
113 if null_count > 0 {
115 if options.nulls_last {
117 if s.slice((s_len - null_count) as i64, null_count)
118 .null_count()
119 != null_count
120 {
121 return Ok(false);
122 }
123 } else if s.slice(0, null_count).null_count() != null_count {
124 return Ok(false);
125 }
126 }
127
128 if s.dtype().is_primitive_numeric() {
129 with_match_physical_numeric_polars_type!(s.dtype(), |$T| {
130 let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();
131 return Ok(is_sorted_ca_num::<$T>(ca, options))
132 })
133 }
134
135 let non_null_len = s_len - null_count;
145 if non_null_len <= 1 {
146 return Ok(true);
147 }
148
149 let offset = (!options.nulls_last as i64) * (null_count as i64);
150 let non_null = s.slice(offset, non_null_len);
151 debug_assert_eq!(
152 non_null.null_count(),
153 0,
154 "internal error: `is_sorted` non-null slice contains nulls"
155 );
156
157 #[cfg(feature = "dtype-categorical")]
158 if matches!(non_null.dtype(), DataType::Categorical(_, _)) {
159 return is_sorted_categorical_lexical_adjacent(&non_null, options);
160 }
161
162 let phys = non_null.to_physical_repr();
163 let s_phys = phys.as_ref();
164 if s_phys.dtype().is_primitive_numeric() {
165 with_match_physical_numeric_polars_type!(s_phys.dtype(), |$T| {
166 let ca: &ChunkedArray<$T> = s_phys.as_ref().as_ref().as_ref();
167 return Ok(is_sorted_ca_num::<$T>(ca, options))
168 })
169 }
170
171 match s_phys.dtype() {
172 DataType::Boolean => {
173 let ca = s_phys.bool()?;
174 Ok(is_sorted_ca_bool(ca, options.descending))
175 },
176 DataType::String => {
177 let ca = s_phys.str()?;
178 Ok(is_sorted_adjacent_total_ord(
179 ca.no_null_iter(),
180 options.descending,
181 ))
182 },
183 DataType::Binary => {
184 let ca = s_phys.binary()?;
185 Ok(is_sorted_adjacent_total_ord(
186 ca.no_null_iter(),
187 options.descending,
188 ))
189 },
190 DataType::BinaryOffset => {
191 let ca = s_phys.binary_offset()?;
192 Ok(is_sorted_adjacent_total_ord(
193 ca.no_null_iter(),
194 options.descending,
195 ))
196 },
197 _ => {
198 let cmp_len = non_null_len - 1;
200 let s1 = non_null.slice(0, cmp_len);
201 let s2 = non_null.slice(1, cmp_len);
202 let cmp_op = if options.descending {
203 Series::gt_eq
204 } else {
205 Series::lt_eq
206 };
207 Ok(cmp_op(&s1, &s2)?.all())
208 },
209 }
210 }
211}
212
213fn is_sorted_adjacent_total_ord<T: TotalOrd>(
219 it: impl Iterator<Item = T>,
220 descending: bool,
221) -> bool {
222 let mut it = it;
223 let Some(mut prev) = it.next() else {
225 return true;
226 };
227 if descending {
228 for v in it {
229 if !prev.tot_ge(&v) {
230 return false;
231 }
232 prev = v;
233 }
234 } else {
235 for v in it {
236 if !prev.tot_le(&v) {
237 return false;
238 }
239 prev = v;
240 }
241 }
242 true
243}
244
245#[cfg(feature = "dtype-categorical")]
249fn is_sorted_categorical_lexical_adjacent(s: &Series, options: SortOptions) -> PolarsResult<bool> {
250 polars_ensure!(
251 matches!(s.dtype(), DataType::Categorical(_, _)),
252 ComputeError: "internal error: expected Categorical in lexical `is_sorted` path",
253 );
254
255 with_match_categorical_physical_type!(s.dtype().cat_physical().unwrap(), |$C| {
256 let ca = s.cat::<$C>()?;
257
258 Ok(is_sorted_adjacent_total_ord(
260 ca.iter_str().map(|opt| {
261 opt.expect(
262 "`iter_str` produced None while categorical null_count reported 0 (`is_sorted`)"
263 )
264 }),
265 options.descending,
266 ))
267 })
268}
269
270fn is_sorted_ca_bool(ca: &BooleanChunked, descending: bool) -> bool {
278 let len = ca.len();
279 if len <= 1 {
280 return true;
281 }
282 debug_assert_eq!(
283 ca.null_count(),
284 0,
285 "internal error: `is_sorted_ca_bool` expects a non-null boolean slice"
286 );
287 if descending {
288 let Some(idx) = ca.first_false_idx() else {
289 return true;
290 };
291 !ca.slice(idx as i64, ca.len() - idx).any()
292 } else {
293 let Some(idx) = ca.first_true_idx() else {
294 return true;
295 };
296 ca.slice(idx as i64, ca.len() - idx).all()
297 }
298}
299
300fn check_cmp<T: NumericNative, Cmp: Fn(&T, &T) -> bool>(
301 vals: &[T],
302 f: Cmp,
303 previous: &mut T,
304) -> bool {
305 let mut sorted = true;
306
307 for c in vals.chunks(1024) {
310 for v in c {
313 sorted &= f(previous, v);
314 *previous = *v;
315 }
316 if !sorted {
317 return false;
318 }
319 }
320 sorted
321}
322
323fn is_sorted_ca_num<T: PolarsNumericType>(ca: &ChunkedArray<T>, options: SortOptions) -> bool {
325 if let Ok(vals) = ca.cont_slice() {
326 let mut previous = vals[0];
327 return if options.descending {
328 check_cmp(vals, |prev, c| prev.tot_ge(c), &mut previous)
329 } else {
330 check_cmp(vals, |prev, c| prev.tot_le(c), &mut previous)
331 };
332 };
333
334 if ca.null_count() == 0 {
335 let mut previous = if options.descending {
336 T::Native::max_value()
337 } else {
338 T::Native::min_value()
339 };
340 for arr in ca.downcast_iter() {
341 let vals = arr.values();
342
343 let sorted = if options.descending {
344 check_cmp(vals, |prev, c| prev.tot_ge(c), &mut previous)
345 } else {
346 check_cmp(vals, |prev, c| prev.tot_le(c), &mut previous)
347 };
348 if !sorted {
349 return false;
350 }
351 }
352 return true;
353 };
354
355 let null_count = ca.null_count();
357 if options.nulls_last {
358 let ca = ca.slice(0, ca.len() - null_count);
359 is_sorted_ca_num(&ca, options)
360 } else {
361 let ca = ca.slice(null_count as i64, ca.len() - null_count);
362 is_sorted_ca_num(&ca, options)
363 }
364}
365
366impl SeriesMethods for Series {}