1use num_traits::Bounded;
2#[cfg(feature = "dtype-struct")]
3use polars_core::chunked_array::ops::row_encode::_get_rows_encoded_ca;
4use polars_core::prelude::arity::unary_elementwise_values;
5use polars_core::prelude::*;
6use polars_core::series::IsSorted;
7use polars_core::with_match_physical_numeric_polars_type;
8#[cfg(feature = "hash")]
9use polars_utils::aliases::PlSeedableRandomStateQuality;
10use polars_utils::total_ord::TotalOrd;
11
12use crate::series::ops::SeriesSealed;
13
14pub trait SeriesMethods: SeriesSealed {
15 fn value_counts(
18 &self,
19 sort: bool,
20 parallel: bool,
21 name: PlSmallStr,
22 normalize: bool,
23 ) -> PolarsResult<DataFrame> {
24 let s = self.as_series();
25 polars_ensure!(
26 s.name() != &name,
27 Duplicate: "using `value_counts` on a column/series named '{}' would lead to duplicate \
28 column names; change `name` to fix", name,
29 );
30 let groups = s.group_tuples(parallel, sort)?;
31 let values = unsafe { s.agg_first(&groups) }
32 .with_name(s.name().clone())
33 .into();
34 let counts = groups.group_count().with_name(name.clone());
35
36 let counts = if normalize {
37 let len = s.len() as f64;
38 let counts: Float64Chunked =
39 unary_elementwise_values(&counts, |count| count as f64 / len);
40 counts.into_column()
41 } else {
42 counts.into_column()
43 };
44
45 let height = counts.len();
46 let cols = vec![values, counts];
47 let df = unsafe { DataFrame::new_unchecked(height, cols) };
48 if sort {
49 df.sort(
50 [name],
51 SortMultipleOptions::default()
52 .with_order_descending(true)
53 .with_multithreaded(parallel),
54 )
55 } else {
56 Ok(df)
57 }
58 }
59
60 #[cfg(feature = "hash")]
61 fn hash(&self, build_hasher: PlSeedableRandomStateQuality) -> UInt64Chunked {
62 let s = self.as_series();
63 let mut h = vec![];
64 s.0.vec_hash(build_hasher, &mut h).unwrap();
65 UInt64Chunked::from_vec(s.name().clone(), h)
66 }
67
68 fn ensure_sorted_arg(&self, operation: &str) -> PolarsResult<()> {
69 polars_ensure!(
70 self.is_sorted(SortOptions::default())?,
71 InvalidOperation: "argument in operation '{}' is not sorted, please sort the 'expr/series/column' first",
72 operation
73 );
74 Ok(())
75 }
76
77 fn is_sorted(&self, options: SortOptions) -> PolarsResult<bool> {
81 is_sorted_impl(self.as_series(), options)
82 }
83
84 fn is_sorted_any(
85 &self,
86 descending: Option<bool>,
87 nulls_last: Option<bool>,
88 ) -> PolarsResult<bool> {
89 let s = self.as_series();
90 let (descending, nulls_last) = resolve_sort_options(s, descending, nulls_last)?;
91 let options = SortOptions {
94 descending: descending.unwrap_or(false),
95 nulls_last: nulls_last.unwrap_or(false),
96 ..Default::default()
97 };
98 is_sorted_impl(s, options)
99 }
100}
101
102fn is_sorted_impl(s: &Series, options: SortOptions) -> PolarsResult<bool> {
103 let null_count = s.null_count();
104
105 if (options.descending
106 && (options.nulls_last || null_count == 0)
107 && matches!(s.is_sorted_flag(), IsSorted::Descending))
108 || (!options.descending
109 && (!options.nulls_last || null_count == 0)
110 && matches!(s.is_sorted_flag(), IsSorted::Ascending))
111 {
112 return Ok(true);
113 }
114
115 #[cfg(feature = "dtype-struct")]
116 if matches!(s.dtype(), DataType::Struct(_)) {
117 let encoded = _get_rows_encoded_ca(
118 PlSmallStr::EMPTY,
119 &[s.clone().into()],
120 &[options.descending],
121 &[options.nulls_last],
122 false,
123 )?;
124 let options = SortOptions {
125 descending: false,
126 nulls_last: false,
127 ..options
128 };
129 return is_sorted_impl(&encoded.into_series(), options);
130 }
131
132 let s_len = s.len();
133 if null_count == s_len {
134 return Ok(true);
136 }
137 if null_count > 0 {
139 if options.nulls_last {
140 if s.slice((s_len - null_count) as i64, null_count)
141 .null_count()
142 != null_count
143 {
144 return Ok(false);
145 }
146 } else if s.slice(0, null_count).null_count() != null_count {
147 return Ok(false);
148 }
149 }
150
151 if s.dtype().is_primitive_numeric() {
152 with_match_physical_numeric_polars_type!(s.dtype(), |$T| {
153 let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();
154 return Ok(is_sorted_ca_num::<$T>(ca, options))
155 })
156 }
157
158 let non_null_len = s_len - null_count;
168 if non_null_len <= 1 {
169 return Ok(true);
170 }
171
172 let offset = (!options.nulls_last as i64) * (null_count as i64);
173 let non_null = s.slice(offset, non_null_len);
174 debug_assert_eq!(
175 non_null.null_count(),
176 0,
177 "internal error: `is_sorted` non-null slice contains nulls"
178 );
179
180 #[cfg(feature = "dtype-categorical")]
181 if matches!(non_null.dtype(), DataType::Categorical(_, _)) {
182 return is_sorted_categorical_lexical_adjacent(&non_null, options);
183 }
184
185 let phys = non_null.to_physical_repr();
186 let s_phys = phys.as_ref();
187 if s_phys.dtype().is_primitive_numeric() {
188 with_match_physical_numeric_polars_type!(s_phys.dtype(), |$T| {
189 let ca: &ChunkedArray<$T> = s_phys.as_ref().as_ref().as_ref();
190 return Ok(is_sorted_ca_num::<$T>(ca, options))
191 })
192 }
193
194 match s_phys.dtype() {
195 DataType::Boolean => {
196 let ca = s_phys.bool()?;
197 Ok(is_sorted_ca_bool(ca, options.descending))
198 },
199 DataType::String => {
200 let ca = s_phys.str()?;
201 Ok(is_sorted_adjacent_total_ord(
202 ca.no_null_iter(),
203 options.descending,
204 ))
205 },
206 DataType::Binary => {
207 let ca = s_phys.binary()?;
208 Ok(is_sorted_adjacent_total_ord(
209 ca.no_null_iter(),
210 options.descending,
211 ))
212 },
213 DataType::BinaryOffset => {
214 let ca = s_phys.binary_offset()?;
215 Ok(is_sorted_adjacent_total_ord(
216 ca.no_null_iter(),
217 options.descending,
218 ))
219 },
220 _ => {
221 let cmp_len = non_null_len - 1;
223 let s1 = non_null.slice(0, cmp_len);
224 let s2 = non_null.slice(1, cmp_len);
225 let cmp_op = if options.descending {
226 Series::gt_eq
227 } else {
228 Series::lt_eq
229 };
230 Ok(cmp_op(&s1, &s2)?.all())
231 },
232 }
233}
234
235fn is_sorted_adjacent_total_ord<T: TotalOrd>(
241 it: impl Iterator<Item = T>,
242 descending: bool,
243) -> bool {
244 let mut it = it;
245 let Some(mut prev) = it.next() else {
247 return true;
248 };
249 if descending {
250 for v in it {
251 if !prev.tot_ge(&v) {
252 return false;
253 }
254 prev = v;
255 }
256 } else {
257 for v in it {
258 if !prev.tot_le(&v) {
259 return false;
260 }
261 prev = v;
262 }
263 }
264 true
265}
266
267#[cfg(feature = "dtype-categorical")]
271fn is_sorted_categorical_lexical_adjacent(s: &Series, options: SortOptions) -> PolarsResult<bool> {
272 polars_ensure!(
273 matches!(s.dtype(), DataType::Categorical(_, _)),
274 ComputeError: "internal error: expected Categorical in lexical `is_sorted` path",
275 );
276
277 with_match_categorical_physical_type!(s.dtype().cat_physical().unwrap(), |$C| {
278 let ca = s.cat::<$C>()?;
279
280 Ok(is_sorted_adjacent_total_ord(
282 ca.iter_str().map(|opt| {
283 opt.expect(
284 "`iter_str` produced None while categorical null_count reported 0 (`is_sorted`)"
285 )
286 }),
287 options.descending,
288 ))
289 })
290}
291
292fn is_sorted_ca_bool(ca: &BooleanChunked, descending: bool) -> bool {
300 let len = ca.len();
301 if len <= 1 {
302 return true;
303 }
304 debug_assert_eq!(
305 ca.null_count(),
306 0,
307 "internal error: `is_sorted_ca_bool` expects a non-null boolean slice"
308 );
309 if descending {
310 let Some(idx) = ca.first_false_idx() else {
311 return true;
312 };
313 !ca.slice(idx as i64, ca.len() - idx).any()
314 } else {
315 let Some(idx) = ca.first_true_idx() else {
316 return true;
317 };
318 ca.slice(idx as i64, ca.len() - idx).all()
319 }
320}
321
322pub fn resolve_sort_options(
334 s: &Series,
335 descending: Option<bool>,
336 nulls_last: Option<bool>,
337) -> PolarsResult<(Option<bool>, Option<bool>)> {
338 let nulls_last = match nulls_last {
339 Some(n) => Some(n),
340 None => infer_nulls_last(s),
341 };
342
343 let descending = match descending {
344 Some(d) => Some(d),
345 None => infer_descending(s, nulls_last.unwrap_or(false))?,
346 };
347
348 Ok((descending, nulls_last))
349}
350
351fn infer_nulls_last(s: &Series) -> Option<bool> {
355 let null_count = s.null_count();
356 let s_len = s.len();
357
358 if null_count == 0 || null_count == s_len {
359 return None;
360 }
361
362 if s.slice((s_len - null_count) as i64, null_count)
363 .null_count()
364 == null_count
365 {
366 Some(true)
367 } else if s.slice(0, null_count).null_count() == null_count {
368 Some(false)
369 } else {
370 None
371 }
372}
373
374fn infer_descending(s: &Series, nulls_last: bool) -> PolarsResult<Option<bool>> {
375 let null_count = s.null_count();
376 let non_null_len = s.len() - null_count;
377 if non_null_len < 2 {
378 return Ok(None);
379 }
380
381 let non_null_start = if nulls_last { 0 } else { null_count };
382 let non_null = s.slice(non_null_start as i64, non_null_len);
383
384 let a = non_null.slice(0, non_null_len - 1);
385 let b = non_null.slice(1, non_null_len - 1);
386
387 let lt = a.lt(&b)?;
388 let gt = a.gt(&b)?;
389
390 let lt_first = lt.iter().position(|v| v == Some(true));
391 let gt_first = gt.iter().position(|v| v == Some(true));
392
393 Ok(match (lt_first, gt_first) {
394 (None, None) => None,
395 (Some(_), None) => Some(false),
396 (None, Some(_)) => Some(true),
397 (Some(l), Some(g)) => Some(g < l),
398 })
399}
400
401fn check_cmp<T: NumericNative, Cmp: Fn(&T, &T) -> bool>(
402 vals: &[T],
403 f: Cmp,
404 previous: &mut T,
405) -> bool {
406 let mut sorted = true;
407 for c in vals.chunks(1024) {
408 for v in c {
409 sorted &= f(previous, v);
410 *previous = *v;
411 }
412 if !sorted {
413 return false;
414 }
415 }
416 sorted
417}
418
419fn is_sorted_ca_num<T: PolarsNumericType>(ca: &ChunkedArray<T>, options: SortOptions) -> bool {
420 if let Ok(vals) = ca.cont_slice() {
421 let mut previous = vals[0];
422 return if options.descending {
423 check_cmp(vals, |prev, c| prev.tot_ge(c), &mut previous)
424 } else {
425 check_cmp(vals, |prev, c| prev.tot_le(c), &mut previous)
426 };
427 };
428
429 if ca.null_count() == 0 {
430 let mut previous = if options.descending {
431 T::Native::max_value()
432 } else {
433 T::Native::min_value()
434 };
435 for arr in ca.downcast_iter() {
436 let vals = arr.values();
437 let sorted = if options.descending {
438 check_cmp(vals, |prev, c| prev.tot_ge(c), &mut previous)
439 } else {
440 check_cmp(vals, |prev, c| prev.tot_le(c), &mut previous)
441 };
442 if !sorted {
443 return false;
444 }
445 }
446 return true;
447 };
448
449 let null_count = ca.null_count();
450 if options.nulls_last {
451 let ca = ca.slice(0, ca.len() - null_count);
452 is_sorted_ca_num(&ca, options)
453 } else {
454 let ca = ca.slice(null_count as i64, ca.len() - null_count);
455 is_sorted_ca_num(&ca, options)
456 }
457}
458
459impl SeriesMethods for Series {}