Skip to main content

polars_ops/series/ops/
various.rs

1use num_traits::Bounded;
2#[cfg(feature = "dtype-struct")]
3use polars_core::chunked_array::ops::row_encode::_get_rows_encoded_ca;
4use polars_core::prelude::arity::unary_elementwise_values;
5use polars_core::prelude::*;
6use polars_core::series::IsSorted;
7use polars_core::with_match_physical_numeric_polars_type;
8#[cfg(feature = "hash")]
9use polars_utils::aliases::PlSeedableRandomStateQuality;
10use polars_utils::total_ord::TotalOrd;
11
12use crate::series::ops::SeriesSealed;
13
14pub trait SeriesMethods: SeriesSealed {
15    /// Create a [`DataFrame`] with the unique `values` of this [`Series`] and a column `"counts"`
16    /// with dtype [`IdxType`]
17    fn value_counts(
18        &self,
19        sort: bool,
20        parallel: bool,
21        name: PlSmallStr,
22        normalize: bool,
23    ) -> PolarsResult<DataFrame> {
24        let s = self.as_series();
25        polars_ensure!(
26            s.name() != &name,
27            Duplicate: "using `value_counts` on a column/series named '{}' would lead to duplicate \
28            column names; change `name` to fix", name,
29        );
30        let groups = s.group_tuples(parallel, sort)?;
31        let values = unsafe { s.agg_first(&groups) }
32            .with_name(s.name().clone())
33            .into();
34        let counts = groups.group_count().with_name(name.clone());
35
36        let counts = if normalize {
37            let len = s.len() as f64;
38            let counts: Float64Chunked =
39                unary_elementwise_values(&counts, |count| count as f64 / len);
40            counts.into_column()
41        } else {
42            counts.into_column()
43        };
44
45        let height = counts.len();
46        let cols = vec![values, counts];
47        let df = unsafe { DataFrame::new_unchecked(height, cols) };
48        if sort {
49            df.sort(
50                [name],
51                SortMultipleOptions::default()
52                    .with_order_descending(true)
53                    .with_multithreaded(parallel),
54            )
55        } else {
56            Ok(df)
57        }
58    }
59
60    #[cfg(feature = "hash")]
61    fn hash(&self, build_hasher: PlSeedableRandomStateQuality) -> UInt64Chunked {
62        let s = self.as_series();
63        let mut h = vec![];
64        s.0.vec_hash(build_hasher, &mut h).unwrap();
65        UInt64Chunked::from_vec(s.name().clone(), h)
66    }
67
68    fn ensure_sorted_arg(&self, operation: &str) -> PolarsResult<()> {
69        polars_ensure!(
70            self.is_sorted(SortOptions::default())?,
71            InvalidOperation: "argument in operation '{}' is not sorted, please sort the 'expr/series/column' first",
72            operation
73        );
74        Ok(())
75    }
76
77    /// Checks if a [`Series`] is sorted with concrete options. Tries to fail fast.
78    ///
79    /// For inference of `descending` / `nulls_last`, see [`Self::is_sorted_any`].
80    fn is_sorted(&self, options: SortOptions) -> PolarsResult<bool> {
81        is_sorted_impl(self.as_series(), options)
82    }
83
84    fn is_sorted_any(
85        &self,
86        descending: Option<bool>,
87        nulls_last: Option<bool>,
88    ) -> PolarsResult<bool> {
89        let s = self.as_series();
90        let (descending, nulls_last) = resolve_sort_options(s, descending, nulls_last)?;
91        // When an option could not be inferred the series is trivially sorted along that axis
92        // (e.g. all non-null values equal, or no nulls), so any value works; default to `false`.
93        let options = SortOptions {
94            descending: descending.unwrap_or(false),
95            nulls_last: nulls_last.unwrap_or(false),
96            ..Default::default()
97        };
98        is_sorted_impl(s, options)
99    }
100}
101
102fn is_sorted_impl(s: &Series, options: SortOptions) -> PolarsResult<bool> {
103    let null_count = s.null_count();
104
105    if (options.descending
106        && (options.nulls_last || null_count == 0)
107        && matches!(s.is_sorted_flag(), IsSorted::Descending))
108        || (!options.descending
109            && (!options.nulls_last || null_count == 0)
110            && matches!(s.is_sorted_flag(), IsSorted::Ascending))
111    {
112        return Ok(true);
113    }
114
115    #[cfg(feature = "dtype-struct")]
116    if matches!(s.dtype(), DataType::Struct(_)) {
117        let encoded = _get_rows_encoded_ca(
118            PlSmallStr::EMPTY,
119            &[s.clone().into()],
120            &[options.descending],
121            &[options.nulls_last],
122            false,
123        )?;
124        let options = SortOptions {
125            descending: false,
126            nulls_last: false,
127            ..options
128        };
129        return is_sorted_impl(&encoded.into_series(), options);
130    }
131
132    let s_len = s.len();
133    if null_count == s_len {
134        // All nulls are equal.
135        return Ok(true);
136    }
137    // Check if nulls are in the right location.
138    if null_count > 0 {
139        if options.nulls_last {
140            if s.slice((s_len - null_count) as i64, null_count)
141                .null_count()
142                != null_count
143            {
144                return Ok(false);
145            }
146        } else if s.slice(0, null_count).null_count() != null_count {
147            return Ok(false);
148        }
149    }
150
151    if s.dtype().is_primitive_numeric() {
152        with_match_physical_numeric_polars_type!(s.dtype(), |$T| {
153            let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();
154            return Ok(is_sorted_ca_num::<$T>(ca, options))
155        })
156    }
157
158    // Logical non-primitive types (e.g. String, Categorical, List, …): take only the contiguous
159    // non-null values (`non_null`). For ordinary `Categorical` use `iter_str` (below); otherwise
160    // `to_physical_repr`, then
161    // (1) for ordinary [`DataType::Categorical`], compare adjacent **decoded strings** (`iter_str`),
162    // (2) reuse `is_sorted_ca_num` when the physical type is primitive numeric (temporal /
163    //     Decimal, Enum-as-integer, …) after `to_physical_repr`;
164    // (3) uses a dedicated kernel for boolean values,
165    // (4) else scans string / binary values with `TotalOrd`,
166    // (5) else fall back to pairwise `Series::lt_eq` / `gt_eq` (nested types, etc.).
167    let non_null_len = s_len - null_count;
168    if non_null_len <= 1 {
169        return Ok(true);
170    }
171
172    let offset = (!options.nulls_last as i64) * (null_count as i64);
173    let non_null = s.slice(offset, non_null_len);
174    debug_assert_eq!(
175        non_null.null_count(),
176        0,
177        "internal error: `is_sorted` non-null slice contains nulls"
178    );
179
180    #[cfg(feature = "dtype-categorical")]
181    if matches!(non_null.dtype(), DataType::Categorical(_, _)) {
182        return is_sorted_categorical_lexical_adjacent(&non_null, options);
183    }
184
185    let phys = non_null.to_physical_repr();
186    let s_phys = phys.as_ref();
187    if s_phys.dtype().is_primitive_numeric() {
188        with_match_physical_numeric_polars_type!(s_phys.dtype(), |$T| {
189            let ca: &ChunkedArray<$T> = s_phys.as_ref().as_ref().as_ref();
190            return Ok(is_sorted_ca_num::<$T>(ca, options))
191        })
192    }
193
194    match s_phys.dtype() {
195        DataType::Boolean => {
196            let ca = s_phys.bool()?;
197            Ok(is_sorted_ca_bool(ca, options.descending))
198        },
199        DataType::String => {
200            let ca = s_phys.str()?;
201            Ok(is_sorted_adjacent_total_ord(
202                ca.no_null_iter(),
203                options.descending,
204            ))
205        },
206        DataType::Binary => {
207            let ca = s_phys.binary()?;
208            Ok(is_sorted_adjacent_total_ord(
209                ca.no_null_iter(),
210                options.descending,
211            ))
212        },
213        DataType::BinaryOffset => {
214            let ca = s_phys.binary_offset()?;
215            Ok(is_sorted_adjacent_total_ord(
216                ca.no_null_iter(),
217                options.descending,
218            ))
219        },
220        _ => {
221            // `non_null` excludes nulls already; compare `non_null[..-1]` with `non_null[1..]`.
222            let cmp_len = non_null_len - 1;
223            let s1 = non_null.slice(0, cmp_len);
224            let s2 = non_null.slice(1, cmp_len);
225            let cmp_op = if options.descending {
226                Series::gt_eq
227            } else {
228                Series::lt_eq
229            };
230            Ok(cmp_op(&s1, &s2)?.all())
231        },
232    }
233}
234
235/// Returns whether iterator elements are non-decreasing (`descending == false`) or non-increasing
236/// (`descending == true`) under [`TotalOrd`].
237///
238/// Assumes the iterator `it` yields **only** the non-null values in row order (one item per row). An empty
239/// iterator is considered sorted. Stops at the first pair that violates the ordering.
240fn is_sorted_adjacent_total_ord<T: TotalOrd>(
241    it: impl Iterator<Item = T>,
242    descending: bool,
243) -> bool {
244    let mut it = it;
245    // Sliding window: `prev` is always the previous element; seed with the first value.
246    let Some(mut prev) = it.next() else {
247        return true;
248    };
249    if descending {
250        for v in it {
251            if !prev.tot_ge(&v) {
252                return false;
253            }
254            prev = v;
255        }
256    } else {
257        for v in it {
258            if !prev.tot_le(&v) {
259                return false;
260            }
261            prev = v;
262        }
263    }
264    true
265}
266
267/// Ordinary [`DataType::Categorical`]: lexical order via adjacent decoded strings (`iter_str`), same as
268/// `Series::lt_eq` / `gt_eq`, but without a Boolean series. Caller must pass a contiguous **non-null**
269/// slice.
270#[cfg(feature = "dtype-categorical")]
271fn is_sorted_categorical_lexical_adjacent(s: &Series, options: SortOptions) -> PolarsResult<bool> {
272    polars_ensure!(
273        matches!(s.dtype(), DataType::Categorical(_, _)),
274        ComputeError: "internal error: expected Categorical in lexical `is_sorted` path",
275    );
276
277    with_match_categorical_physical_type!(s.dtype().cat_physical().unwrap(), |$C| {
278        let ca = s.cat::<$C>()?;
279
280        // `ca.null_count() == 0` implies each `phys` row decodes via `iter_str` to `Some(..)`
281        Ok(is_sorted_adjacent_total_ord(
282            ca.iter_str().map(|opt| {
283                opt.expect(
284                    "`iter_str` produced None while categorical null_count reported 0 (`is_sorted`)"
285                )
286            }),
287            options.descending,
288        ))
289    })
290}
291
292/// Booleans ordered as [`false`] < [`true`] (same as inequality comparisons on [`BooleanChunked`]).
293///
294/// Monotone order is equivalent to at most one plateau change: ascending is `F…FT…T`, descending is
295/// `T…TF…F`. Implemented with `first_true_idx` / `first_false_idx` plus a global false/true count
296/// check.
297///
298/// Caller must ensure **`ca` has no nulls** on the flattened series (see `non_null` slice above).
299fn is_sorted_ca_bool(ca: &BooleanChunked, descending: bool) -> bool {
300    let len = ca.len();
301    if len <= 1 {
302        return true;
303    }
304    debug_assert_eq!(
305        ca.null_count(),
306        0,
307        "internal error: `is_sorted_ca_bool` expects a non-null boolean slice"
308    );
309    if descending {
310        let Some(idx) = ca.first_false_idx() else {
311            return true;
312        };
313        !ca.slice(idx as i64, ca.len() - idx).any()
314    } else {
315        let Some(idx) = ca.first_true_idx() else {
316            return true;
317        };
318        ca.slice(idx as i64, ca.len() - idx).all()
319    }
320}
321
322/// Infers the `(descending, nulls_last)` sort options for `s`, honoring any provided hints.
323///
324/// Each returned value is `Some` when known — taken from the corresponding hint when given,
325/// otherwise inferred from the data — and `None` when it cannot be inferred from `s` alone:
326/// - `descending` is `None` when there are fewer than two distinct non-null values, so no direction
327///   is implied.
328/// - `nulls_last` is `None` when `s` has no nulls, is entirely null, or the nulls are interleaved
329///   (the last of which is not sorted under any placement and is rejected by the `is_sorted` check).
330///
331/// The two axes are independent, so callers can use whichever was determined even when the other
332/// could not be.
333pub fn resolve_sort_options(
334    s: &Series,
335    descending: Option<bool>,
336    nulls_last: Option<bool>,
337) -> PolarsResult<(Option<bool>, Option<bool>)> {
338    let nulls_last = match nulls_last {
339        Some(n) => Some(n),
340        None => infer_nulls_last(s),
341    };
342
343    let descending = match descending {
344        Some(d) => Some(d),
345        None => infer_descending(s, nulls_last.unwrap_or(false))?,
346    };
347
348    Ok((descending, nulls_last))
349}
350
351/// Infers null placement from `s`: `Some(true)` if all nulls sit at the tail, `Some(false)` if all
352/// sit at the head, and `None` if there are no nulls, `s` is entirely null, or the nulls are
353/// interleaved (the latter is not sorted under any placement; the `is_sorted` check rejects it).
354fn infer_nulls_last(s: &Series) -> Option<bool> {
355    let null_count = s.null_count();
356    let s_len = s.len();
357
358    if null_count == 0 || null_count == s_len {
359        return None;
360    }
361
362    if s.slice((s_len - null_count) as i64, null_count)
363        .null_count()
364        == null_count
365    {
366        Some(true)
367    } else if s.slice(0, null_count).null_count() == null_count {
368        Some(false)
369    } else {
370        None
371    }
372}
373
374fn infer_descending(s: &Series, nulls_last: bool) -> PolarsResult<Option<bool>> {
375    let null_count = s.null_count();
376    let non_null_len = s.len() - null_count;
377    if non_null_len < 2 {
378        return Ok(None);
379    }
380
381    let non_null_start = if nulls_last { 0 } else { null_count };
382    let non_null = s.slice(non_null_start as i64, non_null_len);
383
384    let a = non_null.slice(0, non_null_len - 1);
385    let b = non_null.slice(1, non_null_len - 1);
386
387    let lt = a.lt(&b)?;
388    let gt = a.gt(&b)?;
389
390    let lt_first = lt.iter().position(|v| v == Some(true));
391    let gt_first = gt.iter().position(|v| v == Some(true));
392
393    Ok(match (lt_first, gt_first) {
394        (None, None) => None,
395        (Some(_), None) => Some(false),
396        (None, Some(_)) => Some(true),
397        (Some(l), Some(g)) => Some(g < l),
398    })
399}
400
401fn check_cmp<T: NumericNative, Cmp: Fn(&T, &T) -> bool>(
402    vals: &[T],
403    f: Cmp,
404    previous: &mut T,
405) -> bool {
406    let mut sorted = true;
407    for c in vals.chunks(1024) {
408        for v in c {
409            sorted &= f(previous, v);
410            *previous = *v;
411        }
412        if !sorted {
413            return false;
414        }
415    }
416    sorted
417}
418
419fn is_sorted_ca_num<T: PolarsNumericType>(ca: &ChunkedArray<T>, options: SortOptions) -> bool {
420    if let Ok(vals) = ca.cont_slice() {
421        let mut previous = vals[0];
422        return if options.descending {
423            check_cmp(vals, |prev, c| prev.tot_ge(c), &mut previous)
424        } else {
425            check_cmp(vals, |prev, c| prev.tot_le(c), &mut previous)
426        };
427    };
428
429    if ca.null_count() == 0 {
430        let mut previous = if options.descending {
431            T::Native::max_value()
432        } else {
433            T::Native::min_value()
434        };
435        for arr in ca.downcast_iter() {
436            let vals = arr.values();
437            let sorted = if options.descending {
438                check_cmp(vals, |prev, c| prev.tot_ge(c), &mut previous)
439            } else {
440                check_cmp(vals, |prev, c| prev.tot_le(c), &mut previous)
441            };
442            if !sorted {
443                return false;
444            }
445        }
446        return true;
447    };
448
449    let null_count = ca.null_count();
450    if options.nulls_last {
451        let ca = ca.slice(0, ca.len() - null_count);
452        is_sorted_ca_num(&ca, options)
453    } else {
454        let ca = ca.slice(null_count as i64, ca.len() - null_count);
455        is_sorted_ca_num(&ca, options)
456    }
457}
458
459impl SeriesMethods for Series {}