polars_ops/series/ops/
various.rs1use num_traits::Bounded;
2#[cfg(feature = "dtype-struct")]
3use polars_core::chunked_array::ops::row_encode::_get_rows_encoded_ca;
4use polars_core::prelude::arity::unary_elementwise_values;
5use polars_core::prelude::*;
6use polars_core::series::IsSorted;
7use polars_core::with_match_physical_numeric_polars_type;
8#[cfg(feature = "hash")]
9use polars_utils::aliases::PlSeedableRandomStateQuality;
10use polars_utils::total_ord::TotalOrd;
11
12use crate::series::ops::SeriesSealed;
13
14pub trait SeriesMethods: SeriesSealed {
15 fn value_counts(
18 &self,
19 sort: bool,
20 parallel: bool,
21 name: PlSmallStr,
22 normalize: bool,
23 ) -> PolarsResult<DataFrame> {
24 let s = self.as_series();
25 polars_ensure!(
26 s.name() != &name,
27 Duplicate: "using `value_counts` on a column/series named '{}' would lead to duplicate \
28 column names; change `name` to fix", name,
29 );
30 let groups = s.group_tuples(parallel, sort)?;
32 let values = unsafe { s.agg_first(&groups) }
33 .with_name(s.name().clone())
34 .into();
35 let counts = groups.group_count().with_name(name.clone());
36
37 let counts = if normalize {
38 let len = s.len() as f64;
39 let counts: Float64Chunked =
40 unary_elementwise_values(&counts, |count| count as f64 / len);
41 counts.into_column()
42 } else {
43 counts.into_column()
44 };
45
46 let height = counts.len();
47 let cols = vec![values, counts];
48 let df = unsafe { DataFrame::new_no_checks(height, cols) };
49 if sort {
50 df.sort(
51 [name],
52 SortMultipleOptions::default()
53 .with_order_descending(true)
54 .with_multithreaded(parallel),
55 )
56 } else {
57 Ok(df)
58 }
59 }
60
61 #[cfg(feature = "hash")]
62 fn hash(&self, build_hasher: PlSeedableRandomStateQuality) -> UInt64Chunked {
63 let s = self.as_series().to_physical_repr();
64 match s.dtype() {
65 DataType::List(_) => {
66 let mut ca = s.list().unwrap().clone();
67 crate::chunked_array::hash::hash(&mut ca, build_hasher)
68 },
69 _ => {
70 let mut h = vec![];
71 s.0.vec_hash(build_hasher, &mut h).unwrap();
72 UInt64Chunked::from_vec(s.name().clone(), h)
73 },
74 }
75 }
76
77 fn ensure_sorted_arg(&self, operation: &str) -> PolarsResult<()> {
78 polars_ensure!(self.is_sorted(Default::default())?, InvalidOperation: "argument in operation '{}' is not sorted, please sort the 'expr/series/column' first", operation);
79 Ok(())
80 }
81
82 fn is_sorted(&self, options: SortOptions) -> PolarsResult<bool> {
84 let s = self.as_series();
85 let null_count = s.null_count();
86
87 if (options.descending
89 && (options.nulls_last || null_count == 0)
90 && matches!(s.is_sorted_flag(), IsSorted::Descending))
91 || (!options.descending
92 && (!options.nulls_last || null_count == 0)
93 && matches!(s.is_sorted_flag(), IsSorted::Ascending))
94 {
95 return Ok(true);
96 }
97
98 #[cfg(feature = "dtype-struct")]
100 if matches!(s.dtype(), DataType::Struct(_)) {
101 let encoded = _get_rows_encoded_ca(
102 PlSmallStr::EMPTY,
103 &[s.clone().into()],
104 &[options.descending],
105 &[options.nulls_last],
106 )?;
107 return encoded.into_series().is_sorted(options);
108 }
109
110 let s_len = s.len();
111 if null_count == s_len {
112 return Ok(true);
114 }
115 if null_count > 0 {
117 if options.nulls_last {
119 if s.slice((s_len - null_count) as i64, null_count)
120 .null_count()
121 != null_count
122 {
123 return Ok(false);
124 }
125 } else if s.slice(0, null_count).null_count() != null_count {
126 return Ok(false);
127 }
128 }
129
130 if s.dtype().is_primitive_numeric() {
131 with_match_physical_numeric_polars_type!(s.dtype(), |$T| {
132 let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();
133 return Ok(is_sorted_ca_num::<$T>(ca, options))
134 })
135 }
136
137 let cmp_len = s_len - null_count - 1; let offset = !options.nulls_last as i64 * null_count as i64;
141 let (s1, s2) = (s.slice(offset, cmp_len), s.slice(offset + 1, cmp_len));
142 let cmp_op = if options.descending {
143 Series::gt_eq
144 } else {
145 Series::lt_eq
146 };
147 Ok(cmp_op(&s1, &s2)?.all())
148 }
149}
150
151fn check_cmp<T: NumericNative, Cmp: Fn(&T, &T) -> bool>(
152 vals: &[T],
153 f: Cmp,
154 previous: &mut T,
155) -> bool {
156 let mut sorted = true;
157
158 for c in vals.chunks(1024) {
161 for v in c {
164 sorted &= f(previous, v);
165 *previous = *v;
166 }
167 if !sorted {
168 return false;
169 }
170 }
171 sorted
172}
173
174fn is_sorted_ca_num<T: PolarsNumericType>(ca: &ChunkedArray<T>, options: SortOptions) -> bool {
176 if let Ok(vals) = ca.cont_slice() {
177 let mut previous = vals[0];
178 return if options.descending {
179 check_cmp(vals, |prev, c| prev.tot_ge(c), &mut previous)
180 } else {
181 check_cmp(vals, |prev, c| prev.tot_le(c), &mut previous)
182 };
183 };
184
185 if ca.null_count() == 0 {
186 let mut previous = if options.descending {
187 T::Native::max_value()
188 } else {
189 T::Native::min_value()
190 };
191 for arr in ca.downcast_iter() {
192 let vals = arr.values();
193
194 let sorted = if options.descending {
195 check_cmp(vals, |prev, c| prev.tot_ge(c), &mut previous)
196 } else {
197 check_cmp(vals, |prev, c| prev.tot_le(c), &mut previous)
198 };
199 if !sorted {
200 return false;
201 }
202 }
203 return true;
204 };
205
206 let null_count = ca.null_count();
208 if options.nulls_last {
209 let ca = ca.slice(0, ca.len() - null_count);
210 is_sorted_ca_num(&ca, options)
211 } else {
212 let ca = ca.slice(null_count as i64, ca.len() - null_count);
213 is_sorted_ca_num(&ca, options)
214 }
215}
216
217impl SeriesMethods for Series {}