polars_ops/series/ops/
various.rs1use num_traits::Bounded;
2#[cfg(feature = "dtype-struct")]
3use polars_core::chunked_array::ops::row_encode::_get_rows_encoded_ca;
4use polars_core::prelude::arity::unary_elementwise_values;
5use polars_core::prelude::*;
6use polars_core::series::IsSorted;
7use polars_core::with_match_physical_numeric_polars_type;
8#[cfg(feature = "hash")]
9use polars_utils::aliases::PlSeedableRandomStateQuality;
10use polars_utils::total_ord::TotalOrd;
11
12use crate::series::ops::SeriesSealed;
13
14pub trait SeriesMethods: SeriesSealed {
15 fn value_counts(
18 &self,
19 sort: bool,
20 parallel: bool,
21 name: PlSmallStr,
22 normalize: bool,
23 ) -> PolarsResult<DataFrame> {
24 let s = self.as_series();
25 polars_ensure!(
26 s.name() != &name,
27 Duplicate: "using `value_counts` on a column/series named '{}' would lead to duplicate \
28 column names; change `name` to fix", name,
29 );
30 let groups = s.group_tuples(parallel, sort)?;
32 let values = unsafe { s.agg_first(&groups) }
33 .with_name(s.name().clone())
34 .into();
35 let counts = groups.group_count().with_name(name.clone());
36
37 let counts = if normalize {
38 let len = s.len() as f64;
39 let counts: Float64Chunked =
40 unary_elementwise_values(&counts, |count| count as f64 / len);
41 counts.into_column()
42 } else {
43 counts.into_column()
44 };
45
46 let height = counts.len();
47 let cols = vec![values, counts];
48 let df = unsafe { DataFrame::new_no_checks(height, cols) };
49 if sort {
50 df.sort(
51 [name],
52 SortMultipleOptions::default()
53 .with_order_descending(true)
54 .with_multithreaded(parallel),
55 )
56 } else {
57 Ok(df)
58 }
59 }
60
61 #[cfg(feature = "hash")]
62 fn hash(&self, build_hasher: PlSeedableRandomStateQuality) -> UInt64Chunked {
63 let s = self.as_series().to_physical_repr();
64 let mut h = vec![];
65 s.0.vec_hash(build_hasher, &mut h).unwrap();
66 UInt64Chunked::from_vec(s.name().clone(), h)
67 }
68
69 fn ensure_sorted_arg(&self, operation: &str) -> PolarsResult<()> {
70 polars_ensure!(self.is_sorted(Default::default())?, InvalidOperation: "argument in operation '{}' is not sorted, please sort the 'expr/series/column' first", operation);
71 Ok(())
72 }
73
74 fn is_sorted(&self, options: SortOptions) -> PolarsResult<bool> {
76 let s = self.as_series();
77 let null_count = s.null_count();
78
79 if (options.descending
81 && (options.nulls_last || null_count == 0)
82 && matches!(s.is_sorted_flag(), IsSorted::Descending))
83 || (!options.descending
84 && (!options.nulls_last || null_count == 0)
85 && matches!(s.is_sorted_flag(), IsSorted::Ascending))
86 {
87 return Ok(true);
88 }
89
90 #[cfg(feature = "dtype-struct")]
92 if matches!(s.dtype(), DataType::Struct(_)) {
93 let encoded = _get_rows_encoded_ca(
94 PlSmallStr::EMPTY,
95 &[s.clone().into()],
96 &[options.descending],
97 &[options.nulls_last],
98 )?;
99 return encoded.into_series().is_sorted(options);
100 }
101
102 let s_len = s.len();
103 if null_count == s_len {
104 return Ok(true);
106 }
107 if null_count > 0 {
109 if options.nulls_last {
111 if s.slice((s_len - null_count) as i64, null_count)
112 .null_count()
113 != null_count
114 {
115 return Ok(false);
116 }
117 } else if s.slice(0, null_count).null_count() != null_count {
118 return Ok(false);
119 }
120 }
121
122 if s.dtype().is_primitive_numeric() {
123 with_match_physical_numeric_polars_type!(s.dtype(), |$T| {
124 let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();
125 return Ok(is_sorted_ca_num::<$T>(ca, options))
126 })
127 }
128
129 let cmp_len = s_len - null_count - 1; let offset = !options.nulls_last as i64 * null_count as i64;
133 let (s1, s2) = (s.slice(offset, cmp_len), s.slice(offset + 1, cmp_len));
134 let cmp_op = if options.descending {
135 Series::gt_eq
136 } else {
137 Series::lt_eq
138 };
139 Ok(cmp_op(&s1, &s2)?.all())
140 }
141}
142
143fn check_cmp<T: NumericNative, Cmp: Fn(&T, &T) -> bool>(
144 vals: &[T],
145 f: Cmp,
146 previous: &mut T,
147) -> bool {
148 let mut sorted = true;
149
150 for c in vals.chunks(1024) {
153 for v in c {
156 sorted &= f(previous, v);
157 *previous = *v;
158 }
159 if !sorted {
160 return false;
161 }
162 }
163 sorted
164}
165
166fn is_sorted_ca_num<T: PolarsNumericType>(ca: &ChunkedArray<T>, options: SortOptions) -> bool {
168 if let Ok(vals) = ca.cont_slice() {
169 let mut previous = vals[0];
170 return if options.descending {
171 check_cmp(vals, |prev, c| prev.tot_ge(c), &mut previous)
172 } else {
173 check_cmp(vals, |prev, c| prev.tot_le(c), &mut previous)
174 };
175 };
176
177 if ca.null_count() == 0 {
178 let mut previous = if options.descending {
179 T::Native::max_value()
180 } else {
181 T::Native::min_value()
182 };
183 for arr in ca.downcast_iter() {
184 let vals = arr.values();
185
186 let sorted = if options.descending {
187 check_cmp(vals, |prev, c| prev.tot_ge(c), &mut previous)
188 } else {
189 check_cmp(vals, |prev, c| prev.tot_le(c), &mut previous)
190 };
191 if !sorted {
192 return false;
193 }
194 }
195 return true;
196 };
197
198 let null_count = ca.null_count();
200 if options.nulls_last {
201 let ca = ca.slice(0, ca.len() - null_count);
202 is_sorted_ca_num(&ca, options)
203 } else {
204 let ca = ca.slice(null_count as i64, ca.len() - null_count);
205 is_sorted_ca_num(&ca, options)
206 }
207}
208
209impl SeriesMethods for Series {}