polars_ops/series/ops/
various.rs1use num_traits::Bounded;
2#[cfg(feature = "dtype-struct")]
3use polars_core::chunked_array::ops::row_encode::_get_rows_encoded_ca;
4use polars_core::prelude::arity::unary_elementwise_values;
5use polars_core::prelude::*;
6use polars_core::series::IsSorted;
7use polars_core::with_match_physical_numeric_polars_type;
8use polars_utils::aliases::PlSeedableRandomStateQuality;
9use polars_utils::total_ord::TotalOrd;
10
11use crate::series::ops::SeriesSealed;
12
13pub trait SeriesMethods: SeriesSealed {
14 fn value_counts(
17 &self,
18 sort: bool,
19 parallel: bool,
20 name: PlSmallStr,
21 normalize: bool,
22 ) -> PolarsResult<DataFrame> {
23 let s = self.as_series();
24 polars_ensure!(
25 s.name() != &name,
26 Duplicate: "using `value_counts` on a column/series named '{}' would lead to duplicate \
27 column names; change `name` to fix", name,
28 );
29 let groups = s.group_tuples(parallel, sort)?;
31 let values = unsafe { s.agg_first(&groups) }.into();
32 let counts = groups.group_count().with_name(name.clone());
33
34 let counts = if normalize {
35 let len = s.len() as f64;
36 let counts: Float64Chunked =
37 unary_elementwise_values(&counts, |count| count as f64 / len);
38 counts.into_column()
39 } else {
40 counts.into_column()
41 };
42
43 let height = counts.len();
44 let cols = vec![values, counts];
45 let df = unsafe { DataFrame::new_no_checks(height, cols) };
46 if sort {
47 df.sort(
48 [name],
49 SortMultipleOptions::default()
50 .with_order_descending(true)
51 .with_multithreaded(parallel),
52 )
53 } else {
54 Ok(df)
55 }
56 }
57
58 #[cfg(feature = "hash")]
59 fn hash(&self, build_hasher: PlSeedableRandomStateQuality) -> UInt64Chunked {
60 let s = self.as_series().to_physical_repr();
61 match s.dtype() {
62 DataType::List(_) => {
63 let mut ca = s.list().unwrap().clone();
64 crate::chunked_array::hash::hash(&mut ca, build_hasher)
65 },
66 _ => {
67 let mut h = vec![];
68 s.0.vec_hash(build_hasher, &mut h).unwrap();
69 UInt64Chunked::from_vec(s.name().clone(), h)
70 },
71 }
72 }
73
74 fn ensure_sorted_arg(&self, operation: &str) -> PolarsResult<()> {
75 polars_ensure!(self.is_sorted(Default::default())?, InvalidOperation: "argument in operation '{}' is not sorted, please sort the 'expr/series/column' first", operation);
76 Ok(())
77 }
78
79 fn is_sorted(&self, options: SortOptions) -> PolarsResult<bool> {
81 let s = self.as_series();
82 let null_count = s.null_count();
83
84 if (options.descending
86 && (options.nulls_last || null_count == 0)
87 && matches!(s.is_sorted_flag(), IsSorted::Descending))
88 || (!options.descending
89 && (!options.nulls_last || null_count == 0)
90 && matches!(s.is_sorted_flag(), IsSorted::Ascending))
91 {
92 return Ok(true);
93 }
94
95 #[cfg(feature = "dtype-struct")]
97 if matches!(s.dtype(), DataType::Struct(_)) {
98 let encoded = _get_rows_encoded_ca(
99 PlSmallStr::EMPTY,
100 &[s.clone().into()],
101 &[options.descending],
102 &[options.nulls_last],
103 )?;
104 return encoded.into_series().is_sorted(options);
105 }
106
107 let s_len = s.len();
108 if null_count == s_len {
109 return Ok(true);
111 }
112 if null_count > 0 {
114 if options.nulls_last {
116 if s.slice((s_len - null_count) as i64, null_count)
117 .null_count()
118 != null_count
119 {
120 return Ok(false);
121 }
122 } else if s.slice(0, null_count).null_count() != null_count {
123 return Ok(false);
124 }
125 }
126
127 if s.dtype().is_primitive_numeric() {
128 with_match_physical_numeric_polars_type!(s.dtype(), |$T| {
129 let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();
130 return Ok(is_sorted_ca_num::<$T>(ca, options))
131 })
132 }
133
134 let cmp_len = s_len - null_count - 1; let offset = !options.nulls_last as i64 * null_count as i64;
138 let (s1, s2) = (s.slice(offset, cmp_len), s.slice(offset + 1, cmp_len));
139 let cmp_op = if options.descending {
140 Series::gt_eq
141 } else {
142 Series::lt_eq
143 };
144 Ok(cmp_op(&s1, &s2)?.all())
145 }
146}
147
148fn check_cmp<T: NumericNative, Cmp: Fn(&T, &T) -> bool>(
149 vals: &[T],
150 f: Cmp,
151 previous: &mut T,
152) -> bool {
153 let mut sorted = true;
154
155 for c in vals.chunks(1024) {
158 for v in c {
161 sorted &= f(previous, v);
162 *previous = *v;
163 }
164 if !sorted {
165 return false;
166 }
167 }
168 sorted
169}
170
171fn is_sorted_ca_num<T: PolarsNumericType>(ca: &ChunkedArray<T>, options: SortOptions) -> bool {
173 if let Ok(vals) = ca.cont_slice() {
174 let mut previous = vals[0];
175 return if options.descending {
176 check_cmp(vals, |prev, c| prev.tot_ge(c), &mut previous)
177 } else {
178 check_cmp(vals, |prev, c| prev.tot_le(c), &mut previous)
179 };
180 };
181
182 if ca.null_count() == 0 {
183 let mut previous = if options.descending {
184 T::Native::max_value()
185 } else {
186 T::Native::min_value()
187 };
188 for arr in ca.downcast_iter() {
189 let vals = arr.values();
190
191 let sorted = if options.descending {
192 check_cmp(vals, |prev, c| prev.tot_ge(c), &mut previous)
193 } else {
194 check_cmp(vals, |prev, c| prev.tot_le(c), &mut previous)
195 };
196 if !sorted {
197 return false;
198 }
199 }
200 return true;
201 };
202
203 let null_count = ca.null_count();
205 if options.nulls_last {
206 let ca = ca.slice(0, ca.len() - null_count);
207 is_sorted_ca_num(&ca, options)
208 } else {
209 let ca = ca.slice(null_count as i64, ca.len() - null_count);
210 is_sorted_ca_num(&ca, options)
211 }
212}
213
214impl SeriesMethods for Series {}