polars_ops/series/ops/
various.rs1use num_traits::Bounded;
2#[cfg(feature = "dtype-struct")]
3use polars_core::chunked_array::ops::row_encode::_get_rows_encoded_ca;
4use polars_core::prelude::arity::unary_elementwise_values;
5use polars_core::prelude::*;
6use polars_core::series::IsSorted;
7use polars_core::with_match_physical_numeric_polars_type;
8use polars_utils::total_ord::TotalOrd;
9
10use crate::series::ops::SeriesSealed;
11
12pub trait SeriesMethods: SeriesSealed {
13 fn value_counts(
16 &self,
17 sort: bool,
18 parallel: bool,
19 name: PlSmallStr,
20 normalize: bool,
21 ) -> PolarsResult<DataFrame> {
22 let s = self.as_series();
23 polars_ensure!(
24 s.name() != &name,
25 Duplicate: "using `value_counts` on a column/series named '{}' would lead to duplicate \
26 column names; change `name` to fix", name,
27 );
28 let groups = s.group_tuples(parallel, sort)?;
30 let values = unsafe { s.agg_first(&groups) }.into();
31 let counts = groups.group_count().with_name(name.clone());
32
33 let counts = if normalize {
34 let len = s.len() as f64;
35 let counts: Float64Chunked =
36 unary_elementwise_values(&counts, |count| count as f64 / len);
37 counts.into_column()
38 } else {
39 counts.into_column()
40 };
41
42 let height = counts.len();
43 let cols = vec![values, counts];
44 let df = unsafe { DataFrame::new_no_checks(height, cols) };
45 if sort {
46 df.sort(
47 [name],
48 SortMultipleOptions::default()
49 .with_order_descending(true)
50 .with_multithreaded(parallel),
51 )
52 } else {
53 Ok(df)
54 }
55 }
56
57 #[cfg(feature = "hash")]
58 fn hash(&self, build_hasher: PlRandomState) -> UInt64Chunked {
59 let s = self.as_series().to_physical_repr();
60 match s.dtype() {
61 DataType::List(_) => {
62 let mut ca = s.list().unwrap().clone();
63 crate::chunked_array::hash::hash(&mut ca, build_hasher)
64 },
65 _ => {
66 let mut h = vec![];
67 s.0.vec_hash(build_hasher, &mut h).unwrap();
68 UInt64Chunked::from_vec(s.name().clone(), h)
69 },
70 }
71 }
72
73 fn ensure_sorted_arg(&self, operation: &str) -> PolarsResult<()> {
74 polars_ensure!(self.is_sorted(Default::default())?, InvalidOperation: "argument in operation '{}' is not sorted, please sort the 'expr/series/column' first", operation);
75 Ok(())
76 }
77
78 fn is_sorted(&self, options: SortOptions) -> PolarsResult<bool> {
80 let s = self.as_series();
81 let null_count = s.null_count();
82
83 if (options.descending
85 && (options.nulls_last || null_count == 0)
86 && matches!(s.is_sorted_flag(), IsSorted::Descending))
87 || (!options.descending
88 && (!options.nulls_last || null_count == 0)
89 && matches!(s.is_sorted_flag(), IsSorted::Ascending))
90 {
91 return Ok(true);
92 }
93
94 #[cfg(feature = "dtype-struct")]
96 if matches!(s.dtype(), DataType::Struct(_)) {
97 let encoded = _get_rows_encoded_ca(
98 PlSmallStr::EMPTY,
99 &[s.clone().into()],
100 &[options.descending],
101 &[options.nulls_last],
102 )?;
103 return encoded.into_series().is_sorted(options);
104 }
105
106 let s_len = s.len();
107 if null_count == s_len {
108 return Ok(true);
110 }
111 if null_count > 0 {
113 if options.nulls_last {
115 if s.slice((s_len - null_count) as i64, null_count)
116 .null_count()
117 != null_count
118 {
119 return Ok(false);
120 }
121 } else if s.slice(0, null_count).null_count() != null_count {
122 return Ok(false);
123 }
124 }
125
126 if s.dtype().is_primitive_numeric() {
127 with_match_physical_numeric_polars_type!(s.dtype(), |$T| {
128 let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();
129 return Ok(is_sorted_ca_num::<$T>(ca, options))
130 })
131 }
132
133 let cmp_len = s_len - null_count - 1; let offset = !options.nulls_last as i64 * null_count as i64;
137 let (s1, s2) = (s.slice(offset, cmp_len), s.slice(offset + 1, cmp_len));
138 let cmp_op = if options.descending {
139 Series::gt_eq
140 } else {
141 Series::lt_eq
142 };
143 Ok(cmp_op(&s1, &s2)?.all())
144 }
145}
146
147fn check_cmp<T: NumericNative, Cmp: Fn(&T, &T) -> bool>(
148 vals: &[T],
149 f: Cmp,
150 previous: &mut T,
151) -> bool {
152 let mut sorted = true;
153
154 for c in vals.chunks(1024) {
157 for v in c {
160 sorted &= f(previous, v);
161 *previous = *v;
162 }
163 if !sorted {
164 return false;
165 }
166 }
167 sorted
168}
169
170fn is_sorted_ca_num<T: PolarsNumericType>(ca: &ChunkedArray<T>, options: SortOptions) -> bool {
172 if let Ok(vals) = ca.cont_slice() {
173 let mut previous = vals[0];
174 return if options.descending {
175 check_cmp(vals, |prev, c| prev.tot_ge(c), &mut previous)
176 } else {
177 check_cmp(vals, |prev, c| prev.tot_le(c), &mut previous)
178 };
179 };
180
181 if ca.null_count() == 0 {
182 let mut previous = if options.descending {
183 T::Native::max_value()
184 } else {
185 T::Native::min_value()
186 };
187 for arr in ca.downcast_iter() {
188 let vals = arr.values();
189
190 let sorted = if options.descending {
191 check_cmp(vals, |prev, c| prev.tot_ge(c), &mut previous)
192 } else {
193 check_cmp(vals, |prev, c| prev.tot_le(c), &mut previous)
194 };
195 if !sorted {
196 return false;
197 }
198 }
199 return true;
200 };
201
202 let null_count = ca.null_count();
204 if options.nulls_last {
205 let ca = ca.slice(0, ca.len() - null_count);
206 is_sorted_ca_num(&ca, options)
207 } else {
208 let ca = ca.slice(null_count as i64, ca.len() - null_count);
209 is_sorted_ca_num(&ca, options)
210 }
211}
212
213impl SeriesMethods for Series {}