polars_core/chunked_array/ops/aggregate/
quantile.rs

1use polars_compute::rolling::QuantileMethod;
2
3use super::*;
4
5pub trait QuantileAggSeries {
6    /// Get the median of the [`ChunkedArray`] as a new [`Series`] of length 1.
7    fn median_reduce(&self) -> Scalar;
8    /// Get the quantile of the [`ChunkedArray`] as a new [`Series`] of length 1.
9    fn quantile_reduce(&self, _quantile: f64, _method: QuantileMethod) -> PolarsResult<Scalar>;
10}
11
12/// helper
13fn quantile_idx(
14    quantile: f64,
15    length: usize,
16    null_count: usize,
17    method: QuantileMethod,
18) -> (usize, f64, usize) {
19    let nonnull_count = (length - null_count) as f64;
20    let float_idx = (nonnull_count - 1.0) * quantile + null_count as f64;
21    let mut base_idx = match method {
22        QuantileMethod::Nearest => {
23            let idx = float_idx.round() as usize;
24            return (idx, 0.0, idx);
25        },
26        QuantileMethod::Lower | QuantileMethod::Midpoint | QuantileMethod::Linear => {
27            float_idx as usize
28        },
29        QuantileMethod::Higher => float_idx.ceil() as usize,
30        QuantileMethod::Equiprobable => {
31            let idx = ((nonnull_count * quantile).ceil() - 1.0).max(0.0) as usize + null_count;
32            return (idx, 0.0, idx);
33        },
34    };
35
36    base_idx = base_idx.clamp(0, length - 1);
37    let top_idx = f64::ceil(float_idx) as usize;
38
39    (base_idx, float_idx, top_idx)
40}
41
42/// helper
43fn linear_interpol<T: Float>(lower: T, upper: T, idx: usize, float_idx: f64) -> T {
44    if lower == upper {
45        lower
46    } else {
47        let proportion: T = T::from(float_idx).unwrap() - T::from(idx).unwrap();
48        proportion * (upper - lower) + lower
49    }
50}
51fn midpoint_interpol<T: Float>(lower: T, upper: T) -> T {
52    if lower == upper {
53        lower
54    } else {
55        (lower + upper) / (T::one() + T::one())
56    }
57}
58
59// Uses quickselect instead of sorting all data
60fn quantile_slice<T: ToPrimitive + TotalOrd + Copy>(
61    vals: &mut [T],
62    quantile: f64,
63    method: QuantileMethod,
64) -> PolarsResult<Option<f64>> {
65    polars_ensure!((0.0..=1.0).contains(&quantile),
66        ComputeError: "quantile should be between 0.0 and 1.0",
67    );
68    if vals.is_empty() {
69        return Ok(None);
70    }
71    if vals.len() == 1 {
72        return Ok(vals[0].to_f64());
73    }
74    let (idx, float_idx, top_idx) = quantile_idx(quantile, vals.len(), 0, method);
75
76    let (_lhs, lower, rhs) = vals.select_nth_unstable_by(idx, TotalOrd::tot_cmp);
77    if idx == top_idx {
78        Ok(lower.to_f64())
79    } else {
80        match method {
81            QuantileMethod::Midpoint => {
82                let upper = rhs.iter().copied().min_by(TotalOrd::tot_cmp).unwrap();
83                Ok(Some(midpoint_interpol(
84                    lower.to_f64().unwrap(),
85                    upper.to_f64().unwrap(),
86                )))
87            },
88            QuantileMethod::Linear => {
89                let upper = rhs.iter().copied().min_by(TotalOrd::tot_cmp).unwrap();
90                Ok(linear_interpol(
91                    lower.to_f64().unwrap(),
92                    upper.to_f64().unwrap(),
93                    idx,
94                    float_idx,
95                )
96                .to_f64())
97            },
98            _ => Ok(lower.to_f64()),
99        }
100    }
101}
102
103fn generic_quantile<T>(
104    ca: ChunkedArray<T>,
105    quantile: f64,
106    method: QuantileMethod,
107) -> PolarsResult<Option<f64>>
108where
109    T: PolarsNumericType,
110{
111    polars_ensure!(
112        (0.0..=1.0).contains(&quantile),
113        ComputeError: "`quantile` should be between 0.0 and 1.0",
114    );
115
116    let null_count = ca.null_count();
117    let length = ca.len();
118
119    if null_count == length {
120        return Ok(None);
121    }
122
123    let (idx, float_idx, top_idx) = quantile_idx(quantile, length, null_count, method);
124    let sorted = ca.sort(false);
125    let lower = sorted.get(idx).map(|v| v.to_f64().unwrap());
126
127    let opt = match method {
128        QuantileMethod::Midpoint => {
129            if top_idx == idx {
130                lower
131            } else {
132                let upper = sorted.get(idx + 1).map(|v| v.to_f64().unwrap());
133                midpoint_interpol(lower.unwrap(), upper.unwrap()).to_f64()
134            }
135        },
136        QuantileMethod::Linear => {
137            if top_idx == idx {
138                lower
139            } else {
140                let upper = sorted.get(idx + 1).map(|v| v.to_f64().unwrap());
141
142                linear_interpol(lower.unwrap(), upper.unwrap(), idx, float_idx).to_f64()
143            }
144        },
145        _ => lower,
146    };
147    Ok(opt)
148}
149
150impl<T> ChunkQuantile<f64> for ChunkedArray<T>
151where
152    T: PolarsIntegerType,
153    T::Native: TotalOrd,
154{
155    fn quantile(&self, quantile: f64, method: QuantileMethod) -> PolarsResult<Option<f64>> {
156        // in case of sorted data, the sort is free, so don't take quickselect route
157        if let (Ok(slice), false) = (self.cont_slice(), self.is_sorted_ascending_flag()) {
158            let mut owned = slice.to_vec();
159            quantile_slice(&mut owned, quantile, method)
160        } else {
161            generic_quantile(self.clone(), quantile, method)
162        }
163    }
164
165    fn median(&self) -> Option<f64> {
166        self.quantile(0.5, QuantileMethod::Linear).unwrap() // unwrap fine since quantile in range
167    }
168}
169
170// Version of quantile/median that don't need a memcpy
171impl<T> ChunkedArray<T>
172where
173    T: PolarsIntegerType,
174    T::Native: TotalOrd,
175{
176    pub(crate) fn quantile_faster(
177        mut self,
178        quantile: f64,
179        method: QuantileMethod,
180    ) -> PolarsResult<Option<f64>> {
181        // in case of sorted data, the sort is free, so don't take quickselect route
182        let is_sorted = self.is_sorted_ascending_flag();
183        if let (Some(slice), false) = (self.cont_slice_mut(), is_sorted) {
184            quantile_slice(slice, quantile, method)
185        } else {
186            self.quantile(quantile, method)
187        }
188    }
189
190    pub(crate) fn median_faster(self) -> Option<f64> {
191        self.quantile_faster(0.5, QuantileMethod::Linear).unwrap()
192    }
193}
194
195impl ChunkQuantile<f32> for Float32Chunked {
196    fn quantile(&self, quantile: f64, method: QuantileMethod) -> PolarsResult<Option<f32>> {
197        // in case of sorted data, the sort is free, so don't take quickselect route
198        let out = if let (Ok(slice), false) = (self.cont_slice(), self.is_sorted_ascending_flag()) {
199            let mut owned = slice.to_vec();
200            quantile_slice(&mut owned, quantile, method)
201        } else {
202            generic_quantile(self.clone(), quantile, method)
203        };
204        out.map(|v| v.map(|v| v as f32))
205    }
206
207    fn median(&self) -> Option<f32> {
208        self.quantile(0.5, QuantileMethod::Linear).unwrap() // unwrap fine since quantile in range
209    }
210}
211
212impl ChunkQuantile<f64> for Float64Chunked {
213    fn quantile(&self, quantile: f64, method: QuantileMethod) -> PolarsResult<Option<f64>> {
214        // in case of sorted data, the sort is free, so don't take quickselect route
215        if let (Ok(slice), false) = (self.cont_slice(), self.is_sorted_ascending_flag()) {
216            let mut owned = slice.to_vec();
217            quantile_slice(&mut owned, quantile, method)
218        } else {
219            generic_quantile(self.clone(), quantile, method)
220        }
221    }
222
223    fn median(&self) -> Option<f64> {
224        self.quantile(0.5, QuantileMethod::Linear).unwrap() // unwrap fine since quantile in range
225    }
226}
227
228impl Float64Chunked {
229    pub(crate) fn quantile_faster(
230        mut self,
231        quantile: f64,
232        method: QuantileMethod,
233    ) -> PolarsResult<Option<f64>> {
234        // in case of sorted data, the sort is free, so don't take quickselect route
235        let is_sorted = self.is_sorted_ascending_flag();
236        if let (Some(slice), false) = (self.cont_slice_mut(), is_sorted) {
237            quantile_slice(slice, quantile, method)
238        } else {
239            self.quantile(quantile, method)
240        }
241    }
242
243    pub(crate) fn median_faster(self) -> Option<f64> {
244        self.quantile_faster(0.5, QuantileMethod::Linear).unwrap()
245    }
246}
247
248impl Float32Chunked {
249    pub(crate) fn quantile_faster(
250        mut self,
251        quantile: f64,
252        method: QuantileMethod,
253    ) -> PolarsResult<Option<f32>> {
254        // in case of sorted data, the sort is free, so don't take quickselect route
255        let is_sorted = self.is_sorted_ascending_flag();
256        if let (Some(slice), false) = (self.cont_slice_mut(), is_sorted) {
257            quantile_slice(slice, quantile, method).map(|v| v.map(|v| v as f32))
258        } else {
259            self.quantile(quantile, method)
260        }
261    }
262
263    pub(crate) fn median_faster(self) -> Option<f32> {
264        self.quantile_faster(0.5, QuantileMethod::Linear).unwrap()
265    }
266}
267
268impl ChunkQuantile<String> for StringChunked {}
269impl ChunkQuantile<Series> for ListChunked {}
270#[cfg(feature = "dtype-array")]
271impl ChunkQuantile<Series> for ArrayChunked {}
272#[cfg(feature = "object")]
273impl<T: PolarsObject> ChunkQuantile<Series> for ObjectChunked<T> {}
274impl ChunkQuantile<bool> for BooleanChunked {}