polars_core/chunked_array/ops/aggregate/
quantile.rs

1use num_traits::AsPrimitive;
2use polars_compute::rolling::QuantileMethod;
3#[cfg(feature = "dtype-f16")]
4use polars_utils::float16::pf16;
5
6use super::*;
7
8pub trait QuantileAggSeries {
9    /// Get the median of the [`ChunkedArray`] as a new [`Series`] of length 1.
10    fn median_reduce(&self) -> Scalar;
11    /// Get the quantile of the [`ChunkedArray`] as a new [`Series`] of length 1.
12    fn quantile_reduce(&self, _quantile: f64, _method: QuantileMethod) -> PolarsResult<Scalar>;
13}
14
15/// helper
16fn quantile_idx(
17    quantile: f64,
18    length: usize,
19    null_count: usize,
20    method: QuantileMethod,
21) -> (usize, f64, usize) {
22    let nonnull_count = (length - null_count) as f64;
23    let float_idx = (nonnull_count - 1.0) * quantile + null_count as f64;
24    let mut base_idx = match method {
25        QuantileMethod::Nearest => {
26            let idx = float_idx.round() as usize;
27            return (idx, 0.0, idx);
28        },
29        QuantileMethod::Lower | QuantileMethod::Midpoint | QuantileMethod::Linear => {
30            float_idx as usize
31        },
32        QuantileMethod::Higher => float_idx.ceil() as usize,
33        QuantileMethod::Equiprobable => {
34            let idx = ((nonnull_count * quantile).ceil() - 1.0).max(0.0) as usize + null_count;
35            return (idx, 0.0, idx);
36        },
37    };
38
39    base_idx = base_idx.clamp(0, length - 1);
40    let top_idx = f64::ceil(float_idx) as usize;
41
42    (base_idx, float_idx, top_idx)
43}
44
45/// helper
46fn linear_interpol<T: Float>(lower: T, upper: T, idx: usize, float_idx: f64) -> T {
47    if lower == upper {
48        lower
49    } else {
50        let proportion: T = T::from(float_idx).unwrap() - T::from(idx).unwrap();
51        proportion * (upper - lower) + lower
52    }
53}
54fn midpoint_interpol<T: Float>(lower: T, upper: T) -> T {
55    if lower == upper {
56        lower
57    } else {
58        (lower + upper) / (T::one() + T::one())
59    }
60}
61
62// Uses quickselect instead of sorting all data
63fn quantile_slice<T: ToPrimitive + TotalOrd + Copy>(
64    vals: &mut [T],
65    quantile: f64,
66    method: QuantileMethod,
67) -> PolarsResult<Option<f64>> {
68    polars_ensure!((0.0..=1.0).contains(&quantile),
69        ComputeError: "quantile should be between 0.0 and 1.0",
70    );
71    if vals.is_empty() {
72        return Ok(None);
73    }
74    if vals.len() == 1 {
75        return Ok(vals[0].to_f64());
76    }
77    let (idx, float_idx, top_idx) = quantile_idx(quantile, vals.len(), 0, method);
78
79    let (_lhs, lower, rhs) = vals.select_nth_unstable_by(idx, TotalOrd::tot_cmp);
80    if idx == top_idx {
81        Ok(lower.to_f64())
82    } else {
83        match method {
84            QuantileMethod::Midpoint => {
85                let upper = rhs.iter().copied().min_by(TotalOrd::tot_cmp).unwrap();
86                Ok(Some(midpoint_interpol(
87                    lower.to_f64().unwrap(),
88                    upper.to_f64().unwrap(),
89                )))
90            },
91            QuantileMethod::Linear => {
92                let upper = rhs.iter().copied().min_by(TotalOrd::tot_cmp).unwrap();
93                Ok(linear_interpol(
94                    lower.to_f64().unwrap(),
95                    upper.to_f64().unwrap(),
96                    idx,
97                    float_idx,
98                )
99                .to_f64())
100            },
101            _ => Ok(lower.to_f64()),
102        }
103    }
104}
105
106fn generic_quantile<T>(
107    ca: ChunkedArray<T>,
108    quantile: f64,
109    method: QuantileMethod,
110) -> PolarsResult<Option<f64>>
111where
112    T: PolarsNumericType,
113{
114    polars_ensure!(
115        (0.0..=1.0).contains(&quantile),
116        ComputeError: "`quantile` should be between 0.0 and 1.0",
117    );
118
119    let null_count = ca.null_count();
120    let length = ca.len();
121
122    if null_count == length {
123        return Ok(None);
124    }
125
126    let (idx, float_idx, top_idx) = quantile_idx(quantile, length, null_count, method);
127    let sorted = ca.sort(false);
128    let lower = sorted.get(idx).map(|v| v.to_f64().unwrap());
129
130    let opt = match method {
131        QuantileMethod::Midpoint => {
132            if top_idx == idx {
133                lower
134            } else {
135                let upper = sorted.get(idx + 1).map(|v| v.to_f64().unwrap());
136                midpoint_interpol(lower.unwrap(), upper.unwrap()).to_f64()
137            }
138        },
139        QuantileMethod::Linear => {
140            if top_idx == idx {
141                lower
142            } else {
143                let upper = sorted.get(idx + 1).map(|v| v.to_f64().unwrap());
144
145                linear_interpol(lower.unwrap(), upper.unwrap(), idx, float_idx).to_f64()
146            }
147        },
148        _ => lower,
149    };
150    Ok(opt)
151}
152
153impl<T> ChunkQuantile<f64> for ChunkedArray<T>
154where
155    T: PolarsIntegerType,
156    T::Native: TotalOrd,
157{
158    fn quantile(&self, quantile: f64, method: QuantileMethod) -> PolarsResult<Option<f64>> {
159        // in case of sorted data, the sort is free, so don't take quickselect route
160        if let (Ok(slice), false) = (self.cont_slice(), self.is_sorted_ascending_flag()) {
161            let mut owned = slice.to_vec();
162            quantile_slice(&mut owned, quantile, method)
163        } else {
164            generic_quantile(self.clone(), quantile, method)
165        }
166    }
167
168    fn median(&self) -> Option<f64> {
169        self.quantile(0.5, QuantileMethod::Linear).unwrap() // unwrap fine since quantile in range
170    }
171}
172
173// Version of quantile/median that don't need a memcpy
174impl<T> ChunkedArray<T>
175where
176    T: PolarsIntegerType,
177    T::Native: TotalOrd,
178{
179    pub(crate) fn quantile_faster(
180        mut self,
181        quantile: f64,
182        method: QuantileMethod,
183    ) -> PolarsResult<Option<f64>> {
184        // in case of sorted data, the sort is free, so don't take quickselect route
185        let is_sorted = self.is_sorted_ascending_flag();
186        if let (Some(slice), false) = (self.cont_slice_mut(), is_sorted) {
187            quantile_slice(slice, quantile, method)
188        } else {
189            self.quantile(quantile, method)
190        }
191    }
192
193    pub(crate) fn median_faster(self) -> Option<f64> {
194        self.quantile_faster(0.5, QuantileMethod::Linear).unwrap()
195    }
196}
197
198macro_rules! impl_chunk_quantile_for_float_chunked {
199    ($T:ty, $CA:ty) => {
200        impl ChunkQuantile<$T> for $CA {
201            fn quantile(&self, quantile: f64, method: QuantileMethod) -> PolarsResult<Option<$T>> {
202                // in case of sorted data, the sort is free, so don't take quickselect route
203                let out = if let (Ok(slice), false) =
204                    (self.cont_slice(), self.is_sorted_ascending_flag())
205                {
206                    let mut owned = slice.to_vec();
207                    quantile_slice(&mut owned, quantile, method)
208                } else {
209                    generic_quantile(self.clone(), quantile, method)
210                };
211                out.map(|v| v.map(|v| v.as_()))
212            }
213
214            fn median(&self) -> Option<$T> {
215                self.quantile(0.5, QuantileMethod::Linear).unwrap() // unwrap fine since quantile in range
216            }
217        }
218    };
219}
220
221#[cfg(feature = "dtype-f16")]
222impl_chunk_quantile_for_float_chunked!(pf16, Float16Chunked);
223impl_chunk_quantile_for_float_chunked!(f32, Float32Chunked);
224impl_chunk_quantile_for_float_chunked!(f64, Float64Chunked);
225
226macro_rules! impl_float_chunked {
227    ($T:ty, $CA:ty) => {
228        impl $CA {
229            pub(crate) fn quantile_faster(
230                mut self,
231                quantile: f64,
232                method: QuantileMethod,
233            ) -> PolarsResult<Option<$T>> {
234                // in case of sorted data, the sort is free, so don't take quickselect route
235                let is_sorted = self.is_sorted_ascending_flag();
236                if let (Some(slice), false) = (self.cont_slice_mut(), is_sorted) {
237                    Ok(quantile_slice(slice, quantile, method)?.map(AsPrimitive::as_))
238                } else {
239                    Ok(self.quantile(quantile, method)?.map(AsPrimitive::as_))
240                }
241            }
242
243            pub(crate) fn median_faster(self) -> Option<$T> {
244                self.quantile_faster(0.5.into(), QuantileMethod::Linear)
245                    .unwrap()
246            }
247        }
248    };
249}
250
251#[cfg(feature = "dtype-f16")]
252impl_float_chunked!(pf16, Float16Chunked);
253impl_float_chunked!(f32, Float32Chunked);
254impl_float_chunked!(f64, Float64Chunked);
255
256impl ChunkQuantile<String> for StringChunked {}
257impl ChunkQuantile<Series> for ListChunked {}
258#[cfg(feature = "dtype-array")]
259impl ChunkQuantile<Series> for ArrayChunked {}
260#[cfg(feature = "object")]
261impl<T: PolarsObject> ChunkQuantile<Series> for ObjectChunked<T> {}
262impl ChunkQuantile<bool> for BooleanChunked {}