polars_core/series/
comparison.rs

1//! Comparison operations on Series.
2
3use polars_error::feature_gated;
4
5use crate::prelude::*;
6use crate::series::arithmetic::coerce_lhs_rhs;
7use crate::series::nulls::replace_non_null;
8
9macro_rules! impl_eq_compare {
10    ($self:expr, $rhs:expr, $method:ident) => {{
11        use DataType::*;
12        let (lhs, rhs) = ($self, $rhs);
13        validate_types(lhs.dtype(), rhs.dtype())?;
14
15        polars_ensure!(
16            lhs.len() == rhs.len() ||
17
18            // Broadcast
19            lhs.len() == 1 ||
20            rhs.len() == 1,
21            ShapeMismatch: "could not compare between two series of different length ({} != {})",
22            lhs.len(),
23            rhs.len()
24        );
25
26        #[cfg(feature = "dtype-categorical")]
27        match (lhs.dtype(), rhs.dtype()) {
28            (Categorical(lcats, _), Categorical(rcats, _)) => {
29                ensure_same_categories(lcats, rcats)?;
30                return with_match_categorical_physical_type!(lcats.physical(), |$C| {
31                    lhs.cat::<$C>().unwrap().$method(rhs.cat::<$C>().unwrap())
32                })
33            },
34            (Enum(lfcats, _), Enum(rfcats, _)) => {
35                ensure_same_frozen_categories(lfcats, rfcats)?;
36                return with_match_categorical_physical_type!(lfcats.physical(), |$C| {
37                    lhs.cat::<$C>().unwrap().$method(rhs.cat::<$C>().unwrap())
38                })
39            },
40            (Categorical(_, _) | Enum(_, _), String) => {
41                return with_match_categorical_physical_type!(lhs.dtype().cat_physical().unwrap(), |$C| {
42                    Ok(lhs.cat::<$C>().unwrap().$method(rhs.str().unwrap()))
43                })
44            },
45            (String, Categorical(_, _) | Enum(_, _)) => {
46                return with_match_categorical_physical_type!(rhs.dtype().cat_physical().unwrap(), |$C| {
47                    Ok(rhs.cat::<$C>().unwrap().$method(lhs.str().unwrap()))
48                })
49            },
50            _ => (),
51        };
52
53        let (lhs, rhs) = coerce_lhs_rhs(lhs, rhs)
54            .map_err(|_| polars_err!(
55                    SchemaMismatch: "could not evaluate comparison between series '{}' of dtype: {} and series '{}' of dtype: {}",
56                    lhs.name(), lhs.dtype(), rhs.name(), rhs.dtype()
57            ))?;
58        let lhs = lhs.to_physical_repr();
59        let rhs = rhs.to_physical_repr();
60        let mut out = match lhs.dtype() {
61            Null => lhs.null().unwrap().$method(rhs.null().unwrap()),
62            Boolean => lhs.bool().unwrap().$method(rhs.bool().unwrap()),
63            String => lhs.str().unwrap().$method(rhs.str().unwrap()),
64            Binary => lhs.binary().unwrap().$method(rhs.binary().unwrap()),
65            BinaryOffset => lhs.binary_offset().unwrap().$method(rhs.binary_offset().unwrap()),
66            UInt8 => feature_gated!("dtype-u8", lhs.u8().unwrap().$method(rhs.u8().unwrap())),
67            UInt16 => feature_gated!("dtype-u16", lhs.u16().unwrap().$method(rhs.u16().unwrap())),
68            UInt32 => lhs.u32().unwrap().$method(rhs.u32().unwrap()),
69            UInt64 => lhs.u64().unwrap().$method(rhs.u64().unwrap()),
70            Int8 => feature_gated!("dtype-i8", lhs.i8().unwrap().$method(rhs.i8().unwrap())),
71            Int16 => feature_gated!("dtype-i16", lhs.i16().unwrap().$method(rhs.i16().unwrap())),
72            Int32 => lhs.i32().unwrap().$method(rhs.i32().unwrap()),
73            Int64 => lhs.i64().unwrap().$method(rhs.i64().unwrap()),
74            Int128 => feature_gated!("dtype-i128", lhs.i128().unwrap().$method(rhs.i128().unwrap())),
75            Float32 => lhs.f32().unwrap().$method(rhs.f32().unwrap()),
76            Float64 => lhs.f64().unwrap().$method(rhs.f64().unwrap()),
77            List(_) => lhs.list().unwrap().$method(rhs.list().unwrap()),
78            #[cfg(feature = "dtype-array")]
79            Array(_, _) => lhs.array().unwrap().$method(rhs.array().unwrap()),
80            #[cfg(feature = "dtype-struct")]
81            Struct(_) => lhs.struct_().unwrap().$method(rhs.struct_().unwrap()),
82
83            dt => polars_bail!(InvalidOperation: "could not apply comparison on series of dtype '{}; operand names: '{}', '{}'", dt, lhs.name(), rhs.name()),
84        };
85        out.rename(lhs.name().clone());
86        PolarsResult::Ok(out)
87    }};
88}
89
90macro_rules! bail_invalid_ineq {
91    ($lhs:expr, $rhs:expr, $op:literal) => {
92        polars_bail!(
93            InvalidOperation: "cannot perform '{}' comparison between series '{}' of dtype: {} and series '{}' of dtype: {}",
94            $op,
95            $lhs.name(), $lhs.dtype(),
96            $rhs.name(), $rhs.dtype(),
97        )
98    };
99}
100
101macro_rules! impl_ineq_compare {
102    ($self:expr, $rhs:expr, $method:ident, $op:literal, $rev_method:ident) => {{
103        use DataType::*;
104        let (lhs, rhs) = ($self, $rhs);
105        validate_types(lhs.dtype(), rhs.dtype())?;
106
107        polars_ensure!(
108            lhs.len() == rhs.len() ||
109
110            // Broadcast
111            lhs.len() == 1 ||
112            rhs.len() == 1,
113            ShapeMismatch:
114                "could not perform '{}' comparison between series '{}' of length: {} and series '{}' of length: {}, because they have different lengths",
115            $op,
116            lhs.name(), lhs.len(),
117            rhs.name(), rhs.len()
118        );
119
120        #[cfg(feature = "dtype-categorical")]
121        match (lhs.dtype(), rhs.dtype()) {
122            (Categorical(lcats, _), Categorical(rcats, _)) => {
123                ensure_same_categories(lcats, rcats)?;
124                return with_match_categorical_physical_type!(lcats.physical(), |$C| {
125                    lhs.cat::<$C>().unwrap().$method(rhs.cat::<$C>().unwrap())
126                })
127            },
128            (Enum(lfcats, _), Enum(rfcats, _)) => {
129                ensure_same_frozen_categories(lfcats, rfcats)?;
130                return with_match_categorical_physical_type!(lfcats.physical(), |$C| {
131                    lhs.cat::<$C>().unwrap().$method(rhs.cat::<$C>().unwrap())
132                })
133            },
134            (Categorical(_, _) | Enum(_, _), String) => {
135                return with_match_categorical_physical_type!(lhs.dtype().cat_physical().unwrap(), |$C| {
136                    lhs.cat::<$C>().unwrap().$method(rhs.str().unwrap())
137                })
138            },
139            (String, Categorical(_, _) | Enum(_, _)) => {
140                return with_match_categorical_physical_type!(rhs.dtype().cat_physical().unwrap(), |$C| {
141                    // We use the reverse method as string <-> enum comparisons are only implemented one-way.
142                    rhs.cat::<$C>().unwrap().$rev_method(lhs.str().unwrap())
143                })
144            },
145            _ => (),
146        };
147
148        let (lhs, rhs) = coerce_lhs_rhs(lhs, rhs).map_err(|_|
149            polars_err!(
150                SchemaMismatch: "could not evaluate '{}' comparison between series '{}' of dtype: {} and series '{}' of dtype: {}",
151                $op,
152                lhs.name(), lhs.dtype(),
153                rhs.name(), rhs.dtype()
154            )
155        )?;
156        let lhs = lhs.to_physical_repr();
157        let rhs = rhs.to_physical_repr();
158        let mut out = match lhs.dtype() {
159            Null => lhs.null().unwrap().$method(rhs.null().unwrap()),
160            Boolean => lhs.bool().unwrap().$method(rhs.bool().unwrap()),
161            String => lhs.str().unwrap().$method(rhs.str().unwrap()),
162            Binary => lhs.binary().unwrap().$method(rhs.binary().unwrap()),
163            BinaryOffset => lhs.binary_offset().unwrap().$method(rhs.binary_offset().unwrap()),
164            UInt8 => feature_gated!("dtype-u8", lhs.u8().unwrap().$method(rhs.u8().unwrap())),
165            UInt16 => feature_gated!("dtype-u16", lhs.u16().unwrap().$method(rhs.u16().unwrap())),
166            UInt32 => lhs.u32().unwrap().$method(rhs.u32().unwrap()),
167            UInt64 => lhs.u64().unwrap().$method(rhs.u64().unwrap()),
168            Int8 => feature_gated!("dtype-i8", lhs.i8().unwrap().$method(rhs.i8().unwrap())),
169            Int16 => feature_gated!("dtype-i16", lhs.i16().unwrap().$method(rhs.i16().unwrap())),
170            Int32 => lhs.i32().unwrap().$method(rhs.i32().unwrap()),
171            Int64 => lhs.i64().unwrap().$method(rhs.i64().unwrap()),
172            Int128 => feature_gated!("dtype-i128", lhs.i128().unwrap().$method(rhs.i128().unwrap())),
173            Float32 => lhs.f32().unwrap().$method(rhs.f32().unwrap()),
174            Float64 => lhs.f64().unwrap().$method(rhs.f64().unwrap()),
175            List(_) => bail_invalid_ineq!(lhs, rhs, $op),
176            #[cfg(feature = "dtype-array")]
177            Array(_, _) => bail_invalid_ineq!(lhs, rhs, $op),
178            #[cfg(feature = "dtype-struct")]
179            Struct(_) => bail_invalid_ineq!(lhs, rhs, $op),
180
181            dt => polars_bail!(InvalidOperation: "could not apply comparison on series of dtype '{}; operand names: '{}', '{}'", dt, lhs.name(), rhs.name()),
182        };
183        out.rename(lhs.name().clone());
184        PolarsResult::Ok(out)
185    }};
186}
187
188fn validate_types(left: &DataType, right: &DataType) -> PolarsResult<()> {
189    use DataType::*;
190
191    match (left, right) {
192        (String, dt) | (dt, String) if dt.is_primitive_numeric() => {
193            polars_bail!(ComputeError: "cannot compare string with numeric type ({})", dt)
194        },
195        #[cfg(feature = "dtype-categorical")]
196        (Categorical(_, _) | Enum(_, _), dt) | (dt, Categorical(_, _) | Enum(_, _))
197            if !(dt.is_categorical() | dt.is_string() | dt.is_enum()) =>
198        {
199            polars_bail!(ComputeError: "cannot compare categorical with {}", dt);
200        },
201        _ => (),
202    };
203    Ok(())
204}
205
206impl ChunkCompareEq<&Series> for Series {
207    type Item = PolarsResult<BooleanChunked>;
208
209    /// Create a boolean mask by checking for equality.
210    fn equal(&self, rhs: &Series) -> Self::Item {
211        impl_eq_compare!(self, rhs, equal)
212    }
213
214    /// Create a boolean mask by checking for equality.
215    fn equal_missing(&self, rhs: &Series) -> Self::Item {
216        impl_eq_compare!(self, rhs, equal_missing)
217    }
218
219    /// Create a boolean mask by checking for inequality.
220    fn not_equal(&self, rhs: &Series) -> Self::Item {
221        impl_eq_compare!(self, rhs, not_equal)
222    }
223
224    /// Create a boolean mask by checking for inequality.
225    fn not_equal_missing(&self, rhs: &Series) -> Self::Item {
226        impl_eq_compare!(self, rhs, not_equal_missing)
227    }
228}
229
230impl ChunkCompareIneq<&Series> for Series {
231    type Item = PolarsResult<BooleanChunked>;
232
233    /// Create a boolean mask by checking if self > rhs.
234    fn gt(&self, rhs: &Series) -> Self::Item {
235        impl_ineq_compare!(self, rhs, gt, ">", lt)
236    }
237
238    /// Create a boolean mask by checking if self >= rhs.
239    fn gt_eq(&self, rhs: &Series) -> Self::Item {
240        impl_ineq_compare!(self, rhs, gt_eq, ">=", lt_eq)
241    }
242
243    /// Create a boolean mask by checking if self < rhs.
244    fn lt(&self, rhs: &Series) -> Self::Item {
245        impl_ineq_compare!(self, rhs, lt, "<", gt)
246    }
247
248    /// Create a boolean mask by checking if self <= rhs.
249    fn lt_eq(&self, rhs: &Series) -> Self::Item {
250        impl_ineq_compare!(self, rhs, lt_eq, "<=", gt_eq)
251    }
252}
253
254impl<Rhs> ChunkCompareEq<Rhs> for Series
255where
256    Rhs: NumericNative,
257{
258    type Item = PolarsResult<BooleanChunked>;
259
260    fn equal(&self, rhs: Rhs) -> Self::Item {
261        validate_types(self.dtype(), &DataType::Int8)?;
262        let s = self.to_physical_repr();
263        Ok(apply_method_physical_numeric!(&s, equal, rhs))
264    }
265
266    fn equal_missing(&self, rhs: Rhs) -> Self::Item {
267        validate_types(self.dtype(), &DataType::Int8)?;
268        let s = self.to_physical_repr();
269        Ok(apply_method_physical_numeric!(&s, equal_missing, rhs))
270    }
271
272    fn not_equal(&self, rhs: Rhs) -> Self::Item {
273        validate_types(self.dtype(), &DataType::Int8)?;
274        let s = self.to_physical_repr();
275        Ok(apply_method_physical_numeric!(&s, not_equal, rhs))
276    }
277
278    fn not_equal_missing(&self, rhs: Rhs) -> Self::Item {
279        validate_types(self.dtype(), &DataType::Int8)?;
280        let s = self.to_physical_repr();
281        Ok(apply_method_physical_numeric!(&s, not_equal_missing, rhs))
282    }
283}
284
285impl<Rhs> ChunkCompareIneq<Rhs> for Series
286where
287    Rhs: NumericNative,
288{
289    type Item = PolarsResult<BooleanChunked>;
290
291    fn gt(&self, rhs: Rhs) -> Self::Item {
292        validate_types(self.dtype(), &DataType::Int8)?;
293        let s = self.to_physical_repr();
294        Ok(apply_method_physical_numeric!(&s, gt, rhs))
295    }
296
297    fn gt_eq(&self, rhs: Rhs) -> Self::Item {
298        validate_types(self.dtype(), &DataType::Int8)?;
299        let s = self.to_physical_repr();
300        Ok(apply_method_physical_numeric!(&s, gt_eq, rhs))
301    }
302
303    fn lt(&self, rhs: Rhs) -> Self::Item {
304        validate_types(self.dtype(), &DataType::Int8)?;
305        let s = self.to_physical_repr();
306        Ok(apply_method_physical_numeric!(&s, lt, rhs))
307    }
308
309    fn lt_eq(&self, rhs: Rhs) -> Self::Item {
310        validate_types(self.dtype(), &DataType::Int8)?;
311        let s = self.to_physical_repr();
312        Ok(apply_method_physical_numeric!(&s, lt_eq, rhs))
313    }
314}
315
316impl ChunkCompareEq<&str> for Series {
317    type Item = PolarsResult<BooleanChunked>;
318
319    fn equal(&self, rhs: &str) -> PolarsResult<BooleanChunked> {
320        validate_types(self.dtype(), &DataType::String)?;
321        match self.dtype() {
322            DataType::String => Ok(self.str().unwrap().equal(rhs)),
323            #[cfg(feature = "dtype-categorical")]
324            DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
325                with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
326                    self.cat::<$C>().unwrap().equal(rhs)
327                }),
328            ),
329            _ => Ok(BooleanChunked::full(self.name().clone(), false, self.len())),
330        }
331    }
332
333    fn equal_missing(&self, rhs: &str) -> Self::Item {
334        validate_types(self.dtype(), &DataType::String)?;
335        match self.dtype() {
336            DataType::String => Ok(self.str().unwrap().equal_missing(rhs)),
337            #[cfg(feature = "dtype-categorical")]
338            DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
339                with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
340                    self.cat::<$C>().unwrap().equal_missing(rhs)
341                }),
342            ),
343            _ => Ok(replace_non_null(
344                self.name().clone(),
345                self.0.chunks(),
346                false,
347            )),
348        }
349    }
350
351    fn not_equal(&self, rhs: &str) -> PolarsResult<BooleanChunked> {
352        validate_types(self.dtype(), &DataType::String)?;
353        match self.dtype() {
354            DataType::String => Ok(self.str().unwrap().not_equal(rhs)),
355            #[cfg(feature = "dtype-categorical")]
356            DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
357                with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
358                    self.cat::<$C>().unwrap().not_equal(rhs)
359                }),
360            ),
361            _ => Ok(BooleanChunked::full(self.name().clone(), true, self.len())),
362        }
363    }
364
365    fn not_equal_missing(&self, rhs: &str) -> Self::Item {
366        validate_types(self.dtype(), &DataType::String)?;
367        match self.dtype() {
368            DataType::String => Ok(self.str().unwrap().not_equal_missing(rhs)),
369            #[cfg(feature = "dtype-categorical")]
370            DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
371                with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
372                    self.cat::<$C>().unwrap().not_equal_missing(rhs)
373                }),
374            ),
375            _ => Ok(replace_non_null(self.name().clone(), self.0.chunks(), true)),
376        }
377    }
378}
379
380impl ChunkCompareIneq<&str> for Series {
381    type Item = PolarsResult<BooleanChunked>;
382
383    fn gt(&self, rhs: &str) -> Self::Item {
384        validate_types(self.dtype(), &DataType::String)?;
385        match self.dtype() {
386            DataType::String => Ok(self.str().unwrap().gt(rhs)),
387            #[cfg(feature = "dtype-categorical")]
388            DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
389                with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
390                    self.cat::<$C>().unwrap().gt(rhs)
391                }),
392            ),
393            _ => polars_bail!(
394                ComputeError: "cannot compare str value to series of type {}", self.dtype(),
395            ),
396        }
397    }
398
399    fn gt_eq(&self, rhs: &str) -> Self::Item {
400        validate_types(self.dtype(), &DataType::String)?;
401        match self.dtype() {
402            DataType::String => Ok(self.str().unwrap().gt_eq(rhs)),
403            #[cfg(feature = "dtype-categorical")]
404            DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
405                with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
406                    self.cat::<$C>().unwrap().gt_eq(rhs)
407                }),
408            ),
409            _ => polars_bail!(
410                ComputeError: "cannot compare str value to series of type {}", self.dtype(),
411            ),
412        }
413    }
414
415    fn lt(&self, rhs: &str) -> Self::Item {
416        validate_types(self.dtype(), &DataType::String)?;
417        match self.dtype() {
418            DataType::String => Ok(self.str().unwrap().lt(rhs)),
419            #[cfg(feature = "dtype-categorical")]
420            DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
421                with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
422                    self.cat::<$C>().unwrap().lt(rhs)
423                }),
424            ),
425            _ => polars_bail!(
426                ComputeError: "cannot compare str value to series of type {}", self.dtype(),
427            ),
428        }
429    }
430
431    fn lt_eq(&self, rhs: &str) -> Self::Item {
432        validate_types(self.dtype(), &DataType::String)?;
433        match self.dtype() {
434            DataType::String => Ok(self.str().unwrap().lt_eq(rhs)),
435            #[cfg(feature = "dtype-categorical")]
436            DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
437                with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
438                    self.cat::<$C>().unwrap().lt_eq(rhs)
439                }),
440            ),
441            _ => polars_bail!(
442                ComputeError: "cannot compare str value to series of type {}", self.dtype(),
443            ),
444        }
445    }
446}