polars_core/series/
comparison.rs

1//! Comparison operations on Series.
2
3use polars_error::feature_gated;
4
5use crate::prelude::*;
6use crate::series::arithmetic::coerce_lhs_rhs;
7use crate::series::nulls::replace_non_null;
8
9macro_rules! impl_eq_compare {
10    ($self:expr, $rhs:expr, $method:ident) => {{
11        use DataType::*;
12        let (lhs, rhs) = ($self, $rhs);
13        validate_types(lhs.dtype(), rhs.dtype())?;
14
15        polars_ensure!(
16            lhs.len() == rhs.len() ||
17
18            // Broadcast
19            lhs.len() == 1 ||
20            rhs.len() == 1,
21            ShapeMismatch: "could not compare between two series of different length ({} != {})",
22            lhs.len(),
23            rhs.len()
24        );
25
26        #[cfg(feature = "dtype-categorical")]
27        match (lhs.dtype(), rhs.dtype()) {
28            (Categorical(lcats, _), Categorical(rcats, _)) => {
29                ensure_same_categories(lcats, rcats)?;
30                return with_match_categorical_physical_type!(lcats.physical(), |$C| {
31                    lhs.cat::<$C>().unwrap().$method(rhs.cat::<$C>().unwrap())
32                })
33            },
34            (Enum(lfcats, _), Enum(rfcats, _)) => {
35                ensure_same_frozen_categories(lfcats, rfcats)?;
36                return with_match_categorical_physical_type!(lfcats.physical(), |$C| {
37                    lhs.cat::<$C>().unwrap().$method(rhs.cat::<$C>().unwrap())
38                })
39            },
40            (Categorical(_, _) | Enum(_, _), String) => {
41                return with_match_categorical_physical_type!(lhs.dtype().cat_physical().unwrap(), |$C| {
42                    Ok(lhs.cat::<$C>().unwrap().$method(rhs.str().unwrap()))
43                })
44            },
45            (String, Categorical(_, _) | Enum(_, _)) => {
46                return with_match_categorical_physical_type!(rhs.dtype().cat_physical().unwrap(), |$C| {
47                    Ok(rhs.cat::<$C>().unwrap().$method(lhs.str().unwrap()))
48                })
49            },
50            _ => (),
51        };
52
53        let (lhs, rhs) = coerce_lhs_rhs(lhs, rhs)
54            .map_err(|_| polars_err!(
55                    SchemaMismatch: "could not evaluate comparison between series '{}' of dtype: {} and series '{}' of dtype: {}",
56                    lhs.name(), lhs.dtype(), rhs.name(), rhs.dtype()
57            ))?;
58        let lhs = lhs.to_physical_repr();
59        let rhs = rhs.to_physical_repr();
60        let mut out = match lhs.dtype() {
61            Null => lhs.null().unwrap().$method(rhs.null().unwrap()),
62            Boolean => lhs.bool().unwrap().$method(rhs.bool().unwrap()),
63            String => lhs.str().unwrap().$method(rhs.str().unwrap()),
64            Binary => lhs.binary().unwrap().$method(rhs.binary().unwrap()),
65            BinaryOffset => lhs.binary_offset().unwrap().$method(rhs.binary_offset().unwrap()),
66            UInt8 => feature_gated!("dtype-u8", lhs.u8().unwrap().$method(rhs.u8().unwrap())),
67            UInt16 => feature_gated!("dtype-u16", lhs.u16().unwrap().$method(rhs.u16().unwrap())),
68            UInt32 => lhs.u32().unwrap().$method(rhs.u32().unwrap()),
69            UInt64 => lhs.u64().unwrap().$method(rhs.u64().unwrap()),
70            UInt128 => feature_gated!("dtype-u128", lhs.u128().unwrap().$method(rhs.u128().unwrap())),
71            Int8 => feature_gated!("dtype-i8", lhs.i8().unwrap().$method(rhs.i8().unwrap())),
72            Int16 => feature_gated!("dtype-i16", lhs.i16().unwrap().$method(rhs.i16().unwrap())),
73            Int32 => lhs.i32().unwrap().$method(rhs.i32().unwrap()),
74            Int64 => lhs.i64().unwrap().$method(rhs.i64().unwrap()),
75            Int128 => feature_gated!("dtype-i128", lhs.i128().unwrap().$method(rhs.i128().unwrap())),
76            Float32 => lhs.f32().unwrap().$method(rhs.f32().unwrap()),
77            Float64 => lhs.f64().unwrap().$method(rhs.f64().unwrap()),
78            List(_) => lhs.list().unwrap().$method(rhs.list().unwrap()),
79            #[cfg(feature = "dtype-array")]
80            Array(_, _) => lhs.array().unwrap().$method(rhs.array().unwrap()),
81            #[cfg(feature = "dtype-struct")]
82            Struct(_) => lhs.struct_().unwrap().$method(rhs.struct_().unwrap()),
83
84            dt => polars_bail!(InvalidOperation: "could not apply comparison on series of dtype '{}; operand names: '{}', '{}'", dt, lhs.name(), rhs.name()),
85        };
86        out.rename(lhs.name().clone());
87        PolarsResult::Ok(out)
88    }};
89}
90
91macro_rules! bail_invalid_ineq {
92    ($lhs:expr, $rhs:expr, $op:literal) => {
93        polars_bail!(
94            InvalidOperation: "cannot perform '{}' comparison between series '{}' of dtype: {} and series '{}' of dtype: {}",
95            $op,
96            $lhs.name(), $lhs.dtype(),
97            $rhs.name(), $rhs.dtype(),
98        )
99    };
100}
101
102macro_rules! impl_ineq_compare {
103    ($self:expr, $rhs:expr, $method:ident, $op:literal, $rev_method:ident) => {{
104        use DataType::*;
105        let (lhs, rhs) = ($self, $rhs);
106        validate_types(lhs.dtype(), rhs.dtype())?;
107
108        polars_ensure!(
109            lhs.len() == rhs.len() ||
110
111            // Broadcast
112            lhs.len() == 1 ||
113            rhs.len() == 1,
114            ShapeMismatch:
115                "could not perform '{}' comparison between series '{}' of length: {} and series '{}' of length: {}, because they have different lengths",
116            $op,
117            lhs.name(), lhs.len(),
118            rhs.name(), rhs.len()
119        );
120
121        #[cfg(feature = "dtype-categorical")]
122        match (lhs.dtype(), rhs.dtype()) {
123            (Categorical(lcats, _), Categorical(rcats, _)) => {
124                ensure_same_categories(lcats, rcats)?;
125                return with_match_categorical_physical_type!(lcats.physical(), |$C| {
126                    lhs.cat::<$C>().unwrap().$method(rhs.cat::<$C>().unwrap())
127                })
128            },
129            (Enum(lfcats, _), Enum(rfcats, _)) => {
130                ensure_same_frozen_categories(lfcats, rfcats)?;
131                return with_match_categorical_physical_type!(lfcats.physical(), |$C| {
132                    lhs.cat::<$C>().unwrap().$method(rhs.cat::<$C>().unwrap())
133                })
134            },
135            (Categorical(_, _) | Enum(_, _), String) => {
136                return with_match_categorical_physical_type!(lhs.dtype().cat_physical().unwrap(), |$C| {
137                    lhs.cat::<$C>().unwrap().$method(rhs.str().unwrap())
138                })
139            },
140            (String, Categorical(_, _) | Enum(_, _)) => {
141                return with_match_categorical_physical_type!(rhs.dtype().cat_physical().unwrap(), |$C| {
142                    // We use the reverse method as string <-> enum comparisons are only implemented one-way.
143                    rhs.cat::<$C>().unwrap().$rev_method(lhs.str().unwrap())
144                })
145            },
146            _ => (),
147        };
148
149        let (lhs, rhs) = coerce_lhs_rhs(lhs, rhs).map_err(|_|
150            polars_err!(
151                SchemaMismatch: "could not evaluate '{}' comparison between series '{}' of dtype: {} and series '{}' of dtype: {}",
152                $op,
153                lhs.name(), lhs.dtype(),
154                rhs.name(), rhs.dtype()
155            )
156        )?;
157        let lhs = lhs.to_physical_repr();
158        let rhs = rhs.to_physical_repr();
159        let mut out = match lhs.dtype() {
160            Null => lhs.null().unwrap().$method(rhs.null().unwrap()),
161            Boolean => lhs.bool().unwrap().$method(rhs.bool().unwrap()),
162            String => lhs.str().unwrap().$method(rhs.str().unwrap()),
163            Binary => lhs.binary().unwrap().$method(rhs.binary().unwrap()),
164            BinaryOffset => lhs.binary_offset().unwrap().$method(rhs.binary_offset().unwrap()),
165            UInt8 => feature_gated!("dtype-u8", lhs.u8().unwrap().$method(rhs.u8().unwrap())),
166            UInt16 => feature_gated!("dtype-u16", lhs.u16().unwrap().$method(rhs.u16().unwrap())),
167            UInt32 => lhs.u32().unwrap().$method(rhs.u32().unwrap()),
168            UInt64 => lhs.u64().unwrap().$method(rhs.u64().unwrap()),
169            UInt128 => feature_gated!("dtype-u128", lhs.u128().unwrap().$method(rhs.u128().unwrap())),
170            Int8 => feature_gated!("dtype-i8", lhs.i8().unwrap().$method(rhs.i8().unwrap())),
171            Int16 => feature_gated!("dtype-i16", lhs.i16().unwrap().$method(rhs.i16().unwrap())),
172            Int32 => lhs.i32().unwrap().$method(rhs.i32().unwrap()),
173            Int64 => lhs.i64().unwrap().$method(rhs.i64().unwrap()),
174            Int128 => feature_gated!("dtype-i128", lhs.i128().unwrap().$method(rhs.i128().unwrap())),
175            Float32 => lhs.f32().unwrap().$method(rhs.f32().unwrap()),
176            Float64 => lhs.f64().unwrap().$method(rhs.f64().unwrap()),
177            List(_) => bail_invalid_ineq!(lhs, rhs, $op),
178            #[cfg(feature = "dtype-array")]
179            Array(_, _) => bail_invalid_ineq!(lhs, rhs, $op),
180            #[cfg(feature = "dtype-struct")]
181            Struct(_) => bail_invalid_ineq!(lhs, rhs, $op),
182
183            dt => polars_bail!(InvalidOperation: "could not apply comparison on series of dtype '{}; operand names: '{}', '{}'", dt, lhs.name(), rhs.name()),
184        };
185        out.rename(lhs.name().clone());
186        PolarsResult::Ok(out)
187    }};
188}
189
190fn validate_types(left: &DataType, right: &DataType) -> PolarsResult<()> {
191    use DataType::*;
192
193    match (left, right) {
194        (String, dt) | (dt, String) if dt.is_primitive_numeric() => {
195            polars_bail!(ComputeError: "cannot compare string with numeric type ({})", dt)
196        },
197        #[cfg(feature = "dtype-categorical")]
198        (Categorical(_, _) | Enum(_, _), dt) | (dt, Categorical(_, _) | Enum(_, _))
199            if !(dt.is_categorical() | dt.is_string() | dt.is_enum()) =>
200        {
201            polars_bail!(ComputeError: "cannot compare categorical with {}", dt);
202        },
203        _ => (),
204    };
205    Ok(())
206}
207
208impl ChunkCompareEq<&Series> for Series {
209    type Item = PolarsResult<BooleanChunked>;
210
211    /// Create a boolean mask by checking for equality.
212    fn equal(&self, rhs: &Series) -> Self::Item {
213        impl_eq_compare!(self, rhs, equal)
214    }
215
216    /// Create a boolean mask by checking for equality.
217    fn equal_missing(&self, rhs: &Series) -> Self::Item {
218        impl_eq_compare!(self, rhs, equal_missing)
219    }
220
221    /// Create a boolean mask by checking for inequality.
222    fn not_equal(&self, rhs: &Series) -> Self::Item {
223        impl_eq_compare!(self, rhs, not_equal)
224    }
225
226    /// Create a boolean mask by checking for inequality.
227    fn not_equal_missing(&self, rhs: &Series) -> Self::Item {
228        impl_eq_compare!(self, rhs, not_equal_missing)
229    }
230}
231
232impl ChunkCompareIneq<&Series> for Series {
233    type Item = PolarsResult<BooleanChunked>;
234
235    /// Create a boolean mask by checking if self > rhs.
236    fn gt(&self, rhs: &Series) -> Self::Item {
237        impl_ineq_compare!(self, rhs, gt, ">", lt)
238    }
239
240    /// Create a boolean mask by checking if self >= rhs.
241    fn gt_eq(&self, rhs: &Series) -> Self::Item {
242        impl_ineq_compare!(self, rhs, gt_eq, ">=", lt_eq)
243    }
244
245    /// Create a boolean mask by checking if self < rhs.
246    fn lt(&self, rhs: &Series) -> Self::Item {
247        impl_ineq_compare!(self, rhs, lt, "<", gt)
248    }
249
250    /// Create a boolean mask by checking if self <= rhs.
251    fn lt_eq(&self, rhs: &Series) -> Self::Item {
252        impl_ineq_compare!(self, rhs, lt_eq, "<=", gt_eq)
253    }
254}
255
256impl<Rhs> ChunkCompareEq<Rhs> for Series
257where
258    Rhs: NumericNative,
259{
260    type Item = PolarsResult<BooleanChunked>;
261
262    fn equal(&self, rhs: Rhs) -> Self::Item {
263        validate_types(self.dtype(), &DataType::Int8)?;
264        let s = self.to_physical_repr();
265        Ok(apply_method_physical_numeric!(&s, equal, rhs))
266    }
267
268    fn equal_missing(&self, rhs: Rhs) -> Self::Item {
269        validate_types(self.dtype(), &DataType::Int8)?;
270        let s = self.to_physical_repr();
271        Ok(apply_method_physical_numeric!(&s, equal_missing, rhs))
272    }
273
274    fn not_equal(&self, rhs: Rhs) -> Self::Item {
275        validate_types(self.dtype(), &DataType::Int8)?;
276        let s = self.to_physical_repr();
277        Ok(apply_method_physical_numeric!(&s, not_equal, rhs))
278    }
279
280    fn not_equal_missing(&self, rhs: Rhs) -> Self::Item {
281        validate_types(self.dtype(), &DataType::Int8)?;
282        let s = self.to_physical_repr();
283        Ok(apply_method_physical_numeric!(&s, not_equal_missing, rhs))
284    }
285}
286
287impl<Rhs> ChunkCompareIneq<Rhs> for Series
288where
289    Rhs: NumericNative,
290{
291    type Item = PolarsResult<BooleanChunked>;
292
293    fn gt(&self, rhs: Rhs) -> Self::Item {
294        validate_types(self.dtype(), &DataType::Int8)?;
295        let s = self.to_physical_repr();
296        Ok(apply_method_physical_numeric!(&s, gt, rhs))
297    }
298
299    fn gt_eq(&self, rhs: Rhs) -> Self::Item {
300        validate_types(self.dtype(), &DataType::Int8)?;
301        let s = self.to_physical_repr();
302        Ok(apply_method_physical_numeric!(&s, gt_eq, rhs))
303    }
304
305    fn lt(&self, rhs: Rhs) -> Self::Item {
306        validate_types(self.dtype(), &DataType::Int8)?;
307        let s = self.to_physical_repr();
308        Ok(apply_method_physical_numeric!(&s, lt, rhs))
309    }
310
311    fn lt_eq(&self, rhs: Rhs) -> Self::Item {
312        validate_types(self.dtype(), &DataType::Int8)?;
313        let s = self.to_physical_repr();
314        Ok(apply_method_physical_numeric!(&s, lt_eq, rhs))
315    }
316}
317
318impl ChunkCompareEq<&str> for Series {
319    type Item = PolarsResult<BooleanChunked>;
320
321    fn equal(&self, rhs: &str) -> PolarsResult<BooleanChunked> {
322        validate_types(self.dtype(), &DataType::String)?;
323        match self.dtype() {
324            DataType::String => Ok(self.str().unwrap().equal(rhs)),
325            #[cfg(feature = "dtype-categorical")]
326            DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
327                with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
328                    self.cat::<$C>().unwrap().equal(rhs)
329                }),
330            ),
331            _ => Ok(BooleanChunked::full(self.name().clone(), false, self.len())),
332        }
333    }
334
335    fn equal_missing(&self, rhs: &str) -> Self::Item {
336        validate_types(self.dtype(), &DataType::String)?;
337        match self.dtype() {
338            DataType::String => Ok(self.str().unwrap().equal_missing(rhs)),
339            #[cfg(feature = "dtype-categorical")]
340            DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
341                with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
342                    self.cat::<$C>().unwrap().equal_missing(rhs)
343                }),
344            ),
345            _ => Ok(replace_non_null(
346                self.name().clone(),
347                self.0.chunks(),
348                false,
349            )),
350        }
351    }
352
353    fn not_equal(&self, rhs: &str) -> PolarsResult<BooleanChunked> {
354        validate_types(self.dtype(), &DataType::String)?;
355        match self.dtype() {
356            DataType::String => Ok(self.str().unwrap().not_equal(rhs)),
357            #[cfg(feature = "dtype-categorical")]
358            DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
359                with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
360                    self.cat::<$C>().unwrap().not_equal(rhs)
361                }),
362            ),
363            _ => Ok(BooleanChunked::full(self.name().clone(), true, self.len())),
364        }
365    }
366
367    fn not_equal_missing(&self, rhs: &str) -> Self::Item {
368        validate_types(self.dtype(), &DataType::String)?;
369        match self.dtype() {
370            DataType::String => Ok(self.str().unwrap().not_equal_missing(rhs)),
371            #[cfg(feature = "dtype-categorical")]
372            DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
373                with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
374                    self.cat::<$C>().unwrap().not_equal_missing(rhs)
375                }),
376            ),
377            _ => Ok(replace_non_null(self.name().clone(), self.0.chunks(), true)),
378        }
379    }
380}
381
382impl ChunkCompareIneq<&str> for Series {
383    type Item = PolarsResult<BooleanChunked>;
384
385    fn gt(&self, rhs: &str) -> Self::Item {
386        validate_types(self.dtype(), &DataType::String)?;
387        match self.dtype() {
388            DataType::String => Ok(self.str().unwrap().gt(rhs)),
389            #[cfg(feature = "dtype-categorical")]
390            DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
391                with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
392                    self.cat::<$C>().unwrap().gt(rhs)
393                }),
394            ),
395            _ => polars_bail!(
396                ComputeError: "cannot compare str value to series of type {}", self.dtype(),
397            ),
398        }
399    }
400
401    fn gt_eq(&self, rhs: &str) -> Self::Item {
402        validate_types(self.dtype(), &DataType::String)?;
403        match self.dtype() {
404            DataType::String => Ok(self.str().unwrap().gt_eq(rhs)),
405            #[cfg(feature = "dtype-categorical")]
406            DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
407                with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
408                    self.cat::<$C>().unwrap().gt_eq(rhs)
409                }),
410            ),
411            _ => polars_bail!(
412                ComputeError: "cannot compare str value to series of type {}", self.dtype(),
413            ),
414        }
415    }
416
417    fn lt(&self, rhs: &str) -> Self::Item {
418        validate_types(self.dtype(), &DataType::String)?;
419        match self.dtype() {
420            DataType::String => Ok(self.str().unwrap().lt(rhs)),
421            #[cfg(feature = "dtype-categorical")]
422            DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
423                with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
424                    self.cat::<$C>().unwrap().lt(rhs)
425                }),
426            ),
427            _ => polars_bail!(
428                ComputeError: "cannot compare str value to series of type {}", self.dtype(),
429            ),
430        }
431    }
432
433    fn lt_eq(&self, rhs: &str) -> Self::Item {
434        validate_types(self.dtype(), &DataType::String)?;
435        match self.dtype() {
436            DataType::String => Ok(self.str().unwrap().lt_eq(rhs)),
437            #[cfg(feature = "dtype-categorical")]
438            DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
439                with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
440                    self.cat::<$C>().unwrap().lt_eq(rhs)
441                }),
442            ),
443            _ => polars_bail!(
444                ComputeError: "cannot compare str value to series of type {}", self.dtype(),
445            ),
446        }
447    }
448}