polars_core/series/
comparison.rs

1//! Comparison operations on Series.
2
3use polars_error::feature_gated;
4
5use crate::prelude::*;
6use crate::series::arithmetic::coerce_lhs_rhs;
7use crate::series::nulls::replace_non_null;
8
9macro_rules! impl_eq_compare {
10    ($self:expr, $rhs:expr, $method:ident) => {{
11        use DataType::*;
12        let (lhs, rhs) = ($self, $rhs);
13        validate_types(lhs.dtype(), rhs.dtype())?;
14
15        polars_ensure!(
16            lhs.len() == rhs.len() ||
17
18            // Broadcast
19            lhs.len() == 1 ||
20            rhs.len() == 1,
21            ShapeMismatch: "could not compare between two series of different length ({} != {})",
22            lhs.len(),
23            rhs.len()
24        );
25
26        match (lhs.dtype(), rhs.dtype()) {
27            #[cfg(feature = "dtype-categorical")]
28            (Categorical(lcats, _), Categorical(rcats, _)) => {
29                ensure_same_categories(lcats, rcats)?;
30                return with_match_categorical_physical_type!(lcats.physical(), |$C| {
31                    lhs.cat::<$C>().unwrap().$method(rhs.cat::<$C>().unwrap())
32                })
33            },
34            #[cfg(feature = "dtype-categorical")]
35            (Enum(lfcats, _), Enum(rfcats, _)) => {
36                ensure_same_frozen_categories(lfcats, rfcats)?;
37                return with_match_categorical_physical_type!(lfcats.physical(), |$C| {
38                    lhs.cat::<$C>().unwrap().$method(rhs.cat::<$C>().unwrap())
39                })
40            },
41            #[cfg(feature = "dtype-categorical")]
42            (Categorical(_, _) | Enum(_, _), String) => {
43                return with_match_categorical_physical_type!(lhs.dtype().cat_physical().unwrap(), |$C| {
44                    Ok(lhs.cat::<$C>().unwrap().$method(rhs.str().unwrap()))
45                })
46            },
47            #[cfg(feature = "dtype-categorical")]
48            (String, Categorical(_, _) | Enum(_, _)) => {
49                return with_match_categorical_physical_type!(rhs.dtype().cat_physical().unwrap(), |$C| {
50                    Ok(rhs.cat::<$C>().unwrap().$method(lhs.str().unwrap()))
51                })
52            },
53
54            #[cfg(feature = "dtype-extension")]
55            (le @ Extension(_, _), re @ Extension(_, _)) if le == re => {
56                let lhs = lhs.ext().unwrap();
57                let rhs = rhs.ext().unwrap();
58                return lhs.storage().$method(rhs.storage());
59            },
60
61            #[cfg(feature = "dtype-extension")]
62            (Extension(_, storage), rdt) if **storage == *rdt => {
63                let lhs = lhs.ext().unwrap();
64                return lhs.storage().$method(rhs);
65            },
66
67            #[cfg(feature = "dtype-extension")]
68            (ldt, Extension(_, storage)) if *ldt == **storage => {
69                let rhs = rhs.ext().unwrap();
70                return lhs.$method(rhs.storage());
71            },
72            _ => (),
73        };
74
75        let (lhs, rhs) = coerce_lhs_rhs(lhs, rhs)
76            .map_err(|_| polars_err!(
77                    SchemaMismatch: "could not evaluate comparison between series '{}' of dtype: {:?} and series '{}' of dtype: {:?}",
78                    lhs.name(), lhs.dtype(), rhs.name(), rhs.dtype()
79            ))?;
80        let lhs = lhs.to_physical_repr();
81        let rhs = rhs.to_physical_repr();
82        let mut out = match lhs.dtype() {
83            Null => lhs.null().unwrap().$method(rhs.null().unwrap()),
84            Boolean => lhs.bool().unwrap().$method(rhs.bool().unwrap()),
85            String => lhs.str().unwrap().$method(rhs.str().unwrap()),
86            Binary => lhs.binary().unwrap().$method(rhs.binary().unwrap()),
87            BinaryOffset => lhs.binary_offset().unwrap().$method(rhs.binary_offset().unwrap()),
88            UInt8 => feature_gated!("dtype-u8", lhs.u8().unwrap().$method(rhs.u8().unwrap())),
89            UInt16 => feature_gated!("dtype-u16", lhs.u16().unwrap().$method(rhs.u16().unwrap())),
90            UInt32 => lhs.u32().unwrap().$method(rhs.u32().unwrap()),
91            UInt64 => lhs.u64().unwrap().$method(rhs.u64().unwrap()),
92            UInt128 => feature_gated!("dtype-u128", lhs.u128().unwrap().$method(rhs.u128().unwrap())),
93            Int8 => feature_gated!("dtype-i8", lhs.i8().unwrap().$method(rhs.i8().unwrap())),
94            Int16 => feature_gated!("dtype-i16", lhs.i16().unwrap().$method(rhs.i16().unwrap())),
95            Int32 => lhs.i32().unwrap().$method(rhs.i32().unwrap()),
96            Int64 => lhs.i64().unwrap().$method(rhs.i64().unwrap()),
97            Int128 => feature_gated!("dtype-i128", lhs.i128().unwrap().$method(rhs.i128().unwrap())),
98            Float16 => feature_gated!("dtype-f16", lhs.f16().unwrap().$method(rhs.f16().unwrap())),
99            Float32 => lhs.f32().unwrap().$method(rhs.f32().unwrap()),
100            Float64 => lhs.f64().unwrap().$method(rhs.f64().unwrap()),
101            List(_) => lhs.list().unwrap().$method(rhs.list().unwrap()),
102            #[cfg(feature = "dtype-array")]
103            Array(_, _) => lhs.array().unwrap().$method(rhs.array().unwrap()),
104            #[cfg(feature = "dtype-struct")]
105            Struct(_) => lhs.struct_().unwrap().$method(rhs.struct_().unwrap()),
106
107            dt => polars_bail!(InvalidOperation: "could not apply comparison on series of dtype '{}; operand names: '{}', '{}'", dt, lhs.name(), rhs.name()),
108        };
109        out.rename(lhs.name().clone());
110        PolarsResult::Ok(out)
111    }};
112}
113
114macro_rules! bail_invalid_ineq {
115    ($lhs:expr, $rhs:expr, $op:literal) => {
116        polars_bail!(
117            InvalidOperation: "cannot perform '{}' comparison between series '{}' of dtype: {} and series '{}' of dtype: {}",
118            $op,
119            $lhs.name(), $lhs.dtype(),
120            $rhs.name(), $rhs.dtype(),
121        )
122    };
123}
124
125macro_rules! impl_ineq_compare {
126    ($self:expr, $rhs:expr, $method:ident, $op:literal, $rev_method:ident) => {{
127        use DataType::*;
128        let (lhs, rhs) = ($self, $rhs);
129        validate_types(lhs.dtype(), rhs.dtype())?;
130
131        polars_ensure!(
132            lhs.len() == rhs.len() ||
133
134            // Broadcast
135            lhs.len() == 1 ||
136            rhs.len() == 1,
137            ShapeMismatch:
138                "could not perform '{}' comparison between series '{}' of length: {} and series '{}' of length: {}, because they have different lengths",
139            $op,
140            lhs.name(), lhs.len(),
141            rhs.name(), rhs.len()
142        );
143
144        match (lhs.dtype(), rhs.dtype()) {
145            #[cfg(feature = "dtype-categorical")]
146            (Categorical(lcats, _), Categorical(rcats, _)) => {
147                ensure_same_categories(lcats, rcats)?;
148                return with_match_categorical_physical_type!(lcats.physical(), |$C| {
149                    lhs.cat::<$C>().unwrap().$method(rhs.cat::<$C>().unwrap())
150                })
151            },
152            #[cfg(feature = "dtype-categorical")]
153            (Enum(lfcats, _), Enum(rfcats, _)) => {
154                ensure_same_frozen_categories(lfcats, rfcats)?;
155                return with_match_categorical_physical_type!(lfcats.physical(), |$C| {
156                    lhs.cat::<$C>().unwrap().$method(rhs.cat::<$C>().unwrap())
157                })
158            },
159            #[cfg(feature = "dtype-categorical")]
160            (Categorical(_, _) | Enum(_, _), String) => {
161                return with_match_categorical_physical_type!(lhs.dtype().cat_physical().unwrap(), |$C| {
162                    lhs.cat::<$C>().unwrap().$method(rhs.str().unwrap())
163                })
164            },
165            #[cfg(feature = "dtype-categorical")]
166            (String, Categorical(_, _) | Enum(_, _)) => {
167                return with_match_categorical_physical_type!(rhs.dtype().cat_physical().unwrap(), |$C| {
168                    // We use the reverse method as string <-> enum comparisons are only implemented one-way.
169                    rhs.cat::<$C>().unwrap().$rev_method(lhs.str().unwrap())
170                })
171            },
172            #[cfg(feature = "dtype-extension")]
173            (le @ Extension(_, _), re @ Extension(_, _)) if le == re => {
174                let lhs = lhs.ext().unwrap();
175                let rhs = rhs.ext().unwrap();
176                return lhs.storage().$method(rhs.storage());
177            },
178
179            #[cfg(feature = "dtype-extension")]
180            (Extension(_, storage), rdt) if **storage == *rdt => {
181                let lhs = lhs.ext().unwrap();
182                return lhs.storage().$method(rhs);
183            },
184
185            #[cfg(feature = "dtype-extension")]
186            (ldt, Extension(_, storage)) if *ldt == **storage => {
187                let rhs = rhs.ext().unwrap();
188                return lhs.$method(rhs.storage());
189            },
190            _ => (),
191        };
192
193        let (lhs, rhs) = coerce_lhs_rhs(lhs, rhs).map_err(|_|
194            polars_err!(
195                SchemaMismatch: "could not evaluate '{}' comparison between series '{}' of dtype: {:?} and series '{}' of dtype: {:?}",
196                $op,
197                lhs.name(), lhs.dtype(),
198                rhs.name(), rhs.dtype()
199            )
200        )?;
201        let lhs = lhs.to_physical_repr();
202        let rhs = rhs.to_physical_repr();
203        let mut out = match lhs.dtype() {
204            Null => lhs.null().unwrap().$method(rhs.null().unwrap()),
205            Boolean => lhs.bool().unwrap().$method(rhs.bool().unwrap()),
206            String => lhs.str().unwrap().$method(rhs.str().unwrap()),
207            Binary => lhs.binary().unwrap().$method(rhs.binary().unwrap()),
208            BinaryOffset => lhs.binary_offset().unwrap().$method(rhs.binary_offset().unwrap()),
209            UInt8 => feature_gated!("dtype-u8", lhs.u8().unwrap().$method(rhs.u8().unwrap())),
210            UInt16 => feature_gated!("dtype-u16", lhs.u16().unwrap().$method(rhs.u16().unwrap())),
211            UInt32 => lhs.u32().unwrap().$method(rhs.u32().unwrap()),
212            UInt64 => lhs.u64().unwrap().$method(rhs.u64().unwrap()),
213            UInt128 => feature_gated!("dtype-u128", lhs.u128().unwrap().$method(rhs.u128().unwrap())),
214            Int8 => feature_gated!("dtype-i8", lhs.i8().unwrap().$method(rhs.i8().unwrap())),
215            Int16 => feature_gated!("dtype-i16", lhs.i16().unwrap().$method(rhs.i16().unwrap())),
216            Int32 => lhs.i32().unwrap().$method(rhs.i32().unwrap()),
217            Int64 => lhs.i64().unwrap().$method(rhs.i64().unwrap()),
218            Int128 => feature_gated!("dtype-i128", lhs.i128().unwrap().$method(rhs.i128().unwrap())),
219            Float16 => feature_gated!("dtype-f16", lhs.f16().unwrap().$method(rhs.f16().unwrap())),
220            Float32 => lhs.f32().unwrap().$method(rhs.f32().unwrap()),
221            Float64 => lhs.f64().unwrap().$method(rhs.f64().unwrap()),
222            List(_) => bail_invalid_ineq!(lhs, rhs, $op),
223            #[cfg(feature = "dtype-array")]
224            Array(_, _) => bail_invalid_ineq!(lhs, rhs, $op),
225            #[cfg(feature = "dtype-struct")]
226            Struct(_) => bail_invalid_ineq!(lhs, rhs, $op),
227
228            dt => polars_bail!(InvalidOperation: "could not apply comparison on series of dtype '{}'; operand names: '{}', '{}'", dt, lhs.name(), rhs.name()),
229        };
230        out.rename(lhs.name().clone());
231        PolarsResult::Ok(out)
232    }};
233}
234
235fn validate_types(left: &DataType, right: &DataType) -> PolarsResult<()> {
236    use DataType::*;
237
238    match (left, right) {
239        (String, dt) | (dt, String) if dt.is_primitive_numeric() => {
240            polars_bail!(ComputeError: "cannot compare string with numeric type ({})", dt)
241        },
242        #[cfg(feature = "dtype-categorical")]
243        (Categorical(_, _) | Enum(_, _), dt) | (dt, Categorical(_, _) | Enum(_, _))
244            if !(dt.is_categorical() | dt.is_string() | dt.is_enum()) =>
245        {
246            polars_bail!(ComputeError: "cannot compare categorical with {}", dt);
247        },
248        _ => (),
249    };
250    Ok(())
251}
252
253impl ChunkCompareEq<&Series> for Series {
254    type Item = PolarsResult<BooleanChunked>;
255
256    /// Create a boolean mask by checking for equality.
257    fn equal(&self, rhs: &Series) -> Self::Item {
258        impl_eq_compare!(self, rhs, equal)
259    }
260
261    /// Create a boolean mask by checking for equality.
262    fn equal_missing(&self, rhs: &Series) -> Self::Item {
263        impl_eq_compare!(self, rhs, equal_missing)
264    }
265
266    /// Create a boolean mask by checking for inequality.
267    fn not_equal(&self, rhs: &Series) -> Self::Item {
268        impl_eq_compare!(self, rhs, not_equal)
269    }
270
271    /// Create a boolean mask by checking for inequality.
272    fn not_equal_missing(&self, rhs: &Series) -> Self::Item {
273        impl_eq_compare!(self, rhs, not_equal_missing)
274    }
275}
276
277impl ChunkCompareIneq<&Series> for Series {
278    type Item = PolarsResult<BooleanChunked>;
279
280    /// Create a boolean mask by checking if self > rhs.
281    fn gt(&self, rhs: &Series) -> Self::Item {
282        impl_ineq_compare!(self, rhs, gt, ">", lt)
283    }
284
285    /// Create a boolean mask by checking if self >= rhs.
286    fn gt_eq(&self, rhs: &Series) -> Self::Item {
287        impl_ineq_compare!(self, rhs, gt_eq, ">=", lt_eq)
288    }
289
290    /// Create a boolean mask by checking if self < rhs.
291    fn lt(&self, rhs: &Series) -> Self::Item {
292        impl_ineq_compare!(self, rhs, lt, "<", gt)
293    }
294
295    /// Create a boolean mask by checking if self <= rhs.
296    fn lt_eq(&self, rhs: &Series) -> Self::Item {
297        impl_ineq_compare!(self, rhs, lt_eq, "<=", gt_eq)
298    }
299}
300
301impl<Rhs> ChunkCompareEq<Rhs> for Series
302where
303    Rhs: NumericNative,
304{
305    type Item = PolarsResult<BooleanChunked>;
306
307    fn equal(&self, rhs: Rhs) -> Self::Item {
308        validate_types(self.dtype(), &DataType::Int8)?;
309        let s = self.to_physical_repr();
310        Ok(apply_method_physical_numeric!(&s, equal, rhs))
311    }
312
313    fn equal_missing(&self, rhs: Rhs) -> Self::Item {
314        validate_types(self.dtype(), &DataType::Int8)?;
315        let s = self.to_physical_repr();
316        Ok(apply_method_physical_numeric!(&s, equal_missing, rhs))
317    }
318
319    fn not_equal(&self, rhs: Rhs) -> Self::Item {
320        validate_types(self.dtype(), &DataType::Int8)?;
321        let s = self.to_physical_repr();
322        Ok(apply_method_physical_numeric!(&s, not_equal, rhs))
323    }
324
325    fn not_equal_missing(&self, rhs: Rhs) -> Self::Item {
326        validate_types(self.dtype(), &DataType::Int8)?;
327        let s = self.to_physical_repr();
328        Ok(apply_method_physical_numeric!(&s, not_equal_missing, rhs))
329    }
330}
331
332impl<Rhs> ChunkCompareIneq<Rhs> for Series
333where
334    Rhs: NumericNative,
335{
336    type Item = PolarsResult<BooleanChunked>;
337
338    fn gt(&self, rhs: Rhs) -> Self::Item {
339        validate_types(self.dtype(), &DataType::Int8)?;
340        let s = self.to_physical_repr();
341        Ok(apply_method_physical_numeric!(&s, gt, rhs))
342    }
343
344    fn gt_eq(&self, rhs: Rhs) -> Self::Item {
345        validate_types(self.dtype(), &DataType::Int8)?;
346        let s = self.to_physical_repr();
347        Ok(apply_method_physical_numeric!(&s, gt_eq, rhs))
348    }
349
350    fn lt(&self, rhs: Rhs) -> Self::Item {
351        validate_types(self.dtype(), &DataType::Int8)?;
352        let s = self.to_physical_repr();
353        Ok(apply_method_physical_numeric!(&s, lt, rhs))
354    }
355
356    fn lt_eq(&self, rhs: Rhs) -> Self::Item {
357        validate_types(self.dtype(), &DataType::Int8)?;
358        let s = self.to_physical_repr();
359        Ok(apply_method_physical_numeric!(&s, lt_eq, rhs))
360    }
361}
362
363impl ChunkCompareEq<&str> for Series {
364    type Item = PolarsResult<BooleanChunked>;
365
366    fn equal(&self, rhs: &str) -> PolarsResult<BooleanChunked> {
367        validate_types(self.dtype(), &DataType::String)?;
368        match self.dtype() {
369            DataType::String => Ok(self.str().unwrap().equal(rhs)),
370            #[cfg(feature = "dtype-categorical")]
371            DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
372                with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
373                    self.cat::<$C>().unwrap().equal(rhs)
374                }),
375            ),
376            #[cfg(feature = "dtype-extension")]
377            DataType::Extension(_, _) => self.ext().unwrap().storage().equal(rhs),
378            _ => Ok(BooleanChunked::full(self.name().clone(), false, self.len())),
379        }
380    }
381
382    fn equal_missing(&self, rhs: &str) -> Self::Item {
383        validate_types(self.dtype(), &DataType::String)?;
384        match self.dtype() {
385            DataType::String => Ok(self.str().unwrap().equal_missing(rhs)),
386            #[cfg(feature = "dtype-categorical")]
387            DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
388                with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
389                    self.cat::<$C>().unwrap().equal_missing(rhs)
390                }),
391            ),
392            #[cfg(feature = "dtype-extension")]
393            DataType::Extension(_, _) => self.ext().unwrap().storage().equal_missing(rhs),
394            _ => Ok(replace_non_null(
395                self.name().clone(),
396                self.0.chunks(),
397                false,
398            )),
399        }
400    }
401
402    fn not_equal(&self, rhs: &str) -> PolarsResult<BooleanChunked> {
403        validate_types(self.dtype(), &DataType::String)?;
404        match self.dtype() {
405            DataType::String => Ok(self.str().unwrap().not_equal(rhs)),
406            #[cfg(feature = "dtype-categorical")]
407            DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
408                with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
409                    self.cat::<$C>().unwrap().not_equal(rhs)
410                }),
411            ),
412            #[cfg(feature = "dtype-extension")]
413            DataType::Extension(_, _) => self.ext().unwrap().storage().not_equal(rhs),
414            _ => Ok(BooleanChunked::full(self.name().clone(), true, self.len())),
415        }
416    }
417
418    fn not_equal_missing(&self, rhs: &str) -> Self::Item {
419        validate_types(self.dtype(), &DataType::String)?;
420        match self.dtype() {
421            DataType::String => Ok(self.str().unwrap().not_equal_missing(rhs)),
422            #[cfg(feature = "dtype-categorical")]
423            DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
424                with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
425                    self.cat::<$C>().unwrap().not_equal_missing(rhs)
426                }),
427            ),
428            #[cfg(feature = "dtype-extension")]
429            DataType::Extension(_, _) => self.ext().unwrap().storage().not_equal_missing(rhs),
430            _ => Ok(replace_non_null(self.name().clone(), self.0.chunks(), true)),
431        }
432    }
433}
434
435impl ChunkCompareIneq<&str> for Series {
436    type Item = PolarsResult<BooleanChunked>;
437
438    fn gt(&self, rhs: &str) -> Self::Item {
439        validate_types(self.dtype(), &DataType::String)?;
440        match self.dtype() {
441            DataType::String => Ok(self.str().unwrap().gt(rhs)),
442            #[cfg(feature = "dtype-categorical")]
443            DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
444                with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
445                    self.cat::<$C>().unwrap().gt(rhs)
446                }),
447            ),
448            #[cfg(feature = "dtype-extension")]
449            DataType::Extension(_, _) => self.ext().unwrap().storage().gt(rhs),
450            _ => polars_bail!(
451                ComputeError: "cannot compare str value to series of type {}", self.dtype(),
452            ),
453        }
454    }
455
456    fn gt_eq(&self, rhs: &str) -> Self::Item {
457        validate_types(self.dtype(), &DataType::String)?;
458        match self.dtype() {
459            DataType::String => Ok(self.str().unwrap().gt_eq(rhs)),
460            #[cfg(feature = "dtype-categorical")]
461            DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
462                with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
463                    self.cat::<$C>().unwrap().gt_eq(rhs)
464                }),
465            ),
466            #[cfg(feature = "dtype-extension")]
467            DataType::Extension(_, _) => self.ext().unwrap().storage().gt_eq(rhs),
468            _ => polars_bail!(
469                ComputeError: "cannot compare str value to series of type {}", self.dtype(),
470            ),
471        }
472    }
473
474    fn lt(&self, rhs: &str) -> Self::Item {
475        validate_types(self.dtype(), &DataType::String)?;
476        match self.dtype() {
477            DataType::String => Ok(self.str().unwrap().lt(rhs)),
478            #[cfg(feature = "dtype-categorical")]
479            DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
480                with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
481                    self.cat::<$C>().unwrap().lt(rhs)
482                }),
483            ),
484            #[cfg(feature = "dtype-extension")]
485            DataType::Extension(_, _) => self.ext().unwrap().storage().lt(rhs),
486            _ => polars_bail!(
487                ComputeError: "cannot compare str value to series of type {}", self.dtype(),
488            ),
489        }
490    }
491
492    fn lt_eq(&self, rhs: &str) -> Self::Item {
493        validate_types(self.dtype(), &DataType::String)?;
494        match self.dtype() {
495            DataType::String => Ok(self.str().unwrap().lt_eq(rhs)),
496            #[cfg(feature = "dtype-categorical")]
497            DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
498                with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
499                    self.cat::<$C>().unwrap().lt_eq(rhs)
500                }),
501            ),
502            #[cfg(feature = "dtype-extension")]
503            DataType::Extension(_, _) => self.ext().unwrap().storage().lt_eq(rhs),
504            _ => polars_bail!(
505                ComputeError: "cannot compare str value to series of type {}", self.dtype(),
506            ),
507        }
508    }
509}