polars_utils/
total_ord.rs

1use std::cmp::Ordering;
2use std::hash::{BuildHasher, Hash, Hasher};
3
4use bytemuck::TransparentWrapper;
5
6use crate::hashing::{BytesHash, DirtyHash};
7use crate::nulls::IsNull;
8
9/// Converts an f32 into a canonical form, where -0 == 0 and all NaNs map to
10/// the same value.
11#[inline]
12pub fn canonical_f32(x: f32) -> f32 {
13    // -0.0 + 0.0 becomes 0.0.
14    let convert_zero = x + 0.0;
15    if convert_zero.is_nan() {
16        f32::from_bits(0x7fc00000) // Canonical quiet NaN.
17    } else {
18        convert_zero
19    }
20}
21
22/// Converts an f64 into a canonical form, where -0 == 0 and all NaNs map to
23/// the same value.
24#[inline]
25pub fn canonical_f64(x: f64) -> f64 {
26    // -0.0 + 0.0 becomes 0.0.
27    let convert_zero = x + 0.0;
28    if convert_zero.is_nan() {
29        f64::from_bits(0x7ff8000000000000) // Canonical quiet NaN.
30    } else {
31        convert_zero
32    }
33}
34
35/// Alternative trait for Eq. By consistently using this we can still be
36/// generic w.r.t Eq while getting a total ordering for floats.
37pub trait TotalEq {
38    fn tot_eq(&self, other: &Self) -> bool;
39
40    #[inline]
41    fn tot_ne(&self, other: &Self) -> bool {
42        !(self.tot_eq(other))
43    }
44}
45
46/// Alternative trait for Ord. By consistently using this we can still be
47/// generic w.r.t Ord while getting a total ordering for floats.
48pub trait TotalOrd: TotalEq {
49    fn tot_cmp(&self, other: &Self) -> Ordering;
50
51    #[inline]
52    fn tot_lt(&self, other: &Self) -> bool {
53        self.tot_cmp(other) == Ordering::Less
54    }
55
56    #[inline]
57    fn tot_gt(&self, other: &Self) -> bool {
58        self.tot_cmp(other) == Ordering::Greater
59    }
60
61    #[inline]
62    fn tot_le(&self, other: &Self) -> bool {
63        self.tot_cmp(other) != Ordering::Greater
64    }
65
66    #[inline]
67    fn tot_ge(&self, other: &Self) -> bool {
68        self.tot_cmp(other) != Ordering::Less
69    }
70}
71
72/// Alternative trait for Hash. By consistently using this we can still be
73/// generic w.r.t Hash while being able to hash floats.
74pub trait TotalHash {
75    fn tot_hash<H>(&self, state: &mut H)
76    where
77        H: Hasher;
78
79    fn tot_hash_slice<H>(data: &[Self], state: &mut H)
80    where
81        H: Hasher,
82        Self: Sized,
83    {
84        for piece in data {
85            piece.tot_hash(state)
86        }
87    }
88}
89
90pub trait BuildHasherTotalExt: BuildHasher {
91    fn tot_hash_one<T>(&self, x: T) -> u64
92    where
93        T: TotalHash,
94        Self: Sized,
95        <Self as BuildHasher>::Hasher: Hasher,
96    {
97        let mut hasher = self.build_hasher();
98        x.tot_hash(&mut hasher);
99        hasher.finish()
100    }
101}
102
103impl<T: BuildHasher> BuildHasherTotalExt for T {}
104
105#[repr(transparent)]
106pub struct TotalOrdWrap<T>(pub T);
107unsafe impl<T> TransparentWrapper<T> for TotalOrdWrap<T> {}
108
109impl<T: TotalOrd> PartialOrd for TotalOrdWrap<T> {
110    #[inline(always)]
111    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
112        Some(self.cmp(other))
113    }
114
115    #[inline(always)]
116    fn lt(&self, other: &Self) -> bool {
117        self.0.tot_lt(&other.0)
118    }
119
120    #[inline(always)]
121    fn le(&self, other: &Self) -> bool {
122        self.0.tot_le(&other.0)
123    }
124
125    #[inline(always)]
126    fn gt(&self, other: &Self) -> bool {
127        self.0.tot_gt(&other.0)
128    }
129
130    #[inline(always)]
131    fn ge(&self, other: &Self) -> bool {
132        self.0.tot_ge(&other.0)
133    }
134}
135
136impl<T: TotalOrd> Ord for TotalOrdWrap<T> {
137    #[inline(always)]
138    fn cmp(&self, other: &Self) -> Ordering {
139        self.0.tot_cmp(&other.0)
140    }
141}
142
143impl<T: TotalEq> PartialEq for TotalOrdWrap<T> {
144    #[inline(always)]
145    fn eq(&self, other: &Self) -> bool {
146        self.0.tot_eq(&other.0)
147    }
148
149    #[inline(always)]
150    #[allow(clippy::partialeq_ne_impl)]
151    fn ne(&self, other: &Self) -> bool {
152        self.0.tot_ne(&other.0)
153    }
154}
155
156impl<T: TotalEq> Eq for TotalOrdWrap<T> {}
157
158impl<T: TotalHash> Hash for TotalOrdWrap<T> {
159    #[inline(always)]
160    fn hash<H: Hasher>(&self, state: &mut H) {
161        self.0.tot_hash(state);
162    }
163}
164
165impl<T: Clone> Clone for TotalOrdWrap<T> {
166    #[inline]
167    fn clone(&self) -> Self {
168        Self(self.0.clone())
169    }
170}
171
172impl<T: Copy> Copy for TotalOrdWrap<T> {}
173
174impl<T: IsNull> IsNull for TotalOrdWrap<T> {
175    const HAS_NULLS: bool = T::HAS_NULLS;
176    type Inner = T::Inner;
177
178    #[inline(always)]
179    fn is_null(&self) -> bool {
180        self.0.is_null()
181    }
182
183    #[inline(always)]
184    fn unwrap_inner(self) -> Self::Inner {
185        self.0.unwrap_inner()
186    }
187}
188
189impl DirtyHash for f32 {
190    #[inline(always)]
191    fn dirty_hash(&self) -> u64 {
192        canonical_f32(*self).to_bits().dirty_hash()
193    }
194}
195
196impl DirtyHash for f64 {
197    #[inline(always)]
198    fn dirty_hash(&self) -> u64 {
199        canonical_f64(*self).to_bits().dirty_hash()
200    }
201}
202
203impl<T: DirtyHash> DirtyHash for TotalOrdWrap<T> {
204    #[inline(always)]
205    fn dirty_hash(&self) -> u64 {
206        self.0.dirty_hash()
207    }
208}
209
210macro_rules! impl_trivial_total {
211    ($T: ty) => {
212        impl TotalEq for $T {
213            #[inline(always)]
214            fn tot_eq(&self, other: &Self) -> bool {
215                self == other
216            }
217
218            #[inline(always)]
219            fn tot_ne(&self, other: &Self) -> bool {
220                self != other
221            }
222        }
223
224        impl TotalOrd for $T {
225            #[inline(always)]
226            fn tot_cmp(&self, other: &Self) -> Ordering {
227                self.cmp(other)
228            }
229
230            #[inline(always)]
231            fn tot_lt(&self, other: &Self) -> bool {
232                self < other
233            }
234
235            #[inline(always)]
236            fn tot_gt(&self, other: &Self) -> bool {
237                self > other
238            }
239
240            #[inline(always)]
241            fn tot_le(&self, other: &Self) -> bool {
242                self <= other
243            }
244
245            #[inline(always)]
246            fn tot_ge(&self, other: &Self) -> bool {
247                self >= other
248            }
249        }
250
251        impl TotalHash for $T {
252            #[inline(always)]
253            fn tot_hash<H>(&self, state: &mut H)
254            where
255                H: Hasher,
256            {
257                self.hash(state);
258            }
259        }
260    };
261}
262
263// We can't do a blanket impl because Rust complains f32 might implement
264// Ord / Eq someday.
265impl_trivial_total!(bool);
266impl_trivial_total!(u8);
267impl_trivial_total!(u16);
268impl_trivial_total!(u32);
269impl_trivial_total!(u64);
270impl_trivial_total!(u128);
271impl_trivial_total!(usize);
272impl_trivial_total!(i8);
273impl_trivial_total!(i16);
274impl_trivial_total!(i32);
275impl_trivial_total!(i64);
276impl_trivial_total!(i128);
277impl_trivial_total!(isize);
278impl_trivial_total!(char);
279impl_trivial_total!(&str);
280impl_trivial_total!(&[u8]);
281impl_trivial_total!(String);
282
283macro_rules! impl_float_eq_ord {
284    ($T:ty) => {
285        impl TotalEq for $T {
286            #[inline]
287            fn tot_eq(&self, other: &Self) -> bool {
288                if self.is_nan() {
289                    other.is_nan()
290                } else {
291                    self == other
292                }
293            }
294        }
295
296        impl TotalOrd for $T {
297            #[inline(always)]
298            fn tot_cmp(&self, other: &Self) -> Ordering {
299                if self.tot_lt(other) {
300                    Ordering::Less
301                } else if self.tot_gt(other) {
302                    Ordering::Greater
303                } else {
304                    Ordering::Equal
305                }
306            }
307
308            #[inline(always)]
309            fn tot_lt(&self, other: &Self) -> bool {
310                !self.tot_ge(other)
311            }
312
313            #[inline(always)]
314            fn tot_gt(&self, other: &Self) -> bool {
315                other.tot_lt(self)
316            }
317
318            #[inline(always)]
319            fn tot_le(&self, other: &Self) -> bool {
320                other.tot_ge(self)
321            }
322
323            #[inline(always)]
324            fn tot_ge(&self, other: &Self) -> bool {
325                // We consider all NaNs equal, and NaN is the largest possible
326                // value. Thus if self is NaN we always return true. Otherwise
327                // self >= other is correct. If other is not NaN it is trivially
328                // correct, and if it is we note that nothing can be greater or
329                // equal to NaN except NaN itself, which we already handled earlier.
330                self.is_nan() | (self >= other)
331            }
332        }
333    };
334}
335
336impl_float_eq_ord!(f32);
337impl_float_eq_ord!(f64);
338
339impl TotalHash for f32 {
340    #[inline(always)]
341    fn tot_hash<H>(&self, state: &mut H)
342    where
343        H: Hasher,
344    {
345        canonical_f32(*self).to_bits().hash(state)
346    }
347}
348
349impl TotalHash for f64 {
350    #[inline(always)]
351    fn tot_hash<H>(&self, state: &mut H)
352    where
353        H: Hasher,
354    {
355        canonical_f64(*self).to_bits().hash(state)
356    }
357}
358
359// Blanket implementations.
360impl<T: TotalEq> TotalEq for Option<T> {
361    #[inline(always)]
362    fn tot_eq(&self, other: &Self) -> bool {
363        match (self, other) {
364            (None, None) => true,
365            (Some(a), Some(b)) => a.tot_eq(b),
366            _ => false,
367        }
368    }
369
370    #[inline(always)]
371    fn tot_ne(&self, other: &Self) -> bool {
372        match (self, other) {
373            (None, None) => false,
374            (Some(a), Some(b)) => a.tot_ne(b),
375            _ => true,
376        }
377    }
378}
379
380impl<T: TotalOrd> TotalOrd for Option<T> {
381    #[inline(always)]
382    fn tot_cmp(&self, other: &Self) -> Ordering {
383        match (self, other) {
384            (None, None) => Ordering::Equal,
385            (None, Some(_)) => Ordering::Less,
386            (Some(_), None) => Ordering::Greater,
387            (Some(a), Some(b)) => a.tot_cmp(b),
388        }
389    }
390
391    #[inline(always)]
392    fn tot_lt(&self, other: &Self) -> bool {
393        match (self, other) {
394            (None, Some(_)) => true,
395            (Some(a), Some(b)) => a.tot_lt(b),
396            _ => false,
397        }
398    }
399
400    #[inline(always)]
401    fn tot_gt(&self, other: &Self) -> bool {
402        other.tot_lt(self)
403    }
404
405    #[inline(always)]
406    fn tot_le(&self, other: &Self) -> bool {
407        match (self, other) {
408            (Some(_), None) => false,
409            (Some(a), Some(b)) => a.tot_lt(b),
410            _ => true,
411        }
412    }
413
414    #[inline(always)]
415    fn tot_ge(&self, other: &Self) -> bool {
416        other.tot_le(self)
417    }
418}
419
420impl<T: TotalHash> TotalHash for Option<T> {
421    #[inline]
422    fn tot_hash<H>(&self, state: &mut H)
423    where
424        H: Hasher,
425    {
426        self.is_some().tot_hash(state);
427        if let Some(slf) = self {
428            slf.tot_hash(state)
429        }
430    }
431}
432
433impl<T: TotalEq + ?Sized> TotalEq for &T {
434    #[inline(always)]
435    fn tot_eq(&self, other: &Self) -> bool {
436        (*self).tot_eq(*other)
437    }
438
439    #[inline(always)]
440    fn tot_ne(&self, other: &Self) -> bool {
441        (*self).tot_ne(*other)
442    }
443}
444
445impl<T: TotalHash + ?Sized> TotalHash for &T {
446    #[inline(always)]
447    fn tot_hash<H>(&self, state: &mut H)
448    where
449        H: Hasher,
450    {
451        (*self).tot_hash(state)
452    }
453}
454
455impl<T: TotalEq, U: TotalEq> TotalEq for (T, U) {
456    #[inline]
457    fn tot_eq(&self, other: &Self) -> bool {
458        self.0.tot_eq(&other.0) && self.1.tot_eq(&other.1)
459    }
460}
461
462impl<T: TotalOrd, U: TotalOrd> TotalOrd for (T, U) {
463    #[inline]
464    fn tot_cmp(&self, other: &Self) -> Ordering {
465        self.0
466            .tot_cmp(&other.0)
467            .then_with(|| self.1.tot_cmp(&other.1))
468    }
469}
470
471impl TotalHash for BytesHash<'_> {
472    #[inline(always)]
473    fn tot_hash<H>(&self, state: &mut H)
474    where
475        H: Hasher,
476    {
477        self.hash(state)
478    }
479}
480
481impl TotalEq for BytesHash<'_> {
482    #[inline(always)]
483    fn tot_eq(&self, other: &Self) -> bool {
484        self == other
485    }
486}
487
488/// This elides creating a [`TotalOrdWrap`] for types that don't need it.
489pub trait ToTotalOrd {
490    type TotalOrdItem: Hash + Eq;
491    type SourceItem;
492
493    fn to_total_ord(&self) -> Self::TotalOrdItem;
494
495    fn peel_total_ord(ord_item: Self::TotalOrdItem) -> Self::SourceItem;
496}
497
498macro_rules! impl_to_total_ord_identity {
499    ($T: ty) => {
500        impl ToTotalOrd for $T {
501            type TotalOrdItem = $T;
502            type SourceItem = $T;
503
504            #[inline]
505            fn to_total_ord(&self) -> Self::TotalOrdItem {
506                self.clone()
507            }
508
509            #[inline]
510            fn peel_total_ord(ord_item: Self::TotalOrdItem) -> Self::SourceItem {
511                ord_item
512            }
513        }
514    };
515}
516
517impl_to_total_ord_identity!(bool);
518impl_to_total_ord_identity!(u8);
519impl_to_total_ord_identity!(u16);
520impl_to_total_ord_identity!(u32);
521impl_to_total_ord_identity!(u64);
522impl_to_total_ord_identity!(u128);
523impl_to_total_ord_identity!(usize);
524impl_to_total_ord_identity!(i8);
525impl_to_total_ord_identity!(i16);
526impl_to_total_ord_identity!(i32);
527impl_to_total_ord_identity!(i64);
528impl_to_total_ord_identity!(i128);
529impl_to_total_ord_identity!(isize);
530impl_to_total_ord_identity!(char);
531impl_to_total_ord_identity!(String);
532
533macro_rules! impl_to_total_ord_lifetimed_ref_identity {
534    ($T: ty) => {
535        impl<'a> ToTotalOrd for &'a $T {
536            type TotalOrdItem = &'a $T;
537            type SourceItem = &'a $T;
538
539            #[inline]
540            fn to_total_ord(&self) -> Self::TotalOrdItem {
541                *self
542            }
543
544            #[inline]
545            fn peel_total_ord(ord_item: Self::TotalOrdItem) -> Self::SourceItem {
546                ord_item
547            }
548        }
549    };
550}
551
552impl_to_total_ord_lifetimed_ref_identity!(str);
553impl_to_total_ord_lifetimed_ref_identity!([u8]);
554
555macro_rules! impl_to_total_ord_wrapped {
556    ($T: ty) => {
557        impl ToTotalOrd for $T {
558            type TotalOrdItem = TotalOrdWrap<$T>;
559            type SourceItem = $T;
560
561            #[inline]
562            fn to_total_ord(&self) -> Self::TotalOrdItem {
563                TotalOrdWrap(self.clone())
564            }
565
566            #[inline]
567            fn peel_total_ord(ord_item: Self::TotalOrdItem) -> Self::SourceItem {
568                ord_item.0
569            }
570        }
571    };
572}
573
574impl_to_total_ord_wrapped!(f32);
575impl_to_total_ord_wrapped!(f64);
576
577/// This is safe without needing to map the option value to TotalOrdWrap, since
578/// for example:
579/// `TotalOrdWrap<Option<T>>` implements `Eq + Hash`, iff:
580/// `Option<T>` implements `TotalEq + TotalHash`, iff:
581/// `T` implements `TotalEq + TotalHash`
582impl<T: Copy + TotalEq + TotalHash> ToTotalOrd for Option<T> {
583    type TotalOrdItem = TotalOrdWrap<Option<T>>;
584    type SourceItem = Option<T>;
585
586    #[inline]
587    fn to_total_ord(&self) -> Self::TotalOrdItem {
588        TotalOrdWrap(*self)
589    }
590
591    #[inline]
592    fn peel_total_ord(ord_item: Self::TotalOrdItem) -> Self::SourceItem {
593        ord_item.0
594    }
595}
596
597impl<T: ToTotalOrd> ToTotalOrd for &T {
598    type TotalOrdItem = T::TotalOrdItem;
599    type SourceItem = T::SourceItem;
600
601    #[inline]
602    fn to_total_ord(&self) -> Self::TotalOrdItem {
603        (*self).to_total_ord()
604    }
605
606    #[inline]
607    fn peel_total_ord(ord_item: Self::TotalOrdItem) -> Self::SourceItem {
608        T::peel_total_ord(ord_item)
609    }
610}
611
612impl<'a> ToTotalOrd for BytesHash<'a> {
613    type TotalOrdItem = BytesHash<'a>;
614    type SourceItem = BytesHash<'a>;
615
616    #[inline]
617    fn to_total_ord(&self) -> Self::TotalOrdItem {
618        *self
619    }
620
621    #[inline]
622    fn peel_total_ord(ord_item: Self::TotalOrdItem) -> Self::SourceItem {
623        ord_item
624    }
625}