polars_utils/
total_ord.rs

1use std::cmp::Ordering;
2use std::hash::{BuildHasher, Hash, Hasher};
3
4use bytemuck::TransparentWrapper;
5use num_traits::Zero;
6
7use crate::float16::pf16;
8use crate::hashing::{BytesHash, DirtyHash};
9use crate::nulls::IsNull;
10
11/// Converts an pf16 into a canonical form, where -0 == 0 and all NaNs map to
12/// the same value.
13#[inline]
14pub fn canonical_f16(x: pf16) -> pf16 {
15    // -0.0 + 0.0 becomes 0.0.
16    let convert_zero = x + pf16::zero(); // zero out the sign bit if the f16 is zero.
17    if convert_zero.is_nan() {
18        pf16(half::f16::from_bits(0x7e00)) // Canonical quiet NaN.
19    } else {
20        convert_zero
21    }
22}
23
24/// Converts an f32 into a canonical form, where -0 == 0 and all NaNs map to
25/// the same value.
26#[inline]
27pub fn canonical_f32(x: f32) -> f32 {
28    // -0.0 + 0.0 becomes 0.0.
29    let convert_zero = x + 0.0;
30    if convert_zero.is_nan() {
31        f32::from_bits(0x7fc00000) // Canonical quiet NaN.
32    } else {
33        convert_zero
34    }
35}
36
37/// Converts an f64 into a canonical form, where -0 == 0 and all NaNs map to
38/// the same value.
39#[inline]
40pub fn canonical_f64(x: f64) -> f64 {
41    // -0.0 + 0.0 becomes 0.0.
42    let convert_zero = x + 0.0;
43    if convert_zero.is_nan() {
44        f64::from_bits(0x7ff8000000000000) // Canonical quiet NaN.
45    } else {
46        convert_zero
47    }
48}
49
50/// Alternative trait for Eq. By consistently using this we can still be
51/// generic w.r.t Eq while getting a total ordering for floats.
52pub trait TotalEq {
53    fn tot_eq(&self, other: &Self) -> bool;
54
55    #[inline]
56    fn tot_ne(&self, other: &Self) -> bool {
57        !(self.tot_eq(other))
58    }
59}
60
61/// Alternative trait for Ord. By consistently using this we can still be
62/// generic w.r.t Ord while getting a total ordering for floats.
63pub trait TotalOrd: TotalEq {
64    fn tot_cmp(&self, other: &Self) -> Ordering;
65
66    #[inline]
67    fn tot_lt(&self, other: &Self) -> bool {
68        self.tot_cmp(other) == Ordering::Less
69    }
70
71    #[inline]
72    fn tot_gt(&self, other: &Self) -> bool {
73        self.tot_cmp(other) == Ordering::Greater
74    }
75
76    #[inline]
77    fn tot_le(&self, other: &Self) -> bool {
78        self.tot_cmp(other) != Ordering::Greater
79    }
80
81    #[inline]
82    fn tot_ge(&self, other: &Self) -> bool {
83        self.tot_cmp(other) != Ordering::Less
84    }
85}
86
87/// Alternative trait for Hash. By consistently using this we can still be
88/// generic w.r.t Hash while being able to hash floats.
89pub trait TotalHash {
90    fn tot_hash<H>(&self, state: &mut H)
91    where
92        H: Hasher;
93
94    fn tot_hash_slice<H>(data: &[Self], state: &mut H)
95    where
96        H: Hasher,
97        Self: Sized,
98    {
99        for piece in data {
100            piece.tot_hash(state)
101        }
102    }
103}
104
105pub trait BuildHasherTotalExt: BuildHasher {
106    fn tot_hash_one<T>(&self, x: T) -> u64
107    where
108        T: TotalHash,
109        Self: Sized,
110        <Self as BuildHasher>::Hasher: Hasher,
111    {
112        let mut hasher = self.build_hasher();
113        x.tot_hash(&mut hasher);
114        hasher.finish()
115    }
116}
117
118impl<T: BuildHasher> BuildHasherTotalExt for T {}
119
120#[derive(Debug)]
121#[cfg_attr(
122    feature = "serde",
123    derive(serde::Serialize, serde::Deserialize),
124    serde(transparent)
125)]
126#[cfg_attr(
127    feature = "dsl-schema",
128    derive(schemars::JsonSchema),
129    schemars(transparent)
130)]
131#[repr(transparent)]
132pub struct TotalOrdWrap<T>(pub T);
133unsafe impl<T> TransparentWrapper<T> for TotalOrdWrap<T> {}
134
135impl<T: TotalOrd> PartialOrd for TotalOrdWrap<T> {
136    #[inline(always)]
137    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
138        Some(self.cmp(other))
139    }
140
141    #[inline(always)]
142    fn lt(&self, other: &Self) -> bool {
143        self.0.tot_lt(&other.0)
144    }
145
146    #[inline(always)]
147    fn le(&self, other: &Self) -> bool {
148        self.0.tot_le(&other.0)
149    }
150
151    #[inline(always)]
152    fn gt(&self, other: &Self) -> bool {
153        self.0.tot_gt(&other.0)
154    }
155
156    #[inline(always)]
157    fn ge(&self, other: &Self) -> bool {
158        self.0.tot_ge(&other.0)
159    }
160}
161
162impl<T: TotalOrd> Ord for TotalOrdWrap<T> {
163    #[inline(always)]
164    fn cmp(&self, other: &Self) -> Ordering {
165        self.0.tot_cmp(&other.0)
166    }
167}
168
169impl<T: TotalEq> PartialEq for TotalOrdWrap<T> {
170    #[inline(always)]
171    fn eq(&self, other: &Self) -> bool {
172        self.0.tot_eq(&other.0)
173    }
174
175    #[inline(always)]
176    #[allow(clippy::partialeq_ne_impl)]
177    fn ne(&self, other: &Self) -> bool {
178        self.0.tot_ne(&other.0)
179    }
180}
181
182impl<T: TotalEq> Eq for TotalOrdWrap<T> {}
183
184impl<T: TotalHash> Hash for TotalOrdWrap<T> {
185    #[inline(always)]
186    fn hash<H: Hasher>(&self, state: &mut H) {
187        self.0.tot_hash(state);
188    }
189}
190
191impl<T: Clone> Clone for TotalOrdWrap<T> {
192    #[inline]
193    fn clone(&self) -> Self {
194        Self(self.0.clone())
195    }
196}
197
198impl<T: Copy> Copy for TotalOrdWrap<T> {}
199
200impl<T: IsNull> IsNull for TotalOrdWrap<T> {
201    const HAS_NULLS: bool = T::HAS_NULLS;
202    type Inner = T::Inner;
203
204    #[inline(always)]
205    fn is_null(&self) -> bool {
206        self.0.is_null()
207    }
208
209    #[inline(always)]
210    fn unwrap_inner(self) -> Self::Inner {
211        self.0.unwrap_inner()
212    }
213}
214
215impl DirtyHash for pf16 {
216    #[inline(always)]
217    fn dirty_hash(&self) -> u64 {
218        canonical_f16(*self).0.to_bits().dirty_hash()
219    }
220}
221
222impl DirtyHash for f32 {
223    #[inline(always)]
224    fn dirty_hash(&self) -> u64 {
225        canonical_f32(*self).to_bits().dirty_hash()
226    }
227}
228
229impl DirtyHash for f64 {
230    #[inline(always)]
231    fn dirty_hash(&self) -> u64 {
232        canonical_f64(*self).to_bits().dirty_hash()
233    }
234}
235
236impl<T: DirtyHash> DirtyHash for TotalOrdWrap<T> {
237    #[inline(always)]
238    fn dirty_hash(&self) -> u64 {
239        self.0.dirty_hash()
240    }
241}
242
243macro_rules! impl_trivial_total {
244    ($T: ty) => {
245        impl TotalEq for $T {
246            #[inline(always)]
247            fn tot_eq(&self, other: &Self) -> bool {
248                self == other
249            }
250
251            #[inline(always)]
252            fn tot_ne(&self, other: &Self) -> bool {
253                self != other
254            }
255        }
256
257        impl TotalOrd for $T {
258            #[inline(always)]
259            fn tot_cmp(&self, other: &Self) -> Ordering {
260                self.cmp(other)
261            }
262
263            #[inline(always)]
264            fn tot_lt(&self, other: &Self) -> bool {
265                self < other
266            }
267
268            #[inline(always)]
269            fn tot_gt(&self, other: &Self) -> bool {
270                self > other
271            }
272
273            #[inline(always)]
274            fn tot_le(&self, other: &Self) -> bool {
275                self <= other
276            }
277
278            #[inline(always)]
279            fn tot_ge(&self, other: &Self) -> bool {
280                self >= other
281            }
282        }
283
284        impl TotalHash for $T {
285            #[inline(always)]
286            fn tot_hash<H>(&self, state: &mut H)
287            where
288                H: Hasher,
289            {
290                self.hash(state);
291            }
292        }
293    };
294}
295
296// We can't do a blanket impl because Rust complains f32 might implement
297// Ord / Eq someday.
298impl_trivial_total!(());
299impl_trivial_total!(bool);
300impl_trivial_total!(u8);
301impl_trivial_total!(u16);
302impl_trivial_total!(u32);
303impl_trivial_total!(u64);
304impl_trivial_total!(u128);
305impl_trivial_total!(usize);
306impl_trivial_total!(i8);
307impl_trivial_total!(i16);
308impl_trivial_total!(i32);
309impl_trivial_total!(i64);
310impl_trivial_total!(i128);
311impl_trivial_total!(isize);
312impl_trivial_total!(char);
313impl_trivial_total!(&str);
314impl_trivial_total!(&[u8]);
315impl_trivial_total!(String);
316
317macro_rules! impl_float_eq_ord {
318    ($T:ty) => {
319        impl TotalEq for $T {
320            #[inline]
321            fn tot_eq(&self, other: &Self) -> bool {
322                if self.is_nan() {
323                    other.is_nan()
324                } else {
325                    self == other
326                }
327            }
328        }
329
330        impl TotalOrd for $T {
331            #[inline(always)]
332            fn tot_cmp(&self, other: &Self) -> Ordering {
333                if self.tot_lt(other) {
334                    Ordering::Less
335                } else if self.tot_gt(other) {
336                    Ordering::Greater
337                } else {
338                    Ordering::Equal
339                }
340            }
341
342            #[inline(always)]
343            fn tot_lt(&self, other: &Self) -> bool {
344                !self.tot_ge(other)
345            }
346
347            #[inline(always)]
348            fn tot_gt(&self, other: &Self) -> bool {
349                other.tot_lt(self)
350            }
351
352            #[inline(always)]
353            fn tot_le(&self, other: &Self) -> bool {
354                other.tot_ge(self)
355            }
356
357            #[inline(always)]
358            fn tot_ge(&self, other: &Self) -> bool {
359                // We consider all NaNs equal, and NaN is the largest possible
360                // value. Thus if self is NaN we always return true. Otherwise
361                // self >= other is correct. If other is not NaN it is trivially
362                // correct, and if it is we note that nothing can be greater or
363                // equal to NaN except NaN itself, which we already handled earlier.
364                self.is_nan() | (self >= other)
365            }
366        }
367    };
368}
369
370impl_float_eq_ord!(pf16);
371impl_float_eq_ord!(f32);
372impl_float_eq_ord!(f64);
373
374impl TotalHash for pf16 {
375    #[inline(always)]
376    fn tot_hash<H>(&self, state: &mut H)
377    where
378        H: Hasher,
379    {
380        canonical_f16(*self).to_bits().hash(state)
381    }
382}
383
384impl TotalHash for f32 {
385    #[inline(always)]
386    fn tot_hash<H>(&self, state: &mut H)
387    where
388        H: Hasher,
389    {
390        canonical_f32(*self).to_bits().hash(state)
391    }
392}
393
394impl TotalHash for f64 {
395    #[inline(always)]
396    fn tot_hash<H>(&self, state: &mut H)
397    where
398        H: Hasher,
399    {
400        canonical_f64(*self).to_bits().hash(state)
401    }
402}
403
404// Blanket implementations.
405impl<T: TotalEq> TotalEq for Option<T> {
406    #[inline(always)]
407    fn tot_eq(&self, other: &Self) -> bool {
408        match (self, other) {
409            (None, None) => true,
410            (Some(a), Some(b)) => a.tot_eq(b),
411            _ => false,
412        }
413    }
414
415    #[inline(always)]
416    fn tot_ne(&self, other: &Self) -> bool {
417        match (self, other) {
418            (None, None) => false,
419            (Some(a), Some(b)) => a.tot_ne(b),
420            _ => true,
421        }
422    }
423}
424
425impl<T: TotalOrd> TotalOrd for Option<T> {
426    #[inline(always)]
427    fn tot_cmp(&self, other: &Self) -> Ordering {
428        match (self, other) {
429            (None, None) => Ordering::Equal,
430            (None, Some(_)) => Ordering::Less,
431            (Some(_), None) => Ordering::Greater,
432            (Some(a), Some(b)) => a.tot_cmp(b),
433        }
434    }
435
436    #[inline(always)]
437    fn tot_lt(&self, other: &Self) -> bool {
438        match (self, other) {
439            (None, Some(_)) => true,
440            (Some(a), Some(b)) => a.tot_lt(b),
441            _ => false,
442        }
443    }
444
445    #[inline(always)]
446    fn tot_gt(&self, other: &Self) -> bool {
447        other.tot_lt(self)
448    }
449
450    #[inline(always)]
451    fn tot_le(&self, other: &Self) -> bool {
452        match (self, other) {
453            (Some(_), None) => false,
454            (Some(a), Some(b)) => a.tot_lt(b),
455            _ => true,
456        }
457    }
458
459    #[inline(always)]
460    fn tot_ge(&self, other: &Self) -> bool {
461        other.tot_le(self)
462    }
463}
464
465impl<T: TotalHash> TotalHash for Option<T> {
466    #[inline]
467    fn tot_hash<H>(&self, state: &mut H)
468    where
469        H: Hasher,
470    {
471        self.is_some().tot_hash(state);
472        if let Some(slf) = self {
473            slf.tot_hash(state)
474        }
475    }
476}
477
478impl<T: TotalEq + ?Sized> TotalEq for &T {
479    #[inline(always)]
480    fn tot_eq(&self, other: &Self) -> bool {
481        (*self).tot_eq(*other)
482    }
483
484    #[inline(always)]
485    fn tot_ne(&self, other: &Self) -> bool {
486        (*self).tot_ne(*other)
487    }
488}
489
490impl<T: TotalHash + ?Sized> TotalHash for &T {
491    #[inline(always)]
492    fn tot_hash<H>(&self, state: &mut H)
493    where
494        H: Hasher,
495    {
496        (*self).tot_hash(state)
497    }
498}
499
500impl<T: TotalEq, U: TotalEq> TotalEq for (T, U) {
501    #[inline]
502    fn tot_eq(&self, other: &Self) -> bool {
503        self.0.tot_eq(&other.0) && self.1.tot_eq(&other.1)
504    }
505}
506
507impl<T: TotalOrd, U: TotalOrd> TotalOrd for (T, U) {
508    #[inline]
509    fn tot_cmp(&self, other: &Self) -> Ordering {
510        self.0
511            .tot_cmp(&other.0)
512            .then_with(|| self.1.tot_cmp(&other.1))
513    }
514}
515
516impl TotalHash for BytesHash<'_> {
517    #[inline(always)]
518    fn tot_hash<H>(&self, state: &mut H)
519    where
520        H: Hasher,
521    {
522        self.hash(state)
523    }
524}
525
526impl TotalEq for BytesHash<'_> {
527    #[inline(always)]
528    fn tot_eq(&self, other: &Self) -> bool {
529        self == other
530    }
531}
532
533/// This elides creating a [`TotalOrdWrap`] for types that don't need it.
534pub trait ToTotalOrd {
535    type TotalOrdItem: Hash + Eq;
536    type SourceItem;
537
538    fn to_total_ord(&self) -> Self::TotalOrdItem;
539
540    fn peel_total_ord(ord_item: Self::TotalOrdItem) -> Self::SourceItem;
541}
542
543macro_rules! impl_to_total_ord_identity {
544    ($T: ty) => {
545        impl ToTotalOrd for $T {
546            type TotalOrdItem = $T;
547            type SourceItem = $T;
548
549            #[inline]
550            fn to_total_ord(&self) -> Self::TotalOrdItem {
551                self.clone()
552            }
553
554            #[inline]
555            fn peel_total_ord(ord_item: Self::TotalOrdItem) -> Self::SourceItem {
556                ord_item
557            }
558        }
559    };
560}
561
562impl_to_total_ord_identity!(bool);
563impl_to_total_ord_identity!(u8);
564impl_to_total_ord_identity!(u16);
565impl_to_total_ord_identity!(u32);
566impl_to_total_ord_identity!(u64);
567impl_to_total_ord_identity!(u128);
568impl_to_total_ord_identity!(usize);
569impl_to_total_ord_identity!(i8);
570impl_to_total_ord_identity!(i16);
571impl_to_total_ord_identity!(i32);
572impl_to_total_ord_identity!(i64);
573impl_to_total_ord_identity!(i128);
574impl_to_total_ord_identity!(isize);
575impl_to_total_ord_identity!(char);
576impl_to_total_ord_identity!(String);
577
578macro_rules! impl_to_total_ord_lifetimed_ref_identity {
579    ($T: ty) => {
580        impl<'a> ToTotalOrd for &'a $T {
581            type TotalOrdItem = &'a $T;
582            type SourceItem = &'a $T;
583
584            #[inline]
585            fn to_total_ord(&self) -> Self::TotalOrdItem {
586                *self
587            }
588
589            #[inline]
590            fn peel_total_ord(ord_item: Self::TotalOrdItem) -> Self::SourceItem {
591                ord_item
592            }
593        }
594    };
595}
596
597impl_to_total_ord_lifetimed_ref_identity!(str);
598impl_to_total_ord_lifetimed_ref_identity!([u8]);
599
600macro_rules! impl_to_total_ord_wrapped {
601    ($T: ty) => {
602        impl ToTotalOrd for $T {
603            type TotalOrdItem = TotalOrdWrap<$T>;
604            type SourceItem = $T;
605
606            #[inline]
607            fn to_total_ord(&self) -> Self::TotalOrdItem {
608                TotalOrdWrap(self.clone())
609            }
610
611            #[inline]
612            fn peel_total_ord(ord_item: Self::TotalOrdItem) -> Self::SourceItem {
613                ord_item.0
614            }
615        }
616    };
617}
618
619impl_to_total_ord_wrapped!(pf16);
620impl_to_total_ord_wrapped!(f32);
621impl_to_total_ord_wrapped!(f64);
622
623/// This is safe without needing to map the option value to TotalOrdWrap, since
624/// for example:
625/// `TotalOrdWrap<Option<T>>` implements `Eq + Hash`, iff:
626/// `Option<T>` implements `TotalEq + TotalHash`, iff:
627/// `T` implements `TotalEq + TotalHash`
628impl<T: Copy + TotalEq + TotalHash> ToTotalOrd for Option<T> {
629    type TotalOrdItem = TotalOrdWrap<Option<T>>;
630    type SourceItem = Option<T>;
631
632    #[inline]
633    fn to_total_ord(&self) -> Self::TotalOrdItem {
634        TotalOrdWrap(*self)
635    }
636
637    #[inline]
638    fn peel_total_ord(ord_item: Self::TotalOrdItem) -> Self::SourceItem {
639        ord_item.0
640    }
641}
642
643impl<T: ToTotalOrd> ToTotalOrd for &T {
644    type TotalOrdItem = T::TotalOrdItem;
645    type SourceItem = T::SourceItem;
646
647    #[inline]
648    fn to_total_ord(&self) -> Self::TotalOrdItem {
649        (*self).to_total_ord()
650    }
651
652    #[inline]
653    fn peel_total_ord(ord_item: Self::TotalOrdItem) -> Self::SourceItem {
654        T::peel_total_ord(ord_item)
655    }
656}
657
658impl<'a> ToTotalOrd for BytesHash<'a> {
659    type TotalOrdItem = BytesHash<'a>;
660    type SourceItem = BytesHash<'a>;
661
662    #[inline]
663    fn to_total_ord(&self) -> Self::TotalOrdItem {
664        *self
665    }
666
667    #[inline]
668    fn peel_total_ord(ord_item: Self::TotalOrdItem) -> Self::SourceItem {
669        ord_item
670    }
671}