Skip to main content

polars_utils/parma/raw/
probe.rs

1/*
2    To speed up probing we group table entries into groups of 8, and use
3    one-byte tags to quickly skip irrelevant entries. Each tag has the following
4    bit patterns:
5
6        1000_0000 = EMPTY
7        1111_1111 = DELETED
8        0hhh_hhhh = OCCUPIED (with 7 bits of key hash)
9
10    For the purposes of parallel probes, we allow false-positive probes (key
11    doesn't exist or is deleted but tag is set), but not false-negative probes
12    (key exists but tag is not set).
13*/
14
15use std::sync::atomic::{AtomicU64, Ordering};
16
17const fn tag(hash: usize) -> u8 {
18    (hash >> (usize::BITS - 7)) as u8
19}
20
21#[inline(always)]
22const fn repeat(tag: u8) -> u64 {
23    (tag as u64) * 0x01010101_01010101
24}
25
26#[repr(transparent)]
27pub struct TagGroup(AtomicU64);
28
29impl TagGroup {
30    #[inline(always)]
31    pub const fn all_empty() -> Self {
32        Self(AtomicU64::new(repeat(0x80)))
33    }
34
35    #[inline(always)]
36    pub fn all_occupied(hash: usize) -> Self {
37        Self(AtomicU64::new(repeat(tag(hash))))
38    }
39
40    #[inline(always)]
41    pub const fn idx_mask(num_entries: usize) -> usize {
42        num_entries / 8 - 1
43    }
44
45    #[inline(always)]
46    pub fn load(loc: &Self) -> Self {
47        Self(AtomicU64::new(loc.0.load(Ordering::Relaxed)))
48    }
49
50    #[inline(always)]
51    pub fn try_occupy(&self, cur: &mut Self, idx: usize, hash: usize) -> bool {
52        let shift = 8 * idx;
53        let update = ((0x80 | tag(hash)) as u64) << shift;
54        let cur_val = *cur.0.get_mut();
55        self.0
56            .compare_exchange(
57                cur_val,
58                cur_val ^ update,
59                Ordering::Relaxed,
60                Ordering::Relaxed,
61            )
62            .is_ok()
63    }
64
65    #[inline(always)]
66    pub fn occupy_mut(&mut self, idx: usize, hash: usize) {
67        *self.0.get_mut() ^= (0x80 | tag(hash) as u64) << (8 * idx);
68    }
69
70    pub fn delete(&self, idx: usize) {
71        self.0.fetch_or(0xff << (8 * idx), Ordering::Relaxed);
72    }
73
74    #[inline(always)]
75    pub fn empties(&mut self) -> TagMatches {
76        let t = *self.0.get_mut();
77        TagMatches(t & t.wrapping_add(repeat(1)) & repeat(0x80))
78    }
79
80    #[inline(always)]
81    pub fn matches(&mut self, other: &mut TagGroup) -> TagMatches {
82        let self_tags = *self.0.get_mut();
83        let other_tags = *other.0.get_mut();
84        TagMatches(((self_tags ^ other_tags).wrapping_sub(repeat(1))) & !self_tags & repeat(0x80))
85    }
86}
87
88#[repr(transparent)]
89#[derive(Copy, Clone)]
90pub struct TagMatches(u64);
91
92impl TagMatches {
93    #[inline(always)]
94    pub fn has_matches(&self) -> bool {
95        self.0 != 0
96    }
97
98    #[inline(always)]
99    pub fn get(&self) -> usize {
100        (self.0.trailing_zeros() / 8) as usize
101    }
102
103    pub fn has_match_at(&self, idx: usize) -> bool {
104        (self.0 >> (idx * 8)) & 0x80 != 0
105    }
106
107    #[inline(always)]
108    pub fn advance(&mut self) {
109        self.0 &= self.0 - 1;
110    }
111}
112
113impl std::ops::BitOr for TagMatches {
114    type Output = Self;
115
116    fn bitor(self, rhs: Self) -> Self::Output {
117        Self(self.0 | rhs.0)
118    }
119}
120
121/// A quadratic prober meant for power-of-two hash tables.
122///
123/// Does not reduce the index to be in-bounds, that's the responsibility of the
124/// caller.
125pub struct Prober {
126    idx: usize,
127    step: usize,
128}
129
130impl Prober {
131    #[inline(always)]
132    pub fn new(hash: usize) -> Self {
133        Self {
134            idx: hash >> 3,
135            step: 0,
136        }
137    }
138
139    #[inline(always)]
140    pub fn get(&self) -> usize {
141        self.idx
142    }
143
144    #[inline(always)]
145    pub fn advance(&mut self) {
146        self.step = self.step.wrapping_add(1);
147        self.idx = self.idx.wrapping_add(self.step);
148    }
149}
150
151pub fn max_load(num_entries: usize) -> usize {
152    num_entries * 3 / 4
153}
154
155pub fn min_entries_for_load(load: usize) -> usize {
156    (load * 4 / 3 + 1).next_power_of_two().max(8)
157}