polars_utils/
small_bytes.rs

1use crate::marked_usize::MarkedUsize;
2
3type Pointer = *mut u8;
4const MAX_INLINE_SIZE: usize = core::mem::size_of::<Pointer>();
5
6type Array = [u8; MAX_INLINE_SIZE];
7const ARRAY_DEFAULT: Array = [0; MAX_INLINE_SIZE];
8
9union PointerOrArray {
10    ptr: Pointer,
11    array: Array,
12}
13
14const DATA_DEFAULT: PointerOrArray = PointerOrArray {
15    array: ARRAY_DEFAULT,
16};
17
18/// `Option<Box<[u8]>>` with inlining of `<= size_of::<*mut u8>()` bytes.
19pub struct SmallBytes {
20    data: PointerOrArray,
21    len: MarkedUsize,
22}
23
24unsafe impl Send for SmallBytes {}
25unsafe impl Sync for SmallBytes {}
26
27impl Eq for SmallBytes {}
28
29impl SmallBytes {
30    pub const NULL: SmallBytes = SmallBytes {
31        data: DATA_DEFAULT,
32        len: MarkedUsize::new(0, true),
33    };
34
35    pub fn from_slice(slice: &[u8]) -> Self {
36        let len = slice.len();
37        assert!(len <= MarkedUsize::UNMARKED_MAX);
38
39        let data = if len <= MAX_INLINE_SIZE {
40            let mut array = ARRAY_DEFAULT;
41            array[..len].copy_from_slice(slice);
42
43            PointerOrArray { array }
44        } else {
45            let boxed: Box<[u8]> = slice.into();
46            let ptr: *mut [u8] = Box::into_raw(boxed);
47            let ptr: *mut u8 = ptr.cast();
48
49            PointerOrArray { ptr }
50        };
51
52        Self {
53            data,
54            len: MarkedUsize::new(len, false),
55        }
56    }
57
58    pub fn from_opt_slice(slice: Option<&[u8]>) -> Self {
59        if let Some(slice) = slice {
60            Self::from_slice(slice)
61        } else {
62            Self::NULL
63        }
64    }
65
66    #[inline]
67    fn as_slice(&self) -> Option<&[u8]> {
68        (!self.is_null()).then_some(unsafe {
69            if self.is_inline() {
70                self.inline_slice_unchecked()
71            } else {
72                self.non_inline_slice_unchecked()
73            }
74        })
75    }
76
77    #[inline]
78    fn is_inline(&self) -> bool {
79        self.len.to_usize() <= MAX_INLINE_SIZE
80    }
81
82    #[inline]
83    fn is_null(&self) -> bool {
84        self.len.marked()
85    }
86
87    /// # Safety
88    /// `self.is_inline()`
89    #[inline]
90    unsafe fn inline_slice_unchecked(&self) -> &[u8] {
91        unsafe { self.data.array.get_unchecked(..self.len.to_usize()) }
92    }
93
94    /// # Safety
95    /// `!self.is_inline()`
96    #[inline]
97    unsafe fn non_inline_slice_unchecked(&self) -> &[u8] {
98        unsafe { core::slice::from_raw_parts(self.data.ptr, self.len.to_usize()) }
99    }
100
101    /// # Safety
102    /// `!self.is_inline()`
103    #[inline]
104    unsafe fn non_inline_slice_unchecked_mut(&mut self) -> &mut [u8] {
105        unsafe { core::slice::from_raw_parts_mut(self.data.ptr, self.len.to_usize()) }
106    }
107}
108
109impl core::fmt::Debug for SmallBytes {
110    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
111        f.write_str("SmallBytes(")?;
112
113        if let Some(slice) = self.as_slice() {
114            core::fmt::Debug::fmt(slice, f)?;
115        } else {
116            f.write_str("null")?;
117        }
118
119        f.write_str(")")
120    }
121}
122
123impl Drop for SmallBytes {
124    fn drop(&mut self) {
125        if !self.is_inline() {
126            unsafe {
127                let ptr: *mut [u8] = self.non_inline_slice_unchecked_mut();
128                let v: Box<[u8]> = Box::from_raw(ptr);
129                drop(v);
130            }
131        }
132    }
133}
134
135impl PartialEq for SmallBytes {
136    fn eq(&self, other: &Self) -> bool {
137        self.as_slice() == other.as_slice()
138    }
139}
140
141impl core::hash::Hash for SmallBytes {
142    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
143        if let Some(slice) = self.as_slice() {
144            slice.hash(state)
145        }
146    }
147}
148
149#[cfg(test)]
150mod tests {
151    use super::{MAX_INLINE_SIZE, PointerOrArray, SmallBytes};
152    use crate::marked_usize::MarkedUsize;
153
154    fn hash<T: core::hash::Hash>(t: &T) -> u64 {
155        let mut s = std::hash::DefaultHasher::new();
156        t.hash(&mut s);
157        core::hash::Hasher::finish(&s)
158    }
159
160    #[test]
161    fn test_partition_key_eq() {
162        // If both sides have the NULL bit set, hash / eq should all match regardless of the values
163        // of any other bits.
164        let lhs = SmallBytes {
165            data: PointerOrArray {
166                array: [0; MAX_INLINE_SIZE],
167            },
168            len: MarkedUsize::new(1, true),
169        };
170
171        let rhs = SmallBytes {
172            data: PointerOrArray {
173                array: [1; MAX_INLINE_SIZE],
174            },
175            len: MarkedUsize::new(2, true),
176        };
177
178        assert_eq!(lhs, rhs);
179        assert_eq!(hash(&SmallBytes::NULL), hash(&rhs));
180        assert_eq!(lhs, SmallBytes::NULL);
181        assert_eq!(rhs, SmallBytes::NULL);
182
183        let mut rhs = SmallBytes::from_slice(&[1; MAX_INLINE_SIZE + 1]);
184        assert!(!rhs.is_null());
185        rhs.len = MarkedUsize::new(rhs.len.to_usize(), true);
186        assert!(rhs.is_null());
187
188        assert_eq!(lhs, rhs);
189        assert_eq!(rhs, SmallBytes::NULL);
190
191        let lhs = SmallBytes {
192            data: PointerOrArray {
193                array: [0; MAX_INLINE_SIZE],
194            },
195            len: MarkedUsize::new(2, true),
196        };
197
198        let rhs = SmallBytes {
199            data: PointerOrArray {
200                array: [0; MAX_INLINE_SIZE],
201            },
202            len: MarkedUsize::new(2, false),
203        };
204
205        assert_ne!(lhs, rhs);
206    }
207}