polars_utils/
idx_vec.rs

1use std::fmt::{Debug, Formatter};
2use std::ops::Deref;
3
4use crate::index::{IdxSize, NonZeroIdxSize};
5
6pub type IdxVec = UnitVec<IdxSize>;
7
8/// A type logically equivalent to `Vec<T>`, but which does not do a
9/// memory allocation until at least two elements have been pushed, storing the
10/// first element in the data pointer directly.
11///
12/// Uses IdxSize internally to store lengths, will panic if trying to reserve
13/// for more elements.
14#[derive(Eq)]
15pub struct UnitVec<T> {
16    len: IdxSize,
17    capacity: NonZeroIdxSize,
18    data: *mut T,
19}
20
21unsafe impl<T: Send + Sync> Send for UnitVec<T> {}
22unsafe impl<T: Send + Sync> Sync for UnitVec<T> {}
23
24impl<T> UnitVec<T> {
25    #[inline(always)]
26    fn data_ptr_mut(&mut self) -> *mut T {
27        let external = self.data;
28        let inline = &mut self.data as *mut *mut T as *mut T;
29        if self.capacity.get() == 1 {
30            inline
31        } else {
32            external
33        }
34    }
35
36    #[inline(always)]
37    fn data_ptr(&self) -> *const T {
38        let external = self.data;
39        let inline = &self.data as *const *mut T as *mut T;
40        if self.capacity.get() == 1 {
41            inline
42        } else {
43            external
44        }
45    }
46
47    #[inline]
48    pub fn new() -> Self {
49        // This is optimized away, all const.
50        assert!(size_of::<T>() <= size_of::<*mut T>() && align_of::<T>() <= align_of::<*mut T>());
51        Self {
52            len: 0,
53            capacity: NonZeroIdxSize::new(1).unwrap(),
54            data: std::ptr::null_mut(),
55        }
56    }
57
58    #[inline(always)]
59    pub fn len(&self) -> usize {
60        self.len as usize
61    }
62
63    #[inline(always)]
64    pub fn is_empty(&self) -> bool {
65        self.len == 0
66    }
67
68    #[inline(always)]
69    pub fn capacity(&self) -> usize {
70        self.capacity.get() as usize
71    }
72
73    #[inline(always)]
74    pub fn clear(&mut self) {
75        self.len = 0;
76    }
77
78    #[inline(always)]
79    pub fn push(&mut self, idx: T) {
80        if self.len == self.capacity.get() {
81            self.reserve(1);
82        }
83
84        unsafe { self.push_unchecked(idx) }
85    }
86
87    #[inline(always)]
88    /// # Safety
89    /// Caller must ensure that `UnitVec` has enough capacity.
90    pub unsafe fn push_unchecked(&mut self, idx: T) {
91        unsafe {
92            self.data_ptr_mut().add(self.len as usize).write(idx);
93            self.len += 1;
94        }
95    }
96
97    #[cold]
98    #[inline(never)]
99    pub fn reserve(&mut self, additional: usize) {
100        let new_len = self
101            .len
102            .checked_add(additional.try_into().unwrap())
103            .unwrap();
104        if new_len > self.capacity.get() {
105            let double = self.capacity.get() * 2;
106            self.realloc(double.max(new_len).max(8));
107        }
108    }
109
110    /// # Panics
111    /// Panics if `new_cap <= 1` or `new_cap < self.len`
112    fn realloc(&mut self, new_cap: IdxSize) {
113        assert!(new_cap > 1 && new_cap >= self.len);
114        unsafe {
115            let mut me = std::mem::ManuallyDrop::new(Vec::with_capacity(new_cap as usize));
116            let buffer = me.as_mut_ptr();
117            std::ptr::copy(self.data_ptr(), buffer, self.len as usize);
118            self.dealloc();
119            self.data = buffer;
120            self.capacity = NonZeroIdxSize::new(new_cap).unwrap();
121        }
122    }
123
124    fn dealloc(&mut self) {
125        unsafe {
126            if self.capacity.get() > 1 {
127                let _ = Vec::from_raw_parts(self.data, self.len as usize, self.capacity());
128                self.capacity = NonZeroIdxSize::new(1).unwrap();
129            }
130        }
131    }
132
133    pub fn with_capacity(capacity: usize) -> Self {
134        if capacity <= 1 {
135            Self::new()
136        } else {
137            let mut me = std::mem::ManuallyDrop::new(Vec::with_capacity(capacity));
138            let data = me.as_mut_ptr();
139            Self {
140                len: 0,
141                capacity: NonZeroIdxSize::new(capacity.try_into().unwrap()).unwrap(),
142                data,
143            }
144        }
145    }
146
147    #[inline]
148    pub fn iter(&self) -> std::slice::Iter<'_, T> {
149        self.as_slice().iter()
150    }
151
152    #[inline]
153    pub fn iter_mut(&mut self) -> std::slice::IterMut<'_, T> {
154        self.as_mut_slice().iter_mut()
155    }
156
157    #[inline]
158    pub fn as_slice(&self) -> &[T] {
159        self.as_ref()
160    }
161
162    #[inline]
163    pub fn as_mut_slice(&mut self) -> &mut [T] {
164        self.as_mut()
165    }
166
167    #[inline]
168    pub fn pop(&mut self) -> Option<T> {
169        if self.len == 0 {
170            None
171        } else {
172            unsafe {
173                self.len -= 1;
174                Some(std::ptr::read(self.as_ptr().add(self.len())))
175            }
176        }
177    }
178}
179
180impl<T> Extend<T> for UnitVec<T> {
181    fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
182        let iter = iter.into_iter();
183        self.reserve(iter.size_hint().0);
184        for v in iter {
185            self.push(v)
186        }
187    }
188}
189
190impl<T> Drop for UnitVec<T> {
191    fn drop(&mut self) {
192        self.dealloc()
193    }
194}
195
196impl<T> Clone for UnitVec<T> {
197    fn clone(&self) -> Self {
198        unsafe {
199            if self.capacity.get() == 1 {
200                Self { ..*self }
201            } else {
202                let mut copy = Self::with_capacity(self.len as usize);
203                std::ptr::copy(self.data_ptr(), copy.data_ptr_mut(), self.len as usize);
204                copy.len = self.len;
205                copy
206            }
207        }
208    }
209}
210
211impl<T: Debug> Debug for UnitVec<T> {
212    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
213        write!(f, "UnitVec: {:?}", self.as_slice())
214    }
215}
216
217impl<T> Default for UnitVec<T> {
218    fn default() -> Self {
219        Self {
220            len: 0,
221            capacity: NonZeroIdxSize::new(1).unwrap(),
222            data: std::ptr::null_mut(),
223        }
224    }
225}
226
227impl<T> Deref for UnitVec<T> {
228    type Target = [T];
229
230    fn deref(&self) -> &Self::Target {
231        self.as_slice()
232    }
233}
234
235impl<T> AsRef<[T]> for UnitVec<T> {
236    fn as_ref(&self) -> &[T] {
237        unsafe { std::slice::from_raw_parts(self.data_ptr(), self.len as usize) }
238    }
239}
240
241impl<T> AsMut<[T]> for UnitVec<T> {
242    fn as_mut(&mut self) -> &mut [T] {
243        unsafe { std::slice::from_raw_parts_mut(self.data_ptr_mut(), self.len as usize) }
244    }
245}
246
247impl<T: PartialEq> PartialEq for UnitVec<T> {
248    fn eq(&self, other: &Self) -> bool {
249        self.as_slice() == other.as_slice()
250    }
251}
252
253impl<T> FromIterator<T> for UnitVec<T> {
254    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
255        let iter = iter.into_iter();
256        if iter.size_hint().0 <= 1 {
257            let mut new = UnitVec::new();
258            for v in iter {
259                new.push(v)
260            }
261            new
262        } else {
263            let v = iter.collect::<Vec<_>>();
264            v.into()
265        }
266    }
267}
268
269impl<T> From<Vec<T>> for UnitVec<T> {
270    fn from(mut value: Vec<T>) -> Self {
271        if value.capacity() <= 1 {
272            let mut new = UnitVec::new();
273            if let Some(v) = value.pop() {
274                new.push(v)
275            }
276            new
277        } else {
278            let mut me = std::mem::ManuallyDrop::new(value);
279            UnitVec {
280                data: me.as_mut_ptr(),
281                capacity: NonZeroIdxSize::new(me.capacity().try_into().unwrap()).unwrap(),
282                len: me.len().try_into().unwrap(),
283            }
284        }
285    }
286}
287
288impl<T: Clone> From<&[T]> for UnitVec<T> {
289    fn from(value: &[T]) -> Self {
290        if value.len() <= 1 {
291            let mut new = UnitVec::new();
292            if let Some(v) = value.first() {
293                new.push(v.clone())
294            }
295            new
296        } else {
297            value.to_vec().into()
298        }
299    }
300}
301
302#[macro_export]
303macro_rules! unitvec {
304    () => (
305        $crate::idx_vec::UnitVec::new()
306    );
307    ($elem:expr; $n:expr) => (
308        let mut new = $crate::idx_vec::UnitVec::new();
309        for _ in 0..$n {
310            new.push($elem)
311        }
312        new
313    );
314    ($elem:expr) => (
315        {let mut new = $crate::idx_vec::UnitVec::new();
316        let v = $elem;
317        // SAFETY: first element always fits.
318        unsafe { new.push_unchecked(v) };
319        new}
320    );
321    ($($x:expr),+ $(,)?) => (
322            vec![$($x),+].into()
323    );
324}
325
326mod tests {
327
328    #[test]
329    #[should_panic]
330    fn test_unitvec_realloc_zero() {
331        super::UnitVec::<usize>::new().realloc(0);
332    }
333
334    #[test]
335    #[should_panic]
336    fn test_unitvec_realloc_one() {
337        super::UnitVec::<usize>::new().realloc(1);
338    }
339
340    #[test]
341    #[should_panic]
342    fn test_untivec_realloc_lt_len() {
343        super::UnitVec::<usize>::from(&[1, 2][..]).realloc(1)
344    }
345
346    #[test]
347    fn test_unitvec_clone() {
348        {
349            let v = unitvec![1usize];
350            assert_eq!(v, v.clone());
351        }
352
353        for n in [
354            26903816120209729usize,
355            42566276440897687,
356            44435161834424652,
357            49390731489933083,
358            51201454727649242,
359            83861672190814841,
360            92169290527847622,
361            92476373900398436,
362            95488551309275459,
363            97499984126814549,
364        ] {
365            let v = unitvec![n];
366            assert_eq!(v, v.clone());
367        }
368    }
369}