polars_utils/
idx_vec.rs

1use std::fmt::{Debug, Formatter};
2use std::ops::Deref;
3
4use crate::index::{IdxSize, NonZeroIdxSize};
5
6pub type IdxVec = UnitVec<IdxSize>;
7
8/// A type logically equivalent to `Vec<T>`, but which does not do a
9/// memory allocation until at least two elements have been pushed, storing the
10/// first element in the data pointer directly.
11///
12/// Uses IdxSize internally to store lengths, will panic if trying to reserve
13/// for more elements.
14#[derive(Eq)]
15pub struct UnitVec<T> {
16    len: IdxSize,
17    capacity: NonZeroIdxSize,
18    data: *mut T,
19}
20
21unsafe impl<T: Send + Sync> Send for UnitVec<T> {}
22unsafe impl<T: Send + Sync> Sync for UnitVec<T> {}
23
24impl<T> UnitVec<T> {
25    #[inline(always)]
26    fn data_ptr_mut(&mut self) -> *mut T {
27        let external = self.data;
28        let inline = &mut self.data as *mut *mut T as *mut T;
29        if self.is_inline() { inline } else { external }
30    }
31
32    #[inline(always)]
33    fn data_ptr(&self) -> *const T {
34        let external = self.data;
35        let inline = &self.data as *const *mut T as *mut T;
36        if self.is_inline() { inline } else { external }
37    }
38
39    #[inline]
40    pub fn new() -> Self {
41        // This is optimized away, all const.
42        assert!(size_of::<T>() <= size_of::<*mut T>() && align_of::<T>() <= align_of::<*mut T>());
43        Self {
44            len: 0,
45            capacity: NonZeroIdxSize::new(1).unwrap(),
46            data: std::ptr::null_mut(),
47        }
48    }
49
50    pub fn is_inline(&self) -> bool {
51        self.capacity.get() == 1
52    }
53
54    #[inline(always)]
55    pub fn len(&self) -> usize {
56        self.len as usize
57    }
58
59    #[inline(always)]
60    pub fn is_empty(&self) -> bool {
61        self.len == 0
62    }
63
64    #[inline(always)]
65    pub fn capacity(&self) -> usize {
66        self.capacity.get() as usize
67    }
68
69    #[inline(always)]
70    pub fn clear(&mut self) {
71        self.len = 0;
72    }
73
74    #[inline(always)]
75    pub fn push(&mut self, idx: T) {
76        if self.len == self.capacity.get() {
77            self.reserve(1);
78        }
79
80        unsafe { self.push_unchecked(idx) }
81    }
82
83    #[inline(always)]
84    /// # Safety
85    /// Caller must ensure that `UnitVec` has enough capacity.
86    pub unsafe fn push_unchecked(&mut self, idx: T) {
87        unsafe {
88            self.data_ptr_mut().add(self.len as usize).write(idx);
89            self.len += 1;
90        }
91    }
92
93    #[cold]
94    #[inline(never)]
95    pub fn reserve(&mut self, additional: usize) {
96        let new_len = self
97            .len
98            .checked_add(additional.try_into().unwrap())
99            .unwrap();
100        if new_len > self.capacity.get() {
101            let double = self.capacity.get() * 2;
102            self.realloc(double.max(new_len).max(8));
103        }
104    }
105
106    /// # Panics
107    /// Panics if `new_cap <= 1` or `new_cap < self.len`
108    fn realloc(&mut self, new_cap: IdxSize) {
109        assert!(new_cap > 1 && new_cap >= self.len);
110        unsafe {
111            let mut me = std::mem::ManuallyDrop::new(Vec::with_capacity(new_cap as usize));
112            let buffer = me.as_mut_ptr();
113            std::ptr::copy(self.data_ptr(), buffer, self.len as usize);
114            self.dealloc();
115            self.data = buffer;
116            self.capacity = NonZeroIdxSize::new(new_cap).unwrap();
117        }
118    }
119
120    fn dealloc(&mut self) {
121        unsafe {
122            if !self.is_inline() {
123                let _ = Vec::from_raw_parts(self.data, self.len as usize, self.capacity());
124                self.capacity = NonZeroIdxSize::new(1).unwrap();
125            }
126        }
127    }
128
129    pub fn with_capacity(capacity: usize) -> Self {
130        if capacity <= 1 {
131            Self::new()
132        } else {
133            let mut me = std::mem::ManuallyDrop::new(Vec::with_capacity(capacity));
134            let data = me.as_mut_ptr();
135            Self {
136                len: 0,
137                capacity: NonZeroIdxSize::new(capacity.try_into().unwrap()).unwrap(),
138                data,
139            }
140        }
141    }
142
143    #[inline]
144    pub fn iter(&self) -> std::slice::Iter<'_, T> {
145        self.as_slice().iter()
146    }
147
148    #[inline]
149    pub fn iter_mut(&mut self) -> std::slice::IterMut<'_, T> {
150        self.as_mut_slice().iter_mut()
151    }
152
153    #[inline]
154    pub fn as_slice(&self) -> &[T] {
155        self.as_ref()
156    }
157
158    #[inline]
159    pub fn as_mut_slice(&mut self) -> &mut [T] {
160        self.as_mut()
161    }
162
163    #[inline]
164    pub fn pop(&mut self) -> Option<T> {
165        if self.len == 0 {
166            None
167        } else {
168            unsafe {
169                self.len -= 1;
170                Some(std::ptr::read(self.as_ptr().add(self.len())))
171            }
172        }
173    }
174}
175
176impl<T> Extend<T> for UnitVec<T> {
177    fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
178        let iter = iter.into_iter();
179        self.reserve(iter.size_hint().0);
180        for v in iter {
181            self.push(v)
182        }
183    }
184}
185
186impl<T> Drop for UnitVec<T> {
187    fn drop(&mut self) {
188        self.dealloc()
189    }
190}
191
192impl<T> Clone for UnitVec<T> {
193    fn clone(&self) -> Self {
194        unsafe {
195            if self.is_inline() {
196                Self { ..*self }
197            } else {
198                let mut copy = Self::with_capacity(self.len as usize);
199                std::ptr::copy(self.data_ptr(), copy.data_ptr_mut(), self.len as usize);
200                copy.len = self.len;
201                copy
202            }
203        }
204    }
205}
206
207impl<T: Debug> Debug for UnitVec<T> {
208    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
209        write!(f, "UnitVec: {:?}", self.as_slice())
210    }
211}
212
213impl<T> Default for UnitVec<T> {
214    fn default() -> Self {
215        Self {
216            len: 0,
217            capacity: NonZeroIdxSize::new(1).unwrap(),
218            data: std::ptr::null_mut(),
219        }
220    }
221}
222
223impl<T> Deref for UnitVec<T> {
224    type Target = [T];
225
226    fn deref(&self) -> &Self::Target {
227        self.as_slice()
228    }
229}
230
231impl<T> AsRef<[T]> for UnitVec<T> {
232    fn as_ref(&self) -> &[T] {
233        unsafe { std::slice::from_raw_parts(self.data_ptr(), self.len as usize) }
234    }
235}
236
237impl<T> AsMut<[T]> for UnitVec<T> {
238    fn as_mut(&mut self) -> &mut [T] {
239        unsafe { std::slice::from_raw_parts_mut(self.data_ptr_mut(), self.len as usize) }
240    }
241}
242
243impl<T: PartialEq> PartialEq for UnitVec<T> {
244    fn eq(&self, other: &Self) -> bool {
245        self.as_slice() == other.as_slice()
246    }
247}
248
249impl<T> FromIterator<T> for UnitVec<T> {
250    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
251        let mut iter = iter.into_iter();
252
253        let Some(first) = iter.next() else {
254            return Self::new();
255        };
256
257        let Some(second) = iter.next() else {
258            let mut out = Self::new();
259            out.push(first);
260            return out;
261        };
262
263        let mut vec = Vec::with_capacity(iter.size_hint().0 + 2);
264        vec.push(first);
265        vec.push(second);
266        vec.extend(iter);
267        Self::from(vec)
268    }
269}
270
271impl<T> IntoIterator for UnitVec<T> {
272    type Item = T;
273
274    type IntoIter = IntoIter<T>;
275
276    fn into_iter(mut self) -> Self::IntoIter {
277        if self.is_inline() {
278            IntoIter::Inline(self.pop().into_iter())
279        } else {
280            IntoIter::External(Vec::from(self).into_iter())
281        }
282    }
283}
284
285pub enum IntoIter<T> {
286    Inline(std::option::IntoIter<T>),
287    External(std::vec::IntoIter<T>),
288}
289
290impl<T> Iterator for IntoIter<T> {
291    type Item = T;
292
293    fn next(&mut self) -> Option<Self::Item> {
294        match self {
295            IntoIter::Inline(it) => it.next(),
296            IntoIter::External(it) => it.next(),
297        }
298    }
299}
300
301impl<T> From<Vec<T>> for UnitVec<T> {
302    fn from(mut value: Vec<T>) -> Self {
303        if value.capacity() <= 1 {
304            let mut new = UnitVec::new();
305            if let Some(v) = value.pop() {
306                new.push(v)
307            }
308            new
309        } else {
310            let mut me = std::mem::ManuallyDrop::new(value);
311            UnitVec {
312                data: me.as_mut_ptr(),
313                capacity: NonZeroIdxSize::new(me.capacity().try_into().unwrap()).unwrap(),
314                len: me.len().try_into().unwrap(),
315            }
316        }
317    }
318}
319
320impl<T> From<UnitVec<T>> for Vec<T> {
321    fn from(mut value: UnitVec<T>) -> Self {
322        if value.is_inline() {
323            let mut out = Vec::with_capacity(value.len());
324            if let Some(item) = value.pop() {
325                out.push(item);
326            }
327            out
328        } else {
329            // SAFETY: when not inline, the data points to a buffer allocated by a Vec.
330            let out =
331                unsafe { Vec::from_raw_parts(value.data, value.len as usize, value.capacity()) };
332            // Prevent deallocating the buffer
333            std::mem::forget(value);
334            out
335        }
336    }
337}
338
339impl<T: Clone> From<&[T]> for UnitVec<T> {
340    fn from(value: &[T]) -> Self {
341        if value.len() <= 1 {
342            let mut new = UnitVec::new();
343            if let Some(v) = value.first() {
344                new.push(v.clone())
345            }
346            new
347        } else {
348            value.to_vec().into()
349        }
350    }
351}
352
353#[macro_export]
354macro_rules! unitvec {
355    () => (
356        $crate::idx_vec::UnitVec::new()
357    );
358    ($elem:expr; $n:expr) => (
359        let mut new = $crate::idx_vec::UnitVec::new();
360        for _ in 0..$n {
361            new.push($elem)
362        }
363        new
364    );
365    ($elem:expr) => (
366        {let mut new = $crate::idx_vec::UnitVec::new();
367        let v = $elem;
368        // SAFETY: first element always fits.
369        unsafe { new.push_unchecked(v) };
370        new}
371    );
372    ($($x:expr),+ $(,)?) => (
373            vec![$($x),+].into()
374    );
375}
376
377mod tests {
378
379    #[test]
380    #[should_panic]
381    fn test_unitvec_realloc_zero() {
382        super::UnitVec::<usize>::new().realloc(0);
383    }
384
385    #[test]
386    #[should_panic]
387    fn test_unitvec_realloc_one() {
388        super::UnitVec::<usize>::new().realloc(1);
389    }
390
391    #[test]
392    #[should_panic]
393    fn test_untivec_realloc_lt_len() {
394        super::UnitVec::<usize>::from(&[1, 2][..]).realloc(1)
395    }
396
397    #[test]
398    fn test_unitvec_clone() {
399        {
400            let v = unitvec![1usize];
401            assert_eq!(v, v.clone());
402        }
403
404        for n in [
405            26903816120209729usize,
406            42566276440897687,
407            44435161834424652,
408            49390731489933083,
409            51201454727649242,
410            83861672190814841,
411            92169290527847622,
412            92476373900398436,
413            95488551309275459,
414            97499984126814549,
415        ] {
416            let v = unitvec![n];
417            assert_eq!(v, v.clone());
418        }
419    }
420}