polars_utils/
idx_vec.rs

1use std::fmt::{Debug, Formatter};
2use std::mem::ManuallyDrop;
3use std::ops::{Deref, DerefMut};
4
5use crate::index::{IdxSize, NonZeroIdxSize};
6
7pub type IdxVec = UnitVec<IdxSize>;
8
9union PointerOrValue<T> {
10    ptr: *mut T,
11    value: ManuallyDrop<T>,
12}
13
14/// A type logically equivalent to `Vec<T>`, but which does not do a
15/// memory allocation until at least two elements have been pushed, storing the
16/// first element inside the UnitVec directly.
17///
18/// Uses IdxSize internally to store lengths, will panic if trying to reserve
19/// for more elements.
20pub struct UnitVec<T> {
21    len: IdxSize,
22    capacity: NonZeroIdxSize,
23    data: PointerOrValue<T>,
24}
25
26unsafe impl<T: Send + Sync> Send for UnitVec<T> {}
27unsafe impl<T: Send + Sync> Sync for UnitVec<T> {}
28
29impl<T> UnitVec<T> {
30    #[inline(always)]
31    fn data_ptr_mut(&mut self) -> *mut T {
32        if self.is_inline() {
33            unsafe { &mut *self.data.value }
34        } else {
35            unsafe { self.data.ptr }
36        }
37    }
38
39    #[inline(always)]
40    fn data_ptr(&self) -> *const T {
41        if self.is_inline() {
42            unsafe { &*self.data.value }
43        } else {
44            unsafe { self.data.ptr }
45        }
46    }
47
48    #[inline]
49    pub fn new() -> Self {
50        Self {
51            len: 0,
52            capacity: NonZeroIdxSize::new(1).unwrap(),
53            data: PointerOrValue {
54                ptr: std::ptr::null_mut(),
55            },
56        }
57    }
58
59    pub fn is_inline(&self) -> bool {
60        self.capacity.get() == 1
61    }
62
63    #[inline(always)]
64    pub fn len(&self) -> usize {
65        self.len as usize
66    }
67
68    #[inline(always)]
69    pub fn is_empty(&self) -> bool {
70        self.len == 0
71    }
72
73    #[inline(always)]
74    pub fn capacity(&self) -> usize {
75        self.capacity.get() as usize
76    }
77
78    #[inline(always)]
79    pub fn clear(&mut self) {
80        if std::mem::needs_drop::<T>() {
81            while self.len > 0 {
82                self.pop();
83            }
84        } else {
85            self.len = 0;
86        }
87    }
88
89    #[inline(always)]
90    pub fn push(&mut self, idx: T) {
91        if self.len == self.capacity.get() {
92            self.reserve(1);
93        }
94
95        unsafe { self.push_unchecked(idx) }
96    }
97
98    #[inline(always)]
99    /// # Safety
100    /// Caller must ensure that `UnitVec` has enough capacity.
101    pub unsafe fn push_unchecked(&mut self, idx: T) {
102        unsafe {
103            self.data_ptr_mut().add(self.len as usize).write(idx);
104            self.len += 1;
105        }
106    }
107
108    #[inline]
109    pub fn pop(&mut self) -> Option<T> {
110        if self.len == 0 {
111            None
112        } else {
113            unsafe {
114                self.len -= 1;
115                Some(self.data_ptr().add(self.len as usize).read())
116            }
117        }
118    }
119
120    #[cold]
121    #[inline(never)]
122    pub fn reserve(&mut self, additional: usize) {
123        let new_len = self
124            .len
125            .checked_add(additional.try_into().unwrap())
126            .unwrap();
127        if new_len > self.capacity.get() {
128            let double = self.capacity.get() * 2;
129            self.realloc(double.max(new_len).max(8));
130        }
131    }
132
133    /// # Panics
134    /// Panics if `new_cap <= 1` or `new_cap < self.len`
135    fn realloc(&mut self, mut new_cap: IdxSize) {
136        assert!(new_cap > 1 && new_cap >= self.len);
137        unsafe {
138            let mut me = std::mem::ManuallyDrop::new(Vec::with_capacity(new_cap as usize));
139            new_cap = me.capacity().try_into().unwrap();
140            let buffer = me.as_mut_ptr();
141            std::ptr::copy(self.data_ptr(), buffer, self.len as usize);
142            self.dealloc();
143            self.data = PointerOrValue { ptr: buffer };
144            self.capacity = NonZeroIdxSize::new(new_cap).unwrap();
145        }
146    }
147
148    unsafe fn dealloc(&mut self) {
149        unsafe {
150            if !self.is_inline() {
151                drop(Vec::from_raw_parts(
152                    self.data.ptr.cast::<ManuallyDrop<T>>(),
153                    self.len as usize,
154                    self.capacity(),
155                ));
156            }
157        }
158    }
159
160    pub fn with_capacity(capacity: usize) -> Self {
161        if capacity <= 1 {
162            Self::new()
163        } else {
164            let mut me = std::mem::ManuallyDrop::new(Vec::with_capacity(capacity));
165            let cap = me.capacity().try_into().unwrap();
166            let ptr = me.as_mut_ptr();
167            Self {
168                len: 0,
169                capacity: NonZeroIdxSize::new(cap).unwrap(),
170                data: PointerOrValue { ptr },
171            }
172        }
173    }
174
175    #[inline]
176    pub fn iter(&self) -> std::slice::Iter<'_, T> {
177        self.as_slice().iter()
178    }
179
180    #[inline]
181    pub fn iter_mut(&mut self) -> std::slice::IterMut<'_, T> {
182        self.as_mut_slice().iter_mut()
183    }
184
185    #[inline]
186    pub fn as_slice(&self) -> &[T] {
187        self.as_ref()
188    }
189
190    #[inline]
191    pub fn as_mut_slice(&mut self) -> &mut [T] {
192        self.as_mut()
193    }
194}
195
196impl<T: Clone> UnitVec<T> {
197    pub fn from_slice(sl: &[T]) -> Self {
198        if sl.len() <= 1 {
199            let mut new = UnitVec::new();
200            if let Some(v) = sl.first() {
201                new.push(v.clone())
202            }
203            new
204        } else {
205            sl.to_vec().into()
206        }
207    }
208}
209
210impl<T> Extend<T> for UnitVec<T> {
211    fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
212        let iter = iter.into_iter();
213        self.reserve(iter.size_hint().0);
214        for v in iter {
215            self.push(v)
216        }
217    }
218}
219
220impl<T> Drop for UnitVec<T> {
221    fn drop(&mut self) {
222        self.clear();
223        unsafe { self.dealloc() }
224    }
225}
226
227impl<T: Clone> Clone for UnitVec<T> {
228    fn clone(&self) -> Self {
229        Self::from_iter(self.iter().cloned())
230    }
231}
232
233impl<T: Debug> Debug for UnitVec<T> {
234    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
235        write!(f, "UnitVec: {:?}", self.as_slice())
236    }
237}
238
239impl<T> Default for UnitVec<T> {
240    fn default() -> Self {
241        Self::new()
242    }
243}
244
245impl<T> Deref for UnitVec<T> {
246    type Target = [T];
247
248    fn deref(&self) -> &Self::Target {
249        self.as_slice()
250    }
251}
252
253impl<T> DerefMut for UnitVec<T> {
254    fn deref_mut(&mut self) -> &mut Self::Target {
255        self.as_mut_slice()
256    }
257}
258
259impl<T> AsRef<[T]> for UnitVec<T> {
260    fn as_ref(&self) -> &[T] {
261        unsafe { std::slice::from_raw_parts(self.data_ptr(), self.len as usize) }
262    }
263}
264
265impl<T> AsMut<[T]> for UnitVec<T> {
266    fn as_mut(&mut self) -> &mut [T] {
267        unsafe { std::slice::from_raw_parts_mut(self.data_ptr_mut(), self.len as usize) }
268    }
269}
270
271impl<T: PartialEq> PartialEq for UnitVec<T> {
272    fn eq(&self, other: &Self) -> bool {
273        self.as_slice() == other.as_slice()
274    }
275}
276
277impl<T: Eq> Eq for UnitVec<T> {}
278
279impl<T> FromIterator<T> for UnitVec<T> {
280    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
281        let mut iter = iter.into_iter();
282
283        let Some(first) = iter.next() else {
284            return Self::new();
285        };
286
287        let Some(second) = iter.next() else {
288            let mut out = Self::new();
289            out.push(first);
290            return out;
291        };
292
293        let mut vec = Vec::with_capacity(iter.size_hint().0 + 2);
294        vec.push(first);
295        vec.push(second);
296        vec.extend(iter);
297        Self::from(vec)
298    }
299}
300
301impl<T> IntoIterator for UnitVec<T> {
302    type Item = T;
303
304    type IntoIter = IntoIter<T>;
305
306    fn into_iter(mut self) -> Self::IntoIter {
307        if self.is_inline() {
308            IntoIter::Inline(self.pop().into_iter())
309        } else {
310            IntoIter::External(Vec::from(self).into_iter())
311        }
312    }
313}
314
315pub enum IntoIter<T> {
316    Inline(std::option::IntoIter<T>),
317    External(std::vec::IntoIter<T>),
318}
319
320impl<T> Iterator for IntoIter<T> {
321    type Item = T;
322
323    fn next(&mut self) -> Option<Self::Item> {
324        match self {
325            IntoIter::Inline(it) => it.next(),
326            IntoIter::External(it) => it.next(),
327        }
328    }
329
330    fn size_hint(&self) -> (usize, Option<usize>) {
331        match self {
332            IntoIter::Inline(it) => it.size_hint(),
333            IntoIter::External(it) => it.size_hint(),
334        }
335    }
336}
337
338impl<T, const N: usize> From<[T; N]> for UnitVec<T> {
339    fn from(value: [T; N]) -> Self {
340        UnitVec::from_iter(value)
341    }
342}
343
344impl<T> ExactSizeIterator for IntoIter<T> {}
345
346impl<T> From<Vec<T>> for UnitVec<T> {
347    fn from(mut value: Vec<T>) -> Self {
348        if value.capacity() <= 1 {
349            let mut new = UnitVec::new();
350            if let Some(v) = value.pop() {
351                new.push(v)
352            }
353            new
354        } else {
355            let mut me = std::mem::ManuallyDrop::new(value);
356            UnitVec {
357                data: PointerOrValue {
358                    ptr: me.as_mut_ptr(),
359                },
360                capacity: NonZeroIdxSize::new(me.capacity().try_into().unwrap()).unwrap(),
361                len: me.len().try_into().unwrap(),
362            }
363        }
364    }
365}
366
367impl<T> From<UnitVec<T>> for Vec<T> {
368    fn from(mut value: UnitVec<T>) -> Self {
369        if value.is_inline() {
370            let mut out = Vec::with_capacity(value.len());
371            if let Some(item) = value.pop() {
372                out.push(item);
373            }
374            out
375        } else {
376            // SAFETY: when not inline, the data points to a buffer allocated by a Vec.
377            let out = unsafe {
378                Vec::from_raw_parts(value.data.ptr, value.len as usize, value.capacity())
379            };
380            // Prevent deallocating the buffer
381            std::mem::forget(value);
382            out
383        }
384    }
385}
386
387#[macro_export]
388macro_rules! unitvec {
389    () => {{
390        $crate::idx_vec::UnitVec::new()
391    }};
392    ($elem:expr; $n:expr) => {{
393        let mut new = $crate::idx_vec::UnitVec::new();
394        for _ in 0..$n {
395            new.push($elem)
396        }
397        new
398    }};
399    ($elem:expr) => {{
400        let mut new = $crate::idx_vec::UnitVec::new();
401        let v = $elem;
402        // SAFETY: first element always fits.
403        unsafe { new.push_unchecked(v) };
404        new
405    }};
406    ($($x:expr),+ $(,)?) => {{
407        vec![$($x),+].into()
408    }};
409}
410
411mod tests {
412
413    #[test]
414    #[should_panic]
415    fn test_unitvec_realloc_zero() {
416        super::UnitVec::<usize>::new().realloc(0);
417    }
418
419    #[test]
420    #[should_panic]
421    fn test_unitvec_realloc_one() {
422        super::UnitVec::<usize>::new().realloc(1);
423    }
424
425    #[test]
426    #[should_panic]
427    fn test_untivec_realloc_lt_len() {
428        super::UnitVec::<usize>::from([1, 2]).realloc(1)
429    }
430
431    #[test]
432    fn test_unitvec_clone() {
433        {
434            let v = unitvec![1usize];
435            assert_eq!(v, v.clone());
436        }
437
438        for n in [
439            26903816120209729usize,
440            42566276440897687,
441            44435161834424652,
442            49390731489933083,
443            51201454727649242,
444            83861672190814841,
445            92169290527847622,
446            92476373900398436,
447            95488551309275459,
448            97499984126814549,
449        ] {
450            let v = unitvec![n];
451            assert_eq!(v, v.clone());
452        }
453    }
454
455    #[test]
456    fn test_unitvec_repeat_n() {
457        assert_eq!(unitvec![5; 3].as_slice(), &[5, 5, 5])
458    }
459}