polars_utils/
idx_vec.rs

1use std::fmt::{Debug, Formatter};
2use std::mem::ManuallyDrop;
3use std::ops::{Deref, DerefMut};
4
5use crate::index::{IdxSize, NonZeroIdxSize};
6
7pub type IdxVec = UnitVec<IdxSize>;
8
9union PointerOrValue<T> {
10    ptr: *mut T,
11    value: ManuallyDrop<T>,
12}
13
14/// A type logically equivalent to `Vec<T>`, but which does not do a
15/// memory allocation until at least two elements have been pushed, storing the
16/// first element inside the UnitVec directly.
17///
18/// Uses IdxSize internally to store lengths, will panic if trying to reserve
19/// for more elements.
20pub struct UnitVec<T> {
21    len: IdxSize,
22    capacity: NonZeroIdxSize,
23    data: PointerOrValue<T>,
24}
25
26unsafe impl<T: Send + Sync> Send for UnitVec<T> {}
27unsafe impl<T: Send + Sync> Sync for UnitVec<T> {}
28
29impl<T> UnitVec<T> {
30    #[inline(always)]
31    fn data_ptr_mut(&mut self) -> *mut T {
32        if self.is_inline() {
33            unsafe { &mut *self.data.value }
34        } else {
35            unsafe { self.data.ptr }
36        }
37    }
38
39    #[inline(always)]
40    fn data_ptr(&self) -> *const T {
41        if self.is_inline() {
42            unsafe { &*self.data.value }
43        } else {
44            unsafe { self.data.ptr }
45        }
46    }
47
48    #[inline]
49    pub fn new() -> Self {
50        Self {
51            len: 0,
52            capacity: NonZeroIdxSize::new(1).unwrap(),
53            data: PointerOrValue {
54                ptr: std::ptr::null_mut(),
55            },
56        }
57    }
58
59    pub fn is_inline(&self) -> bool {
60        self.capacity.get() == 1
61    }
62
63    #[inline(always)]
64    pub fn len(&self) -> usize {
65        self.len as usize
66    }
67
68    #[inline(always)]
69    pub fn is_empty(&self) -> bool {
70        self.len == 0
71    }
72
73    #[inline(always)]
74    pub fn capacity(&self) -> usize {
75        self.capacity.get() as usize
76    }
77
78    #[inline(always)]
79    pub fn clear(&mut self) {
80        if std::mem::needs_drop::<T>() {
81            while self.len > 0 {
82                self.pop();
83            }
84        } else {
85            self.len = 0;
86        }
87    }
88
89    #[inline(always)]
90    pub fn push(&mut self, idx: T) {
91        if self.len == self.capacity.get() {
92            self.reserve(1);
93        }
94
95        unsafe { self.push_unchecked(idx) }
96    }
97
98    #[inline(always)]
99    /// # Safety
100    /// Caller must ensure that `UnitVec` has enough capacity.
101    pub unsafe fn push_unchecked(&mut self, idx: T) {
102        unsafe {
103            self.data_ptr_mut().add(self.len as usize).write(idx);
104            self.len += 1;
105        }
106    }
107
108    #[inline]
109    pub fn pop(&mut self) -> Option<T> {
110        if self.len == 0 {
111            None
112        } else {
113            unsafe {
114                self.len -= 1;
115                Some(self.data_ptr().add(self.len as usize).read())
116            }
117        }
118    }
119
120    #[cold]
121    #[inline(never)]
122    pub fn reserve(&mut self, additional: usize) {
123        let new_len = self
124            .len
125            .checked_add(additional.try_into().unwrap())
126            .unwrap();
127        if new_len > self.capacity.get() {
128            let double = self.capacity.get() * 2;
129            self.realloc(double.max(new_len).max(8));
130        }
131    }
132
133    /// # Panics
134    /// Panics if `new_cap <= 1` or `new_cap < self.len`
135    fn realloc(&mut self, mut new_cap: IdxSize) {
136        assert!(new_cap > 1 && new_cap >= self.len);
137        unsafe {
138            let mut me = std::mem::ManuallyDrop::new(Vec::with_capacity(new_cap as usize));
139            new_cap = me.capacity().try_into().unwrap();
140            let buffer = me.as_mut_ptr();
141            std::ptr::copy(self.data_ptr(), buffer, self.len as usize);
142            self.dealloc();
143            self.data = PointerOrValue { ptr: buffer };
144            self.capacity = NonZeroIdxSize::new(new_cap).unwrap();
145        }
146    }
147
148    unsafe fn dealloc(&mut self) {
149        unsafe {
150            if !self.is_inline() {
151                drop(Vec::from_raw_parts(
152                    self.data.ptr.cast::<ManuallyDrop<T>>(),
153                    self.len as usize,
154                    self.capacity(),
155                ));
156            }
157        }
158    }
159
160    pub fn with_capacity(capacity: usize) -> Self {
161        if capacity <= 1 {
162            Self::new()
163        } else {
164            let mut me = std::mem::ManuallyDrop::new(Vec::with_capacity(capacity));
165            let cap = me.capacity().try_into().unwrap();
166            let ptr = me.as_mut_ptr();
167            Self {
168                len: 0,
169                capacity: NonZeroIdxSize::new(cap).unwrap(),
170                data: PointerOrValue { ptr },
171            }
172        }
173    }
174
175    #[inline]
176    pub fn iter(&self) -> std::slice::Iter<'_, T> {
177        self.as_slice().iter()
178    }
179
180    #[inline]
181    pub fn iter_mut(&mut self) -> std::slice::IterMut<'_, T> {
182        self.as_mut_slice().iter_mut()
183    }
184
185    #[inline]
186    pub fn as_slice(&self) -> &[T] {
187        self.as_ref()
188    }
189
190    #[inline]
191    pub fn as_mut_slice(&mut self) -> &mut [T] {
192        self.as_mut()
193    }
194}
195
196impl<T: Copy> UnitVec<T> {
197    pub fn retain(&mut self, mut f: impl FnMut(T) -> bool) {
198        let mut i = 0;
199        for j in 0..self.len() {
200            if f(self[j]) {
201                self[i] = self[j];
202                i += 1;
203            }
204        }
205
206        if i == 0 {
207            *self = Self::new();
208        } else if i == 1 {
209            *self = Self::from_slice(&[self[0]]);
210        } else {
211            self.len = i as IdxSize;
212        }
213    }
214}
215
216impl<T: Clone> UnitVec<T> {
217    pub fn from_slice(sl: &[T]) -> Self {
218        if sl.len() <= 1 {
219            let mut new = UnitVec::new();
220            if let Some(v) = sl.first() {
221                new.push(v.clone())
222            }
223            new
224        } else {
225            sl.to_vec().into()
226        }
227    }
228}
229
230impl<T> Extend<T> for UnitVec<T> {
231    fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
232        let iter = iter.into_iter();
233        self.reserve(iter.size_hint().0);
234        for v in iter {
235            self.push(v)
236        }
237    }
238}
239
240impl<T> Drop for UnitVec<T> {
241    fn drop(&mut self) {
242        self.clear();
243        unsafe { self.dealloc() }
244    }
245}
246
247impl<T: Clone> Clone for UnitVec<T> {
248    fn clone(&self) -> Self {
249        Self::from_iter(self.iter().cloned())
250    }
251}
252
253impl<T: Debug> Debug for UnitVec<T> {
254    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
255        write!(f, "UnitVec: {:?}", self.as_slice())
256    }
257}
258
259impl<T> Default for UnitVec<T> {
260    fn default() -> Self {
261        Self::new()
262    }
263}
264
265impl<T> Deref for UnitVec<T> {
266    type Target = [T];
267
268    fn deref(&self) -> &Self::Target {
269        self.as_slice()
270    }
271}
272
273impl<T> DerefMut for UnitVec<T> {
274    fn deref_mut(&mut self) -> &mut Self::Target {
275        self.as_mut_slice()
276    }
277}
278
279impl<T> AsRef<[T]> for UnitVec<T> {
280    fn as_ref(&self) -> &[T] {
281        unsafe { std::slice::from_raw_parts(self.data_ptr(), self.len as usize) }
282    }
283}
284
285impl<T> AsMut<[T]> for UnitVec<T> {
286    fn as_mut(&mut self) -> &mut [T] {
287        unsafe { std::slice::from_raw_parts_mut(self.data_ptr_mut(), self.len as usize) }
288    }
289}
290
291impl<T: PartialEq> PartialEq for UnitVec<T> {
292    fn eq(&self, other: &Self) -> bool {
293        self.as_slice() == other.as_slice()
294    }
295}
296
297impl<T: Eq> Eq for UnitVec<T> {}
298
299impl<T> FromIterator<T> for UnitVec<T> {
300    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
301        let mut iter = iter.into_iter();
302
303        let Some(first) = iter.next() else {
304            return Self::new();
305        };
306
307        let Some(second) = iter.next() else {
308            let mut out = Self::new();
309            out.push(first);
310            return out;
311        };
312
313        let mut vec = Vec::with_capacity(iter.size_hint().0 + 2);
314        vec.push(first);
315        vec.push(second);
316        vec.extend(iter);
317        Self::from(vec)
318    }
319}
320
321impl<T> IntoIterator for UnitVec<T> {
322    type Item = T;
323
324    type IntoIter = IntoIter<T>;
325
326    fn into_iter(mut self) -> Self::IntoIter {
327        if self.is_inline() {
328            IntoIter::Inline(self.pop().into_iter())
329        } else {
330            IntoIter::External(Vec::from(self).into_iter())
331        }
332    }
333}
334
335pub enum IntoIter<T> {
336    Inline(std::option::IntoIter<T>),
337    External(std::vec::IntoIter<T>),
338}
339
340impl<T> Iterator for IntoIter<T> {
341    type Item = T;
342
343    fn next(&mut self) -> Option<Self::Item> {
344        match self {
345            IntoIter::Inline(it) => it.next(),
346            IntoIter::External(it) => it.next(),
347        }
348    }
349
350    fn size_hint(&self) -> (usize, Option<usize>) {
351        match self {
352            IntoIter::Inline(it) => it.size_hint(),
353            IntoIter::External(it) => it.size_hint(),
354        }
355    }
356}
357
358impl<T, const N: usize> From<[T; N]> for UnitVec<T> {
359    fn from(value: [T; N]) -> Self {
360        UnitVec::from_iter(value)
361    }
362}
363
364impl<T> ExactSizeIterator for IntoIter<T> {}
365
366impl<T> From<Vec<T>> for UnitVec<T> {
367    fn from(mut value: Vec<T>) -> Self {
368        if value.capacity() <= 1 {
369            let mut new = UnitVec::new();
370            if let Some(v) = value.pop() {
371                new.push(v)
372            }
373            new
374        } else {
375            let mut me = std::mem::ManuallyDrop::new(value);
376            UnitVec {
377                data: PointerOrValue {
378                    ptr: me.as_mut_ptr(),
379                },
380                capacity: NonZeroIdxSize::new(me.capacity().try_into().unwrap()).unwrap(),
381                len: me.len().try_into().unwrap(),
382            }
383        }
384    }
385}
386
387impl<T> From<UnitVec<T>> for Vec<T> {
388    fn from(mut value: UnitVec<T>) -> Self {
389        if value.is_inline() {
390            let mut out = Vec::with_capacity(value.len());
391            if let Some(item) = value.pop() {
392                out.push(item);
393            }
394            out
395        } else {
396            // SAFETY: when not inline, the data points to a buffer allocated by a Vec.
397            let out = unsafe {
398                Vec::from_raw_parts(value.data.ptr, value.len as usize, value.capacity())
399            };
400            // Prevent deallocating the buffer
401            std::mem::forget(value);
402            out
403        }
404    }
405}
406
407#[macro_export]
408macro_rules! unitvec {
409    () => {{
410        $crate::idx_vec::UnitVec::new()
411    }};
412    ($elem:expr; $n:expr) => {{
413        let mut new = $crate::idx_vec::UnitVec::new();
414        for _ in 0..$n {
415            new.push($elem)
416        }
417        new
418    }};
419    ($elem:expr) => {{
420        let mut new = $crate::idx_vec::UnitVec::new();
421        let v = $elem;
422        // SAFETY: first element always fits.
423        unsafe { new.push_unchecked(v) };
424        new
425    }};
426    ($($x:expr),+ $(,)?) => {{
427        vec![$($x),+].into()
428    }};
429}
430
431mod tests {
432
433    #[test]
434    #[should_panic]
435    fn test_unitvec_realloc_zero() {
436        super::UnitVec::<usize>::new().realloc(0);
437    }
438
439    #[test]
440    #[should_panic]
441    fn test_unitvec_realloc_one() {
442        super::UnitVec::<usize>::new().realloc(1);
443    }
444
445    #[test]
446    #[should_panic]
447    fn test_untivec_realloc_lt_len() {
448        super::UnitVec::<usize>::from([1, 2]).realloc(1)
449    }
450
451    #[test]
452    fn test_unitvec_clone() {
453        {
454            let v = unitvec![1usize];
455            assert_eq!(v, v.clone());
456        }
457
458        for n in [
459            26903816120209729usize,
460            42566276440897687,
461            44435161834424652,
462            49390731489933083,
463            51201454727649242,
464            83861672190814841,
465            92169290527847622,
466            92476373900398436,
467            95488551309275459,
468            97499984126814549,
469        ] {
470            let v = unitvec![n];
471            assert_eq!(v, v.clone());
472        }
473    }
474
475    #[test]
476    fn test_unitvec_repeat_n() {
477        assert_eq!(unitvec![5; 3].as_slice(), &[5, 5, 5])
478    }
479}