polars_utils/
idx_vec.rs

1use std::fmt::{Debug, Formatter};
2use std::ops::Deref;
3
4use crate::index::{IdxSize, NonZeroIdxSize};
5
6pub type IdxVec = UnitVec<IdxSize>;
7
8/// A type logically equivalent to `Vec<T>`, but which does not do a
9/// memory allocation until at least two elements have been pushed, storing the
10/// first element in the data pointer directly.
11///
12/// Uses IdxSize internally to store lengths, will panic if trying to reserve
13/// for more elements.
14#[derive(Eq)]
15pub struct UnitVec<T> {
16    len: IdxSize,
17    capacity: NonZeroIdxSize,
18    data: *mut T,
19}
20
21unsafe impl<T: Send + Sync> Send for UnitVec<T> {}
22unsafe impl<T: Send + Sync> Sync for UnitVec<T> {}
23
24impl<T> UnitVec<T> {
25    #[inline(always)]
26    fn data_ptr_mut(&mut self) -> *mut T {
27        let external = self.data;
28        let inline = &mut self.data as *mut *mut T as *mut T;
29        if self.is_inline() { inline } else { external }
30    }
31
32    #[inline(always)]
33    fn data_ptr(&self) -> *const T {
34        let external = self.data;
35        let inline = &self.data as *const *mut T as *mut T;
36        if self.is_inline() { inline } else { external }
37    }
38
39    #[inline]
40    pub fn new() -> Self {
41        let fits_in_pointer = const {
42            let fits_in_pointer =
43                size_of::<T>() <= size_of::<*mut T>() && align_of::<T>() <= align_of::<*mut T>();
44
45            if cfg!(target_pointer_width = "64") {
46                assert!(fits_in_pointer)
47            }
48
49            fits_in_pointer
50        };
51
52        assert!(fits_in_pointer);
53
54        Self {
55            len: 0,
56            capacity: NonZeroIdxSize::new(1).unwrap(),
57            data: std::ptr::null_mut(),
58        }
59    }
60
61    pub fn is_inline(&self) -> bool {
62        self.capacity.get() == 1
63    }
64
65    #[inline(always)]
66    pub fn len(&self) -> usize {
67        self.len as usize
68    }
69
70    #[inline(always)]
71    pub fn is_empty(&self) -> bool {
72        self.len == 0
73    }
74
75    #[inline(always)]
76    pub fn capacity(&self) -> usize {
77        self.capacity.get() as usize
78    }
79
80    #[inline(always)]
81    pub fn clear(&mut self) {
82        self.len = 0;
83    }
84
85    #[inline(always)]
86    pub fn push(&mut self, idx: T) {
87        if self.len == self.capacity.get() {
88            self.reserve(1);
89        }
90
91        unsafe { self.push_unchecked(idx) }
92    }
93
94    #[inline(always)]
95    /// # Safety
96    /// Caller must ensure that `UnitVec` has enough capacity.
97    pub unsafe fn push_unchecked(&mut self, idx: T) {
98        unsafe {
99            self.data_ptr_mut().add(self.len as usize).write(idx);
100            self.len += 1;
101        }
102    }
103
104    #[cold]
105    #[inline(never)]
106    pub fn reserve(&mut self, additional: usize) {
107        let new_len = self
108            .len
109            .checked_add(additional.try_into().unwrap())
110            .unwrap();
111        if new_len > self.capacity.get() {
112            let double = self.capacity.get() * 2;
113            self.realloc(double.max(new_len).max(8));
114        }
115    }
116
117    /// # Panics
118    /// Panics if `new_cap <= 1` or `new_cap < self.len`
119    fn realloc(&mut self, new_cap: IdxSize) {
120        assert!(new_cap > 1 && new_cap >= self.len);
121        unsafe {
122            let mut me = std::mem::ManuallyDrop::new(Vec::with_capacity(new_cap as usize));
123            let buffer = me.as_mut_ptr();
124            std::ptr::copy(self.data_ptr(), buffer, self.len as usize);
125            self.dealloc();
126            self.data = buffer;
127            self.capacity = NonZeroIdxSize::new(new_cap).unwrap();
128        }
129    }
130
131    fn dealloc(&mut self) {
132        unsafe {
133            if !self.is_inline() {
134                let _ = Vec::from_raw_parts(self.data, self.len as usize, self.capacity());
135                self.capacity = NonZeroIdxSize::new(1).unwrap();
136            }
137        }
138    }
139
140    pub fn with_capacity(capacity: usize) -> Self {
141        if capacity <= 1 {
142            Self::new()
143        } else {
144            let mut me = std::mem::ManuallyDrop::new(Vec::with_capacity(capacity));
145            let data = me.as_mut_ptr();
146            Self {
147                len: 0,
148                capacity: NonZeroIdxSize::new(capacity.try_into().unwrap()).unwrap(),
149                data,
150            }
151        }
152    }
153
154    #[inline]
155    pub fn iter(&self) -> std::slice::Iter<'_, T> {
156        self.as_slice().iter()
157    }
158
159    #[inline]
160    pub fn iter_mut(&mut self) -> std::slice::IterMut<'_, T> {
161        self.as_mut_slice().iter_mut()
162    }
163
164    #[inline]
165    pub fn as_slice(&self) -> &[T] {
166        self.as_ref()
167    }
168
169    #[inline]
170    pub fn as_mut_slice(&mut self) -> &mut [T] {
171        self.as_mut()
172    }
173
174    #[inline]
175    pub fn pop(&mut self) -> Option<T> {
176        if self.len == 0 {
177            None
178        } else {
179            unsafe {
180                self.len -= 1;
181                Some(std::ptr::read(self.as_ptr().add(self.len())))
182            }
183        }
184    }
185}
186
187impl<T> Extend<T> for UnitVec<T> {
188    fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
189        let iter = iter.into_iter();
190        self.reserve(iter.size_hint().0);
191        for v in iter {
192            self.push(v)
193        }
194    }
195}
196
197impl<T> Drop for UnitVec<T> {
198    fn drop(&mut self) {
199        self.dealloc()
200    }
201}
202
203impl<T> Clone for UnitVec<T> {
204    fn clone(&self) -> Self {
205        unsafe {
206            if self.is_inline() {
207                Self { ..*self }
208            } else {
209                let mut copy = Self::with_capacity(self.len as usize);
210                std::ptr::copy(self.data_ptr(), copy.data_ptr_mut(), self.len as usize);
211                copy.len = self.len;
212                copy
213            }
214        }
215    }
216}
217
218impl<T: Debug> Debug for UnitVec<T> {
219    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
220        write!(f, "UnitVec: {:?}", self.as_slice())
221    }
222}
223
224impl<T> Default for UnitVec<T> {
225    fn default() -> Self {
226        Self {
227            len: 0,
228            capacity: NonZeroIdxSize::new(1).unwrap(),
229            data: std::ptr::null_mut(),
230        }
231    }
232}
233
234impl<T> Deref for UnitVec<T> {
235    type Target = [T];
236
237    fn deref(&self) -> &Self::Target {
238        self.as_slice()
239    }
240}
241
242impl<T> std::ops::DerefMut for UnitVec<T> {
243    fn deref_mut(&mut self) -> &mut Self::Target {
244        self.as_mut_slice()
245    }
246}
247
248impl<T> AsRef<[T]> for UnitVec<T> {
249    fn as_ref(&self) -> &[T] {
250        unsafe { std::slice::from_raw_parts(self.data_ptr(), self.len as usize) }
251    }
252}
253
254impl<T> AsMut<[T]> for UnitVec<T> {
255    fn as_mut(&mut self) -> &mut [T] {
256        unsafe { std::slice::from_raw_parts_mut(self.data_ptr_mut(), self.len as usize) }
257    }
258}
259
260impl<T: PartialEq> PartialEq for UnitVec<T> {
261    fn eq(&self, other: &Self) -> bool {
262        self.as_slice() == other.as_slice()
263    }
264}
265
266impl<T> FromIterator<T> for UnitVec<T> {
267    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
268        let mut iter = iter.into_iter();
269
270        let Some(first) = iter.next() else {
271            return Self::new();
272        };
273
274        let Some(second) = iter.next() else {
275            let mut out = Self::new();
276            out.push(first);
277            return out;
278        };
279
280        let mut vec = Vec::with_capacity(iter.size_hint().0 + 2);
281        vec.push(first);
282        vec.push(second);
283        vec.extend(iter);
284        Self::from(vec)
285    }
286}
287
288impl<T> IntoIterator for UnitVec<T> {
289    type Item = T;
290
291    type IntoIter = IntoIter<T>;
292
293    fn into_iter(mut self) -> Self::IntoIter {
294        if self.is_inline() {
295            IntoIter::Inline(self.pop().into_iter())
296        } else {
297            IntoIter::External(Vec::from(self).into_iter())
298        }
299    }
300}
301
302pub enum IntoIter<T> {
303    Inline(std::option::IntoIter<T>),
304    External(std::vec::IntoIter<T>),
305}
306
307impl<T> Iterator for IntoIter<T> {
308    type Item = T;
309
310    fn next(&mut self) -> Option<Self::Item> {
311        match self {
312            IntoIter::Inline(it) => it.next(),
313            IntoIter::External(it) => it.next(),
314        }
315    }
316}
317
318impl<T> From<Vec<T>> for UnitVec<T> {
319    fn from(mut value: Vec<T>) -> Self {
320        if value.capacity() <= 1 {
321            let mut new = UnitVec::new();
322            if let Some(v) = value.pop() {
323                new.push(v)
324            }
325            new
326        } else {
327            let mut me = std::mem::ManuallyDrop::new(value);
328            UnitVec {
329                data: me.as_mut_ptr(),
330                capacity: NonZeroIdxSize::new(me.capacity().try_into().unwrap()).unwrap(),
331                len: me.len().try_into().unwrap(),
332            }
333        }
334    }
335}
336
337impl<T, const N: usize> From<[T; N]> for UnitVec<T> {
338    fn from(value: [T; N]) -> Self {
339        value.into_iter().collect()
340    }
341}
342
343impl<T> From<UnitVec<T>> for Vec<T> {
344    fn from(mut value: UnitVec<T>) -> Self {
345        if value.is_inline() {
346            let mut out = Vec::with_capacity(value.len());
347            if let Some(item) = value.pop() {
348                out.push(item);
349            }
350            out
351        } else {
352            // SAFETY: when not inline, the data points to a buffer allocated by a Vec.
353            let out =
354                unsafe { Vec::from_raw_parts(value.data, value.len as usize, value.capacity()) };
355            // Prevent deallocating the buffer
356            std::mem::forget(value);
357            out
358        }
359    }
360}
361
362impl<T: Clone> From<&[T]> for UnitVec<T> {
363    fn from(value: &[T]) -> Self {
364        if value.len() <= 1 {
365            let mut new = UnitVec::new();
366            if let Some(v) = value.first() {
367                new.push(v.clone())
368            }
369            new
370        } else {
371            value.to_vec().into()
372        }
373    }
374}
375
376#[macro_export]
377macro_rules! unitvec {
378    () => {{
379        $crate::idx_vec::UnitVec::new()
380    }};
381    ($elem:expr; $n:expr) => {{
382        let mut new = $crate::idx_vec::UnitVec::new();
383        for _ in 0..$n {
384            new.push($elem)
385        }
386        new
387    }};
388    ($elem:expr) => {{
389        let mut new = $crate::idx_vec::UnitVec::new();
390        let v = $elem;
391        // SAFETY: first element always fits.
392        unsafe { new.push_unchecked(v) };
393        new
394    }};
395    ($($x:expr),+ $(,)?) => {{
396        vec![$($x),+].into()
397    }};
398}
399
400mod tests {
401
402    #[test]
403    #[should_panic]
404    fn test_unitvec_realloc_zero() {
405        super::UnitVec::<usize>::new().realloc(0);
406    }
407
408    #[test]
409    #[should_panic]
410    fn test_unitvec_realloc_one() {
411        super::UnitVec::<usize>::new().realloc(1);
412    }
413
414    #[test]
415    #[should_panic]
416    fn test_untivec_realloc_lt_len() {
417        super::UnitVec::<usize>::from(&[1, 2][..]).realloc(1)
418    }
419
420    #[test]
421    fn test_unitvec_clone() {
422        {
423            let v = unitvec![1usize];
424            assert_eq!(v, v.clone());
425        }
426
427        for n in [
428            26903816120209729usize,
429            42566276440897687,
430            44435161834424652,
431            49390731489933083,
432            51201454727649242,
433            83861672190814841,
434            92169290527847622,
435            92476373900398436,
436            95488551309275459,
437            97499984126814549,
438        ] {
439            let v = unitvec![n];
440            assert_eq!(v, v.clone());
441        }
442    }
443
444    #[test]
445    fn test_unitvec_repeat_n() {
446        assert_eq!(unitvec![5; 3].as_slice(), &[5, 5, 5])
447    }
448}