polars_utils/
vec.rs

1#![allow(unsafe_op_in_unsafe_fn)]
2use std::mem::MaybeUninit;
3
4use num_traits::Zero;
5
6pub trait IntoRawParts<T> {
7    fn into_raw_parts(self) -> (*mut T, usize, usize);
8
9    // doesn't take ownership
10    fn raw_parts(&self) -> (*mut T, usize, usize);
11}
12
13impl<T> IntoRawParts<T> for Vec<T> {
14    fn into_raw_parts(self) -> (*mut T, usize, usize) {
15        let mut me = std::mem::ManuallyDrop::new(self);
16        (me.as_mut_ptr(), me.len(), me.capacity())
17    }
18
19    fn raw_parts(&self) -> (*mut T, usize, usize) {
20        (self.as_ptr() as *mut T, self.len(), self.capacity())
21    }
22}
23
24/// Fill current allocation if > 0
25/// otherwise realloc
26pub trait ResizeFaster<T: Copy> {
27    fn fill_or_alloc(&mut self, new_len: usize, value: T);
28}
29
30impl<T: Copy + Zero + PartialEq> ResizeFaster<T> for Vec<T> {
31    fn fill_or_alloc(&mut self, new_len: usize, value: T) {
32        if self.capacity() == 0 {
33            // it is faster to allocate zeroed
34            // so if the capacity is 0, we alloc (value might be 0)
35            *self = vec![value; new_len]
36        } else {
37            // first clear then reserve so that the reserve doesn't have
38            // to memcpy in case it needs to realloc.
39            self.clear();
40            self.reserve(new_len);
41
42            // // init the uninit values
43            let spare = &mut self.spare_capacity_mut()[..new_len];
44            let init_value = MaybeUninit::new(value);
45            spare.fill(init_value);
46            unsafe { self.set_len(new_len) }
47        }
48    }
49}
50pub trait PushUnchecked<T> {
51    /// Will push an item and not check if there is enough capacity
52    ///
53    /// # Safety
54    /// Caller must ensure the array has enough capacity to hold `T`.
55    unsafe fn push_unchecked(&mut self, value: T);
56}
57
58impl<T> PushUnchecked<T> for Vec<T> {
59    #[inline]
60    unsafe fn push_unchecked(&mut self, value: T) {
61        debug_assert!(self.capacity() > self.len());
62        let end = self.as_mut_ptr().add(self.len());
63        std::ptr::write(end, value);
64        self.set_len(self.len() + 1);
65    }
66}
67
68pub trait CapacityByFactor {
69    fn with_capacity_by_factor(original_len: usize, factor: f64) -> Self;
70}
71
72impl<T> CapacityByFactor for Vec<T> {
73    fn with_capacity_by_factor(original_len: usize, factor: f64) -> Self {
74        let cap = (original_len as f64 * factor) as usize;
75        Vec::with_capacity(cap)
76    }
77}
78
79// Trait to convert a Vec.
80// The reason for this is to reduce code-generation. Conversion functions that are named
81// functions should only generate the conversion loop once.
82pub trait ConvertVec<Out> {
83    type ItemIn;
84
85    fn convert_owned<F: FnMut(Self::ItemIn) -> Out>(self, f: F) -> Vec<Out>;
86
87    fn convert<F: FnMut(&Self::ItemIn) -> Out>(&self, f: F) -> Vec<Out>;
88}
89
90impl<T, Out> ConvertVec<Out> for Vec<T> {
91    type ItemIn = T;
92
93    fn convert_owned<F: FnMut(Self::ItemIn) -> Out>(self, f: F) -> Vec<Out> {
94        self.into_iter().map(f).collect()
95    }
96
97    fn convert<F: FnMut(&Self::ItemIn) -> Out>(&self, f: F) -> Vec<Out> {
98        self.iter().map(f).collect()
99    }
100}
101
102/// Perform an in-place `Iterator::filter_map` over two vectors at the same time.
103pub fn inplace_zip_filtermap<T, U>(
104    x: &mut Vec<T>,
105    y: &mut Vec<U>,
106    mut f: impl FnMut(T, U) -> Option<(T, U)>,
107) {
108    assert_eq!(x.len(), y.len());
109
110    let length = x.len();
111
112    struct OwnedBuffer<T> {
113        end: *mut T,
114        length: usize,
115    }
116
117    impl<T> Drop for OwnedBuffer<T> {
118        fn drop(&mut self) {
119            for i in 0..self.length {
120                unsafe { self.end.wrapping_sub(i + 1).read() };
121            }
122        }
123    }
124
125    let x_ptr = x.as_mut_ptr();
126    let y_ptr = y.as_mut_ptr();
127
128    let mut x_buf = OwnedBuffer {
129        end: x_ptr.wrapping_add(length),
130        length,
131    };
132    let mut y_buf = OwnedBuffer {
133        end: y_ptr.wrapping_add(length),
134        length,
135    };
136
137    // SAFETY: All items are now owned by `x_buf` and `y_buf`. Since we know that `x_buf` and
138    // `y_buf` will be dropped before the vecs representing `x` and `y`, this is safe.
139    unsafe {
140        x.set_len(0);
141        y.set_len(0);
142    }
143
144    // SAFETY:
145    //
146    // We know we have a exclusive reference to x and y.
147    //
148    // We know that `i` is always smaller than `x.len()` and `y.len()`. Furthermore, we also know
149    // that `i - num_deleted > 0`.
150    //
151    // Items are dropped exactly once, even if `f` panics.
152    for i in 0..length {
153        let xi = unsafe { x_ptr.wrapping_add(i).read() };
154        let yi = unsafe { y_ptr.wrapping_add(i).read() };
155
156        x_buf.length -= 1;
157        y_buf.length -= 1;
158
159        // We hold the invariant here that all items that are not yet deleted are either in
160        // - `xi` or `yi`
161        // - `x_buf` or `y_buf`
162        // ` `x` or `y`
163        //
164        // This way if `f` ever panics, we are sure that all items are dropped exactly once.
165        // Deleted items will be dropped when they are deleted.
166        let result = f(xi, yi);
167
168        if let Some((xi, yi)) = result {
169            x.push(xi);
170            y.push(yi);
171        }
172    }
173
174    debug_assert_eq!(x_buf.length, 0);
175    debug_assert_eq!(y_buf.length, 0);
176
177    // We are safe to forget `x_buf` and `y_buf` here since they will not deallocate anything
178    // anymore.
179    std::mem::forget(x_buf);
180    std::mem::forget(y_buf);
181}