polars_utils/
vec.rs

1#![allow(unsafe_op_in_unsafe_fn)]
2use std::mem::MaybeUninit;
3
4use bytemuck::Pod;
5use num_traits::Zero;
6
7use crate::with_drop::WithDrop;
8
9pub trait IntoRawParts<T> {
10    fn into_raw_parts(self) -> (*mut T, usize, usize);
11
12    // doesn't take ownership
13    fn raw_parts(&self) -> (*mut T, usize, usize);
14}
15
16impl<T> IntoRawParts<T> for Vec<T> {
17    fn into_raw_parts(self) -> (*mut T, usize, usize) {
18        let mut me = std::mem::ManuallyDrop::new(self);
19        (me.as_mut_ptr(), me.len(), me.capacity())
20    }
21
22    fn raw_parts(&self) -> (*mut T, usize, usize) {
23        (self.as_ptr() as *mut T, self.len(), self.capacity())
24    }
25}
26
27/// Fill current allocation if > 0
28/// otherwise realloc
29pub trait ResizeFaster<T: Copy> {
30    fn fill_or_alloc(&mut self, new_len: usize, value: T);
31}
32
33impl<T: Copy + Zero + PartialEq> ResizeFaster<T> for Vec<T> {
34    fn fill_or_alloc(&mut self, new_len: usize, value: T) {
35        if self.capacity() == 0 {
36            // it is faster to allocate zeroed
37            // so if the capacity is 0, we alloc (value might be 0)
38            *self = vec![value; new_len]
39        } else {
40            // first clear then reserve so that the reserve doesn't have
41            // to memcpy in case it needs to realloc.
42            self.clear();
43            self.reserve(new_len);
44
45            // // init the uninit values
46            let spare = &mut self.spare_capacity_mut()[..new_len];
47            let init_value = MaybeUninit::new(value);
48            spare.fill(init_value);
49            unsafe { self.set_len(new_len) }
50        }
51    }
52}
53pub trait PushUnchecked<T> {
54    /// Will push an item and not check if there is enough capacity
55    ///
56    /// # Safety
57    /// Caller must ensure the array has enough capacity to hold `T`.
58    unsafe fn push_unchecked(&mut self, value: T);
59}
60
61impl<T> PushUnchecked<T> for Vec<T> {
62    #[inline]
63    unsafe fn push_unchecked(&mut self, value: T) {
64        debug_assert!(self.capacity() > self.len());
65        let end = self.as_mut_ptr().add(self.len());
66        std::ptr::write(end, value);
67        self.set_len(self.len() + 1);
68    }
69}
70
71pub trait CapacityByFactor {
72    fn with_capacity_by_factor(original_len: usize, factor: f64) -> Self;
73}
74
75impl<T> CapacityByFactor for Vec<T> {
76    fn with_capacity_by_factor(original_len: usize, factor: f64) -> Self {
77        let cap = (original_len as f64 * factor) as usize;
78        Vec::with_capacity(cap)
79    }
80}
81
82// Trait to convert a Vec.
83// The reason for this is to reduce code-generation. Conversion functions that are named
84// functions should only generate the conversion loop once.
85pub trait ConvertVec<Out> {
86    type ItemIn;
87
88    fn convert_owned<F: FnMut(Self::ItemIn) -> Out>(self, f: F) -> Vec<Out>;
89
90    fn convert<F: FnMut(&Self::ItemIn) -> Out>(&self, f: F) -> Vec<Out>;
91}
92
93impl<T, Out> ConvertVec<Out> for Vec<T> {
94    type ItemIn = T;
95
96    fn convert_owned<F: FnMut(Self::ItemIn) -> Out>(self, f: F) -> Vec<Out> {
97        self.into_iter().map(f).collect()
98    }
99
100    fn convert<F: FnMut(&Self::ItemIn) -> Out>(&self, f: F) -> Vec<Out> {
101        self.iter().map(f).collect()
102    }
103}
104
105/// Perform an in-place `Iterator::filter_map` over two vectors at the same time.
106pub fn inplace_zip_filtermap<T, U>(
107    x: &mut Vec<T>,
108    y: &mut Vec<U>,
109    mut f: impl FnMut(T, U) -> Option<(T, U)>,
110) {
111    assert_eq!(x.len(), y.len());
112
113    let length = x.len();
114
115    struct OwnedBuffer<T> {
116        end: *mut T,
117        length: usize,
118    }
119
120    impl<T> Drop for OwnedBuffer<T> {
121        fn drop(&mut self) {
122            for i in 0..self.length {
123                unsafe { self.end.wrapping_sub(i + 1).read() };
124            }
125        }
126    }
127
128    let x_ptr = x.as_mut_ptr();
129    let y_ptr = y.as_mut_ptr();
130
131    let mut x_buf = OwnedBuffer {
132        end: x_ptr.wrapping_add(length),
133        length,
134    };
135    let mut y_buf = OwnedBuffer {
136        end: y_ptr.wrapping_add(length),
137        length,
138    };
139
140    // SAFETY: All items are now owned by `x_buf` and `y_buf`. Since we know that `x_buf` and
141    // `y_buf` will be dropped before the vecs representing `x` and `y`, this is safe.
142    unsafe {
143        x.set_len(0);
144        y.set_len(0);
145    }
146
147    // SAFETY:
148    //
149    // We know we have a exclusive reference to x and y.
150    //
151    // We know that `i` is always smaller than `x.len()` and `y.len()`. Furthermore, we also know
152    // that `i - num_deleted > 0`.
153    //
154    // Items are dropped exactly once, even if `f` panics.
155    for i in 0..length {
156        let xi = unsafe { x_ptr.wrapping_add(i).read() };
157        let yi = unsafe { y_ptr.wrapping_add(i).read() };
158
159        x_buf.length -= 1;
160        y_buf.length -= 1;
161
162        // We hold the invariant here that all items that are not yet deleted are either in
163        // - `xi` or `yi`
164        // - `x_buf` or `y_buf`
165        // ` `x` or `y`
166        //
167        // This way if `f` ever panics, we are sure that all items are dropped exactly once.
168        // Deleted items will be dropped when they are deleted.
169        let result = f(xi, yi);
170
171        if let Some((xi, yi)) = result {
172            x.push(xi);
173            y.push(yi);
174        }
175    }
176
177    debug_assert_eq!(x_buf.length, 0);
178    debug_assert_eq!(y_buf.length, 0);
179
180    // We are safe to forget `x_buf` and `y_buf` here since they will not deallocate anything
181    // anymore.
182    std::mem::forget(x_buf);
183    std::mem::forget(y_buf);
184}
185
186pub fn with_cast_mut_vec<T: Pod, U: Pod, R, F: FnOnce(&mut Vec<U>) -> R>(
187    v: &mut Vec<T>,
188    f: F,
189) -> R {
190    let mut vu = WithDrop::new(bytemuck::cast_vec::<T, U>(core::mem::take(v)), |vu| {
191        *v = bytemuck::cast_vec::<U, T>(vu)
192    });
193    f(&mut vu)
194}