polars_utils/
vec.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
use std::mem::MaybeUninit;

use num_traits::Zero;

pub trait IntoRawParts<T> {
    fn into_raw_parts(self) -> (*mut T, usize, usize);

    // doesn't take ownership
    fn raw_parts(&self) -> (*mut T, usize, usize);
}

impl<T> IntoRawParts<T> for Vec<T> {
    fn into_raw_parts(self) -> (*mut T, usize, usize) {
        let mut me = std::mem::ManuallyDrop::new(self);
        (me.as_mut_ptr(), me.len(), me.capacity())
    }

    fn raw_parts(&self) -> (*mut T, usize, usize) {
        (self.as_ptr() as *mut T, self.len(), self.capacity())
    }
}

/// Fill current allocation if > 0
/// otherwise realloc
pub trait ResizeFaster<T: Copy> {
    fn fill_or_alloc(&mut self, new_len: usize, value: T);
}

impl<T: Copy + Zero + PartialEq> ResizeFaster<T> for Vec<T> {
    fn fill_or_alloc(&mut self, new_len: usize, value: T) {
        if self.capacity() == 0 {
            // it is faster to allocate zeroed
            // so if the capacity is 0, we alloc (value might be 0)
            *self = vec![value; new_len]
        } else {
            // first clear then reserve so that the reserve doesn't have
            // to memcpy in case it needs to realloc.
            self.clear();
            self.reserve(new_len);

            // // init the uninit values
            let spare = &mut self.spare_capacity_mut()[..new_len];
            let init_value = MaybeUninit::new(value);
            spare.fill(init_value);
            unsafe { self.set_len(new_len) }
        }
    }
}
pub trait PushUnchecked<T> {
    /// Will push an item and not check if there is enough capacity
    ///
    /// # Safety
    /// Caller must ensure the array has enough capacity to hold `T`.
    unsafe fn push_unchecked(&mut self, value: T);
}

impl<T> PushUnchecked<T> for Vec<T> {
    #[inline]
    unsafe fn push_unchecked(&mut self, value: T) {
        debug_assert!(self.capacity() > self.len());
        let end = self.as_mut_ptr().add(self.len());
        std::ptr::write(end, value);
        self.set_len(self.len() + 1);
    }
}

pub trait CapacityByFactor {
    fn with_capacity_by_factor(original_len: usize, factor: f64) -> Self;
}

impl<T> CapacityByFactor for Vec<T> {
    fn with_capacity_by_factor(original_len: usize, factor: f64) -> Self {
        let cap = (original_len as f64 * factor) as usize;
        Vec::with_capacity(cap)
    }
}

// Trait to convert a Vec.
// The reason for this is to reduce code-generation. Conversion functions that are named
// functions should only generate the conversion loop once.
pub trait ConvertVec<Out> {
    type ItemIn;

    fn convert_owned<F: FnMut(Self::ItemIn) -> Out>(self, f: F) -> Vec<Out>;

    fn convert<F: FnMut(&Self::ItemIn) -> Out>(&self, f: F) -> Vec<Out>;
}

impl<T, Out> ConvertVec<Out> for Vec<T> {
    type ItemIn = T;

    fn convert_owned<F: FnMut(Self::ItemIn) -> Out>(self, f: F) -> Vec<Out> {
        self.into_iter().map(f).collect()
    }

    fn convert<F: FnMut(&Self::ItemIn) -> Out>(&self, f: F) -> Vec<Out> {
        self.iter().map(f).collect()
    }
}

/// Perform an in-place `Iterator::filter_map` over two vectors at the same time.
pub fn inplace_zip_filtermap<T, U>(
    x: &mut Vec<T>,
    y: &mut Vec<U>,
    mut f: impl FnMut(T, U) -> Option<(T, U)>,
) {
    assert_eq!(x.len(), y.len());

    let length = x.len();

    struct OwnedBuffer<T> {
        end: *mut T,
        length: usize,
    }

    impl<T> Drop for OwnedBuffer<T> {
        fn drop(&mut self) {
            for i in 0..self.length {
                unsafe { self.end.wrapping_sub(i + 1).read() };
            }
        }
    }

    let x_ptr = x.as_mut_ptr();
    let y_ptr = y.as_mut_ptr();

    let mut x_buf = OwnedBuffer {
        end: x_ptr.wrapping_add(length),
        length,
    };
    let mut y_buf = OwnedBuffer {
        end: y_ptr.wrapping_add(length),
        length,
    };

    // SAFETY: All items are now owned by `x_buf` and `y_buf`. Since we know that `x_buf` and
    // `y_buf` will be dropped before the vecs representing `x` and `y`, this is safe.
    unsafe {
        x.set_len(0);
        y.set_len(0);
    }

    // SAFETY:
    //
    // We know we have a exclusive reference to x and y.
    //
    // We know that `i` is always smaller than `x.len()` and `y.len()`. Furthermore, we also know
    // that `i - num_deleted > 0`.
    //
    // Items are dropped exactly once, even if `f` panics.
    for i in 0..length {
        let xi = unsafe { x_ptr.wrapping_add(i).read() };
        let yi = unsafe { y_ptr.wrapping_add(i).read() };

        x_buf.length -= 1;
        y_buf.length -= 1;

        // We hold the invariant here that all items that are not yet deleted are either in
        // - `xi` or `yi`
        // - `x_buf` or `y_buf`
        // ` `x` or `y`
        //
        // This way if `f` ever panics, we are sure that all items are dropped exactly once.
        // Deleted items will be dropped when they are deleted.
        let result = f(xi, yi);

        if let Some((xi, yi)) = result {
            x.push(xi);
            y.push(yi);
        }
    }

    debug_assert_eq!(x_buf.length, 0);
    debug_assert_eq!(y_buf.length, 0);

    // We are safe to forget `x_buf` and `y_buf` here since they will not deallocate anything
    // anymore.
    std::mem::forget(x_buf);
    std::mem::forget(y_buf);
}