polars_utils/vec.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
use std::mem::MaybeUninit;
use num_traits::Zero;
pub trait IntoRawParts<T> {
fn into_raw_parts(self) -> (*mut T, usize, usize);
// doesn't take ownership
fn raw_parts(&self) -> (*mut T, usize, usize);
}
impl<T> IntoRawParts<T> for Vec<T> {
fn into_raw_parts(self) -> (*mut T, usize, usize) {
let mut me = std::mem::ManuallyDrop::new(self);
(me.as_mut_ptr(), me.len(), me.capacity())
}
fn raw_parts(&self) -> (*mut T, usize, usize) {
(self.as_ptr() as *mut T, self.len(), self.capacity())
}
}
/// Fill current allocation if > 0
/// otherwise realloc
pub trait ResizeFaster<T: Copy> {
fn fill_or_alloc(&mut self, new_len: usize, value: T);
}
impl<T: Copy + Zero + PartialEq> ResizeFaster<T> for Vec<T> {
fn fill_or_alloc(&mut self, new_len: usize, value: T) {
if self.capacity() == 0 {
// it is faster to allocate zeroed
// so if the capacity is 0, we alloc (value might be 0)
*self = vec![value; new_len]
} else {
// first clear then reserve so that the reserve doesn't have
// to memcpy in case it needs to realloc.
self.clear();
self.reserve(new_len);
// // init the uninit values
let spare = &mut self.spare_capacity_mut()[..new_len];
let init_value = MaybeUninit::new(value);
spare.fill(init_value);
unsafe { self.set_len(new_len) }
}
}
}
pub trait PushUnchecked<T> {
/// Will push an item and not check if there is enough capacity
///
/// # Safety
/// Caller must ensure the array has enough capacity to hold `T`.
unsafe fn push_unchecked(&mut self, value: T);
}
impl<T> PushUnchecked<T> for Vec<T> {
#[inline]
unsafe fn push_unchecked(&mut self, value: T) {
debug_assert!(self.capacity() > self.len());
let end = self.as_mut_ptr().add(self.len());
std::ptr::write(end, value);
self.set_len(self.len() + 1);
}
}
pub trait CapacityByFactor {
fn with_capacity_by_factor(original_len: usize, factor: f64) -> Self;
}
impl<T> CapacityByFactor for Vec<T> {
fn with_capacity_by_factor(original_len: usize, factor: f64) -> Self {
let cap = (original_len as f64 * factor) as usize;
Vec::with_capacity(cap)
}
}
// Trait to convert a Vec.
// The reason for this is to reduce code-generation. Conversion functions that are named
// functions should only generate the conversion loop once.
pub trait ConvertVec<Out> {
type ItemIn;
fn convert_owned<F: FnMut(Self::ItemIn) -> Out>(self, f: F) -> Vec<Out>;
fn convert<F: FnMut(&Self::ItemIn) -> Out>(&self, f: F) -> Vec<Out>;
}
impl<T, Out> ConvertVec<Out> for Vec<T> {
type ItemIn = T;
fn convert_owned<F: FnMut(Self::ItemIn) -> Out>(self, f: F) -> Vec<Out> {
self.into_iter().map(f).collect()
}
fn convert<F: FnMut(&Self::ItemIn) -> Out>(&self, f: F) -> Vec<Out> {
self.iter().map(f).collect()
}
}
/// Perform an in-place `Iterator::filter_map` over two vectors at the same time.
pub fn inplace_zip_filtermap<T, U>(
x: &mut Vec<T>,
y: &mut Vec<U>,
mut f: impl FnMut(T, U) -> Option<(T, U)>,
) {
assert_eq!(x.len(), y.len());
let length = x.len();
struct OwnedBuffer<T> {
end: *mut T,
length: usize,
}
impl<T> Drop for OwnedBuffer<T> {
fn drop(&mut self) {
for i in 0..self.length {
unsafe { self.end.wrapping_sub(i + 1).read() };
}
}
}
let x_ptr = x.as_mut_ptr();
let y_ptr = y.as_mut_ptr();
let mut x_buf = OwnedBuffer {
end: x_ptr.wrapping_add(length),
length,
};
let mut y_buf = OwnedBuffer {
end: y_ptr.wrapping_add(length),
length,
};
// SAFETY: All items are now owned by `x_buf` and `y_buf`. Since we know that `x_buf` and
// `y_buf` will be dropped before the vecs representing `x` and `y`, this is safe.
unsafe {
x.set_len(0);
y.set_len(0);
}
// SAFETY:
//
// We know we have a exclusive reference to x and y.
//
// We know that `i` is always smaller than `x.len()` and `y.len()`. Furthermore, we also know
// that `i - num_deleted > 0`.
//
// Items are dropped exactly once, even if `f` panics.
for i in 0..length {
let xi = unsafe { x_ptr.wrapping_add(i).read() };
let yi = unsafe { y_ptr.wrapping_add(i).read() };
x_buf.length -= 1;
y_buf.length -= 1;
// We hold the invariant here that all items that are not yet deleted are either in
// - `xi` or `yi`
// - `x_buf` or `y_buf`
// ` `x` or `y`
//
// This way if `f` ever panics, we are sure that all items are dropped exactly once.
// Deleted items will be dropped when they are deleted.
let result = f(xi, yi);
if let Some((xi, yi)) = result {
x.push(xi);
y.push(yi);
}
}
debug_assert_eq!(x_buf.length, 0);
debug_assert_eq!(y_buf.length, 0);
// We are safe to forget `x_buf` and `y_buf` here since they will not deallocate anything
// anymore.
std::mem::forget(x_buf);
std::mem::forget(y_buf);
}