polars_utils/
index.rs

1#![allow(unsafe_op_in_unsafe_fn)]
2use std::fmt::{Debug, Formatter};
3
4use polars_error::{PolarsResult, polars_ensure};
5
6use crate::nulls::IsNull;
7
8#[cfg(not(feature = "bigidx"))]
9pub type IdxSize = u32;
10#[cfg(feature = "bigidx")]
11pub type IdxSize = u64;
12
13#[cfg(not(feature = "bigidx"))]
14pub type NonZeroIdxSize = std::num::NonZeroU32;
15#[cfg(feature = "bigidx")]
16pub type NonZeroIdxSize = std::num::NonZeroU64;
17
18#[cfg(not(feature = "bigidx"))]
19pub type AtomicIdxSize = std::sync::atomic::AtomicU32;
20#[cfg(feature = "bigidx")]
21pub type AtomicIdxSize = std::sync::atomic::AtomicU64;
22
23#[derive(Clone, Copy)]
24#[repr(transparent)]
25pub struct NullableIdxSize {
26    pub inner: IdxSize,
27}
28
29impl PartialEq<Self> for NullableIdxSize {
30    fn eq(&self, other: &Self) -> bool {
31        self.inner == other.inner
32    }
33}
34
35impl Eq for NullableIdxSize {}
36
37unsafe impl bytemuck::Zeroable for NullableIdxSize {}
38unsafe impl bytemuck::AnyBitPattern for NullableIdxSize {}
39unsafe impl bytemuck::NoUninit for NullableIdxSize {}
40
41impl Debug for NullableIdxSize {
42    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
43        write!(f, "{:?}", self.inner)
44    }
45}
46
47impl NullableIdxSize {
48    #[inline(always)]
49    pub fn is_null_idx(&self) -> bool {
50        // The left/right join maintain_order algorithms depend on the special value for sorting
51        self.inner == IdxSize::MAX
52    }
53
54    #[inline(always)]
55    pub const fn null() -> Self {
56        Self {
57            inner: IdxSize::MAX,
58        }
59    }
60
61    #[inline(always)]
62    pub fn idx(&self) -> IdxSize {
63        self.inner
64    }
65
66    #[inline(always)]
67    pub fn to_opt(&self) -> Option<IdxSize> {
68        if self.is_null_idx() {
69            None
70        } else {
71            Some(self.idx())
72        }
73    }
74}
75
76impl From<IdxSize> for NullableIdxSize {
77    #[inline(always)]
78    fn from(value: IdxSize) -> Self {
79        Self { inner: value }
80    }
81}
82
83pub trait Bounded {
84    fn len(&self) -> usize;
85
86    fn is_empty(&self) -> bool {
87        self.len() == 0
88    }
89}
90
91pub trait NullCount {
92    fn null_count(&self) -> usize {
93        0
94    }
95}
96
97impl<T: NullCount> NullCount for &T {
98    fn null_count(&self) -> usize {
99        (*self).null_count()
100    }
101}
102
103impl<T> Bounded for &[T] {
104    fn len(&self) -> usize {
105        <[T]>::len(self)
106    }
107}
108
109impl<T> NullCount for &[T] {
110    fn null_count(&self) -> usize {
111        0
112    }
113}
114
115pub trait Indexable {
116    type Item: IsNull;
117
118    fn get(&self, i: usize) -> Self::Item;
119
120    /// # Safety
121    /// Doesn't do any bound checks.
122    unsafe fn get_unchecked(&self, i: usize) -> Self::Item;
123}
124
125impl<T: Copy + IsNull> Indexable for &[T] {
126    type Item = T;
127
128    fn get(&self, i: usize) -> Self::Item {
129        self[i]
130    }
131
132    /// # Safety
133    /// Doesn't do any bound checks.
134    unsafe fn get_unchecked(&self, i: usize) -> Self::Item {
135        *<[T]>::get_unchecked(self, i)
136    }
137}
138
139pub fn check_bounds(idx: &[IdxSize], len: IdxSize) -> PolarsResult<()> {
140    // We iterate in large uninterrupted chunks to help auto-vectorization.
141    let Some(max_idx) = idx.iter().copied().max() else {
142        return Ok(());
143    };
144
145    polars_ensure!(max_idx < len, OutOfBounds: "indices are out of bounds");
146    Ok(())
147}
148
149pub trait ToIdx {
150    fn to_idx(self, len: u64) -> IdxSize;
151}
152
153macro_rules! impl_to_idx {
154    ($ty:ty) => {
155        impl ToIdx for $ty {
156            #[inline]
157            fn to_idx(self, _len: u64) -> IdxSize {
158                self as IdxSize
159            }
160        }
161    };
162    ($ty:ty, $ity:ty) => {
163        impl ToIdx for $ty {
164            #[inline]
165            fn to_idx(self, len: u64) -> IdxSize {
166                let idx = self as $ity;
167                if idx < 0 {
168                    (idx + len as $ity) as IdxSize
169                } else {
170                    idx as IdxSize
171                }
172            }
173        }
174    };
175}
176
177impl_to_idx!(u8);
178impl_to_idx!(u16);
179impl_to_idx!(u32);
180impl_to_idx!(u64);
181impl_to_idx!(i8, i16);
182impl_to_idx!(i16, i32);
183impl_to_idx!(i32, i64);
184impl_to_idx!(i64, i64);
185
186// Allows for 2^24 (~16M) chunks
187// Leaves 2^40 (~1T) rows per chunk
188const DEFAULT_CHUNK_BITS: u64 = 24;
189
190#[derive(Clone, Copy)]
191#[repr(transparent)]
192pub struct ChunkId<const CHUNK_BITS: u64 = DEFAULT_CHUNK_BITS> {
193    swizzled: u64,
194}
195
196impl<const CHUNK_BITS: u64> Debug for ChunkId<CHUNK_BITS> {
197    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
198        if self.is_null() {
199            write!(f, "NULL")
200        } else {
201            let (chunk, row) = self.extract();
202            write!(f, "({chunk}, {row})")
203        }
204    }
205}
206
207impl<const CHUNK_BITS: u64> ChunkId<CHUNK_BITS> {
208    #[inline(always)]
209    pub const fn null() -> Self {
210        Self { swizzled: u64::MAX }
211    }
212
213    #[inline(always)]
214    pub fn is_null(&self) -> bool {
215        self.swizzled == u64::MAX
216    }
217
218    #[inline(always)]
219    #[allow(clippy::unnecessary_cast)]
220    pub fn store(chunk: IdxSize, row: IdxSize) -> Self {
221        debug_assert!(chunk < !(u64::MAX << CHUNK_BITS) as IdxSize);
222        let swizzled = ((row as u64) << CHUNK_BITS) | chunk as u64;
223
224        Self { swizzled }
225    }
226
227    #[inline(always)]
228    #[allow(clippy::unnecessary_cast)]
229    pub fn extract(self) -> (IdxSize, IdxSize) {
230        let row = (self.swizzled >> CHUNK_BITS) as IdxSize;
231        let mask = (1u64 << CHUNK_BITS) - 1;
232        let chunk = (self.swizzled & mask) as IdxSize;
233        (chunk, row)
234    }
235
236    #[inline(always)]
237    pub fn inner_mut(&mut self) -> &mut u64 {
238        &mut self.swizzled
239    }
240
241    pub fn from_inner(inner: u64) -> Self {
242        Self { swizzled: inner }
243    }
244
245    pub fn into_inner(self) -> u64 {
246        self.swizzled
247    }
248}
249
250#[cfg(test)]
251mod test {
252    use super::*;
253
254    #[test]
255    fn test_chunk_idx() {
256        let chunk = 213908;
257        let row = 813457;
258
259        let ci: ChunkId = ChunkId::store(chunk, row);
260        let (c, r) = ci.extract();
261
262        assert_eq!(c, chunk);
263        assert_eq!(r, row);
264    }
265}