Skip to main content

polars_utils/
index.rs

1#![allow(unsafe_op_in_unsafe_fn)]
2use std::fmt::{Debug, Formatter};
3
4use polars_error::{PolarsResult, polars_ensure};
5
6use crate::nulls::IsNull;
7
8#[cfg(not(feature = "bigidx"))]
9pub type IdxSize = u32;
10#[cfg(feature = "bigidx")]
11pub type IdxSize = u64;
12
13/// Avoids clippy::unnecessary_cast when compiling with bigidx enabled.
14#[inline]
15pub const fn idxsize_to_u64(
16    #[cfg(feature = "bigidx")] val: u64,
17    #[cfg(not(feature = "bigidx"))] val: u32,
18) -> u64 {
19    #[cfg(feature = "bigidx")]
20    {
21        val
22    }
23    #[cfg(not(feature = "bigidx"))]
24    {
25        val as u64
26    }
27}
28
29#[cfg(not(feature = "bigidx"))]
30pub type NonZeroIdxSize = std::num::NonZeroU32;
31#[cfg(feature = "bigidx")]
32pub type NonZeroIdxSize = std::num::NonZeroU64;
33
34#[cfg(not(feature = "bigidx"))]
35pub type AtomicIdxSize = std::sync::atomic::AtomicU32;
36#[cfg(feature = "bigidx")]
37pub type AtomicIdxSize = std::sync::atomic::AtomicU64;
38
39#[derive(Clone, Copy)]
40#[repr(transparent)]
41pub struct NullableIdxSize {
42    pub inner: IdxSize,
43}
44
45impl PartialEq<Self> for NullableIdxSize {
46    fn eq(&self, other: &Self) -> bool {
47        self.inner == other.inner
48    }
49}
50
51impl Eq for NullableIdxSize {}
52
53unsafe impl bytemuck::Zeroable for NullableIdxSize {}
54unsafe impl bytemuck::AnyBitPattern for NullableIdxSize {}
55unsafe impl bytemuck::NoUninit for NullableIdxSize {}
56
57impl Debug for NullableIdxSize {
58    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
59        write!(f, "{:?}", self.inner)
60    }
61}
62
63impl NullableIdxSize {
64    #[inline(always)]
65    pub fn is_null_idx(&self) -> bool {
66        // The left/right join maintain_order algorithms depend on the special value for sorting
67        self.inner == IdxSize::MAX
68    }
69
70    #[inline(always)]
71    pub const fn null() -> Self {
72        Self {
73            inner: IdxSize::MAX,
74        }
75    }
76
77    #[inline(always)]
78    pub fn idx(&self) -> IdxSize {
79        self.inner
80    }
81
82    #[inline(always)]
83    pub fn to_opt(&self) -> Option<IdxSize> {
84        if self.is_null_idx() {
85            None
86        } else {
87            Some(self.idx())
88        }
89    }
90}
91
92impl From<IdxSize> for NullableIdxSize {
93    #[inline(always)]
94    fn from(value: IdxSize) -> Self {
95        Self { inner: value }
96    }
97}
98
99pub trait Bounded {
100    fn len(&self) -> usize;
101
102    fn is_empty(&self) -> bool {
103        self.len() == 0
104    }
105}
106
107pub trait NullCount {
108    fn null_count(&self) -> usize {
109        0
110    }
111}
112
113impl<T: NullCount> NullCount for &T {
114    fn null_count(&self) -> usize {
115        (*self).null_count()
116    }
117}
118
119impl<T> Bounded for &[T] {
120    fn len(&self) -> usize {
121        <[T]>::len(self)
122    }
123}
124
125impl<T> NullCount for &[T] {
126    fn null_count(&self) -> usize {
127        0
128    }
129}
130
131pub trait Indexable {
132    type Item: IsNull;
133
134    fn get(&self, i: usize) -> Self::Item;
135
136    /// # Safety
137    /// Doesn't do any bound checks.
138    unsafe fn get_unchecked(&self, i: usize) -> Self::Item;
139}
140
141impl<T: Copy + IsNull> Indexable for &[T] {
142    type Item = T;
143
144    fn get(&self, i: usize) -> Self::Item {
145        self[i]
146    }
147
148    /// # Safety
149    /// Doesn't do any bound checks.
150    unsafe fn get_unchecked(&self, i: usize) -> Self::Item {
151        *<[T]>::get_unchecked(self, i)
152    }
153}
154
155pub fn check_bounds(idx: &[IdxSize], len: IdxSize) -> PolarsResult<()> {
156    // We iterate in large uninterrupted chunks to help auto-vectorization.
157    let Some(max_idx) = idx.iter().copied().max() else {
158        return Ok(());
159    };
160
161    polars_ensure!(max_idx < len, OutOfBounds: "indices are out of bounds");
162    Ok(())
163}
164
165pub trait ToIdx {
166    fn to_idx(self, len: u64) -> IdxSize;
167}
168
169macro_rules! impl_to_idx {
170    ($ty:ty) => {
171        impl ToIdx for $ty {
172            #[inline]
173            fn to_idx(self, _len: u64) -> IdxSize {
174                self as IdxSize
175            }
176        }
177    };
178    ($ty:ty, $ity:ty) => {
179        impl ToIdx for $ty {
180            #[inline]
181            fn to_idx(self, len: u64) -> IdxSize {
182                let idx = self as $ity;
183                if idx < 0 {
184                    (idx + len as $ity) as IdxSize
185                } else {
186                    idx as IdxSize
187                }
188            }
189        }
190    };
191}
192
193impl_to_idx!(u8);
194impl_to_idx!(u16);
195impl_to_idx!(u32);
196impl_to_idx!(u64);
197impl_to_idx!(i8, i16);
198impl_to_idx!(i16, i32);
199impl_to_idx!(i32, i64);
200impl_to_idx!(i64, i64);
201
202// Allows for 2^24 (~16M) chunks
203// Leaves 2^40 (~1T) rows per chunk
204const DEFAULT_CHUNK_BITS: u64 = 24;
205
206#[derive(Clone, Copy)]
207#[repr(transparent)]
208pub struct ChunkId<const CHUNK_BITS: u64 = DEFAULT_CHUNK_BITS> {
209    swizzled: u64,
210}
211
212impl<const CHUNK_BITS: u64> Debug for ChunkId<CHUNK_BITS> {
213    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
214        if self.is_null() {
215            write!(f, "NULL")
216        } else {
217            let (chunk, row) = self.extract();
218            write!(f, "({chunk}, {row})")
219        }
220    }
221}
222
223impl<const CHUNK_BITS: u64> ChunkId<CHUNK_BITS> {
224    #[inline(always)]
225    pub const fn null() -> Self {
226        Self { swizzled: u64::MAX }
227    }
228
229    #[inline(always)]
230    pub fn is_null(&self) -> bool {
231        self.swizzled == u64::MAX
232    }
233
234    #[inline(always)]
235    #[allow(clippy::unnecessary_cast)]
236    pub fn store(chunk: IdxSize, row: IdxSize) -> Self {
237        debug_assert!(chunk < !(u64::MAX << CHUNK_BITS) as IdxSize);
238        let swizzled = ((row as u64) << CHUNK_BITS) | chunk as u64;
239
240        Self { swizzled }
241    }
242
243    #[inline(always)]
244    #[allow(clippy::unnecessary_cast)]
245    pub fn extract(self) -> (IdxSize, IdxSize) {
246        let row = (self.swizzled >> CHUNK_BITS) as IdxSize;
247        let mask = (1u64 << CHUNK_BITS) - 1;
248        let chunk = (self.swizzled & mask) as IdxSize;
249        (chunk, row)
250    }
251
252    #[inline(always)]
253    pub fn inner_mut(&mut self) -> &mut u64 {
254        &mut self.swizzled
255    }
256
257    pub fn from_inner(inner: u64) -> Self {
258        Self { swizzled: inner }
259    }
260
261    pub fn into_inner(self) -> u64 {
262        self.swizzled
263    }
264}
265
266#[cfg(test)]
267mod test {
268    use super::*;
269
270    #[test]
271    fn test_chunk_idx() {
272        let chunk = 213908;
273        let row = 813457;
274
275        let ci: ChunkId = ChunkId::store(chunk, row);
276        let (c, r) = ci.extract();
277
278        assert_eq!(c, chunk);
279        assert_eq!(r, row);
280    }
281}