polars_core/chunked_array/ops/
downcast.rs

1#![allow(unsafe_op_in_unsafe_fn)]
2use std::marker::PhantomData;
3
4use arrow::array::*;
5use arrow::compute::utils::combine_validities_and;
6
7use crate::prelude::*;
8use crate::utils::{index_to_chunked_index, index_to_chunked_index_rev};
9
10pub struct Chunks<'a, T> {
11    chunks: &'a [ArrayRef],
12    phantom: PhantomData<T>,
13}
14
15impl<'a, T> Chunks<'a, T> {
16    fn new(chunks: &'a [ArrayRef]) -> Self {
17        Chunks {
18            chunks,
19            phantom: PhantomData,
20        }
21    }
22
23    #[inline]
24    pub fn get(&self, index: usize) -> Option<&'a T> {
25        self.chunks.get(index).map(|arr| {
26            let arr = &**arr;
27            unsafe { &*(arr as *const dyn Array as *const T) }
28        })
29    }
30
31    #[inline]
32    pub unsafe fn get_unchecked(&self, index: usize) -> &'a T {
33        let arr = self.chunks.get_unchecked(index);
34        let arr = &**arr;
35        &*(arr as *const dyn Array as *const T)
36    }
37
38    pub fn len(&self) -> usize {
39        self.chunks.len()
40    }
41
42    #[inline]
43    pub fn last(&self) -> Option<&'a T> {
44        self.chunks.last().map(|arr| {
45            let arr = &**arr;
46            unsafe { &*(arr as *const dyn Array as *const T) }
47        })
48    }
49}
50
51#[doc(hidden)]
52impl<T: PolarsDataType> ChunkedArray<T> {
53    #[inline]
54    pub fn downcast_into_iter(mut self) -> impl DoubleEndedIterator<Item = T::Array> {
55        let chunks = std::mem::take(&mut self.chunks);
56        chunks.into_iter().map(|arr| {
57            // SAFETY: T::Array guarantees this is correct.
58            let ptr = Box::into_raw(arr).cast::<T::Array>();
59            unsafe { *Box::from_raw(ptr) }
60        })
61    }
62
63    #[inline]
64    pub fn downcast_iter(&self) -> impl DoubleEndedIterator<Item = &T::Array> {
65        self.chunks.iter().map(|arr| {
66            // SAFETY: T::Array guarantees this is correct.
67            let arr = &**arr;
68            unsafe { &*(arr as *const dyn Array as *const T::Array) }
69        })
70    }
71
72    #[inline]
73    pub fn downcast_slices(&self) -> Option<impl DoubleEndedIterator<Item = &[T::Physical<'_>]>> {
74        if self.null_count() != 0 {
75            return None;
76        }
77        let arr = self.downcast_iter().next().unwrap();
78        if arr.as_slice().is_some() {
79            Some(self.downcast_iter().map(|arr| arr.as_slice().unwrap()))
80        } else {
81            None
82        }
83    }
84
85    /// # Safety
86    /// The caller must ensure:
87    ///     * the length remains correct.
88    ///     * the flags (sorted, etc) remain correct.
89    #[inline]
90    pub unsafe fn downcast_iter_mut(&mut self) -> impl DoubleEndedIterator<Item = &mut T::Array> {
91        self.chunks.iter_mut().map(|arr| {
92            // SAFETY: T::Array guarantees this is correct.
93            let arr = &mut **arr;
94            &mut *(arr as *mut dyn Array as *mut T::Array)
95        })
96    }
97
98    #[inline]
99    pub fn downcast_chunks(&self) -> Chunks<'_, T::Array> {
100        Chunks::new(&self.chunks)
101    }
102
103    #[inline]
104    pub fn downcast_get(&self, idx: usize) -> Option<&T::Array> {
105        let arr = self.chunks.get(idx)?;
106        // SAFETY: T::Array guarantees this is correct.
107        let arr = &**arr;
108        unsafe { Some(&*(arr as *const dyn Array as *const T::Array)) }
109    }
110
111    #[inline]
112    pub fn downcast_as_array(&self) -> &T::Array {
113        assert_eq!(self.chunks.len(), 1);
114        self.downcast_get(0).unwrap()
115    }
116
117    #[inline]
118    /// # Safety
119    /// It is up to the caller to ensure the chunk idx is in-bounds
120    pub unsafe fn downcast_get_unchecked(&self, idx: usize) -> &T::Array {
121        let arr = self.chunks.get_unchecked(idx);
122        // SAFETY: T::Array guarantees this is correct.
123        let arr = &**arr;
124        unsafe { &*(arr as *const dyn Array as *const T::Array) }
125    }
126
127    /// Get the index of the chunk and the index of the value in that chunk.
128    #[inline]
129    pub(crate) fn index_to_chunked_index(&self, index: usize) -> (usize, usize) {
130        // Fast path.
131        if self.chunks.len() == 1 {
132            // SAFETY: chunks.len() == 1 guarantees this is correct.
133            let len = unsafe { self.chunks.get_unchecked(0).len() };
134            return if index < len {
135                (0, index)
136            } else {
137                (1, index - len)
138            };
139        }
140        let chunk_lens = self.chunk_lengths();
141        let len = self.len();
142        if index <= len / 2 {
143            // Access from lhs.
144            index_to_chunked_index(chunk_lens, index)
145        } else {
146            // Access from rhs.
147            let index_from_back = len - index;
148            index_to_chunked_index_rev(chunk_lens.rev(), index_from_back, self.chunks.len())
149        }
150    }
151
152    /// # Panics
153    /// Panics if chunks don't align
154    pub fn merge_validities(&mut self, chunks: &[ArrayRef]) {
155        assert_eq!(chunks.len(), self.chunks.len());
156        unsafe {
157            for (arr, other) in self.chunks_mut().iter_mut().zip(chunks) {
158                let validity = combine_validities_and(arr.validity(), other.validity());
159                *arr = arr.with_validity(validity);
160            }
161        }
162        self.compute_len();
163    }
164}