polars_core/chunked_array/object/
mod.rs

1#![allow(unsafe_op_in_unsafe_fn)]
2use std::any::Any;
3use std::fmt::{Debug, Display};
4use std::hash::Hash;
5
6use arrow::bitmap::Bitmap;
7use arrow::bitmap::utils::{BitmapIter, ZipValidity};
8use arrow::buffer::Buffer;
9use polars_utils::total_ord::TotalHash;
10
11use crate::prelude::*;
12
13pub mod builder;
14#[cfg(feature = "object")]
15pub(crate) mod extension;
16mod is_valid;
17mod iterator;
18pub mod registry;
19
20pub use extension::set_polars_allow_extension;
21
22#[derive(Debug, Clone)]
23pub struct ObjectArray<T>
24where
25    T: PolarsObject,
26{
27    values: Buffer<T>,
28    validity: Option<Bitmap>,
29}
30
31/// Trimmed down object safe polars object
32pub trait PolarsObjectSafe: Any + Debug + Send + Sync + Display {
33    fn type_name(&self) -> &'static str;
34
35    fn as_any(&self) -> &dyn Any;
36
37    fn to_boxed(&self) -> Box<dyn PolarsObjectSafe>;
38
39    fn equal(&self, other: &dyn PolarsObjectSafe) -> bool;
40}
41
42impl PartialEq for &dyn PolarsObjectSafe {
43    fn eq(&self, other: &Self) -> bool {
44        self.equal(*other)
45    }
46}
47
48/// Values need to implement this so that they can be stored into a Series and DataFrame
49pub trait PolarsObject:
50    Any + Debug + Clone + Send + Sync + Default + Display + Hash + TotalHash + PartialEq + Eq + TotalEq
51{
52    /// This should be used as type information. Consider this a part of the type system.
53    fn type_name() -> &'static str;
54}
55
56impl<T: PolarsObject> PolarsObjectSafe for T {
57    fn type_name(&self) -> &'static str {
58        T::type_name()
59    }
60
61    fn as_any(&self) -> &dyn Any {
62        self
63    }
64
65    fn to_boxed(&self) -> Box<dyn PolarsObjectSafe> {
66        Box::new(self.clone())
67    }
68
69    fn equal(&self, other: &dyn PolarsObjectSafe) -> bool {
70        let Some(other) = other.as_any().downcast_ref::<T>() else {
71            return false;
72        };
73        self == other
74    }
75}
76
77pub type ObjectValueIter<'a, T> = std::slice::Iter<'a, T>;
78
79impl<T> ObjectArray<T>
80where
81    T: PolarsObject,
82{
83    pub fn values_iter(&self) -> ObjectValueIter<'_, T> {
84        self.values.iter()
85    }
86
87    /// Returns an iterator of `Option<&T>` over every element of this array.
88    pub fn iter(&self) -> ZipValidity<&T, ObjectValueIter<'_, T>, BitmapIter> {
89        ZipValidity::new_with_validity(self.values_iter(), self.validity.as_ref())
90    }
91
92    /// Get a value at a certain index location
93    pub fn value(&self, index: usize) -> &T {
94        &self.values[index]
95    }
96
97    pub fn get(&self, index: usize) -> Option<&T> {
98        if self.is_valid(index) {
99            Some(unsafe { self.value_unchecked(index) })
100        } else {
101            None
102        }
103    }
104
105    /// Get a value at a certain index location
106    ///
107    /// # Safety
108    ///
109    /// This does not any bound checks. The caller needs to ensure the index is within
110    /// the size of the array.
111    pub unsafe fn value_unchecked(&self, index: usize) -> &T {
112        self.values.get_unchecked(index)
113    }
114
115    /// Check validity
116    ///
117    /// # Safety
118    /// No bounds checks
119    #[inline]
120    pub unsafe fn is_valid_unchecked(&self, i: usize) -> bool {
121        if let Some(b) = &self.validity {
122            b.get_bit_unchecked(i)
123        } else {
124            true
125        }
126    }
127
128    /// Check validity
129    ///
130    /// # Safety
131    /// No bounds checks
132    #[inline]
133    pub unsafe fn is_null_unchecked(&self, i: usize) -> bool {
134        !self.is_valid_unchecked(i)
135    }
136
137    /// Returns this array with a new validity.
138    /// # Panic
139    /// Panics iff `validity.len() != self.len()`.
140    #[must_use]
141    #[inline]
142    pub fn with_validity(mut self, validity: Option<Bitmap>) -> Self {
143        self.set_validity(validity);
144        self
145    }
146
147    /// Sets the validity of this array.
148    /// # Panics
149    /// This function panics iff `validity.len() != self.len()`.
150    #[inline]
151    pub fn set_validity(&mut self, validity: Option<Bitmap>) {
152        if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
153            panic!("validity must be equal to the array's length")
154        }
155        self.validity = validity;
156    }
157}
158
159impl<T> Array for ObjectArray<T>
160where
161    T: PolarsObject,
162{
163    fn as_any(&self) -> &dyn Any {
164        self
165    }
166
167    fn dtype(&self) -> &ArrowDataType {
168        &ArrowDataType::FixedSizeBinary(size_of::<T>())
169    }
170
171    fn slice(&mut self, offset: usize, length: usize) {
172        assert!(
173            offset + length <= self.len(),
174            "the offset of the new Buffer cannot exceed the existing length"
175        );
176        unsafe { self.slice_unchecked(offset, length) }
177    }
178
179    unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
180        self.validity = self
181            .validity
182            .take()
183            .map(|bitmap| bitmap.sliced_unchecked(offset, length))
184            .filter(|bitmap| bitmap.unset_bits() > 0);
185        self.values.slice_unchecked(offset, length);
186    }
187
188    fn split_at_boxed(&self, offset: usize) -> (Box<dyn Array>, Box<dyn Array>) {
189        let (lhs, rhs) = Splitable::split_at(self, offset);
190        (Box::new(lhs), Box::new(rhs))
191    }
192
193    unsafe fn split_at_boxed_unchecked(&self, offset: usize) -> (Box<dyn Array>, Box<dyn Array>) {
194        let (lhs, rhs) = unsafe { Splitable::split_at_unchecked(self, offset) };
195        (Box::new(lhs), Box::new(rhs))
196    }
197
198    fn len(&self) -> usize {
199        self.values.len()
200    }
201
202    fn validity(&self) -> Option<&Bitmap> {
203        self.validity.as_ref()
204    }
205
206    fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
207        Box::new(self.clone().with_validity(validity))
208    }
209
210    fn to_boxed(&self) -> Box<dyn Array> {
211        Box::new(self.clone())
212    }
213
214    fn as_any_mut(&mut self) -> &mut dyn Any {
215        unimplemented!()
216    }
217
218    fn null_count(&self) -> usize {
219        match &self.validity {
220            None => 0,
221            Some(validity) => validity.unset_bits(),
222        }
223    }
224}
225
226impl<T: PolarsObject> Splitable for ObjectArray<T> {
227    fn check_bound(&self, offset: usize) -> bool {
228        offset <= self.len()
229    }
230
231    unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
232        let (left_values, right_values) = unsafe { self.values.split_at_unchecked(offset) };
233        let (left_validity, right_validity) = unsafe { self.validity.split_at_unchecked(offset) };
234        (
235            Self {
236                values: left_values,
237                validity: left_validity,
238            },
239            Self {
240                values: right_values,
241                validity: right_validity,
242            },
243        )
244    }
245}
246
247impl<T: PolarsObject> StaticArray for ObjectArray<T> {
248    type ValueT<'a> = &'a T;
249    type ZeroableValueT<'a> = Option<&'a T>;
250    type ValueIterT<'a> = ObjectValueIter<'a, T>;
251
252    #[inline]
253    unsafe fn value_unchecked(&self, idx: usize) -> Self::ValueT<'_> {
254        self.value_unchecked(idx)
255    }
256
257    fn values_iter(&self) -> Self::ValueIterT<'_> {
258        self.values_iter()
259    }
260
261    fn iter(&self) -> ZipValidity<Self::ValueT<'_>, Self::ValueIterT<'_>, BitmapIter> {
262        self.iter()
263    }
264
265    fn with_validity_typed(self, validity: Option<Bitmap>) -> Self {
266        self.with_validity(validity)
267    }
268
269    fn full_null(length: usize, _dtype: ArrowDataType) -> Self {
270        ObjectArray {
271            values: vec![T::default(); length].into(),
272            validity: Some(Bitmap::new_with_value(false, length)),
273        }
274    }
275}
276
277impl<T: PolarsObject> ParameterFreeDtypeStaticArray for ObjectArray<T> {
278    fn get_dtype() -> ArrowDataType {
279        ArrowDataType::FixedSizeBinary(size_of::<T>())
280    }
281}
282
283impl<T> ObjectChunked<T>
284where
285    T: PolarsObject,
286{
287    /// Get a hold to an object that can be formatted or downcasted via the Any trait.
288    ///
289    /// # Safety
290    ///
291    /// No bounds checks
292    pub unsafe fn get_object_unchecked(&self, index: usize) -> Option<&dyn PolarsObjectSafe> {
293        let (chunk_idx, idx) = self.index_to_chunked_index(index);
294        self.get_object_chunked_unchecked(chunk_idx, idx)
295    }
296
297    pub(crate) unsafe fn get_object_chunked_unchecked(
298        &self,
299        chunk: usize,
300        index: usize,
301    ) -> Option<&dyn PolarsObjectSafe> {
302        let chunks = self.downcast_chunks();
303        let arr = chunks.get_unchecked(chunk);
304        if arr.is_valid_unchecked(index) {
305            Some(arr.value(index))
306        } else {
307            None
308        }
309    }
310
311    /// Get a hold to an object that can be formatted or downcasted via the Any trait.
312    pub fn get_object(&self, index: usize) -> Option<&dyn PolarsObjectSafe> {
313        if index < self.len() {
314            unsafe { self.get_object_unchecked(index) }
315        } else {
316            None
317        }
318    }
319}
320
321impl<T: PolarsObject> From<Vec<T>> for ObjectArray<T> {
322    fn from(values: Vec<T>) -> Self {
323        Self {
324            values: values.into(),
325            validity: None,
326        }
327    }
328}