polars_core/chunked_array/object/
mod.rs

1#![allow(unsafe_op_in_unsafe_fn)]
2use std::any::Any;
3use std::fmt::{Debug, Display};
4use std::hash::Hash;
5
6use arrow::bitmap::Bitmap;
7use arrow::bitmap::utils::{BitmapIter, ZipValidity};
8use arrow::buffer::Buffer;
9use polars_utils::total_ord::TotalHash;
10
11use crate::prelude::*;
12
13pub mod builder;
14#[cfg(feature = "object")]
15pub(crate) mod extension;
16mod is_valid;
17mod iterator;
18pub mod registry;
19
20pub use extension::set_polars_allow_extension;
21
22#[derive(Debug, Clone)]
23pub struct ObjectArray<T>
24where
25    T: PolarsObject,
26{
27    dtype: ArrowDataType,
28    values: Buffer<T>,
29    validity: Option<Bitmap>,
30}
31
32/// Trimmed down object safe polars object
33pub trait PolarsObjectSafe: Any + Debug + Send + Sync + Display {
34    fn type_name(&self) -> &'static str;
35
36    fn as_any(&self) -> &dyn Any;
37
38    fn to_boxed(&self) -> Box<dyn PolarsObjectSafe>;
39
40    fn equal(&self, other: &dyn PolarsObjectSafe) -> bool;
41}
42
43impl PartialEq for &dyn PolarsObjectSafe {
44    fn eq(&self, other: &Self) -> bool {
45        self.equal(*other)
46    }
47}
48
49/// Values need to implement this so that they can be stored into a Series and DataFrame
50pub trait PolarsObject:
51    Any + Debug + Clone + Send + Sync + Default + Display + Hash + TotalHash + PartialEq + Eq + TotalEq
52{
53    /// This should be used as type information. Consider this a part of the type system.
54    fn type_name() -> &'static str;
55}
56
57impl<T: PolarsObject> PolarsObjectSafe for T {
58    fn type_name(&self) -> &'static str {
59        T::type_name()
60    }
61
62    fn as_any(&self) -> &dyn Any {
63        self
64    }
65
66    fn to_boxed(&self) -> Box<dyn PolarsObjectSafe> {
67        Box::new(self.clone())
68    }
69
70    fn equal(&self, other: &dyn PolarsObjectSafe) -> bool {
71        let Some(other) = other.as_any().downcast_ref::<T>() else {
72            return false;
73        };
74        self == other
75    }
76}
77
78pub type ObjectValueIter<'a, T> = std::slice::Iter<'a, T>;
79
80impl<T> ObjectArray<T>
81where
82    T: PolarsObject,
83{
84    pub fn values_iter(&self) -> ObjectValueIter<'_, T> {
85        self.values.iter()
86    }
87
88    /// Returns an iterator of `Option<&T>` over every element of this array.
89    pub fn iter(&self) -> ZipValidity<&T, ObjectValueIter<'_, T>, BitmapIter<'_>> {
90        ZipValidity::new_with_validity(self.values_iter(), self.validity.as_ref())
91    }
92
93    /// Get a value at a certain index location
94    pub fn value(&self, index: usize) -> &T {
95        &self.values[index]
96    }
97
98    pub fn get(&self, index: usize) -> Option<&T> {
99        if self.is_valid(index) {
100            Some(unsafe { self.value_unchecked(index) })
101        } else {
102            None
103        }
104    }
105
106    /// Get a value at a certain index location
107    ///
108    /// # Safety
109    ///
110    /// This does not any bound checks. The caller needs to ensure the index is within
111    /// the size of the array.
112    pub unsafe fn value_unchecked(&self, index: usize) -> &T {
113        self.values.get_unchecked(index)
114    }
115
116    /// Check validity
117    ///
118    /// # Safety
119    /// No bounds checks
120    #[inline]
121    pub unsafe fn is_valid_unchecked(&self, i: usize) -> bool {
122        if let Some(b) = &self.validity {
123            b.get_bit_unchecked(i)
124        } else {
125            true
126        }
127    }
128
129    /// Check validity
130    ///
131    /// # Safety
132    /// No bounds checks
133    #[inline]
134    pub unsafe fn is_null_unchecked(&self, i: usize) -> bool {
135        !self.is_valid_unchecked(i)
136    }
137
138    /// Returns this array with a new validity.
139    /// # Panic
140    /// Panics iff `validity.len() != self.len()`.
141    #[must_use]
142    #[inline]
143    pub fn with_validity(mut self, validity: Option<Bitmap>) -> Self {
144        self.set_validity(validity);
145        self
146    }
147
148    /// Sets the validity of this array.
149    /// # Panics
150    /// This function panics iff `validity.len() != self.len()`.
151    #[inline]
152    pub fn set_validity(&mut self, validity: Option<Bitmap>) {
153        if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
154            panic!("validity must be equal to the array's length")
155        }
156        self.validity = validity;
157    }
158}
159
160impl<T> Array for ObjectArray<T>
161where
162    T: PolarsObject,
163{
164    fn as_any(&self) -> &dyn Any {
165        self
166    }
167
168    fn dtype(&self) -> &ArrowDataType {
169        &self.dtype
170    }
171
172    fn dtype_mut(&mut self) -> &mut ArrowDataType {
173        &mut self.dtype
174    }
175
176    fn slice(&mut self, offset: usize, length: usize) {
177        assert!(
178            offset + length <= self.len(),
179            "the offset of the new Buffer cannot exceed the existing length"
180        );
181        unsafe { self.slice_unchecked(offset, length) }
182    }
183
184    unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
185        self.validity = self
186            .validity
187            .take()
188            .map(|bitmap| bitmap.sliced_unchecked(offset, length))
189            .filter(|bitmap| bitmap.unset_bits() > 0);
190        self.values.slice_unchecked(offset, length);
191    }
192
193    fn split_at_boxed(&self, offset: usize) -> (Box<dyn Array>, Box<dyn Array>) {
194        let (lhs, rhs) = Splitable::split_at(self, offset);
195        (Box::new(lhs), Box::new(rhs))
196    }
197
198    unsafe fn split_at_boxed_unchecked(&self, offset: usize) -> (Box<dyn Array>, Box<dyn Array>) {
199        let (lhs, rhs) = unsafe { Splitable::split_at_unchecked(self, offset) };
200        (Box::new(lhs), Box::new(rhs))
201    }
202
203    fn len(&self) -> usize {
204        self.values.len()
205    }
206
207    fn validity(&self) -> Option<&Bitmap> {
208        self.validity.as_ref()
209    }
210
211    fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
212        Box::new(self.clone().with_validity(validity))
213    }
214
215    fn to_boxed(&self) -> Box<dyn Array> {
216        Box::new(self.clone())
217    }
218
219    fn as_any_mut(&mut self) -> &mut dyn Any {
220        unimplemented!()
221    }
222
223    fn null_count(&self) -> usize {
224        match &self.validity {
225            None => 0,
226            Some(validity) => validity.unset_bits(),
227        }
228    }
229}
230
231impl<T: PolarsObject> Splitable for ObjectArray<T> {
232    fn check_bound(&self, offset: usize) -> bool {
233        offset <= self.len()
234    }
235
236    unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
237        let (left_values, right_values) = unsafe { self.values.split_at_unchecked(offset) };
238        let (left_validity, right_validity) = unsafe { self.validity.split_at_unchecked(offset) };
239        (
240            Self {
241                dtype: self.dtype.clone(),
242                values: left_values,
243                validity: left_validity,
244            },
245            Self {
246                dtype: self.dtype.clone(),
247                values: right_values,
248                validity: right_validity,
249            },
250        )
251    }
252}
253
254impl<T: PolarsObject> StaticArray for ObjectArray<T> {
255    type ValueT<'a> = &'a T;
256    type ZeroableValueT<'a> = Option<&'a T>;
257    type ValueIterT<'a> = ObjectValueIter<'a, T>;
258
259    #[inline]
260    unsafe fn value_unchecked(&self, idx: usize) -> Self::ValueT<'_> {
261        self.value_unchecked(idx)
262    }
263
264    fn values_iter(&self) -> Self::ValueIterT<'_> {
265        self.values_iter()
266    }
267
268    fn iter(&self) -> ZipValidity<Self::ValueT<'_>, Self::ValueIterT<'_>, BitmapIter<'_>> {
269        self.iter()
270    }
271
272    fn with_validity_typed(self, validity: Option<Bitmap>) -> Self {
273        self.with_validity(validity)
274    }
275
276    fn full_null(length: usize, dtype: ArrowDataType) -> Self {
277        ObjectArray {
278            dtype,
279            values: vec![T::default(); length].into(),
280            validity: Some(Bitmap::new_with_value(false, length)),
281        }
282    }
283}
284
285impl<T: PolarsObject> ParameterFreeDtypeStaticArray for ObjectArray<T> {
286    fn get_dtype() -> ArrowDataType {
287        ArrowDataType::FixedSizeBinary(size_of::<T>())
288    }
289}
290
291impl<T> ObjectChunked<T>
292where
293    T: PolarsObject,
294{
295    /// Get a hold to an object that can be formatted or downcasted via the Any trait.
296    ///
297    /// # Safety
298    ///
299    /// No bounds checks
300    pub unsafe fn get_object_unchecked(&self, index: usize) -> Option<&dyn PolarsObjectSafe> {
301        let (chunk_idx, idx) = self.index_to_chunked_index(index);
302        self.get_object_chunked_unchecked(chunk_idx, idx)
303    }
304
305    pub(crate) unsafe fn get_object_chunked_unchecked(
306        &self,
307        chunk: usize,
308        index: usize,
309    ) -> Option<&dyn PolarsObjectSafe> {
310        let chunks = self.downcast_chunks();
311        let arr = chunks.get_unchecked(chunk);
312        if arr.is_valid_unchecked(index) {
313            Some(arr.value(index))
314        } else {
315            None
316        }
317    }
318
319    /// Get a hold to an object that can be formatted or downcasted via the Any trait.
320    pub fn get_object(&self, index: usize) -> Option<&dyn PolarsObjectSafe> {
321        if index < self.len() {
322            unsafe { self.get_object_unchecked(index) }
323        } else {
324            None
325        }
326    }
327}
328
329impl<T: PolarsObject> From<Vec<T>> for ObjectArray<T> {
330    fn from(values: Vec<T>) -> Self {
331        Self {
332            dtype: ArrowDataType::FixedSizeBinary(size_of::<T>()),
333            values: values.into(),
334            validity: None,
335        }
336    }
337}