polars_core/chunked_array/object/
mod.rs

1#![allow(unsafe_op_in_unsafe_fn)]
2use std::any::Any;
3use std::fmt::{Debug, Display};
4use std::hash::Hash;
5
6use arrow::bitmap::Bitmap;
7use arrow::bitmap::utils::{BitmapIter, ZipValidity};
8use polars_buffer::Buffer;
9use polars_utils::total_ord::TotalHash;
10
11use crate::prelude::*;
12
13pub mod builder;
14#[cfg(feature = "object")]
15pub(crate) mod extension;
16mod is_valid;
17mod iterator;
18pub mod registry;
19
20pub use extension::set_polars_allow_extension;
21
22#[derive(Debug, Clone)]
23pub struct ObjectArray<T>
24where
25    T: PolarsObject,
26{
27    dtype: ArrowDataType,
28    values: Buffer<T>,
29    validity: Option<Bitmap>,
30}
31
32/// Trimmed down object safe polars object
33pub trait PolarsObjectSafe: Any + Debug + Send + Sync + Display {
34    fn type_name(&self) -> &'static str;
35
36    fn as_any(&self) -> &dyn Any;
37
38    fn to_boxed(&self) -> Box<dyn PolarsObjectSafe>;
39
40    fn equal(&self, other: &dyn PolarsObjectSafe) -> bool;
41}
42
43impl PartialEq for &dyn PolarsObjectSafe {
44    fn eq(&self, other: &Self) -> bool {
45        self.equal(*other)
46    }
47}
48
49/// Values need to implement this so that they can be stored into a Series and DataFrame
50pub trait PolarsObject:
51    Any + Debug + Clone + Send + Sync + Default + Display + Hash + TotalHash + PartialEq + Eq + TotalEq
52{
53    /// This should be used as type information. Consider this a part of the type system.
54    fn type_name() -> &'static str;
55}
56
57impl<T: PolarsObject> PolarsObjectSafe for T {
58    fn type_name(&self) -> &'static str {
59        T::type_name()
60    }
61
62    fn as_any(&self) -> &dyn Any {
63        self
64    }
65
66    fn to_boxed(&self) -> Box<dyn PolarsObjectSafe> {
67        Box::new(self.clone())
68    }
69
70    fn equal(&self, other: &dyn PolarsObjectSafe) -> bool {
71        let Some(other) = other.as_any().downcast_ref::<T>() else {
72            return false;
73        };
74        self == other
75    }
76}
77
78pub type ObjectValueIter<'a, T> = std::slice::Iter<'a, T>;
79
80impl<T> ObjectArray<T>
81where
82    T: PolarsObject,
83{
84    pub fn values_iter(&self) -> ObjectValueIter<'_, T> {
85        self.values.iter()
86    }
87
88    /// Returns an iterator of `Option<&T>` over every element of this array.
89    pub fn iter(&self) -> ZipValidity<&T, ObjectValueIter<'_, T>, BitmapIter<'_>> {
90        ZipValidity::new_with_validity(self.values_iter(), self.validity.as_ref())
91    }
92
93    /// Get a value at a certain index location
94    pub fn value(&self, index: usize) -> &T {
95        &self.values[index]
96    }
97
98    pub fn get(&self, index: usize) -> Option<&T> {
99        if self.is_valid(index) {
100            Some(unsafe { self.value_unchecked(index) })
101        } else {
102            None
103        }
104    }
105
106    /// Get a value at a certain index location
107    ///
108    /// # Safety
109    ///
110    /// This does not any bound checks. The caller needs to ensure the index is within
111    /// the size of the array.
112    pub unsafe fn value_unchecked(&self, index: usize) -> &T {
113        self.values.get_unchecked(index)
114    }
115
116    /// Check validity
117    ///
118    /// # Safety
119    /// No bounds checks
120    #[inline]
121    pub unsafe fn is_valid_unchecked(&self, i: usize) -> bool {
122        if let Some(b) = &self.validity {
123            b.get_bit_unchecked(i)
124        } else {
125            true
126        }
127    }
128
129    /// Check validity
130    ///
131    /// # Safety
132    /// No bounds checks
133    #[inline]
134    pub unsafe fn is_null_unchecked(&self, i: usize) -> bool {
135        !self.is_valid_unchecked(i)
136    }
137
138    /// Returns this array with a new validity.
139    /// # Panic
140    /// Panics iff `validity.len() != self.len()`.
141    #[must_use]
142    #[inline]
143    pub fn with_validity(mut self, validity: Option<Bitmap>) -> Self {
144        self.set_validity(validity);
145        self
146    }
147
148    /// Sets the validity of this array.
149    /// # Panics
150    /// This function panics iff `validity.len() != self.len()`.
151    #[inline]
152    pub fn set_validity(&mut self, validity: Option<Bitmap>) {
153        if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
154            panic!("validity must be equal to the array's length")
155        }
156        self.validity = validity;
157    }
158}
159
160impl<T> Array for ObjectArray<T>
161where
162    T: PolarsObject,
163{
164    fn as_any(&self) -> &dyn Any {
165        self
166    }
167
168    fn dtype(&self) -> &ArrowDataType {
169        &self.dtype
170    }
171
172    fn dtype_mut(&mut self) -> &mut ArrowDataType {
173        &mut self.dtype
174    }
175
176    fn slice(&mut self, offset: usize, length: usize) {
177        assert!(
178            offset + length <= self.len(),
179            "the offset of the new Buffer cannot exceed the existing length"
180        );
181        unsafe { self.slice_unchecked(offset, length) }
182    }
183
184    unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
185        self.validity = self
186            .validity
187            .take()
188            .map(|bitmap| bitmap.sliced_unchecked(offset, length))
189            .filter(|bitmap| bitmap.unset_bits() > 0);
190        self.values
191            .slice_in_place_unchecked(offset..offset + length);
192    }
193
194    fn split_at_boxed(&self, offset: usize) -> (Box<dyn Array>, Box<dyn Array>) {
195        let (lhs, rhs) = Splitable::split_at(self, offset);
196        (Box::new(lhs), Box::new(rhs))
197    }
198
199    unsafe fn split_at_boxed_unchecked(&self, offset: usize) -> (Box<dyn Array>, Box<dyn Array>) {
200        let (lhs, rhs) = unsafe { Splitable::split_at_unchecked(self, offset) };
201        (Box::new(lhs), Box::new(rhs))
202    }
203
204    fn len(&self) -> usize {
205        self.values.len()
206    }
207
208    fn validity(&self) -> Option<&Bitmap> {
209        self.validity.as_ref()
210    }
211
212    fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
213        Box::new(self.clone().with_validity(validity))
214    }
215
216    fn to_boxed(&self) -> Box<dyn Array> {
217        Box::new(self.clone())
218    }
219
220    fn as_any_mut(&mut self) -> &mut dyn Any {
221        unimplemented!()
222    }
223
224    fn null_count(&self) -> usize {
225        match &self.validity {
226            None => 0,
227            Some(validity) => validity.unset_bits(),
228        }
229    }
230}
231
232impl<T: PolarsObject> Splitable for ObjectArray<T> {
233    fn check_bound(&self, offset: usize) -> bool {
234        offset <= self.len()
235    }
236
237    unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
238        let (left_values, right_values) = unsafe { self.values.split_at_unchecked(offset) };
239        let (left_validity, right_validity) = unsafe { self.validity.split_at_unchecked(offset) };
240        (
241            Self {
242                dtype: self.dtype.clone(),
243                values: left_values,
244                validity: left_validity,
245            },
246            Self {
247                dtype: self.dtype.clone(),
248                values: right_values,
249                validity: right_validity,
250            },
251        )
252    }
253}
254
255impl<T: PolarsObject> StaticArray for ObjectArray<T> {
256    type ValueT<'a> = &'a T;
257    type ZeroableValueT<'a> = Option<&'a T>;
258    type ValueIterT<'a> = ObjectValueIter<'a, T>;
259
260    #[inline]
261    unsafe fn value_unchecked(&self, idx: usize) -> Self::ValueT<'_> {
262        self.value_unchecked(idx)
263    }
264
265    fn values_iter(&self) -> Self::ValueIterT<'_> {
266        self.values_iter()
267    }
268
269    fn iter(&self) -> ZipValidity<Self::ValueT<'_>, Self::ValueIterT<'_>, BitmapIter<'_>> {
270        self.iter()
271    }
272
273    fn with_validity_typed(self, validity: Option<Bitmap>) -> Self {
274        self.with_validity(validity)
275    }
276
277    fn full_null(length: usize, dtype: ArrowDataType) -> Self {
278        ObjectArray {
279            dtype,
280            values: vec![T::default(); length].into(),
281            validity: Some(Bitmap::new_with_value(false, length)),
282        }
283    }
284}
285
286impl<T: PolarsObject> ParameterFreeDtypeStaticArray for ObjectArray<T> {
287    fn get_dtype() -> ArrowDataType {
288        ArrowDataType::FixedSizeBinary(size_of::<T>())
289    }
290}
291
292impl<T> ObjectChunked<T>
293where
294    T: PolarsObject,
295{
296    /// Get a hold to an object that can be formatted or downcasted via the Any trait.
297    ///
298    /// # Safety
299    ///
300    /// No bounds checks
301    pub unsafe fn get_object_unchecked(&self, index: usize) -> Option<&dyn PolarsObjectSafe> {
302        let (chunk_idx, idx) = self.index_to_chunked_index(index);
303        self.get_object_chunked_unchecked(chunk_idx, idx)
304    }
305
306    pub(crate) unsafe fn get_object_chunked_unchecked(
307        &self,
308        chunk: usize,
309        index: usize,
310    ) -> Option<&dyn PolarsObjectSafe> {
311        let chunks = self.downcast_chunks();
312        let arr = chunks.get_unchecked(chunk);
313        if arr.is_valid_unchecked(index) {
314            Some(arr.value(index))
315        } else {
316            None
317        }
318    }
319
320    /// Get a hold to an object that can be formatted or downcasted via the Any trait.
321    pub fn get_object(&self, index: usize) -> Option<&dyn PolarsObjectSafe> {
322        if index < self.len() {
323            unsafe { self.get_object_unchecked(index) }
324        } else {
325            None
326        }
327    }
328}
329
330impl<T: PolarsObject> From<Vec<T>> for ObjectArray<T> {
331    fn from(values: Vec<T>) -> Self {
332        Self {
333            dtype: ArrowDataType::FixedSizeBinary(size_of::<T>()),
334            values: values.into(),
335            validity: None,
336        }
337    }
338}