Skip to main content

polars_core/chunked_array/object/
builder.rs

1use arrow::array::builder::{ArrayBuilder, ShareStrategy};
2use arrow::bitmap::BitmapBuilder;
3use arrow::datatypes::ExtensionType;
4use polars_utils::vec::PushUnchecked;
5
6use super::*;
7use crate::chunked_array::object::registry::run_with_gil;
8use crate::utils::get_iter_capacity;
9
10pub struct ObjectChunkedBuilder<T> {
11    field: Field,
12    bitmask_builder: BitmapBuilder,
13    values: Vec<T>,
14}
15
16impl<T> ObjectChunkedBuilder<T>
17where
18    T: PolarsObject,
19{
20    pub fn field(&self) -> &Field {
21        &self.field
22    }
23    pub fn new(name: PlSmallStr, capacity: usize) -> Self {
24        ObjectChunkedBuilder {
25            field: Field::new(name, DataType::Object(T::type_name())),
26            values: Vec::with_capacity(capacity),
27            bitmask_builder: BitmapBuilder::with_capacity(capacity),
28        }
29    }
30
31    /// Appends a value of type `T` into the builder
32    #[inline]
33    pub fn append_value(&mut self, v: T) {
34        self.values.push(v);
35        self.bitmask_builder.push(true);
36    }
37
38    /// Appends a null slot into the builder
39    #[inline]
40    pub fn append_null(&mut self) {
41        self.values.push(T::default());
42        self.bitmask_builder.push(false);
43    }
44
45    #[inline]
46    pub fn append_value_from_any(&mut self, v: &dyn Any) -> PolarsResult<()> {
47        let Some(v) = v.downcast_ref::<T>() else {
48            polars_bail!(SchemaMismatch: "cannot downcast any in ObjectBuilder");
49        };
50        self.append_value(v.clone());
51        Ok(())
52    }
53
54    #[inline]
55    pub fn append_option(&mut self, opt: Option<T>) {
56        match opt {
57            Some(s) => self.append_value(s),
58            None => self.append_null(),
59        }
60    }
61
62    pub fn finish(mut self) -> ObjectChunked<T> {
63        let null_bitmap: Option<Bitmap> = self.bitmask_builder.into_opt_validity();
64
65        let len = self.values.len();
66        let null_count = null_bitmap
67            .as_ref()
68            .map(|validity| validity.unset_bits())
69            .unwrap_or(0);
70
71        let arr = Box::new(ObjectArray {
72            dtype: ArrowDataType::FixedSizeBinary(size_of::<T>()),
73            values: self.values.into(),
74            validity: null_bitmap,
75        });
76
77        self.field.dtype = get_object_type::<T>();
78
79        unsafe { ChunkedArray::new_with_dims(Arc::new(self.field), vec![arr], len, null_count) }
80    }
81}
82
83/// Initialize a polars Object data type. The type has got information needed to
84/// construct new objects.
85pub(crate) fn get_object_type<T: PolarsObject>() -> DataType {
86    DataType::Object(T::type_name())
87}
88
89impl<T> Default for ObjectChunkedBuilder<T>
90where
91    T: PolarsObject,
92{
93    fn default() -> Self {
94        ObjectChunkedBuilder::new(PlSmallStr::EMPTY, 0)
95    }
96}
97
98impl<T> NewChunkedArray<ObjectType<T>, T> for ObjectChunked<T>
99where
100    T: PolarsObject,
101{
102    fn from_slice(name: PlSmallStr, v: &[T]) -> Self {
103        Self::from_iter_values(name, v.iter().cloned())
104    }
105
106    fn from_slice_options(name: PlSmallStr, opt_v: &[Option<T>]) -> Self {
107        let mut builder = ObjectChunkedBuilder::<T>::new(name, opt_v.len());
108        opt_v
109            .iter()
110            .cloned()
111            .for_each(|opt| builder.append_option(opt));
112        builder.finish()
113    }
114
115    fn from_iter_options(
116        name: PlSmallStr,
117        it: impl Iterator<Item = Option<T>>,
118    ) -> ObjectChunked<T> {
119        let mut builder = ObjectChunkedBuilder::new(name, get_iter_capacity(&it));
120        it.for_each(|opt| builder.append_option(opt));
121        builder.finish()
122    }
123
124    /// Create a new ChunkedArray from an iterator.
125    fn from_iter_values(name: PlSmallStr, it: impl Iterator<Item = T>) -> ObjectChunked<T> {
126        let mut builder = ObjectChunkedBuilder::new(name, get_iter_capacity(&it));
127        it.for_each(|v| builder.append_value(v));
128        builder.finish()
129    }
130}
131
132impl<T> ObjectChunked<T>
133where
134    T: PolarsObject,
135{
136    pub fn new_from_vec(name: PlSmallStr, v: Vec<T>) -> Self {
137        let field = Arc::new(Field::new(name, DataType::Object(T::type_name())));
138        let len = v.len();
139        let arr = Box::new(ObjectArray {
140            dtype: ArrowDataType::FixedSizeBinary(size_of::<T>()),
141            values: v.into(),
142            validity: None,
143        });
144
145        unsafe { ObjectChunked::new_with_dims(field, vec![arr], len, 0) }
146    }
147
148    pub fn new_from_vec_and_validity(
149        name: PlSmallStr,
150        v: Vec<T>,
151        validity: Option<Bitmap>,
152    ) -> Self {
153        let field = Arc::new(Field::new(name, DataType::Object(T::type_name())));
154        let len = v.len();
155        let null_count = validity.as_ref().map(|v| v.unset_bits()).unwrap_or(0);
156        let arr = Box::new(ObjectArray {
157            dtype: ArrowDataType::FixedSizeBinary(size_of::<T>()),
158            values: v.into(),
159            validity,
160        });
161
162        unsafe { ObjectChunked::new_with_dims(field, vec![arr], len, null_count) }
163    }
164
165    pub fn new_empty(name: PlSmallStr) -> Self {
166        Self::new_from_vec(name, vec![])
167    }
168}
169
170/// Convert a Series of dtype object to an Arrow Array of FixedSizeBinary
171pub(crate) fn object_series_to_arrow_array(s: &Series) -> ArrayRef {
172    // The list builder knows how to create an arrow array
173    // we simply piggy back on that code.
174
175    // SAFETY: 0..len is in bounds
176    let list_s = unsafe {
177        let groups = vec![[0, s.len() as IdxSize]];
178        s.agg_list(&GroupsType::new_slice(groups, false, true))
179    };
180    let arr = &list_s.chunks()[0];
181    let arr = arr.as_any().downcast_ref::<ListArray<i64>>().unwrap();
182
183    let mut arr: Box<dyn Array> = arr.values().clone();
184
185    if let ArrowDataType::Extension(ext_type) = arr.dtype()
186        && let ExtensionType {
187            name,
188            inner: ArrowDataType::FixedSizeBinary(8),
189            metadata: Some(_),
190        } = ext_type.as_ref()
191        && name == POLARS_OBJECT_EXTENSION_NAME
192    {
193        *arr.dtype_mut() = ArrowDataType::FixedSizeBinary(8);
194    } else {
195        panic!()
196    }
197
198    arr
199}
200
201impl<T: PolarsObject> ArrayBuilder for ObjectChunkedBuilder<T> {
202    fn dtype(&self) -> &ArrowDataType {
203        &ArrowDataType::FixedSizeBinary(size_of::<T>())
204    }
205
206    fn reserve(&mut self, additional: usize) {
207        self.bitmask_builder.reserve(additional);
208        self.values.reserve(additional);
209    }
210
211    fn freeze(self) -> Box<dyn Array> {
212        Box::new(ObjectArray {
213            dtype: ArrowDataType::FixedSizeBinary(size_of::<T>()),
214            values: self.values.into(),
215            validity: self.bitmask_builder.into_opt_validity(),
216        })
217    }
218
219    fn freeze_reset(&mut self) -> Box<dyn Array> {
220        Box::new(ObjectArray {
221            dtype: ArrowDataType::FixedSizeBinary(size_of::<T>()),
222            values: core::mem::take(&mut self.values).into(),
223            validity: core::mem::take(&mut self.bitmask_builder).into_opt_validity(),
224        })
225    }
226
227    fn len(&self) -> usize {
228        self.values.len()
229    }
230
231    fn extend_nulls(&mut self, length: usize) {
232        run_with_gil(|| {
233            self.values.resize(self.values.len() + length, T::default());
234        });
235        self.bitmask_builder.extend_constant(length, false);
236    }
237
238    fn subslice_extend(
239        &mut self,
240        other: &dyn Array,
241        start: usize,
242        length: usize,
243        _share: ShareStrategy,
244    ) {
245        run_with_gil(|| {
246            let other: &ObjectArray<T> = other.as_any().downcast_ref().unwrap();
247            self.values
248                .extend_from_slice(&other.values[start..start + length]);
249            self.bitmask_builder
250                .subslice_extend_from_opt_validity(other.validity(), start, length);
251        })
252    }
253
254    fn subslice_extend_repeated(
255        &mut self,
256        other: &dyn Array,
257        start: usize,
258        length: usize,
259        repeats: usize,
260        _share: ShareStrategy,
261    ) {
262        run_with_gil(|| {
263            for _ in 0..repeats {
264                let other: &ObjectArray<T> = other.as_any().downcast_ref().unwrap();
265                self.values
266                    .extend_from_slice(&other.values[start..start + length]);
267                self.bitmask_builder.subslice_extend_from_opt_validity(
268                    other.validity(),
269                    start,
270                    length,
271                );
272            }
273        })
274    }
275
276    fn subslice_extend_each_repeated(
277        &mut self,
278        other: &dyn Array,
279        start: usize,
280        length: usize,
281        repeats: usize,
282        _share: ShareStrategy,
283    ) {
284        run_with_gil(|| {
285            let other: &ObjectArray<T> = other.as_any().downcast_ref().unwrap();
286
287            self.values.reserve(length * repeats);
288            for value in other.values[start..start + length].iter() {
289                unsafe {
290                    for _ in 0..repeats {
291                        self.values.push_unchecked(value.clone());
292                    }
293                }
294            }
295        });
296
297        self.bitmask_builder
298            .subslice_extend_each_repeated_from_opt_validity(
299                other.validity(),
300                start,
301                length,
302                repeats,
303            );
304    }
305
306    unsafe fn gather_extend(&mut self, other: &dyn Array, idxs: &[IdxSize], _share: ShareStrategy) {
307        run_with_gil(|| {
308            let other: &ObjectArray<T> = other.as_any().downcast_ref().unwrap();
309            let other_values_slice = other.values.as_slice();
310            self.values.extend(
311                idxs.iter()
312                    .map(|idx| other_values_slice.get_unchecked(*idx as usize).clone()),
313            );
314        });
315        self.bitmask_builder
316            .gather_extend_from_opt_validity(other.validity(), idxs, other.len());
317    }
318
319    fn opt_gather_extend(&mut self, other: &dyn Array, idxs: &[IdxSize], _share: ShareStrategy) {
320        run_with_gil(|| {
321            let other: &ObjectArray<T> = other.as_any().downcast_ref().unwrap();
322            let other_values_slice = other.values.as_slice();
323            self.values.reserve(idxs.len());
324            unsafe {
325                for idx in idxs {
326                    let val = if (*idx as usize) < other.len() {
327                        other_values_slice.get_unchecked(*idx as usize).clone()
328                    } else {
329                        T::default()
330                    };
331                    self.values.push_unchecked(val);
332                }
333            }
334        });
335        self.bitmask_builder.opt_gather_extend_from_opt_validity(
336            other.validity(),
337            idxs,
338            other.len(),
339        );
340    }
341}