polars_core/chunked_array/object/
builder.rs

1use arrow::array::builder::{ArrayBuilder, ShareStrategy};
2use arrow::bitmap::BitmapBuilder;
3use polars_utils::vec::PushUnchecked;
4
5use super::*;
6use crate::utils::get_iter_capacity;
7
8pub struct ObjectChunkedBuilder<T> {
9    field: Field,
10    bitmask_builder: BitmapBuilder,
11    values: Vec<T>,
12}
13
14impl<T> ObjectChunkedBuilder<T>
15where
16    T: PolarsObject,
17{
18    pub fn field(&self) -> &Field {
19        &self.field
20    }
21    pub fn new(name: PlSmallStr, capacity: usize) -> Self {
22        ObjectChunkedBuilder {
23            field: Field::new(name, DataType::Object(T::type_name())),
24            values: Vec::with_capacity(capacity),
25            bitmask_builder: BitmapBuilder::with_capacity(capacity),
26        }
27    }
28
29    /// Appends a value of type `T` into the builder
30    #[inline]
31    pub fn append_value(&mut self, v: T) {
32        self.values.push(v);
33        self.bitmask_builder.push(true);
34    }
35
36    /// Appends a null slot into the builder
37    #[inline]
38    pub fn append_null(&mut self) {
39        self.values.push(T::default());
40        self.bitmask_builder.push(false);
41    }
42
43    #[inline]
44    pub fn append_value_from_any(&mut self, v: &dyn Any) -> PolarsResult<()> {
45        let Some(v) = v.downcast_ref::<T>() else {
46            polars_bail!(SchemaMismatch: "cannot downcast any in ObjectBuilder");
47        };
48        self.append_value(v.clone());
49        Ok(())
50    }
51
52    #[inline]
53    pub fn append_option(&mut self, opt: Option<T>) {
54        match opt {
55            Some(s) => self.append_value(s),
56            None => self.append_null(),
57        }
58    }
59
60    pub fn finish(mut self) -> ObjectChunked<T> {
61        let null_bitmap: Option<Bitmap> = self.bitmask_builder.into_opt_validity();
62
63        let len = self.values.len();
64        let null_count = null_bitmap
65            .as_ref()
66            .map(|validity| validity.unset_bits())
67            .unwrap_or(0);
68
69        let arr = Box::new(ObjectArray {
70            dtype: ArrowDataType::FixedSizeBinary(size_of::<T>()),
71            values: self.values.into(),
72            validity: null_bitmap,
73        });
74
75        self.field.dtype = get_object_type::<T>();
76
77        unsafe { ChunkedArray::new_with_dims(Arc::new(self.field), vec![arr], len, null_count) }
78    }
79}
80
81/// Initialize a polars Object data type. The type has got information needed to
82/// construct new objects.
83pub(crate) fn get_object_type<T: PolarsObject>() -> DataType {
84    DataType::Object(T::type_name())
85}
86
87impl<T> Default for ObjectChunkedBuilder<T>
88where
89    T: PolarsObject,
90{
91    fn default() -> Self {
92        ObjectChunkedBuilder::new(PlSmallStr::EMPTY, 0)
93    }
94}
95
96impl<T> NewChunkedArray<ObjectType<T>, T> for ObjectChunked<T>
97where
98    T: PolarsObject,
99{
100    fn from_slice(name: PlSmallStr, v: &[T]) -> Self {
101        Self::from_iter_values(name, v.iter().cloned())
102    }
103
104    fn from_slice_options(name: PlSmallStr, opt_v: &[Option<T>]) -> Self {
105        let mut builder = ObjectChunkedBuilder::<T>::new(name, opt_v.len());
106        opt_v
107            .iter()
108            .cloned()
109            .for_each(|opt| builder.append_option(opt));
110        builder.finish()
111    }
112
113    fn from_iter_options(
114        name: PlSmallStr,
115        it: impl Iterator<Item = Option<T>>,
116    ) -> ObjectChunked<T> {
117        let mut builder = ObjectChunkedBuilder::new(name, get_iter_capacity(&it));
118        it.for_each(|opt| builder.append_option(opt));
119        builder.finish()
120    }
121
122    /// Create a new ChunkedArray from an iterator.
123    fn from_iter_values(name: PlSmallStr, it: impl Iterator<Item = T>) -> ObjectChunked<T> {
124        let mut builder = ObjectChunkedBuilder::new(name, get_iter_capacity(&it));
125        it.for_each(|v| builder.append_value(v));
126        builder.finish()
127    }
128}
129
130impl<T> ObjectChunked<T>
131where
132    T: PolarsObject,
133{
134    pub fn new_from_vec(name: PlSmallStr, v: Vec<T>) -> Self {
135        let field = Arc::new(Field::new(name, DataType::Object(T::type_name())));
136        let len = v.len();
137        let arr = Box::new(ObjectArray {
138            dtype: ArrowDataType::FixedSizeBinary(size_of::<T>()),
139            values: v.into(),
140            validity: None,
141        });
142
143        unsafe { ObjectChunked::new_with_dims(field, vec![arr], len, 0) }
144    }
145
146    pub fn new_from_vec_and_validity(
147        name: PlSmallStr,
148        v: Vec<T>,
149        validity: Option<Bitmap>,
150    ) -> Self {
151        let field = Arc::new(Field::new(name, DataType::Object(T::type_name())));
152        let len = v.len();
153        let null_count = validity.as_ref().map(|v| v.unset_bits()).unwrap_or(0);
154        let arr = Box::new(ObjectArray {
155            dtype: ArrowDataType::FixedSizeBinary(size_of::<T>()),
156            values: v.into(),
157            validity,
158        });
159
160        unsafe { ObjectChunked::new_with_dims(field, vec![arr], len, null_count) }
161    }
162
163    pub fn new_empty(name: PlSmallStr) -> Self {
164        Self::new_from_vec(name, vec![])
165    }
166}
167
168/// Convert a Series of dtype object to an Arrow Array of FixedSizeBinary
169pub(crate) fn object_series_to_arrow_array(s: &Series) -> ArrayRef {
170    // The list builder knows how to create an arrow array
171    // we simply piggy back on that code.
172
173    // SAFETY: 0..len is in bounds
174    let list_s = unsafe {
175        s.agg_list(&GroupsType::Slice {
176            groups: vec![[0, s.len() as IdxSize]],
177            overlapping: false,
178        })
179    };
180    let arr = &list_s.chunks()[0];
181    let arr = arr.as_any().downcast_ref::<ListArray<i64>>().unwrap();
182    arr.values().to_boxed()
183}
184
185impl<T: PolarsObject> ArrayBuilder for ObjectChunkedBuilder<T> {
186    fn dtype(&self) -> &ArrowDataType {
187        &ArrowDataType::FixedSizeBinary(size_of::<T>())
188    }
189
190    fn reserve(&mut self, additional: usize) {
191        self.bitmask_builder.reserve(additional);
192        self.values.reserve(additional);
193    }
194
195    fn freeze(self) -> Box<dyn Array> {
196        Box::new(ObjectArray {
197            dtype: ArrowDataType::FixedSizeBinary(size_of::<T>()),
198            values: self.values.into(),
199            validity: self.bitmask_builder.into_opt_validity(),
200        })
201    }
202
203    fn freeze_reset(&mut self) -> Box<dyn Array> {
204        Box::new(ObjectArray {
205            dtype: ArrowDataType::FixedSizeBinary(size_of::<T>()),
206            values: core::mem::take(&mut self.values).into(),
207            validity: core::mem::take(&mut self.bitmask_builder).into_opt_validity(),
208        })
209    }
210
211    fn len(&self) -> usize {
212        self.values.len()
213    }
214
215    fn extend_nulls(&mut self, length: usize) {
216        self.values.resize(self.values.len() + length, T::default());
217        self.bitmask_builder.extend_constant(length, false);
218    }
219
220    fn subslice_extend(
221        &mut self,
222        other: &dyn Array,
223        start: usize,
224        length: usize,
225        _share: ShareStrategy,
226    ) {
227        let other: &ObjectArray<T> = other.as_any().downcast_ref().unwrap();
228        self.values
229            .extend_from_slice(&other.values[start..start + length]);
230        self.bitmask_builder
231            .subslice_extend_from_opt_validity(other.validity(), start, length);
232    }
233
234    fn subslice_extend_repeated(
235        &mut self,
236        other: &dyn Array,
237        start: usize,
238        length: usize,
239        repeats: usize,
240        share: ShareStrategy,
241    ) {
242        for _ in 0..repeats {
243            self.subslice_extend(other, start, length, share)
244        }
245    }
246
247    fn subslice_extend_each_repeated(
248        &mut self,
249        other: &dyn Array,
250        start: usize,
251        length: usize,
252        repeats: usize,
253        _share: ShareStrategy,
254    ) {
255        let other: &ObjectArray<T> = other.as_any().downcast_ref().unwrap();
256
257        self.values.reserve(length * repeats);
258        for value in other.values[start..start + length].iter() {
259            unsafe {
260                for _ in 0..repeats {
261                    self.values.push_unchecked(value.clone());
262                }
263            }
264        }
265        self.bitmask_builder
266            .subslice_extend_each_repeated_from_opt_validity(
267                other.validity(),
268                start,
269                length,
270                repeats,
271            );
272    }
273
274    unsafe fn gather_extend(&mut self, other: &dyn Array, idxs: &[IdxSize], _share: ShareStrategy) {
275        let other: &ObjectArray<T> = other.as_any().downcast_ref().unwrap();
276        let other_values_slice = other.values.as_slice();
277        self.values.extend(
278            idxs.iter()
279                .map(|idx| other_values_slice.get_unchecked(*idx as usize).clone()),
280        );
281        self.bitmask_builder
282            .gather_extend_from_opt_validity(other.validity(), idxs, other.len());
283    }
284
285    fn opt_gather_extend(&mut self, other: &dyn Array, idxs: &[IdxSize], _share: ShareStrategy) {
286        let other: &ObjectArray<T> = other.as_any().downcast_ref().unwrap();
287        let other_values_slice = other.values.as_slice();
288        self.values.reserve(idxs.len());
289        unsafe {
290            for idx in idxs {
291                let val = if (*idx as usize) < other.len() {
292                    other_values_slice.get_unchecked(*idx as usize).clone()
293                } else {
294                    T::default()
295                };
296                self.values.push_unchecked(val);
297            }
298        }
299        self.bitmask_builder.opt_gather_extend_from_opt_validity(
300            other.validity(),
301            idxs,
302            other.len(),
303        );
304    }
305}