polars_core/chunked_array/object/
builder.rs

1use arrow::array::builder::{ArrayBuilder, ShareStrategy};
2use arrow::bitmap::BitmapBuilder;
3use polars_utils::vec::PushUnchecked;
4
5use super::*;
6use crate::utils::get_iter_capacity;
7
8pub struct ObjectChunkedBuilder<T> {
9    field: Field,
10    bitmask_builder: BitmapBuilder,
11    values: Vec<T>,
12}
13
14impl<T> ObjectChunkedBuilder<T>
15where
16    T: PolarsObject,
17{
18    pub fn field(&self) -> &Field {
19        &self.field
20    }
21    pub fn new(name: PlSmallStr, capacity: usize) -> Self {
22        ObjectChunkedBuilder {
23            field: Field::new(name, DataType::Object(T::type_name())),
24            values: Vec::with_capacity(capacity),
25            bitmask_builder: BitmapBuilder::with_capacity(capacity),
26        }
27    }
28
29    /// Appends a value of type `T` into the builder
30    #[inline]
31    pub fn append_value(&mut self, v: T) {
32        self.values.push(v);
33        self.bitmask_builder.push(true);
34    }
35
36    /// Appends a null slot into the builder
37    #[inline]
38    pub fn append_null(&mut self) {
39        self.values.push(T::default());
40        self.bitmask_builder.push(false);
41    }
42
43    #[inline]
44    pub fn append_value_from_any(&mut self, v: &dyn Any) -> PolarsResult<()> {
45        let Some(v) = v.downcast_ref::<T>() else {
46            polars_bail!(SchemaMismatch: "cannot downcast any in ObjectBuilder");
47        };
48        self.append_value(v.clone());
49        Ok(())
50    }
51
52    #[inline]
53    pub fn append_option(&mut self, opt: Option<T>) {
54        match opt {
55            Some(s) => self.append_value(s),
56            None => self.append_null(),
57        }
58    }
59
60    pub fn finish(mut self) -> ObjectChunked<T> {
61        let null_bitmap: Option<Bitmap> = self.bitmask_builder.into_opt_validity();
62
63        let len = self.values.len();
64        let null_count = null_bitmap
65            .as_ref()
66            .map(|validity| validity.unset_bits())
67            .unwrap_or(0);
68
69        let arr = Box::new(ObjectArray {
70            values: self.values.into(),
71            validity: null_bitmap,
72        });
73
74        self.field.dtype = get_object_type::<T>();
75
76        unsafe { ChunkedArray::new_with_dims(Arc::new(self.field), vec![arr], len, null_count) }
77    }
78}
79
80/// Initialize a polars Object data type. The type has got information needed to
81/// construct new objects.
82pub(crate) fn get_object_type<T: PolarsObject>() -> DataType {
83    DataType::Object(T::type_name())
84}
85
86impl<T> Default for ObjectChunkedBuilder<T>
87where
88    T: PolarsObject,
89{
90    fn default() -> Self {
91        ObjectChunkedBuilder::new(PlSmallStr::EMPTY, 0)
92    }
93}
94
95impl<T> NewChunkedArray<ObjectType<T>, T> for ObjectChunked<T>
96where
97    T: PolarsObject,
98{
99    fn from_slice(name: PlSmallStr, v: &[T]) -> Self {
100        Self::from_iter_values(name, v.iter().cloned())
101    }
102
103    fn from_slice_options(name: PlSmallStr, opt_v: &[Option<T>]) -> Self {
104        let mut builder = ObjectChunkedBuilder::<T>::new(name, opt_v.len());
105        opt_v
106            .iter()
107            .cloned()
108            .for_each(|opt| builder.append_option(opt));
109        builder.finish()
110    }
111
112    fn from_iter_options(
113        name: PlSmallStr,
114        it: impl Iterator<Item = Option<T>>,
115    ) -> ObjectChunked<T> {
116        let mut builder = ObjectChunkedBuilder::new(name, get_iter_capacity(&it));
117        it.for_each(|opt| builder.append_option(opt));
118        builder.finish()
119    }
120
121    /// Create a new ChunkedArray from an iterator.
122    fn from_iter_values(name: PlSmallStr, it: impl Iterator<Item = T>) -> ObjectChunked<T> {
123        let mut builder = ObjectChunkedBuilder::new(name, get_iter_capacity(&it));
124        it.for_each(|v| builder.append_value(v));
125        builder.finish()
126    }
127}
128
129impl<T> ObjectChunked<T>
130where
131    T: PolarsObject,
132{
133    pub fn new_from_vec(name: PlSmallStr, v: Vec<T>) -> Self {
134        let field = Arc::new(Field::new(name, DataType::Object(T::type_name())));
135        let len = v.len();
136        let arr = Box::new(ObjectArray {
137            values: v.into(),
138            validity: None,
139        });
140
141        unsafe { ObjectChunked::new_with_dims(field, vec![arr], len, 0) }
142    }
143
144    pub fn new_from_vec_and_validity(
145        name: PlSmallStr,
146        v: Vec<T>,
147        validity: Option<Bitmap>,
148    ) -> Self {
149        let field = Arc::new(Field::new(name, DataType::Object(T::type_name())));
150        let len = v.len();
151        let null_count = validity.as_ref().map(|v| v.unset_bits()).unwrap_or(0);
152        let arr = Box::new(ObjectArray {
153            values: v.into(),
154            validity,
155        });
156
157        unsafe { ObjectChunked::new_with_dims(field, vec![arr], len, null_count) }
158    }
159
160    pub fn new_empty(name: PlSmallStr) -> Self {
161        Self::new_from_vec(name, vec![])
162    }
163}
164
165/// Convert a Series of dtype object to an Arrow Array of FixedSizeBinary
166pub(crate) fn object_series_to_arrow_array(s: &Series) -> ArrayRef {
167    // The list builder knows how to create an arrow array
168    // we simply piggy back on that code.
169
170    // SAFETY: 0..len is in bounds
171    let list_s = unsafe {
172        s.agg_list(&GroupsType::Slice {
173            groups: vec![[0, s.len() as IdxSize]],
174            rolling: false,
175        })
176    };
177    let arr = &list_s.chunks()[0];
178    let arr = arr.as_any().downcast_ref::<ListArray<i64>>().unwrap();
179    arr.values().to_boxed()
180}
181
182impl<T: PolarsObject> ArrayBuilder for ObjectChunkedBuilder<T> {
183    fn dtype(&self) -> &ArrowDataType {
184        &ArrowDataType::FixedSizeBinary(size_of::<T>())
185    }
186
187    fn reserve(&mut self, additional: usize) {
188        self.bitmask_builder.reserve(additional);
189        self.values.reserve(additional);
190    }
191
192    fn freeze(self) -> Box<dyn Array> {
193        Box::new(ObjectArray {
194            values: self.values.into(),
195            validity: self.bitmask_builder.into_opt_validity(),
196        })
197    }
198
199    fn freeze_reset(&mut self) -> Box<dyn Array> {
200        Box::new(ObjectArray {
201            values: core::mem::take(&mut self.values).into(),
202            validity: core::mem::take(&mut self.bitmask_builder).into_opt_validity(),
203        })
204    }
205
206    fn len(&self) -> usize {
207        self.values.len()
208    }
209
210    fn extend_nulls(&mut self, length: usize) {
211        self.values.resize(self.values.len() + length, T::default());
212        self.bitmask_builder.extend_constant(length, false);
213    }
214
215    fn subslice_extend(
216        &mut self,
217        other: &dyn Array,
218        start: usize,
219        length: usize,
220        _share: ShareStrategy,
221    ) {
222        let other: &ObjectArray<T> = other.as_any().downcast_ref().unwrap();
223        self.values
224            .extend_from_slice(&other.values[start..start + length]);
225        self.bitmask_builder
226            .subslice_extend_from_opt_validity(other.validity(), start, length);
227    }
228
229    fn subslice_extend_repeated(
230        &mut self,
231        other: &dyn Array,
232        start: usize,
233        length: usize,
234        repeats: usize,
235        share: ShareStrategy,
236    ) {
237        for _ in 0..repeats {
238            self.subslice_extend(other, start, length, share)
239        }
240    }
241
242    unsafe fn gather_extend(&mut self, other: &dyn Array, idxs: &[IdxSize], _share: ShareStrategy) {
243        let other: &ObjectArray<T> = other.as_any().downcast_ref().unwrap();
244        let other_values_slice = other.values.as_slice();
245        self.values.extend(
246            idxs.iter()
247                .map(|idx| other_values_slice.get_unchecked(*idx as usize).clone()),
248        );
249        self.bitmask_builder
250            .gather_extend_from_opt_validity(other.validity(), idxs, other.len());
251    }
252
253    fn opt_gather_extend(&mut self, other: &dyn Array, idxs: &[IdxSize], _share: ShareStrategy) {
254        let other: &ObjectArray<T> = other.as_any().downcast_ref().unwrap();
255        let other_values_slice = other.values.as_slice();
256        self.values.reserve(idxs.len());
257        unsafe {
258            for idx in idxs {
259                let val = if (*idx as usize) < other.len() {
260                    other_values_slice.get_unchecked(*idx as usize).clone()
261                } else {
262                    T::default()
263                };
264                self.values.push_unchecked(val);
265            }
266        }
267        self.bitmask_builder.opt_gather_extend_from_opt_validity(
268            other.validity(),
269            idxs,
270            other.len(),
271        );
272    }
273}