polars_core/chunked_array/object/
builder.rs1use arrow::array::builder::{ArrayBuilder, ShareStrategy};
2use arrow::bitmap::BitmapBuilder;
3use arrow::datatypes::ExtensionType;
4use polars_utils::vec::PushUnchecked;
5
6use super::*;
7use crate::chunked_array::object::registry::run_with_gil;
8use crate::utils::get_iter_capacity;
9
10pub struct ObjectChunkedBuilder<T> {
11 field: Field,
12 bitmask_builder: BitmapBuilder,
13 values: Vec<T>,
14}
15
16impl<T> ObjectChunkedBuilder<T>
17where
18 T: PolarsObject,
19{
20 pub fn field(&self) -> &Field {
21 &self.field
22 }
23 pub fn new(name: PlSmallStr, capacity: usize) -> Self {
24 ObjectChunkedBuilder {
25 field: Field::new(name, DataType::Object(T::type_name())),
26 values: Vec::with_capacity(capacity),
27 bitmask_builder: BitmapBuilder::with_capacity(capacity),
28 }
29 }
30
31 #[inline]
33 pub fn append_value(&mut self, v: T) {
34 self.values.push(v);
35 self.bitmask_builder.push(true);
36 }
37
38 #[inline]
40 pub fn append_null(&mut self) {
41 self.values.push(T::default());
42 self.bitmask_builder.push(false);
43 }
44
45 #[inline]
46 pub fn append_value_from_any(&mut self, v: &dyn Any) -> PolarsResult<()> {
47 let Some(v) = v.downcast_ref::<T>() else {
48 polars_bail!(SchemaMismatch: "cannot downcast any in ObjectBuilder");
49 };
50 self.append_value(v.clone());
51 Ok(())
52 }
53
54 #[inline]
55 pub fn append_option(&mut self, opt: Option<T>) {
56 match opt {
57 Some(s) => self.append_value(s),
58 None => self.append_null(),
59 }
60 }
61
62 pub fn finish(mut self) -> ObjectChunked<T> {
63 let null_bitmap: Option<Bitmap> = self.bitmask_builder.into_opt_validity();
64
65 let len = self.values.len();
66 let null_count = null_bitmap
67 .as_ref()
68 .map(|validity| validity.unset_bits())
69 .unwrap_or(0);
70
71 let arr = Box::new(ObjectArray {
72 dtype: ArrowDataType::FixedSizeBinary(size_of::<T>()),
73 values: self.values.into(),
74 validity: null_bitmap,
75 });
76
77 self.field.dtype = get_object_type::<T>();
78
79 unsafe { ChunkedArray::new_with_dims(Arc::new(self.field), vec![arr], len, null_count) }
80 }
81}
82
83pub(crate) fn get_object_type<T: PolarsObject>() -> DataType {
86 DataType::Object(T::type_name())
87}
88
89impl<T> Default for ObjectChunkedBuilder<T>
90where
91 T: PolarsObject,
92{
93 fn default() -> Self {
94 ObjectChunkedBuilder::new(PlSmallStr::EMPTY, 0)
95 }
96}
97
98impl<T> NewChunkedArray<ObjectType<T>, T> for ObjectChunked<T>
99where
100 T: PolarsObject,
101{
102 fn from_slice(name: PlSmallStr, v: &[T]) -> Self {
103 Self::from_iter_values(name, v.iter().cloned())
104 }
105
106 fn from_slice_options(name: PlSmallStr, opt_v: &[Option<T>]) -> Self {
107 let mut builder = ObjectChunkedBuilder::<T>::new(name, opt_v.len());
108 opt_v
109 .iter()
110 .cloned()
111 .for_each(|opt| builder.append_option(opt));
112 builder.finish()
113 }
114
115 fn from_iter_options(
116 name: PlSmallStr,
117 it: impl Iterator<Item = Option<T>>,
118 ) -> ObjectChunked<T> {
119 let mut builder = ObjectChunkedBuilder::new(name, get_iter_capacity(&it));
120 it.for_each(|opt| builder.append_option(opt));
121 builder.finish()
122 }
123
124 fn from_iter_values(name: PlSmallStr, it: impl Iterator<Item = T>) -> ObjectChunked<T> {
126 let mut builder = ObjectChunkedBuilder::new(name, get_iter_capacity(&it));
127 it.for_each(|v| builder.append_value(v));
128 builder.finish()
129 }
130}
131
132impl<T> ObjectChunked<T>
133where
134 T: PolarsObject,
135{
136 pub fn new_from_vec(name: PlSmallStr, v: Vec<T>) -> Self {
137 let field = Arc::new(Field::new(name, DataType::Object(T::type_name())));
138 let len = v.len();
139 let arr = Box::new(ObjectArray {
140 dtype: ArrowDataType::FixedSizeBinary(size_of::<T>()),
141 values: v.into(),
142 validity: None,
143 });
144
145 unsafe { ObjectChunked::new_with_dims(field, vec![arr], len, 0) }
146 }
147
148 pub fn new_from_vec_and_validity(
149 name: PlSmallStr,
150 v: Vec<T>,
151 validity: Option<Bitmap>,
152 ) -> Self {
153 let field = Arc::new(Field::new(name, DataType::Object(T::type_name())));
154 let len = v.len();
155 let null_count = validity.as_ref().map(|v| v.unset_bits()).unwrap_or(0);
156 let arr = Box::new(ObjectArray {
157 dtype: ArrowDataType::FixedSizeBinary(size_of::<T>()),
158 values: v.into(),
159 validity,
160 });
161
162 unsafe { ObjectChunked::new_with_dims(field, vec![arr], len, null_count) }
163 }
164
165 pub fn new_empty(name: PlSmallStr) -> Self {
166 Self::new_from_vec(name, vec![])
167 }
168}
169
170pub(crate) fn object_series_to_arrow_array(s: &Series) -> ArrayRef {
172 let list_s = unsafe {
177 let groups = vec![[0, s.len() as IdxSize]];
178 s.agg_list(&GroupsType::new_slice(groups, false, true))
179 };
180 let arr = &list_s.chunks()[0];
181 let arr = arr.as_any().downcast_ref::<ListArray<i64>>().unwrap();
182
183 let mut arr: Box<dyn Array> = arr.values().clone();
184
185 if let ArrowDataType::Extension(ext_type) = arr.dtype()
186 && let ExtensionType {
187 name,
188 inner: ArrowDataType::FixedSizeBinary(8),
189 metadata: Some(_),
190 } = ext_type.as_ref()
191 && name == POLARS_OBJECT_EXTENSION_NAME
192 {
193 *arr.dtype_mut() = ArrowDataType::FixedSizeBinary(8);
194 } else {
195 panic!()
196 }
197
198 arr
199}
200
201impl<T: PolarsObject> ArrayBuilder for ObjectChunkedBuilder<T> {
202 fn dtype(&self) -> &ArrowDataType {
203 &ArrowDataType::FixedSizeBinary(size_of::<T>())
204 }
205
206 fn reserve(&mut self, additional: usize) {
207 self.bitmask_builder.reserve(additional);
208 self.values.reserve(additional);
209 }
210
211 fn freeze(self) -> Box<dyn Array> {
212 Box::new(ObjectArray {
213 dtype: ArrowDataType::FixedSizeBinary(size_of::<T>()),
214 values: self.values.into(),
215 validity: self.bitmask_builder.into_opt_validity(),
216 })
217 }
218
219 fn freeze_reset(&mut self) -> Box<dyn Array> {
220 Box::new(ObjectArray {
221 dtype: ArrowDataType::FixedSizeBinary(size_of::<T>()),
222 values: core::mem::take(&mut self.values).into(),
223 validity: core::mem::take(&mut self.bitmask_builder).into_opt_validity(),
224 })
225 }
226
227 fn len(&self) -> usize {
228 self.values.len()
229 }
230
231 fn extend_nulls(&mut self, length: usize) {
232 run_with_gil(|| {
233 self.values.resize(self.values.len() + length, T::default());
234 });
235 self.bitmask_builder.extend_constant(length, false);
236 }
237
238 fn subslice_extend(
239 &mut self,
240 other: &dyn Array,
241 start: usize,
242 length: usize,
243 _share: ShareStrategy,
244 ) {
245 run_with_gil(|| {
246 let other: &ObjectArray<T> = other.as_any().downcast_ref().unwrap();
247 self.values
248 .extend_from_slice(&other.values[start..start + length]);
249 self.bitmask_builder
250 .subslice_extend_from_opt_validity(other.validity(), start, length);
251 })
252 }
253
254 fn subslice_extend_repeated(
255 &mut self,
256 other: &dyn Array,
257 start: usize,
258 length: usize,
259 repeats: usize,
260 _share: ShareStrategy,
261 ) {
262 run_with_gil(|| {
263 for _ in 0..repeats {
264 let other: &ObjectArray<T> = other.as_any().downcast_ref().unwrap();
265 self.values
266 .extend_from_slice(&other.values[start..start + length]);
267 self.bitmask_builder.subslice_extend_from_opt_validity(
268 other.validity(),
269 start,
270 length,
271 );
272 }
273 })
274 }
275
276 fn subslice_extend_each_repeated(
277 &mut self,
278 other: &dyn Array,
279 start: usize,
280 length: usize,
281 repeats: usize,
282 _share: ShareStrategy,
283 ) {
284 run_with_gil(|| {
285 let other: &ObjectArray<T> = other.as_any().downcast_ref().unwrap();
286
287 self.values.reserve(length * repeats);
288 for value in other.values[start..start + length].iter() {
289 unsafe {
290 for _ in 0..repeats {
291 self.values.push_unchecked(value.clone());
292 }
293 }
294 }
295 });
296
297 self.bitmask_builder
298 .subslice_extend_each_repeated_from_opt_validity(
299 other.validity(),
300 start,
301 length,
302 repeats,
303 );
304 }
305
306 unsafe fn gather_extend(&mut self, other: &dyn Array, idxs: &[IdxSize], _share: ShareStrategy) {
307 run_with_gil(|| {
308 let other: &ObjectArray<T> = other.as_any().downcast_ref().unwrap();
309 let other_values_slice = other.values.as_slice();
310 self.values.extend(
311 idxs.iter()
312 .map(|idx| other_values_slice.get_unchecked(*idx as usize).clone()),
313 );
314 });
315 self.bitmask_builder
316 .gather_extend_from_opt_validity(other.validity(), idxs, other.len());
317 }
318
319 fn opt_gather_extend(&mut self, other: &dyn Array, idxs: &[IdxSize], _share: ShareStrategy) {
320 run_with_gil(|| {
321 let other: &ObjectArray<T> = other.as_any().downcast_ref().unwrap();
322 let other_values_slice = other.values.as_slice();
323 self.values.reserve(idxs.len());
324 unsafe {
325 for idx in idxs {
326 let val = if (*idx as usize) < other.len() {
327 other_values_slice.get_unchecked(*idx as usize).clone()
328 } else {
329 T::default()
330 };
331 self.values.push_unchecked(val);
332 }
333 }
334 });
335 self.bitmask_builder.opt_gather_extend_from_opt_validity(
336 other.validity(),
337 idxs,
338 other.len(),
339 );
340 }
341}