polars_core/chunked_array/object/
builder.rs1use arrow::array::builder::{ArrayBuilder, ShareStrategy};
2use arrow::bitmap::BitmapBuilder;
3use polars_utils::vec::PushUnchecked;
4
5use super::*;
6use crate::utils::get_iter_capacity;
7
8pub struct ObjectChunkedBuilder<T> {
9 field: Field,
10 bitmask_builder: BitmapBuilder,
11 values: Vec<T>,
12}
13
14impl<T> ObjectChunkedBuilder<T>
15where
16 T: PolarsObject,
17{
18 pub fn field(&self) -> &Field {
19 &self.field
20 }
21 pub fn new(name: PlSmallStr, capacity: usize) -> Self {
22 ObjectChunkedBuilder {
23 field: Field::new(name, DataType::Object(T::type_name())),
24 values: Vec::with_capacity(capacity),
25 bitmask_builder: BitmapBuilder::with_capacity(capacity),
26 }
27 }
28
29 #[inline]
31 pub fn append_value(&mut self, v: T) {
32 self.values.push(v);
33 self.bitmask_builder.push(true);
34 }
35
36 #[inline]
38 pub fn append_null(&mut self) {
39 self.values.push(T::default());
40 self.bitmask_builder.push(false);
41 }
42
43 #[inline]
44 pub fn append_value_from_any(&mut self, v: &dyn Any) -> PolarsResult<()> {
45 let Some(v) = v.downcast_ref::<T>() else {
46 polars_bail!(SchemaMismatch: "cannot downcast any in ObjectBuilder");
47 };
48 self.append_value(v.clone());
49 Ok(())
50 }
51
52 #[inline]
53 pub fn append_option(&mut self, opt: Option<T>) {
54 match opt {
55 Some(s) => self.append_value(s),
56 None => self.append_null(),
57 }
58 }
59
60 pub fn finish(mut self) -> ObjectChunked<T> {
61 let null_bitmap: Option<Bitmap> = self.bitmask_builder.into_opt_validity();
62
63 let len = self.values.len();
64 let null_count = null_bitmap
65 .as_ref()
66 .map(|validity| validity.unset_bits())
67 .unwrap_or(0);
68
69 let arr = Box::new(ObjectArray {
70 dtype: ArrowDataType::FixedSizeBinary(size_of::<T>()),
71 values: self.values.into(),
72 validity: null_bitmap,
73 });
74
75 self.field.dtype = get_object_type::<T>();
76
77 unsafe { ChunkedArray::new_with_dims(Arc::new(self.field), vec![arr], len, null_count) }
78 }
79}
80
81pub(crate) fn get_object_type<T: PolarsObject>() -> DataType {
84 DataType::Object(T::type_name())
85}
86
87impl<T> Default for ObjectChunkedBuilder<T>
88where
89 T: PolarsObject,
90{
91 fn default() -> Self {
92 ObjectChunkedBuilder::new(PlSmallStr::EMPTY, 0)
93 }
94}
95
96impl<T> NewChunkedArray<ObjectType<T>, T> for ObjectChunked<T>
97where
98 T: PolarsObject,
99{
100 fn from_slice(name: PlSmallStr, v: &[T]) -> Self {
101 Self::from_iter_values(name, v.iter().cloned())
102 }
103
104 fn from_slice_options(name: PlSmallStr, opt_v: &[Option<T>]) -> Self {
105 let mut builder = ObjectChunkedBuilder::<T>::new(name, opt_v.len());
106 opt_v
107 .iter()
108 .cloned()
109 .for_each(|opt| builder.append_option(opt));
110 builder.finish()
111 }
112
113 fn from_iter_options(
114 name: PlSmallStr,
115 it: impl Iterator<Item = Option<T>>,
116 ) -> ObjectChunked<T> {
117 let mut builder = ObjectChunkedBuilder::new(name, get_iter_capacity(&it));
118 it.for_each(|opt| builder.append_option(opt));
119 builder.finish()
120 }
121
122 fn from_iter_values(name: PlSmallStr, it: impl Iterator<Item = T>) -> ObjectChunked<T> {
124 let mut builder = ObjectChunkedBuilder::new(name, get_iter_capacity(&it));
125 it.for_each(|v| builder.append_value(v));
126 builder.finish()
127 }
128}
129
130impl<T> ObjectChunked<T>
131where
132 T: PolarsObject,
133{
134 pub fn new_from_vec(name: PlSmallStr, v: Vec<T>) -> Self {
135 let field = Arc::new(Field::new(name, DataType::Object(T::type_name())));
136 let len = v.len();
137 let arr = Box::new(ObjectArray {
138 dtype: ArrowDataType::FixedSizeBinary(size_of::<T>()),
139 values: v.into(),
140 validity: None,
141 });
142
143 unsafe { ObjectChunked::new_with_dims(field, vec![arr], len, 0) }
144 }
145
146 pub fn new_from_vec_and_validity(
147 name: PlSmallStr,
148 v: Vec<T>,
149 validity: Option<Bitmap>,
150 ) -> Self {
151 let field = Arc::new(Field::new(name, DataType::Object(T::type_name())));
152 let len = v.len();
153 let null_count = validity.as_ref().map(|v| v.unset_bits()).unwrap_or(0);
154 let arr = Box::new(ObjectArray {
155 dtype: ArrowDataType::FixedSizeBinary(size_of::<T>()),
156 values: v.into(),
157 validity,
158 });
159
160 unsafe { ObjectChunked::new_with_dims(field, vec![arr], len, null_count) }
161 }
162
163 pub fn new_empty(name: PlSmallStr) -> Self {
164 Self::new_from_vec(name, vec![])
165 }
166}
167
168pub(crate) fn object_series_to_arrow_array(s: &Series) -> ArrayRef {
170 let list_s = unsafe {
175 s.agg_list(&GroupsType::Slice {
176 groups: vec![[0, s.len() as IdxSize]],
177 overlapping: false,
178 })
179 };
180 let arr = &list_s.chunks()[0];
181 let arr = arr.as_any().downcast_ref::<ListArray<i64>>().unwrap();
182 arr.values().to_boxed()
183}
184
185impl<T: PolarsObject> ArrayBuilder for ObjectChunkedBuilder<T> {
186 fn dtype(&self) -> &ArrowDataType {
187 &ArrowDataType::FixedSizeBinary(size_of::<T>())
188 }
189
190 fn reserve(&mut self, additional: usize) {
191 self.bitmask_builder.reserve(additional);
192 self.values.reserve(additional);
193 }
194
195 fn freeze(self) -> Box<dyn Array> {
196 Box::new(ObjectArray {
197 dtype: ArrowDataType::FixedSizeBinary(size_of::<T>()),
198 values: self.values.into(),
199 validity: self.bitmask_builder.into_opt_validity(),
200 })
201 }
202
203 fn freeze_reset(&mut self) -> Box<dyn Array> {
204 Box::new(ObjectArray {
205 dtype: ArrowDataType::FixedSizeBinary(size_of::<T>()),
206 values: core::mem::take(&mut self.values).into(),
207 validity: core::mem::take(&mut self.bitmask_builder).into_opt_validity(),
208 })
209 }
210
211 fn len(&self) -> usize {
212 self.values.len()
213 }
214
215 fn extend_nulls(&mut self, length: usize) {
216 self.values.resize(self.values.len() + length, T::default());
217 self.bitmask_builder.extend_constant(length, false);
218 }
219
220 fn subslice_extend(
221 &mut self,
222 other: &dyn Array,
223 start: usize,
224 length: usize,
225 _share: ShareStrategy,
226 ) {
227 let other: &ObjectArray<T> = other.as_any().downcast_ref().unwrap();
228 self.values
229 .extend_from_slice(&other.values[start..start + length]);
230 self.bitmask_builder
231 .subslice_extend_from_opt_validity(other.validity(), start, length);
232 }
233
234 fn subslice_extend_repeated(
235 &mut self,
236 other: &dyn Array,
237 start: usize,
238 length: usize,
239 repeats: usize,
240 share: ShareStrategy,
241 ) {
242 for _ in 0..repeats {
243 self.subslice_extend(other, start, length, share)
244 }
245 }
246
247 fn subslice_extend_each_repeated(
248 &mut self,
249 other: &dyn Array,
250 start: usize,
251 length: usize,
252 repeats: usize,
253 _share: ShareStrategy,
254 ) {
255 let other: &ObjectArray<T> = other.as_any().downcast_ref().unwrap();
256
257 self.values.reserve(length * repeats);
258 for value in other.values[start..start + length].iter() {
259 unsafe {
260 for _ in 0..repeats {
261 self.values.push_unchecked(value.clone());
262 }
263 }
264 }
265 self.bitmask_builder
266 .subslice_extend_each_repeated_from_opt_validity(
267 other.validity(),
268 start,
269 length,
270 repeats,
271 );
272 }
273
274 unsafe fn gather_extend(&mut self, other: &dyn Array, idxs: &[IdxSize], _share: ShareStrategy) {
275 let other: &ObjectArray<T> = other.as_any().downcast_ref().unwrap();
276 let other_values_slice = other.values.as_slice();
277 self.values.extend(
278 idxs.iter()
279 .map(|idx| other_values_slice.get_unchecked(*idx as usize).clone()),
280 );
281 self.bitmask_builder
282 .gather_extend_from_opt_validity(other.validity(), idxs, other.len());
283 }
284
285 fn opt_gather_extend(&mut self, other: &dyn Array, idxs: &[IdxSize], _share: ShareStrategy) {
286 let other: &ObjectArray<T> = other.as_any().downcast_ref().unwrap();
287 let other_values_slice = other.values.as_slice();
288 self.values.reserve(idxs.len());
289 unsafe {
290 for idx in idxs {
291 let val = if (*idx as usize) < other.len() {
292 other_values_slice.get_unchecked(*idx as usize).clone()
293 } else {
294 T::default()
295 };
296 self.values.push_unchecked(val);
297 }
298 }
299 self.bitmask_builder.opt_gather_extend_from_opt_validity(
300 other.validity(),
301 idxs,
302 other.len(),
303 );
304 }
305}