polars_core/chunked_array/object/
builder.rs1use arrow::array::builder::{ArrayBuilder, ShareStrategy};
2use arrow::bitmap::BitmapBuilder;
3use polars_utils::vec::PushUnchecked;
4
5use super::*;
6use crate::utils::get_iter_capacity;
7
8pub struct ObjectChunkedBuilder<T> {
9 field: Field,
10 bitmask_builder: BitmapBuilder,
11 values: Vec<T>,
12}
13
14impl<T> ObjectChunkedBuilder<T>
15where
16 T: PolarsObject,
17{
18 pub fn field(&self) -> &Field {
19 &self.field
20 }
21 pub fn new(name: PlSmallStr, capacity: usize) -> Self {
22 ObjectChunkedBuilder {
23 field: Field::new(name, DataType::Object(T::type_name())),
24 values: Vec::with_capacity(capacity),
25 bitmask_builder: BitmapBuilder::with_capacity(capacity),
26 }
27 }
28
29 #[inline]
31 pub fn append_value(&mut self, v: T) {
32 self.values.push(v);
33 self.bitmask_builder.push(true);
34 }
35
36 #[inline]
38 pub fn append_null(&mut self) {
39 self.values.push(T::default());
40 self.bitmask_builder.push(false);
41 }
42
43 #[inline]
44 pub fn append_value_from_any(&mut self, v: &dyn Any) -> PolarsResult<()> {
45 let Some(v) = v.downcast_ref::<T>() else {
46 polars_bail!(SchemaMismatch: "cannot downcast any in ObjectBuilder");
47 };
48 self.append_value(v.clone());
49 Ok(())
50 }
51
52 #[inline]
53 pub fn append_option(&mut self, opt: Option<T>) {
54 match opt {
55 Some(s) => self.append_value(s),
56 None => self.append_null(),
57 }
58 }
59
60 pub fn finish(mut self) -> ObjectChunked<T> {
61 let null_bitmap: Option<Bitmap> = self.bitmask_builder.into_opt_validity();
62
63 let len = self.values.len();
64 let null_count = null_bitmap
65 .as_ref()
66 .map(|validity| validity.unset_bits())
67 .unwrap_or(0);
68
69 let arr = Box::new(ObjectArray {
70 values: self.values.into(),
71 validity: null_bitmap,
72 });
73
74 self.field.dtype = get_object_type::<T>();
75
76 unsafe { ChunkedArray::new_with_dims(Arc::new(self.field), vec![arr], len, null_count) }
77 }
78}
79
80pub(crate) fn get_object_type<T: PolarsObject>() -> DataType {
83 DataType::Object(T::type_name())
84}
85
86impl<T> Default for ObjectChunkedBuilder<T>
87where
88 T: PolarsObject,
89{
90 fn default() -> Self {
91 ObjectChunkedBuilder::new(PlSmallStr::EMPTY, 0)
92 }
93}
94
95impl<T> NewChunkedArray<ObjectType<T>, T> for ObjectChunked<T>
96where
97 T: PolarsObject,
98{
99 fn from_slice(name: PlSmallStr, v: &[T]) -> Self {
100 Self::from_iter_values(name, v.iter().cloned())
101 }
102
103 fn from_slice_options(name: PlSmallStr, opt_v: &[Option<T>]) -> Self {
104 let mut builder = ObjectChunkedBuilder::<T>::new(name, opt_v.len());
105 opt_v
106 .iter()
107 .cloned()
108 .for_each(|opt| builder.append_option(opt));
109 builder.finish()
110 }
111
112 fn from_iter_options(
113 name: PlSmallStr,
114 it: impl Iterator<Item = Option<T>>,
115 ) -> ObjectChunked<T> {
116 let mut builder = ObjectChunkedBuilder::new(name, get_iter_capacity(&it));
117 it.for_each(|opt| builder.append_option(opt));
118 builder.finish()
119 }
120
121 fn from_iter_values(name: PlSmallStr, it: impl Iterator<Item = T>) -> ObjectChunked<T> {
123 let mut builder = ObjectChunkedBuilder::new(name, get_iter_capacity(&it));
124 it.for_each(|v| builder.append_value(v));
125 builder.finish()
126 }
127}
128
129impl<T> ObjectChunked<T>
130where
131 T: PolarsObject,
132{
133 pub fn new_from_vec(name: PlSmallStr, v: Vec<T>) -> Self {
134 let field = Arc::new(Field::new(name, DataType::Object(T::type_name())));
135 let len = v.len();
136 let arr = Box::new(ObjectArray {
137 values: v.into(),
138 validity: None,
139 });
140
141 unsafe { ObjectChunked::new_with_dims(field, vec![arr], len, 0) }
142 }
143
144 pub fn new_from_vec_and_validity(
145 name: PlSmallStr,
146 v: Vec<T>,
147 validity: Option<Bitmap>,
148 ) -> Self {
149 let field = Arc::new(Field::new(name, DataType::Object(T::type_name())));
150 let len = v.len();
151 let null_count = validity.as_ref().map(|v| v.unset_bits()).unwrap_or(0);
152 let arr = Box::new(ObjectArray {
153 values: v.into(),
154 validity,
155 });
156
157 unsafe { ObjectChunked::new_with_dims(field, vec![arr], len, null_count) }
158 }
159
160 pub fn new_empty(name: PlSmallStr) -> Self {
161 Self::new_from_vec(name, vec![])
162 }
163}
164
165pub(crate) fn object_series_to_arrow_array(s: &Series) -> ArrayRef {
167 let list_s = unsafe {
172 s.agg_list(&GroupsType::Slice {
173 groups: vec![[0, s.len() as IdxSize]],
174 rolling: false,
175 })
176 };
177 let arr = &list_s.chunks()[0];
178 let arr = arr.as_any().downcast_ref::<ListArray<i64>>().unwrap();
179 arr.values().to_boxed()
180}
181
182impl<T: PolarsObject> ArrayBuilder for ObjectChunkedBuilder<T> {
183 fn dtype(&self) -> &ArrowDataType {
184 &ArrowDataType::FixedSizeBinary(size_of::<T>())
185 }
186
187 fn reserve(&mut self, additional: usize) {
188 self.bitmask_builder.reserve(additional);
189 self.values.reserve(additional);
190 }
191
192 fn freeze(self) -> Box<dyn Array> {
193 Box::new(ObjectArray {
194 values: self.values.into(),
195 validity: self.bitmask_builder.into_opt_validity(),
196 })
197 }
198
199 fn freeze_reset(&mut self) -> Box<dyn Array> {
200 Box::new(ObjectArray {
201 values: core::mem::take(&mut self.values).into(),
202 validity: core::mem::take(&mut self.bitmask_builder).into_opt_validity(),
203 })
204 }
205
206 fn len(&self) -> usize {
207 self.values.len()
208 }
209
210 fn extend_nulls(&mut self, length: usize) {
211 self.values.resize(self.values.len() + length, T::default());
212 self.bitmask_builder.extend_constant(length, false);
213 }
214
215 fn subslice_extend(
216 &mut self,
217 other: &dyn Array,
218 start: usize,
219 length: usize,
220 _share: ShareStrategy,
221 ) {
222 let other: &ObjectArray<T> = other.as_any().downcast_ref().unwrap();
223 self.values
224 .extend_from_slice(&other.values[start..start + length]);
225 self.bitmask_builder
226 .subslice_extend_from_opt_validity(other.validity(), start, length);
227 }
228
229 fn subslice_extend_repeated(
230 &mut self,
231 other: &dyn Array,
232 start: usize,
233 length: usize,
234 repeats: usize,
235 share: ShareStrategy,
236 ) {
237 for _ in 0..repeats {
238 self.subslice_extend(other, start, length, share)
239 }
240 }
241
242 unsafe fn gather_extend(&mut self, other: &dyn Array, idxs: &[IdxSize], _share: ShareStrategy) {
243 let other: &ObjectArray<T> = other.as_any().downcast_ref().unwrap();
244 let other_values_slice = other.values.as_slice();
245 self.values.extend(
246 idxs.iter()
247 .map(|idx| other_values_slice.get_unchecked(*idx as usize).clone()),
248 );
249 self.bitmask_builder
250 .gather_extend_from_opt_validity(other.validity(), idxs, other.len());
251 }
252
253 fn opt_gather_extend(&mut self, other: &dyn Array, idxs: &[IdxSize], _share: ShareStrategy) {
254 let other: &ObjectArray<T> = other.as_any().downcast_ref().unwrap();
255 let other_values_slice = other.values.as_slice();
256 self.values.reserve(idxs.len());
257 unsafe {
258 for idx in idxs {
259 let val = if (*idx as usize) < other.len() {
260 other_values_slice.get_unchecked(*idx as usize).clone()
261 } else {
262 T::default()
263 };
264 self.values.push_unchecked(val);
265 }
266 }
267 self.bitmask_builder.opt_gather_extend_from_opt_validity(
268 other.validity(),
269 idxs,
270 other.len(),
271 );
272 }
273}