1#[cfg(any(
2 feature = "dtype-datetime",
3 feature = "dtype-date",
4 feature = "dtype-duration",
5 feature = "dtype-time"
6))]
7use polars_compute::cast::cast_default;
8use polars_compute::cast::cast_unchecked;
9
10use crate::prelude::*;
11
12impl Series {
13 #[inline]
15 pub fn array_ref(&self, chunk_idx: usize) -> &ArrayRef {
16 &self.chunks()[chunk_idx] as &ArrayRef
17 }
18
19 pub fn to_arrow(&self, chunk_idx: usize, compat_level: CompatLevel) -> ArrayRef {
23 self.to_arrow_with_field(chunk_idx, compat_level, None)
24 .unwrap()
25 }
26
27 pub fn to_arrow_with_field(
28 &self,
29 chunk_idx: usize,
30 compat_level: CompatLevel,
31 output_arrow_field: Option<&ArrowField>,
32 ) -> PolarsResult<ArrayRef> {
33 ToArrowConverter {
34 compat_level,
35 #[cfg(feature = "dtype-categorical")]
36 categorical_converter: {
37 let mut categorical_converter =
38 crate::series::categorical_to_arrow::CategoricalToArrowConverter {
39 converters: Default::default(),
40 persist_remap: false,
41 output_keys_only: false,
42 };
43
44 categorical_converter.initialize(self.dtype());
45
46 categorical_converter
47 },
48 }
49 .array_to_arrow(
50 self.chunks().get(chunk_idx).unwrap().as_ref(),
51 self.dtype(),
52 output_arrow_field,
53 )
54 }
55}
56
57pub struct ToArrowConverter {
58 pub compat_level: CompatLevel,
59 #[cfg(feature = "dtype-categorical")]
60 pub categorical_converter: crate::series::categorical_to_arrow::CategoricalToArrowConverter,
61}
62
63impl ToArrowConverter {
64 pub fn array_to_arrow(
66 &mut self,
67 array: &dyn Array,
68 dtype: &DataType,
69 output_arrow_field: Option<&ArrowField>,
70 ) -> PolarsResult<Box<dyn Array>> {
71 let out = self.array_to_arrow_impl(array, dtype, output_arrow_field)?;
72
73 if let Some(field) = output_arrow_field {
74 polars_ensure!(
75 field.is_nullable || !out.has_nulls(),
76 SchemaMismatch:
77 "to_arrow(): nullable is false but array contained {} NULLs (arrow field: {:?})",
78 out.null_count(), field,
79 );
80
81 if (!field.dtype().is_nested()
83 || matches!(field.dtype(), ArrowDataType::Dictionary(..)))
84 && out.dtype() != field.dtype()
85 {
86 polars_bail!(
87 SchemaMismatch:
88 "to_arrow(): provided dtype ({:?}) does not match output dtype ({:?})",
89 field.dtype(), out.dtype()
90 )
91 }
92 }
93
94 Ok(out)
95 }
96
97 fn array_to_arrow_impl(
98 &mut self,
99 array: &dyn Array,
100 dtype: &DataType,
101 output_arrow_field: Option<&ArrowField>,
102 ) -> PolarsResult<Box<dyn Array>> {
103 Ok(match dtype {
104 #[cfg(feature = "dtype-struct")]
106 DataType::Struct(fields) => {
107 use arrow::array::StructArray;
108 let arr: &StructArray = array.as_any().downcast_ref().unwrap();
109
110 let expected_output_fields: &[ArrowField] = match output_arrow_field {
111 Some(
112 field @ ArrowField {
113 name: _,
114 dtype: ArrowDataType::Struct(fields),
115 is_nullable: _,
116 metadata: _,
117 },
118 ) if fields.len() == arr.fields().len()
119 && fields
120 .iter()
121 .zip(arr.fields())
122 .all(|(l, r)| l.name() == r.name()) =>
123 {
124 fields.as_slice()
125 },
126 Some(ArrowField { dtype, .. }) => polars_bail!(
127 SchemaMismatch:
128 "to_arrow(): struct dtype mismatch: {:?} != expected: {:?}",
129 dtype, arr.dtype(),
130 ),
131 None => &[],
132 };
133
134 let values: Vec<ArrayRef> = arr
135 .values()
136 .iter()
137 .zip(fields.iter())
138 .enumerate()
139 .map(|(i, (values, field))| {
140 self.array_to_arrow(
141 values.as_ref(),
142 field.dtype(),
143 expected_output_fields.get(i),
144 )
145 })
146 .collect::<PolarsResult<_>>()?;
147
148 let converted_arrow_fields: Vec<ArrowField> = fields
149 .iter()
150 .map(|x| (x.name().clone(), x.dtype()))
151 .zip(values.iter().map(|x| x.dtype()))
152 .enumerate()
153 .map(|(i, ((name, dtype), converted_arrow_dtype))| {
154 create_arrow_field(
155 name,
156 dtype,
157 converted_arrow_dtype,
158 self.compat_level,
159 opt_field_is_nullable(expected_output_fields.get(i)),
160 )
161 })
162 .collect();
163
164 StructArray::new(
165 ArrowDataType::Struct(converted_arrow_fields),
166 arr.len(),
167 values,
168 arr.validity().cloned(),
169 )
170 .boxed()
171 },
172 DataType::List(inner) => {
173 let arr: &ListArray<i64> = array.as_any().downcast_ref().unwrap();
174
175 let expected_inner_output_field: Option<&ArrowField> = match output_arrow_field {
176 Some(ArrowField {
177 name: _,
178 dtype: ArrowDataType::LargeList(inner_field),
179 is_nullable: _,
180 metadata: _,
181 }) if inner_field.name() == &LIST_VALUES_NAME => Some(inner_field),
182 Some(ArrowField { dtype, .. }) => polars_bail!(
183 SchemaMismatch:
184 "to_arrow(): list dtype mismatch: {:?} != expected: {:?}",
185 dtype, arr.dtype(),
186 ),
187 None => None,
188 };
189
190 let new_values =
191 self.array_to_arrow(arr.values().as_ref(), inner, expected_inner_output_field)?;
192
193 let arr = ListArray::<i64>::new(
194 ArrowDataType::LargeList(Box::new(create_arrow_field(
195 LIST_VALUES_NAME,
196 inner.as_ref(),
197 new_values.dtype(),
198 self.compat_level,
199 opt_field_is_nullable(expected_inner_output_field),
200 ))),
201 arr.offsets().clone(),
202 new_values,
203 arr.validity().cloned(),
204 );
205 Box::new(arr)
206 },
207 #[cfg(feature = "dtype-array")]
208 DataType::Array(inner, width) => {
209 use arrow::array::FixedSizeListArray;
210
211 let arr: &FixedSizeListArray = array.as_any().downcast_ref().unwrap();
212
213 let expected_inner_output_field: Option<&ArrowField> = match output_arrow_field {
214 Some(
215 field @ ArrowField {
216 name: _,
217 dtype: ArrowDataType::FixedSizeList(inner_field, width),
218 is_nullable: _,
219 metadata: _,
220 },
221 ) if *width == arr.size() && inner_field.name() == &LIST_VALUES_NAME => {
222 Some(inner_field)
223 },
224 Some(ArrowField { dtype, .. }) => polars_bail!(
225 SchemaMismatch:
226 "to_arrow(): fixed-size list dtype mismatch: {:?} != expected: {:?}",
227 dtype, arr.dtype(),
228 ),
229 None => None,
230 };
231
232 let new_values =
233 self.array_to_arrow(arr.values().as_ref(), inner, expected_inner_output_field)?;
234
235 let arr = FixedSizeListArray::new(
236 ArrowDataType::FixedSizeList(
237 Box::new(create_arrow_field(
238 LIST_VALUES_NAME,
239 inner.as_ref(),
240 new_values.dtype(),
241 self.compat_level,
242 opt_field_is_nullable(expected_inner_output_field),
243 )),
244 *width,
245 ),
246 arr.len(),
247 new_values,
248 arr.validity().cloned(),
249 );
250 Box::new(arr)
251 },
252 #[cfg(feature = "dtype-categorical")]
253 DataType::Categorical(_, _) | DataType::Enum(_, _) => self
254 .categorical_converter
255 .array_to_arrow(array, dtype, self.compat_level),
256 #[cfg(feature = "dtype-date")]
257 DataType::Date => {
258 cast_default(array, &DataType::Date.to_arrow(self.compat_level)).unwrap()
259 },
260 #[cfg(feature = "dtype-datetime")]
261 DataType::Datetime(_, _) => {
262 cast_default(array, &dtype.to_arrow(self.compat_level)).unwrap()
263 },
264 #[cfg(feature = "dtype-duration")]
265 DataType::Duration(_) => {
266 cast_default(array, &dtype.to_arrow(self.compat_level)).unwrap()
267 },
268 #[cfg(feature = "dtype-time")]
269 DataType::Time => {
270 cast_default(array, &DataType::Time.to_arrow(self.compat_level)).unwrap()
271 },
272 #[cfg(feature = "dtype-decimal")]
273 DataType::Decimal(_, _) => array
274 .as_any()
275 .downcast_ref::<arrow::array::PrimitiveArray<i128>>()
276 .unwrap()
277 .clone()
278 .to(dtype.to_arrow(CompatLevel::newest()))
279 .to_boxed(),
280 #[cfg(feature = "object")]
281 DataType::Object(_) => {
282 use crate::chunked_array::object::builder::object_series_to_arrow_array;
283 object_series_to_arrow_array(&unsafe {
284 Series::from_chunks_and_dtype_unchecked(
285 PlSmallStr::EMPTY,
286 vec![array.to_boxed()],
287 dtype,
288 )
289 })
290 },
291 DataType::String => {
292 if self.compat_level.0 >= 1 {
293 array.to_boxed()
294 } else {
295 cast_unchecked(array, &ArrowDataType::LargeUtf8).unwrap()
296 }
297 },
298 DataType::Binary => {
299 if self.compat_level.0 >= 1 {
300 array.to_boxed()
301 } else {
302 cast_unchecked(array, &ArrowDataType::LargeBinary).unwrap()
303 }
304 },
305 #[cfg(feature = "dtype-extension")]
306 DataType::Extension(typ, storage_dtype) => {
307 use arrow::datatypes::ExtensionType;
308
309 let output_ext_name: PlSmallStr = typ.name().into();
310 let output_ext_md: Option<PlSmallStr> =
311 typ.serialize_metadata().map(|md| md.into());
312
313 let expected_inner_output_field: Option<ArrowField> = match output_arrow_field {
314 Some(
315 field @ ArrowField {
316 name: _,
317 dtype: ArrowDataType::Extension(ext_type),
318 is_nullable: _,
319 metadata: _,
320 },
321 ) if {
322 let ExtensionType {
323 name,
324 inner: _,
325 metadata,
326 } = ext_type.as_ref();
327
328 name == &output_ext_name
329 && metadata.as_ref().filter(|x| !x.is_empty())
330 == output_ext_md.as_ref().filter(|x| !x.is_empty())
331 } =>
332 {
333 let ExtensionType {
334 name,
335 inner,
336 metadata: _,
337 } = ext_type.as_ref();
338
339 Some(create_arrow_field(
340 name.clone(),
341 storage_dtype.as_ref(),
342 inner,
343 self.compat_level,
344 true,
345 ))
346 },
347 Some(ArrowField { dtype, .. }) => {
348 let expected_inner = self
349 .array_to_arrow(array.sliced(0, 0).as_ref(), storage_dtype, None)
350 .unwrap()
351 .dtype()
352 .clone();
353
354 let expected = ArrowDataType::Extension(Box::new(ExtensionType {
355 name: output_ext_name,
356 inner: expected_inner,
357 metadata: output_ext_md,
358 }));
359
360 polars_bail!(
361 SchemaMismatch:
362 "to_arrow(): extension dtype mismatch: {:?} != expected: {:?}",
363 dtype, expected,
364 )
365 },
366 None => None,
367 };
368
369 let mut arr = self.array_to_arrow(
370 array,
371 storage_dtype,
372 expected_inner_output_field.as_ref(),
373 )?;
374
375 *arr.dtype_mut() = ArrowDataType::Extension(Box::new(ExtensionType {
376 name: output_ext_name,
377 inner: arr.dtype().clone(),
378 metadata: output_ext_md,
379 }));
380 arr
381 },
382 _ => {
383 assert!(!dtype.is_logical());
384 array.to_boxed()
385 },
386 })
387 }
388}
389
390fn create_arrow_field(
391 name: PlSmallStr,
392 dtype: &DataType,
393 arrow_dtype: &ArrowDataType,
394 compat_level: CompatLevel,
395 is_nullable: bool,
396) -> ArrowField {
397 match (dtype, arrow_dtype) {
398 #[cfg(feature = "dtype-categorical")]
399 (DataType::Categorical(..) | DataType::Enum(..), ArrowDataType::Dictionary(_, _, _)) => {
400 let mut out = dtype.to_arrow_field(name, compat_level);
402 out.is_nullable = is_nullable;
403 out
404 },
405 _ => ArrowField::new(name, arrow_dtype.clone(), is_nullable),
406 }
407}
408
409fn opt_field_is_nullable(opt_field: Option<&ArrowField>) -> bool {
410 opt_field.is_none_or(|x| x.is_nullable)
411}