1macro_rules! bail_unhandled_arrow_conversion_dtype_pair {
2 ($input_pl_dtype:expr, $output_arrow_field:expr) => {{
3 return Err(
4 $crate::series::arrow_export::unhandled_arrow_conversion_dtype_pair_err(
5 $input_pl_dtype,
6 $output_arrow_field,
7 ),
8 );
9 }};
10}
11
12#[cfg(feature = "dtype-categorical")]
13pub mod categorical;
14
15use std::borrow::Cow;
16use std::sync::Arc;
17
18use polars_compute::cast::cast_unchecked;
19use polars_error::{PolarsError, PolarsResult, polars_ensure, polars_err};
20
21use crate::prelude::{
22 Array, ArrayRef, ArrowDataType, ArrowField, BinaryViewArray, CompatLevel, DataType, ListArray,
23 PlSmallStr, PrimitiveArray, Series,
24};
25
26fn unhandled_arrow_conversion_dtype_pair_err(
27 input_pl_dtype: &DataType,
28 output_arrow_field: &ArrowField,
29) -> PolarsError {
30 polars_err!(
31 InvalidOperation:
32 "to_arrow() conversion failed: cannot convert \
33 ({input_pl_dtype:?}) to ({output_arrow_field:?})",
34 )
35}
36
37macro_rules! primitive_to_boxed_with_logical {
39 ($array:expr, $physical:ty, $logical_arrow_dtype:expr) => {{
40 let arr: &PrimitiveArray<$physical> = $array.as_any().downcast_ref().unwrap();
41 arr.clone().to($logical_arrow_dtype).to_boxed()
42 }};
43}
44
45fn ensure_no_nulls(array: &dyn Array) -> PolarsResult<()> {
46 polars_ensure!(
47 !array.has_nulls(),
48 SchemaMismatch:
49 "to_arrow() conversion failed: nullable is false but array contained {} NULLs (arrow dtype: {:?})",
50 array.null_count(), array.dtype(),
51 );
52
53 Ok(())
54}
55
56impl Series {
57 pub fn to_arrow(&self, chunk_idx: usize, compat_level: CompatLevel) -> ArrayRef {
60 self.to_arrow_with_field(
61 chunk_idx,
62 Cow::Owned(
63 self.dtype()
64 .to_arrow_field(self.name().clone(), compat_level),
65 ),
66 true,
67 )
68 .unwrap()
69 }
70
71 pub fn to_arrow_with_field<'a>(
74 &self,
75 chunk_idx: usize,
76 output_arrow_field: Cow<'a, ArrowField>,
77 skip_attach_pl_metadata: bool,
78 ) -> PolarsResult<ArrayRef> {
79 ToArrowConverter {
80 skip_attach_pl_metadata,
81 #[cfg(feature = "dtype-categorical")]
82 categorical_converter: {
83 let mut categorical_converter =
84 crate::series::arrow_export::categorical::CategoricalToArrowConverter {
85 converters: Default::default(),
86 persist_remap: false,
87 };
88
89 categorical_converter.initialize(self.dtype());
90
91 categorical_converter
92 },
93 }
94 .array_to_arrow(
95 self.chunks().get(chunk_idx).unwrap().as_ref(),
96 self.dtype(),
97 output_arrow_field,
98 )
99 }
100}
101
102pub struct ToArrowConverter {
107 pub skip_attach_pl_metadata: bool,
110 #[cfg(feature = "dtype-categorical")]
111 pub categorical_converter:
112 crate::series::arrow_export::categorical::CategoricalToArrowConverter,
113}
114
115impl ToArrowConverter {
116 pub fn array_to_arrow<'a>(
118 &mut self,
119 array: &dyn Array,
120 dtype: &DataType,
121 arrow_field: Cow<'a, ArrowField>,
122 ) -> PolarsResult<Box<dyn Array>> {
123 let nullable = arrow_field.is_nullable;
124 let out = self.array_to_arrow_impl(array, dtype, arrow_field)?;
125
126 if !nullable {
127 ensure_no_nulls(array)?
128 }
129
130 Ok(out)
131 }
132
133 fn array_to_arrow_impl<'a>(
134 &mut self,
135 array: &dyn Array,
136 polars_dtype: &DataType,
137 arrow_field: Cow<'a, ArrowField>,
138 ) -> PolarsResult<Box<dyn Array>> {
139 Ok(match (polars_dtype, arrow_field.dtype()) {
143 #[cfg(feature = "dtype-struct")]
144 (DataType::Struct(struct_fields), ArrowDataType::Struct(arrow_struct_fields)) => {
145 use arrow::array::StructArray;
146 let arr: &StructArray = array.as_any().downcast_ref().unwrap();
147
148 polars_ensure!(
149 arrow_struct_fields.len() == arr.fields().len()
150 && arrow_struct_fields
151 .iter()
152 .zip(arr.fields())
153 .all(|(l, r)| l.name() == r.name()),
154 SchemaMismatch:
155 "to_arrow() conversion failed: struct field names mismatch: {:?} != expected: {:?}",
156 arrow_field.dtype(), arr.dtype()
157 );
158
159 let mut arrow_dtype = to_owned_dtype(arrow_field);
160
161 let ArrowDataType::Struct(arrow_struct_fields) = &mut arrow_dtype else {
162 unreachable!()
163 };
164
165 self.attach_pl_field_metadata(
166 struct_fields
167 .iter()
168 .map(|x| x.dtype())
169 .zip(arrow_struct_fields.iter_mut()),
170 );
171
172 let values: Vec<ArrayRef> = arr
173 .values()
174 .iter()
175 .zip(struct_fields.iter())
176 .zip(arrow_struct_fields.iter())
177 .map(|((values, pl_field), arrow_field)| {
178 self.array_to_arrow(
179 values.as_ref(),
180 pl_field.dtype(),
181 Cow::Borrowed(arrow_field),
182 )
183 })
184 .collect::<PolarsResult<_>>()?;
185
186 let arr =
187 StructArray::try_new(arrow_dtype, arr.len(), values, arr.validity().cloned())?;
188
189 Box::new(arr)
190 },
191 (DataType::List(item_dtype), ArrowDataType::LargeList(_)) => {
192 let arr: &ListArray<i64> = array.as_any().downcast_ref().unwrap();
193
194 let mut arrow_dtype = to_owned_dtype(arrow_field);
195
196 let ArrowDataType::LargeList(arrow_item_field) = &mut arrow_dtype else {
197 unreachable!()
198 };
199
200 self.attach_pl_field_metadata(std::iter::once((
201 item_dtype.as_ref(),
202 arrow_item_field.as_mut(),
203 )));
204
205 let new_values = self.array_to_arrow(
206 arr.values().as_ref(),
207 item_dtype,
208 Cow::Borrowed(arrow_item_field.as_ref()),
209 )?;
210
211 let arr = ListArray::<i64>::new(
212 arrow_dtype,
213 arr.offsets().clone(),
214 new_values,
215 arr.validity().cloned(),
216 );
217
218 Box::new(arr)
219 },
220 #[cfg(feature = "dtype-array")]
221 (DataType::Array(item_dtype, width), ArrowDataType::FixedSizeList(_, arrow_width)) => {
222 use arrow::array::FixedSizeListArray;
223 let arr: &FixedSizeListArray = array.as_any().downcast_ref().unwrap();
224
225 polars_ensure!(
226 *arrow_width == *width,
227 SchemaMismatch:
228 "to_arrow() conversion failed: fixed-size list width mismatch \
229 ({arrow_width:?} != expected: {width:?})"
230 );
231
232 let mut arrow_dtype = to_owned_dtype(arrow_field);
233
234 let ArrowDataType::FixedSizeList(arrow_item_field, _) = &mut arrow_dtype else {
235 unreachable!()
236 };
237
238 self.attach_pl_field_metadata(std::iter::once((
239 item_dtype.as_ref(),
240 arrow_item_field.as_mut(),
241 )));
242
243 let new_values = self.array_to_arrow(
244 arr.values().as_ref(),
245 item_dtype,
246 Cow::Borrowed(arrow_item_field.as_ref()),
247 )?;
248
249 let arr = FixedSizeListArray::new(
250 arrow_dtype,
251 arr.len(),
252 new_values,
253 arr.validity().cloned(),
254 );
255
256 Box::new(arr)
257 },
258 #[cfg(feature = "dtype-categorical")]
259 (DataType::Categorical(_, _) | DataType::Enum(_, _), _) => {
260 self.categorical_converter.array_to_arrow(
261 array,
262 polars_dtype,
263 arrow_field.as_ref(),
264 )?
265 },
266 #[cfg(feature = "dtype-date")]
267 (DataType::Date, ArrowDataType::Date32) => {
268 primitive_to_boxed_with_logical!(array, i32, ArrowDataType::Date32)
269 },
270 #[cfg(feature = "dtype-datetime")]
271 (DataType::Datetime(tu, tz), ArrowDataType::Timestamp(atu, atz)) => {
272 use crate::prelude::TimeZone;
273
274 let matching = atu == &tu.to_arrow()
275 && TimeZone::eq_none_as_utc(
276 TimeZone::opt_try_new(atz.clone())?.as_ref(),
277 tz.as_ref(),
278 );
279
280 if !matching {
281 bail_unhandled_arrow_conversion_dtype_pair!(polars_dtype, &arrow_field)
282 }
283
284 primitive_to_boxed_with_logical!(array, i64, to_owned_dtype(arrow_field))
285 },
286 #[cfg(feature = "dtype-duration")]
287 (DataType::Duration(tu), ArrowDataType::Duration(atu)) => {
288 let matching = atu == &tu.to_arrow();
289
290 if !matching {
291 bail_unhandled_arrow_conversion_dtype_pair!(polars_dtype, &arrow_field)
292 }
293
294 primitive_to_boxed_with_logical!(array, i64, to_owned_dtype(arrow_field))
295 },
296 #[cfg(feature = "dtype-time")]
297 (DataType::Time, ArrowDataType::Time64(crate::prelude::ArrowTimeUnit::Nanosecond)) => {
298 primitive_to_boxed_with_logical!(array, i64, to_owned_dtype(arrow_field))
299 },
300 #[cfg(feature = "dtype-time")]
301 (DataType::Time, ArrowDataType::Time64(crate::prelude::ArrowTimeUnit::Microsecond)) => {
302 use polars_compute::cast::time64ns_to_time64us;
303
304 let array: &PrimitiveArray<i64> = array.as_any().downcast_ref().unwrap();
305
306 time64ns_to_time64us(array).boxed()
307 },
308 #[cfg(feature = "dtype-decimal")]
309 (DataType::Decimal(prec, scale), ArrowDataType::Decimal(a_prec, a_scale)) => {
310 let matching = *a_prec == *prec && *a_scale == *scale;
311
312 if !matching {
313 bail_unhandled_arrow_conversion_dtype_pair!(polars_dtype, &arrow_field)
314 }
315
316 primitive_to_boxed_with_logical!(array, i128, to_owned_dtype(arrow_field))
317 },
318 #[cfg(feature = "object")]
319 (DataType::Object(_), ArrowDataType::FixedSizeBinary(8)) => {
320 use crate::chunked_array::object::builder::object_series_to_arrow_array;
321
322 let out = object_series_to_arrow_array(&unsafe {
323 Series::from_chunks_and_dtype_unchecked(
324 PlSmallStr::EMPTY,
325 vec![array.to_boxed()],
326 polars_dtype,
327 )
328 });
329
330 assert_eq!(out.dtype(), &ArrowDataType::FixedSizeBinary(8));
331
332 out
333 },
334 (DataType::String, ArrowDataType::Utf8View) => array.to_boxed(),
335 (DataType::String, ArrowDataType::LargeUtf8) => {
336 cast_unchecked(array, &ArrowDataType::LargeUtf8).unwrap()
337 },
338 (DataType::Binary, ArrowDataType::BinaryView) => array.to_boxed(),
339 (DataType::Binary, ArrowDataType::LargeBinary) => {
340 cast_unchecked(array, &ArrowDataType::LargeBinary).unwrap()
341 },
342 (DataType::Binary, ArrowDataType::FixedSizeBinary(row_width)) => {
343 use polars_compute::cast::binview_to_fixed_binary;
344
345 let array: &BinaryViewArray = array.as_any().downcast_ref().unwrap();
346
347 binview_to_fixed_binary(array, *row_width)?.boxed()
348 },
349 (DataType::Binary, ArrowDataType::Extension(_)) => {
350 let arrow_dtype = to_owned_dtype(arrow_field);
351
352 let ArrowDataType::Extension(ext_type) = &arrow_dtype else {
353 unreachable!()
354 };
355
356 let storage_field =
357 ArrowField::new(ext_type.name.clone(), ext_type.inner.clone(), true);
358
359 let mut array =
360 self.array_to_arrow(array, &DataType::Binary, Cow::Owned(storage_field))?;
361
362 *array.dtype_mut() = arrow_dtype;
363
364 array.to_boxed()
365 },
366 #[cfg(feature = "dtype-extension")]
367 (
368 DataType::Extension(pl_ext_type, storage_dtype),
369 ArrowDataType::Extension(arrow_ext_type),
370 ) => {
371 use arrow::datatypes::ExtensionType;
372
373 let ExtensionType {
374 name,
375 inner: _,
376 metadata,
377 } = arrow_ext_type.as_ref();
378
379 if name != pl_ext_type.name().as_ref() {
380 bail_unhandled_arrow_conversion_dtype_pair!(polars_dtype, &arrow_field)
381 }
382
383 match (
384 metadata.as_deref(),
385 pl_ext_type.serialize_metadata().as_deref(),
386 ) {
387 (Some("") | None, Some("") | None) => {},
388 (l, r) => {
389 if l != r {
390 bail_unhandled_arrow_conversion_dtype_pair!(polars_dtype, &arrow_field)
391 }
392 },
393 };
394
395 let arrow_dtype = to_owned_dtype(arrow_field);
396
397 let ArrowDataType::Extension(arrow_ext_type) = &arrow_dtype else {
398 unreachable!()
399 };
400
401 let storage_arrow_field = ArrowField::new(
402 arrow_ext_type.name.clone(),
403 arrow_ext_type.inner.clone(),
404 true,
405 );
406
407 let mut arr =
408 self.array_to_arrow(array, storage_dtype, Cow::Owned(storage_arrow_field))?;
409
410 *arr.dtype_mut() = arrow_dtype;
411
412 arr
413 },
414 (pl_dtype, arrow_dtype) => {
415 if array.dtype() != arrow_dtype {
416 bail_unhandled_arrow_conversion_dtype_pair!(polars_dtype, &arrow_field)
417 }
418
419 if pl_dtype.is_logical() {
420 panic!("{pl_dtype:?}");
421 }
422
423 array.to_boxed()
424 },
425 })
426 }
427
428 #[inline]
429 fn attach_pl_field_metadata<'a, 'b, I>(&self, iter: I)
430 where
431 I: IntoIterator<Item = (&'a DataType, &'b mut ArrowField)>,
432 {
433 if self.skip_attach_pl_metadata {
434 return;
435 }
436
437 inner(&mut iter.into_iter());
438
439 #[inline(never)]
440 fn inner(iter: &mut dyn Iterator<Item = (&DataType, &mut ArrowField)>) {
441 for (pl_dtype, arrow_field) in iter {
442 match pl_dtype {
443 #[cfg(feature = "dtype-categorical")]
444 DataType::Categorical(..) | DataType::Enum(..) => {
445 if !matches!(arrow_field.dtype(), ArrowDataType::Dictionary(..)) {
446 continue;
449 }
450 },
451 _ => {},
452 }
453
454 let mut pl_md = pl_dtype.to_arrow_field_metadata();
455
456 if arrow_field.metadata.is_none() {
457 arrow_field.metadata = pl_md.take().map(|x| x.into());
458 }
459
460 if let Some(pl_md) = pl_md
462 && let Some(md) = arrow_field.metadata.as_mut()
463 {
464 for (k, v) in pl_md {
465 if !md.contains_key(&k) {
466 Arc::make_mut(md).insert(k, v);
467 }
468 }
469 }
470 }
471 }
472 }
473}
474
475fn to_owned_dtype(field: Cow<ArrowField>) -> ArrowDataType {
476 match field {
477 Cow::Borrowed(f) => f.dtype().clone(),
478 Cow::Owned(f) => f.dtype,
479 }
480}