1use arrow::datatypes::{IntervalUnit, Metadata};
2use arrow::offset::OffsetsBuffer;
3#[cfg(any(
4 feature = "dtype-date",
5 feature = "dtype-datetime",
6 feature = "dtype-time",
7 feature = "dtype-duration"
8))]
9use arrow::temporal_conversions::*;
10use arrow::types::months_days_ns;
11use polars_compute::cast::cast_unchecked as cast;
12#[cfg(feature = "dtype-decimal")]
13use polars_compute::decimal::dec128_fits;
14use polars_error::feature_gated;
15use polars_utils::itertools::Itertools;
16
17use crate::chunked_array::cast::{CastOptions, cast_chunks};
18#[cfg(feature = "object")]
19use crate::chunked_array::object::extension::polars_extension::PolarsExtension;
20#[cfg(feature = "object")]
21use crate::chunked_array::object::registry::get_object_builder;
22use crate::config::check_allow_importing_interval_as_struct;
23use crate::prelude::*;
24
25impl Series {
26 pub fn from_array<A: ParameterFreeDtypeStaticArray>(name: PlSmallStr, array: A) -> Self {
27 unsafe {
28 Self::from_chunks_and_dtype_unchecked(
29 name,
30 vec![Box::new(array)],
31 &DataType::from_arrow_dtype(&A::get_dtype()),
32 )
33 }
34 }
35
36 pub fn from_chunk_and_dtype(
37 name: PlSmallStr,
38 chunk: ArrayRef,
39 dtype: &DataType,
40 ) -> PolarsResult<Self> {
41 if &dtype.to_physical().to_arrow(CompatLevel::newest()) != chunk.dtype() {
42 polars_bail!(
43 InvalidOperation: "cannot create a series of type '{dtype}' of arrow chunk with type '{:?}'",
44 chunk.dtype()
45 );
46 }
47
48 let series = unsafe { Self::from_chunks_and_dtype_unchecked(name, vec![chunk], dtype) };
50 Ok(series)
51 }
52
53 pub unsafe fn from_chunks_and_dtype_unchecked(
61 name: PlSmallStr,
62 chunks: Vec<ArrayRef>,
63 dtype: &DataType,
64 ) -> Self {
65 use DataType::*;
66 match dtype {
67 Int8 => Int8Chunked::from_chunks(name, chunks).into_series(),
68 Int16 => Int16Chunked::from_chunks(name, chunks).into_series(),
69 Int32 => Int32Chunked::from_chunks(name, chunks).into_series(),
70 Int64 => Int64Chunked::from_chunks(name, chunks).into_series(),
71 UInt8 => UInt8Chunked::from_chunks(name, chunks).into_series(),
72 UInt16 => UInt16Chunked::from_chunks(name, chunks).into_series(),
73 UInt32 => UInt32Chunked::from_chunks(name, chunks).into_series(),
74 UInt64 => UInt64Chunked::from_chunks(name, chunks).into_series(),
75 #[cfg(feature = "dtype-i128")]
76 Int128 => Int128Chunked::from_chunks(name, chunks).into_series(),
77 #[cfg(feature = "dtype-u128")]
78 UInt128 => UInt128Chunked::from_chunks(name, chunks).into_series(),
79 #[cfg(feature = "dtype-date")]
80 Date => Int32Chunked::from_chunks(name, chunks)
81 .into_date()
82 .into_series(),
83 #[cfg(feature = "dtype-time")]
84 Time => Int64Chunked::from_chunks(name, chunks)
85 .into_time()
86 .into_series(),
87 #[cfg(feature = "dtype-duration")]
88 Duration(tu) => Int64Chunked::from_chunks(name, chunks)
89 .into_duration(*tu)
90 .into_series(),
91 #[cfg(feature = "dtype-datetime")]
92 Datetime(tu, tz) => Int64Chunked::from_chunks(name, chunks)
93 .into_datetime(*tu, tz.clone())
94 .into_series(),
95 #[cfg(feature = "dtype-decimal")]
96 Decimal(precision, scale) => Int128Chunked::from_chunks(name, chunks)
97 .into_decimal_unchecked(*precision, *scale)
98 .into_series(),
99 #[cfg(feature = "dtype-array")]
100 Array(_, _) => {
101 ArrayChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype.clone())
102 .into_series()
103 },
104 List(_) => ListChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype.clone())
105 .into_series(),
106 String => StringChunked::from_chunks(name, chunks).into_series(),
107 Binary => BinaryChunked::from_chunks(name, chunks).into_series(),
108 #[cfg(feature = "dtype-categorical")]
109 dt @ (Categorical(_, _) | Enum(_, _)) => {
110 with_match_categorical_physical_type!(dt.cat_physical().unwrap(), |$C| {
111 let phys = ChunkedArray::from_chunks(name, chunks);
112 CategoricalChunked::<$C>::from_cats_and_dtype_unchecked(phys, dt.clone()).into_series()
113 })
114 },
115 Boolean => BooleanChunked::from_chunks(name, chunks).into_series(),
116 #[cfg(feature = "dtype-f16")]
117 Float16 => Float16Chunked::from_chunks(name, chunks).into_series(),
118 Float32 => Float32Chunked::from_chunks(name, chunks).into_series(),
119 Float64 => Float64Chunked::from_chunks(name, chunks).into_series(),
120 BinaryOffset => BinaryOffsetChunked::from_chunks(name, chunks).into_series(),
121 #[cfg(feature = "dtype-extension")]
122 Extension(typ, storage) => ExtensionChunked::from_storage(
123 typ.clone(),
124 Series::from_chunks_and_dtype_unchecked(name, chunks, storage),
125 )
126 .into_series(),
127 #[cfg(feature = "dtype-struct")]
128 Struct(_) => {
129 let mut ca =
130 StructChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype.clone());
131 StructChunked::propagate_nulls_mut(&mut ca);
132 ca.into_series()
133 },
134 #[cfg(feature = "object")]
135 Object(_) => {
136 if let Some(arr) = chunks[0].as_any().downcast_ref::<FixedSizeBinaryArray>() {
137 assert_eq!(chunks.len(), 1);
138 {
143 let pe = PolarsExtension::new(arr.clone());
144 let s = pe.get_series(&name);
145 pe.take_and_forget();
146 s
147 }
148 } else {
149 unsafe { get_object_builder(name, 0).from_chunks(chunks) }
150 }
151 },
152 Null => new_null(name, &chunks),
153 Unknown(_) => {
154 panic!("dtype is unknown; consider supplying data-types for all operations")
155 },
156 #[allow(unreachable_patterns)]
157 _ => unreachable!(),
158 }
159 }
160
161 pub unsafe fn _try_from_arrow_unchecked(
164 name: PlSmallStr,
165 chunks: Vec<ArrayRef>,
166 dtype: &ArrowDataType,
167 ) -> PolarsResult<Self> {
168 Self::_try_from_arrow_unchecked_with_md(name, chunks, dtype, None)
169 }
170
171 pub unsafe fn _try_from_arrow_unchecked_with_md(
176 name: PlSmallStr,
177 mut chunks: Vec<ArrayRef>,
178 dtype: &ArrowDataType,
179 md: Option<&Metadata>,
180 ) -> PolarsResult<Self> {
181 match dtype {
182 ArrowDataType::Utf8View => Ok(StringChunked::from_chunks(name, chunks).into_series()),
183 ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 => {
184 let chunks =
185 cast_chunks(&chunks, &DataType::String, CastOptions::NonStrict).unwrap();
186 Ok(StringChunked::from_chunks(name, chunks).into_series())
187 },
188 ArrowDataType::BinaryView => Ok(BinaryChunked::from_chunks(name, chunks).into_series()),
189 ArrowDataType::LargeBinary => {
190 if let Some(md) = md {
191 if md.maintain_type() {
192 return Ok(BinaryOffsetChunked::from_chunks(name, chunks).into_series());
193 }
194 }
195 let chunks =
196 cast_chunks(&chunks, &DataType::Binary, CastOptions::NonStrict).unwrap();
197 Ok(BinaryChunked::from_chunks(name, chunks).into_series())
198 },
199 ArrowDataType::Binary => {
200 let chunks =
201 cast_chunks(&chunks, &DataType::Binary, CastOptions::NonStrict).unwrap();
202 Ok(BinaryChunked::from_chunks(name, chunks).into_series())
203 },
204 ArrowDataType::List(_) | ArrowDataType::LargeList(_) => {
205 let (chunks, dtype) = to_physical_and_dtype(chunks, md);
206 unsafe {
207 Ok(
208 ListChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype)
209 .into_series(),
210 )
211 }
212 },
213 #[cfg(feature = "dtype-array")]
214 ArrowDataType::FixedSizeList(_, _) => {
215 let (chunks, dtype) = to_physical_and_dtype(chunks, md);
216 unsafe {
217 Ok(
218 ArrayChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype)
219 .into_series(),
220 )
221 }
222 },
223 ArrowDataType::Boolean => Ok(BooleanChunked::from_chunks(name, chunks).into_series()),
224 #[cfg(feature = "dtype-u8")]
225 ArrowDataType::UInt8 => Ok(UInt8Chunked::from_chunks(name, chunks).into_series()),
226 #[cfg(feature = "dtype-u16")]
227 ArrowDataType::UInt16 => Ok(UInt16Chunked::from_chunks(name, chunks).into_series()),
228 ArrowDataType::UInt32 => Ok(UInt32Chunked::from_chunks(name, chunks).into_series()),
229 ArrowDataType::UInt64 => Ok(UInt64Chunked::from_chunks(name, chunks).into_series()),
230 ArrowDataType::UInt128 => feature_gated!(
231 "dtype-u128",
232 Ok(UInt128Chunked::from_chunks(name, chunks).into_series())
233 ),
234 #[cfg(feature = "dtype-i8")]
235 ArrowDataType::Int8 => Ok(Int8Chunked::from_chunks(name, chunks).into_series()),
236 #[cfg(feature = "dtype-i16")]
237 ArrowDataType::Int16 => Ok(Int16Chunked::from_chunks(name, chunks).into_series()),
238 ArrowDataType::Int32 => Ok(Int32Chunked::from_chunks(name, chunks).into_series()),
239 ArrowDataType::Int64 => Ok(Int64Chunked::from_chunks(name, chunks).into_series()),
240 ArrowDataType::Int128 => feature_gated!(
241 "dtype-i128",
242 Ok(Int128Chunked::from_chunks(name, chunks).into_series())
243 ),
244 #[cfg(feature = "dtype-f16")]
245 ArrowDataType::Float16 => {
246 let chunks =
247 cast_chunks(&chunks, &DataType::Float16, CastOptions::NonStrict).unwrap();
248 Ok(Float16Chunked::from_chunks(name, chunks).into_series())
249 },
250 ArrowDataType::Float32 => Ok(Float32Chunked::from_chunks(name, chunks).into_series()),
251 ArrowDataType::Float64 => Ok(Float64Chunked::from_chunks(name, chunks).into_series()),
252 #[cfg(feature = "dtype-date")]
253 ArrowDataType::Date32 => {
254 let chunks =
255 cast_chunks(&chunks, &DataType::Int32, CastOptions::Overflowing).unwrap();
256 Ok(Int32Chunked::from_chunks(name, chunks)
257 .into_date()
258 .into_series())
259 },
260 #[cfg(feature = "dtype-datetime")]
261 ArrowDataType::Date64 => {
262 let chunks =
263 cast_chunks(&chunks, &DataType::Int64, CastOptions::Overflowing).unwrap();
264 let ca = Int64Chunked::from_chunks(name, chunks);
265 Ok(ca.into_datetime(TimeUnit::Milliseconds, None).into_series())
266 },
267 #[cfg(feature = "dtype-datetime")]
268 ArrowDataType::Timestamp(tu, tz) => {
269 let tz = TimeZone::opt_try_new(tz.clone())?;
270 let chunks =
271 cast_chunks(&chunks, &DataType::Int64, CastOptions::NonStrict).unwrap();
272 let s = Int64Chunked::from_chunks(name, chunks)
273 .into_datetime(tu.into(), tz)
274 .into_series();
275 Ok(match tu {
276 ArrowTimeUnit::Second => &s * MILLISECONDS,
277 ArrowTimeUnit::Millisecond => s,
278 ArrowTimeUnit::Microsecond => s,
279 ArrowTimeUnit::Nanosecond => s,
280 })
281 },
282 #[cfg(feature = "dtype-duration")]
283 ArrowDataType::Duration(tu) => {
284 let chunks =
285 cast_chunks(&chunks, &DataType::Int64, CastOptions::NonStrict).unwrap();
286 let s = Int64Chunked::from_chunks(name, chunks)
287 .into_duration(tu.into())
288 .into_series();
289 Ok(match tu {
290 ArrowTimeUnit::Second => &s * MILLISECONDS,
291 ArrowTimeUnit::Millisecond => s,
292 ArrowTimeUnit::Microsecond => s,
293 ArrowTimeUnit::Nanosecond => s,
294 })
295 },
296 #[cfg(feature = "dtype-time")]
297 ArrowDataType::Time64(tu) | ArrowDataType::Time32(tu) => {
298 let mut chunks = chunks;
299 if matches!(dtype, ArrowDataType::Time32(_)) {
300 chunks =
301 cast_chunks(&chunks, &DataType::Int32, CastOptions::NonStrict).unwrap();
302 }
303 let chunks =
304 cast_chunks(&chunks, &DataType::Int64, CastOptions::NonStrict).unwrap();
305 let s = Int64Chunked::from_chunks(name, chunks)
306 .into_time()
307 .into_series();
308 Ok(match tu {
309 ArrowTimeUnit::Second => &s * NANOSECONDS,
310 ArrowTimeUnit::Millisecond => &s * 1_000_000,
311 ArrowTimeUnit::Microsecond => &s * 1_000,
312 ArrowTimeUnit::Nanosecond => s,
313 })
314 },
315 ArrowDataType::Decimal32(precision, scale) => {
316 feature_gated!("dtype-decimal", {
317 polars_compute::decimal::dec128_verify_prec_scale(*precision, *scale)?;
318
319 let mut chunks = chunks;
320 for chunk in chunks.iter_mut() {
321 let old_chunk = chunk
322 .as_any_mut()
323 .downcast_mut::<PrimitiveArray<i32>>()
324 .unwrap();
325
326 let (_, values, validity) = std::mem::take(old_chunk).into_inner();
328 *chunk = PrimitiveArray::new(
329 ArrowDataType::Int128,
330 values.iter().map(|&v| v as i128).collect(),
331 validity,
332 )
333 .to_boxed();
334 }
335
336 let s = Int128Chunked::from_chunks(name, chunks)
337 .into_decimal_unchecked(*precision, *scale)
338 .into_series();
339 Ok(s)
340 })
341 },
342 ArrowDataType::Decimal64(precision, scale) => {
343 feature_gated!("dtype-decimal", {
344 polars_compute::decimal::dec128_verify_prec_scale(*precision, *scale)?;
345
346 let mut chunks = chunks;
347 for chunk in chunks.iter_mut() {
348 let old_chunk = chunk
349 .as_any_mut()
350 .downcast_mut::<PrimitiveArray<i64>>()
351 .unwrap();
352
353 let (_, values, validity) = std::mem::take(old_chunk).into_inner();
355 *chunk = PrimitiveArray::new(
356 ArrowDataType::Int128,
357 values.iter().map(|&v| v as i128).collect(),
358 validity,
359 )
360 .to_boxed();
361 }
362
363 let s = Int128Chunked::from_chunks(name, chunks)
364 .into_decimal_unchecked(*precision, *scale)
365 .into_series();
366 Ok(s)
367 })
368 },
369 ArrowDataType::Decimal(precision, scale) => {
370 feature_gated!("dtype-decimal", {
371 polars_compute::decimal::dec128_verify_prec_scale(*precision, *scale)?;
372
373 let mut chunks = chunks;
374 for chunk in chunks.iter_mut() {
375 *chunk = std::mem::take(
376 chunk
377 .as_any_mut()
378 .downcast_mut::<PrimitiveArray<i128>>()
379 .unwrap(),
380 )
381 .to(ArrowDataType::Int128)
382 .to_boxed();
383 }
384
385 let s = Int128Chunked::from_chunks(name, chunks)
386 .into_decimal_unchecked(*precision, *scale)
387 .into_series();
388 Ok(s)
389 })
390 },
391 ArrowDataType::Decimal256(precision, scale) => {
392 feature_gated!("dtype-decimal", {
393 use arrow::types::i256;
394
395 polars_compute::decimal::dec128_verify_prec_scale(*precision, *scale)?;
396
397 let mut chunks = chunks;
398 for chunk in chunks.iter_mut() {
399 let arr = std::mem::take(
400 chunk
401 .as_any_mut()
402 .downcast_mut::<PrimitiveArray<i256>>()
403 .unwrap(),
404 );
405 let arr_128: PrimitiveArray<i128> = arr.iter().map(|opt_v| {
406 if let Some(v) = opt_v {
407 let smaller: Option<i128> = (*v).try_into().ok();
408 let smaller = smaller.filter(|v| dec128_fits(*v, *precision));
409 smaller.ok_or_else(|| {
410 polars_err!(ComputeError: "Decimal256 to Decimal128 conversion overflowed, Decimal256 is not (yet) supported in Polars")
411 }).map(Some)
412 } else {
413 Ok(None)
414 }
415 }).try_collect_arr_trusted()?;
416
417 *chunk = arr_128.to(ArrowDataType::Int128).to_boxed();
418 }
419
420 let s = Int128Chunked::from_chunks(name, chunks)
421 .into_decimal_unchecked(*precision, *scale)
422 .into_series();
423 Ok(s)
424 })
425 },
426 ArrowDataType::Null => Ok(new_null(name, &chunks)),
427 #[cfg(not(feature = "dtype-categorical"))]
428 ArrowDataType::Dictionary(_, _, _) => {
429 panic!("activate dtype-categorical to convert dictionary arrays")
430 },
431 #[cfg(feature = "dtype-categorical")]
432 ArrowDataType::Dictionary(key_type, _, _) => {
433 let polars_dtype = DataType::from_arrow(chunks[0].dtype(), md);
434
435 let mut series_iter = chunks.into_iter().map(|arr| {
436 import_arrow_dictionary_array(name.clone(), arr, key_type, &polars_dtype)
437 });
438
439 let mut first = series_iter.next().unwrap()?;
440
441 for s in series_iter {
442 first.append_owned(s?)?;
443 }
444
445 Ok(first)
446 },
447 #[cfg(feature = "object")]
448 ArrowDataType::Extension(ext)
449 if ext.name == POLARS_OBJECT_EXTENSION_NAME && ext.metadata.is_some() =>
450 {
451 assert_eq!(chunks.len(), 1);
452 let arr = chunks[0]
453 .as_any()
454 .downcast_ref::<FixedSizeBinaryArray>()
455 .unwrap();
456 let s = {
461 let pe = PolarsExtension::new(arr.clone());
462 let s = pe.get_series(&name);
463 pe.take_and_forget();
464 s
465 };
466 Ok(s)
467 },
468 #[cfg(feature = "dtype-extension")]
469 ArrowDataType::Extension(ext) => {
470 use crate::datatypes::extension::get_extension_type_or_storage;
471
472 for chunk in &mut chunks {
473 debug_assert!(
474 chunk.dtype() == dtype,
475 "expected chunk dtype to be {:?}, got {:?}",
476 dtype,
477 chunk.dtype()
478 );
479 *chunk.dtype_mut() = ext.inner.clone();
480 }
481 let storage = Series::_try_from_arrow_unchecked_with_md(
482 name.clone(),
483 chunks,
484 &ext.inner,
485 md,
486 )?;
487
488 Ok(
489 match get_extension_type_or_storage(
490 &ext.name,
491 storage.dtype(),
492 ext.metadata.as_deref(),
493 ) {
494 Some(typ) => ExtensionChunked::from_storage(typ, storage).into_series(),
495 None => storage,
496 },
497 )
498 },
499
500 #[cfg(feature = "dtype-struct")]
501 ArrowDataType::Struct(_) => {
502 let (chunks, dtype) = to_physical_and_dtype(chunks, md);
503
504 unsafe {
505 let mut ca =
506 StructChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype);
507 StructChunked::propagate_nulls_mut(&mut ca);
508 Ok(ca.into_series())
509 }
510 },
511 ArrowDataType::FixedSizeBinary(_) => {
512 let chunks = cast_chunks(&chunks, &DataType::Binary, CastOptions::NonStrict)?;
513 Ok(BinaryChunked::from_chunks(name, chunks).into_series())
514 },
515 ArrowDataType::Map(field, _is_ordered) => {
516 let struct_arrays = chunks
517 .iter()
518 .map(|arr| {
519 let arr = arr.as_any().downcast_ref::<MapArray>().unwrap();
520 arr.field().clone()
521 })
522 .collect::<Vec<_>>();
523
524 let (phys_struct_arrays, dtype) =
525 to_physical_and_dtype(struct_arrays, field.metadata.as_deref());
526
527 let chunks = chunks
528 .iter()
529 .zip(phys_struct_arrays)
530 .map(|(arr, values)| {
531 let arr = arr.as_any().downcast_ref::<MapArray>().unwrap();
532 let offsets: &OffsetsBuffer<i32> = arr.offsets();
533
534 let validity = values.validity().cloned();
535
536 Box::from(ListArray::<i64>::new(
537 ListArray::<i64>::default_datatype(values.dtype().clone()),
538 OffsetsBuffer::<i64>::from(offsets),
539 values,
540 validity,
541 )) as ArrayRef
542 })
543 .collect();
544
545 unsafe {
546 let out = ListChunked::from_chunks_and_dtype_unchecked(
547 name,
548 chunks,
549 DataType::List(Box::new(dtype)),
550 );
551
552 Ok(out.into_series())
553 }
554 },
555 ArrowDataType::Interval(IntervalUnit::MonthDayNano) => {
556 check_allow_importing_interval_as_struct("month_day_nano_interval")?;
557
558 feature_gated!("dtype-struct", {
559 let chunks = chunks
560 .into_iter()
561 .map(convert_month_day_nano_to_struct)
562 .collect::<PolarsResult<Vec<_>>>()?;
563
564 Ok(StructChunked::from_chunks_and_dtype_unchecked(
565 name,
566 chunks,
567 DataType::_month_days_ns_struct_type(),
568 )
569 .into_series())
570 })
571 },
572
573 dt => polars_bail!(ComputeError: "cannot create series from {:?}", dt),
574 }
575 }
576
577 #[cfg(feature = "dtype-categorical")]
578 pub fn from_cats_and_dtype(
579 cats: &Series,
580 dtype: &DataType,
581 strict: bool,
582 ) -> PolarsResult<Series> {
583 let phys = dtype.cat_physical()?;
584 let phys_dtype = DataType::from(phys);
585 if cats.dtype() != &phys_dtype {
586 polars_bail!(
587 SchemaMismatch:
588 "cannot convert column of type {} to {} with physical type {}; \
589 column dtype must match the enum/categorical's physical type",
590 cats.dtype(), dtype, phys_dtype
591 )
592 }
593
594 let out = with_match_categorical_physical_type!(phys, |$C| {
595 type PhysCa = ChunkedArray<<$C as PolarsCategoricalType>::PolarsPhysical>;
597 let ca: &PhysCa = cats.as_ref().as_ref();
598 CategoricalChunked::<$C>::from_cats_and_dtype(ca.clone(), dtype.clone()).into_series()
599 });
600
601 if strict && out.null_count() != cats.null_count() {
602 polars_bail!(
603 ComputeError:
604 "found invalid category value when converting from physical to {dtype}",
605 );
606 }
607
608 Ok(out)
609 }
610}
611
612fn convert<F: Fn(&dyn Array) -> ArrayRef>(arr: &[ArrayRef], f: F) -> Vec<ArrayRef> {
613 arr.iter().map(|arr| f(&**arr)).collect()
614}
615
616#[allow(clippy::only_used_in_recursion)]
618unsafe fn to_physical_and_dtype(
619 arrays: Vec<ArrayRef>,
620 md: Option<&Metadata>,
621) -> (Vec<ArrayRef>, DataType) {
622 match arrays[0].dtype() {
623 ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 => {
624 let chunks = cast_chunks(&arrays, &DataType::String, CastOptions::NonStrict).unwrap();
625 (chunks, DataType::String)
626 },
627 ArrowDataType::Binary | ArrowDataType::LargeBinary | ArrowDataType::FixedSizeBinary(_) => {
628 let chunks = cast_chunks(&arrays, &DataType::Binary, CastOptions::NonStrict).unwrap();
629 (chunks, DataType::Binary)
630 },
631 #[allow(unused_variables)]
632 dt @ ArrowDataType::Dictionary(_, _, _) => {
633 feature_gated!("dtype-categorical", {
634 let s = unsafe {
635 let dt = dt.clone();
636 Series::_try_from_arrow_unchecked_with_md(PlSmallStr::EMPTY, arrays, &dt, md)
637 }
638 .unwrap();
639 (s.chunks().clone(), s.dtype().clone())
640 })
641 },
642 dt @ ArrowDataType::Extension(_) => {
643 feature_gated!("dtype-extension", {
644 let s = unsafe {
645 let dt = dt.clone();
646 Series::_try_from_arrow_unchecked_with_md(PlSmallStr::EMPTY, arrays, &dt, md)
647 }
648 .unwrap();
649 (s.chunks().clone(), s.dtype().clone())
650 })
651 },
652 ArrowDataType::List(field) => {
653 let out = convert(&arrays, |arr| {
654 cast(arr, &ArrowDataType::LargeList(field.clone())).unwrap()
655 });
656 to_physical_and_dtype(out, md)
657 },
658 #[cfg(feature = "dtype-array")]
659 ArrowDataType::FixedSizeList(field, size) => {
660 let values = arrays
661 .iter()
662 .map(|arr| {
663 let arr = arr.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
664 arr.values().clone()
665 })
666 .collect::<Vec<_>>();
667
668 let (converted_values, dtype) =
669 to_physical_and_dtype(values, field.metadata.as_deref());
670
671 let arrays = arrays
672 .iter()
673 .zip(converted_values)
674 .map(|(arr, values)| {
675 let arr = arr.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
676
677 let dtype = FixedSizeListArray::default_datatype(values.dtype().clone(), *size);
678 Box::from(FixedSizeListArray::new(
679 dtype,
680 arr.len(),
681 values,
682 arr.validity().cloned(),
683 )) as ArrayRef
684 })
685 .collect();
686 (arrays, DataType::Array(Box::new(dtype), *size))
687 },
688 ArrowDataType::LargeList(field) => {
689 let values = arrays
690 .iter()
691 .map(|arr| {
692 let arr = arr.as_any().downcast_ref::<ListArray<i64>>().unwrap();
693 arr.values().clone()
694 })
695 .collect::<Vec<_>>();
696
697 let (converted_values, dtype) =
698 to_physical_and_dtype(values, field.metadata.as_deref());
699
700 let arrays = arrays
701 .iter()
702 .zip(converted_values)
703 .map(|(arr, values)| {
704 let arr = arr.as_any().downcast_ref::<ListArray<i64>>().unwrap();
705
706 let dtype = ListArray::<i64>::default_datatype(values.dtype().clone());
707 Box::from(ListArray::<i64>::new(
708 dtype,
709 arr.offsets().clone(),
710 values,
711 arr.validity().cloned(),
712 )) as ArrayRef
713 })
714 .collect();
715 (arrays, DataType::List(Box::new(dtype)))
716 },
717 ArrowDataType::Struct(_fields) => {
718 feature_gated!("dtype-struct", {
719 let mut pl_fields = None;
720 let arrays = arrays
721 .iter()
722 .map(|arr| {
723 let arr = arr.as_any().downcast_ref::<StructArray>().unwrap();
724 let (values, dtypes): (Vec<_>, Vec<_>) = arr
725 .values()
726 .iter()
727 .zip(_fields.iter())
728 .map(|(value, field)| {
729 let mut out = to_physical_and_dtype(
730 vec![value.clone()],
731 field.metadata.as_deref(),
732 );
733 (out.0.pop().unwrap(), out.1)
734 })
735 .unzip();
736
737 let arrow_fields = values
738 .iter()
739 .zip(_fields.iter())
740 .map(|(arr, field)| {
741 ArrowField::new(field.name.clone(), arr.dtype().clone(), true)
742 })
743 .collect();
744 let arrow_array = Box::new(StructArray::new(
745 ArrowDataType::Struct(arrow_fields),
746 arr.len(),
747 values,
748 arr.validity().cloned(),
749 )) as ArrayRef;
750
751 if pl_fields.is_none() {
752 pl_fields = Some(
753 _fields
754 .iter()
755 .zip(dtypes)
756 .map(|(field, dtype)| Field::new(field.name.clone(), dtype))
757 .collect_vec(),
758 )
759 }
760
761 arrow_array
762 })
763 .collect_vec();
764
765 (arrays, DataType::Struct(pl_fields.unwrap()))
766 })
767 },
768 dt @ (ArrowDataType::Duration(_)
770 | ArrowDataType::Time32(_)
771 | ArrowDataType::Time64(_)
772 | ArrowDataType::Timestamp(_, _)
773 | ArrowDataType::Date32
774 | ArrowDataType::Decimal(_, _)
775 | ArrowDataType::Date64
776 | ArrowDataType::Map(_, _)) => {
777 let dt = dt.clone();
778 let mut s = Series::_try_from_arrow_unchecked(PlSmallStr::EMPTY, arrays, &dt).unwrap();
779 let dtype = s.dtype().clone();
780 (std::mem::take(s.chunks_mut()), dtype)
781 },
782 dt => {
783 let dtype = DataType::from_arrow(dt, md);
784 (arrays, dtype)
785 },
786 }
787}
788
789#[cfg(feature = "dtype-categorical")]
790unsafe fn import_arrow_dictionary_array(
791 name: PlSmallStr,
792 arr: Box<dyn Array>,
793 key_type: &arrow::datatypes::IntegerType,
794 polars_dtype: &DataType,
795) -> PolarsResult<Series> {
796 use arrow::datatypes::IntegerType as I;
797
798 if matches!(
799 polars_dtype,
800 DataType::Categorical(_, _) | DataType::Enum(_, _)
801 ) {
802 macro_rules! unpack_categorical_chunked {
803 ($dt:ty) => {{
804 let arr = arr.as_any().downcast_ref::<DictionaryArray<$dt>>().unwrap();
805 let keys = arr.keys();
806 let values = arr.values();
807 let values = cast(&**values, &ArrowDataType::Utf8View)?;
808 let values = values.as_any().downcast_ref::<Utf8ViewArray>().unwrap();
809 with_match_categorical_physical_type!(polars_dtype.cat_physical().unwrap(), |$C| {
810 let ca = CategoricalChunked::<$C>::from_str_iter(
811 name,
812 polars_dtype.clone(),
813 keys.iter().map(|k| {
814 let k: usize = (*k?).try_into().ok()?;
815 values.get(k)
816 }),
817 )?;
818 Ok(ca.into_series())
819 })
820 }};
821 }
822
823 match key_type {
824 I::Int8 => unpack_categorical_chunked!(i8),
825 I::UInt8 => unpack_categorical_chunked!(u8),
826 I::Int16 => unpack_categorical_chunked!(i16),
827 I::UInt16 => unpack_categorical_chunked!(u16),
828 I::Int32 => unpack_categorical_chunked!(i32),
829 I::UInt32 => unpack_categorical_chunked!(u32),
830 I::Int64 => unpack_categorical_chunked!(i64),
831 I::UInt64 => unpack_categorical_chunked!(u64),
832 _ => polars_bail!(
833 ComputeError: "unsupported arrow key type: {key_type:?}"
834 ),
835 }
836 } else {
837 macro_rules! unpack_keys_values {
838 ($dt:ty) => {{
839 let arr = arr.as_any().downcast_ref::<DictionaryArray<$dt>>().unwrap();
840 let keys = arr.keys();
841 let keys = polars_compute::cast::primitive_to_primitive::<
842 $dt,
843 <IdxType as PolarsNumericType>::Native,
844 >(keys, &IDX_DTYPE.to_arrow(CompatLevel::newest()));
845 (keys, arr.values())
846 }};
847 }
848
849 let (keys, values) = match key_type {
850 I::Int8 => unpack_keys_values!(i8),
851 I::UInt8 => unpack_keys_values!(u8),
852 I::Int16 => unpack_keys_values!(i16),
853 I::UInt16 => unpack_keys_values!(u16),
854 I::Int32 => unpack_keys_values!(i32),
855 I::UInt32 => unpack_keys_values!(u32),
856 I::Int64 => unpack_keys_values!(i64),
857 I::UInt64 => unpack_keys_values!(u64),
858 _ => polars_bail!(
859 ComputeError: "unsupported arrow key type: {key_type:?}"
860 ),
861 };
862
863 let values = Series::_try_from_arrow_unchecked_with_md(
864 name,
865 vec![values.clone()],
866 values.dtype(),
867 None,
868 )?;
869
870 values.take(&IdxCa::from_chunks_and_dtype(
871 PlSmallStr::EMPTY,
872 vec![keys.to_boxed()],
873 IDX_DTYPE,
874 ))
875 }
876}
877
878#[cfg(feature = "dtype-struct")]
879fn convert_month_day_nano_to_struct(chunk: Box<dyn Array>) -> PolarsResult<Box<dyn Array>> {
880 let arr: &PrimitiveArray<months_days_ns> = chunk.as_any().downcast_ref().unwrap();
881
882 let values: &[months_days_ns] = arr.values();
883
884 let (months_out, days_out, nanoseconds_out): (Vec<i32>, Vec<i32>, Vec<i64>) = values
885 .iter()
886 .map(|x| (x.months(), x.days(), x.ns()))
887 .collect();
888
889 let out = StructArray::new(
890 DataType::_month_days_ns_struct_type()
891 .to_physical()
892 .to_arrow(CompatLevel::newest()),
893 arr.len(),
894 vec![
895 PrimitiveArray::<i32>::from_vec(months_out).boxed(),
896 PrimitiveArray::<i32>::from_vec(days_out).boxed(),
897 PrimitiveArray::<i64>::from_vec(nanoseconds_out).boxed(),
898 ],
899 arr.validity().cloned(),
900 );
901
902 Ok(out.boxed())
903}
904
905fn check_types(chunks: &[ArrayRef]) -> PolarsResult<ArrowDataType> {
906 let mut chunks_iter = chunks.iter();
907 let dtype: ArrowDataType = chunks_iter
908 .next()
909 .ok_or_else(|| polars_err!(NoData: "expected at least one array-ref"))?
910 .dtype()
911 .clone();
912
913 for chunk in chunks_iter {
914 if chunk.dtype() != &dtype {
915 polars_bail!(
916 ComputeError: "cannot create series from multiple arrays with different types"
917 );
918 }
919 }
920 Ok(dtype)
921}
922
923impl Series {
924 pub fn try_new<T>(
925 name: PlSmallStr,
926 data: T,
927 ) -> Result<Self, <(PlSmallStr, T) as TryInto<Self>>::Error>
928 where
929 (PlSmallStr, T): TryInto<Self>,
930 {
931 <(PlSmallStr, T) as TryInto<Self>>::try_into((name, data))
934 }
935}
936
937impl TryFrom<(PlSmallStr, Vec<ArrayRef>)> for Series {
938 type Error = PolarsError;
939
940 fn try_from(name_arr: (PlSmallStr, Vec<ArrayRef>)) -> PolarsResult<Self> {
941 let (name, chunks) = name_arr;
942
943 let dtype = check_types(&chunks)?;
944 unsafe { Series::_try_from_arrow_unchecked(name, chunks, &dtype) }
947 }
948}
949
950impl TryFrom<(PlSmallStr, ArrayRef)> for Series {
951 type Error = PolarsError;
952
953 fn try_from(name_arr: (PlSmallStr, ArrayRef)) -> PolarsResult<Self> {
954 let (name, arr) = name_arr;
955 Series::try_from((name, vec![arr]))
956 }
957}
958
959impl TryFrom<(&ArrowField, Vec<ArrayRef>)> for Series {
960 type Error = PolarsError;
961
962 fn try_from(field_arr: (&ArrowField, Vec<ArrayRef>)) -> PolarsResult<Self> {
963 let (field, chunks) = field_arr;
964 let arrow_dt = field.dtype();
965 let dtype = check_types(&chunks)?;
966 let compatible = match (&dtype, arrow_dt) {
967 (
969 ArrowDataType::Dictionary(int0, inner0, _ord0),
970 ArrowDataType::Dictionary(int1, inner1, _ord1),
971 ) => (int0, inner0) == (int1, inner1),
972 (l, r) => l == r,
973 };
974 polars_ensure!(compatible, ComputeError: "Arrow Field dtype does not match the ArrayRef dtypes");
975
976 unsafe {
979 Series::_try_from_arrow_unchecked_with_md(
980 field.name.clone(),
981 chunks,
982 &dtype,
983 field.metadata.as_deref(),
984 )
985 }
986 }
987}
988
989impl TryFrom<(&ArrowField, ArrayRef)> for Series {
990 type Error = PolarsError;
991
992 fn try_from(field_arr: (&ArrowField, ArrayRef)) -> PolarsResult<Self> {
993 let (field, arr) = field_arr;
994 Series::try_from((field, vec![arr]))
995 }
996}
997
998pub unsafe trait IntoSeries {
1006 fn is_series() -> bool {
1007 false
1008 }
1009
1010 fn into_series(self) -> Series
1011 where
1012 Self: Sized;
1013}
1014
1015impl<T> From<ChunkedArray<T>> for Series
1016where
1017 T: PolarsDataType,
1018 ChunkedArray<T>: IntoSeries,
1019{
1020 fn from(ca: ChunkedArray<T>) -> Self {
1021 ca.into_series()
1022 }
1023}
1024
1025#[cfg(feature = "dtype-date")]
1026impl From<DateChunked> for Series {
1027 fn from(a: DateChunked) -> Self {
1028 a.into_series()
1029 }
1030}
1031
1032#[cfg(feature = "dtype-datetime")]
1033impl From<DatetimeChunked> for Series {
1034 fn from(a: DatetimeChunked) -> Self {
1035 a.into_series()
1036 }
1037}
1038
1039#[cfg(feature = "dtype-duration")]
1040impl From<DurationChunked> for Series {
1041 fn from(a: DurationChunked) -> Self {
1042 a.into_series()
1043 }
1044}
1045
1046#[cfg(feature = "dtype-time")]
1047impl From<TimeChunked> for Series {
1048 fn from(a: TimeChunked) -> Self {
1049 a.into_series()
1050 }
1051}
1052
1053unsafe impl IntoSeries for Arc<dyn SeriesTrait> {
1054 fn into_series(self) -> Series {
1055 Series(self)
1056 }
1057}
1058
1059unsafe impl IntoSeries for Series {
1060 fn is_series() -> bool {
1061 true
1062 }
1063
1064 fn into_series(self) -> Series {
1065 self
1066 }
1067}
1068
1069fn new_null(name: PlSmallStr, chunks: &[ArrayRef]) -> Series {
1070 let len = chunks.iter().map(|arr| arr.len()).sum();
1071 Series::new_null(name, len)
1072}