polars_core/series/
from.rs

1use arrow::datatypes::{IntervalUnit, Metadata};
2use arrow::offset::OffsetsBuffer;
3#[cfg(any(
4    feature = "dtype-date",
5    feature = "dtype-datetime",
6    feature = "dtype-time",
7    feature = "dtype-duration"
8))]
9use arrow::temporal_conversions::*;
10use arrow::types::months_days_ns;
11use polars_compute::cast::cast_unchecked as cast;
12#[cfg(feature = "dtype-decimal")]
13use polars_compute::decimal::dec128_fits;
14use polars_error::feature_gated;
15use polars_utils::check_allow_importing_interval_as_struct;
16use polars_utils::itertools::Itertools;
17
18use crate::chunked_array::cast::{CastOptions, cast_chunks};
19#[cfg(feature = "object")]
20use crate::chunked_array::object::extension::polars_extension::PolarsExtension;
21#[cfg(feature = "object")]
22use crate::chunked_array::object::registry::get_object_builder;
23use crate::prelude::*;
24
25impl Series {
26    pub fn from_array<A: ParameterFreeDtypeStaticArray>(name: PlSmallStr, array: A) -> Self {
27        unsafe {
28            Self::from_chunks_and_dtype_unchecked(
29                name,
30                vec![Box::new(array)],
31                &DataType::from_arrow_dtype(&A::get_dtype()),
32            )
33        }
34    }
35
36    pub fn from_chunk_and_dtype(
37        name: PlSmallStr,
38        chunk: ArrayRef,
39        dtype: &DataType,
40    ) -> PolarsResult<Self> {
41        if &dtype.to_physical().to_arrow(CompatLevel::newest()) != chunk.dtype() {
42            polars_bail!(
43                InvalidOperation: "cannot create a series of type '{dtype}' of arrow chunk with type '{:?}'",
44                chunk.dtype()
45            );
46        }
47
48        // SAFETY: We check that the datatype matches.
49        let series = unsafe { Self::from_chunks_and_dtype_unchecked(name, vec![chunk], dtype) };
50        Ok(series)
51    }
52
53    /// Takes chunks and a polars datatype and constructs the Series.
54    /// This is faster than creating from chunks and an arrow datatype because there is no
55    /// casting involved.
56    ///
57    /// # Safety
58    ///
59    /// The caller must ensure that the given `dtype`'s physical type matches all the `ArrayRef` dtypes.
60    pub unsafe fn from_chunks_and_dtype_unchecked(
61        name: PlSmallStr,
62        chunks: Vec<ArrayRef>,
63        dtype: &DataType,
64    ) -> Self {
65        use DataType::*;
66        match dtype {
67            Int8 => Int8Chunked::from_chunks(name, chunks).into_series(),
68            Int16 => Int16Chunked::from_chunks(name, chunks).into_series(),
69            Int32 => Int32Chunked::from_chunks(name, chunks).into_series(),
70            Int64 => Int64Chunked::from_chunks(name, chunks).into_series(),
71            UInt8 => UInt8Chunked::from_chunks(name, chunks).into_series(),
72            UInt16 => UInt16Chunked::from_chunks(name, chunks).into_series(),
73            UInt32 => UInt32Chunked::from_chunks(name, chunks).into_series(),
74            UInt64 => UInt64Chunked::from_chunks(name, chunks).into_series(),
75            #[cfg(feature = "dtype-i128")]
76            Int128 => Int128Chunked::from_chunks(name, chunks).into_series(),
77            #[cfg(feature = "dtype-u128")]
78            UInt128 => UInt128Chunked::from_chunks(name, chunks).into_series(),
79            #[cfg(feature = "dtype-date")]
80            Date => Int32Chunked::from_chunks(name, chunks)
81                .into_date()
82                .into_series(),
83            #[cfg(feature = "dtype-time")]
84            Time => Int64Chunked::from_chunks(name, chunks)
85                .into_time()
86                .into_series(),
87            #[cfg(feature = "dtype-duration")]
88            Duration(tu) => Int64Chunked::from_chunks(name, chunks)
89                .into_duration(*tu)
90                .into_series(),
91            #[cfg(feature = "dtype-datetime")]
92            Datetime(tu, tz) => Int64Chunked::from_chunks(name, chunks)
93                .into_datetime(*tu, tz.clone())
94                .into_series(),
95            #[cfg(feature = "dtype-decimal")]
96            Decimal(precision, scale) => Int128Chunked::from_chunks(name, chunks)
97                .into_decimal_unchecked(*precision, *scale)
98                .into_series(),
99            #[cfg(feature = "dtype-array")]
100            Array(_, _) => {
101                ArrayChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype.clone())
102                    .into_series()
103            },
104            List(_) => ListChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype.clone())
105                .into_series(),
106            String => StringChunked::from_chunks(name, chunks).into_series(),
107            Binary => BinaryChunked::from_chunks(name, chunks).into_series(),
108            #[cfg(feature = "dtype-categorical")]
109            dt @ (Categorical(_, _) | Enum(_, _)) => {
110                with_match_categorical_physical_type!(dt.cat_physical().unwrap(), |$C| {
111                    let phys = ChunkedArray::from_chunks(name, chunks);
112                    CategoricalChunked::<$C>::from_cats_and_dtype_unchecked(phys, dt.clone()).into_series()
113                })
114            },
115            Boolean => BooleanChunked::from_chunks(name, chunks).into_series(),
116            Float32 => Float32Chunked::from_chunks(name, chunks).into_series(),
117            Float64 => Float64Chunked::from_chunks(name, chunks).into_series(),
118            BinaryOffset => BinaryOffsetChunked::from_chunks(name, chunks).into_series(),
119            #[cfg(feature = "dtype-extension")]
120            Extension(typ, storage) => ExtensionChunked::from_storage(
121                typ.clone(),
122                Series::from_chunks_and_dtype_unchecked(name, chunks, storage),
123            )
124            .into_series(),
125            #[cfg(feature = "dtype-struct")]
126            Struct(_) => {
127                let mut ca =
128                    StructChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype.clone());
129                StructChunked::propagate_nulls_mut(&mut ca);
130                ca.into_series()
131            },
132            #[cfg(feature = "object")]
133            Object(_) => {
134                if let Some(arr) = chunks[0].as_any().downcast_ref::<FixedSizeBinaryArray>() {
135                    assert_eq!(chunks.len(), 1);
136                    // SAFETY:
137                    // this is highly unsafe. it will dereference a raw ptr on the heap
138                    // make sure the ptr is allocated and from this pid
139                    // (the pid is checked before dereference)
140                    {
141                        let pe = PolarsExtension::new(arr.clone());
142                        let s = pe.get_series(&name);
143                        pe.take_and_forget();
144                        s
145                    }
146                } else {
147                    unsafe { get_object_builder(name, 0).from_chunks(chunks) }
148                }
149            },
150            Null => new_null(name, &chunks),
151            Unknown(_) => {
152                panic!("dtype is unknown; consider supplying data-types for all operations")
153            },
154            #[allow(unreachable_patterns)]
155            _ => unreachable!(),
156        }
157    }
158
159    /// # Safety
160    /// The caller must ensure that the given `dtype` matches all the `ArrayRef` dtypes.
161    pub unsafe fn _try_from_arrow_unchecked(
162        name: PlSmallStr,
163        chunks: Vec<ArrayRef>,
164        dtype: &ArrowDataType,
165    ) -> PolarsResult<Self> {
166        Self::_try_from_arrow_unchecked_with_md(name, chunks, dtype, None)
167    }
168
169    /// Create a new Series without checking if the inner dtype of the chunks is correct
170    ///
171    /// # Safety
172    /// The caller must ensure that the given `dtype` matches all the `ArrayRef` dtypes.
173    pub unsafe fn _try_from_arrow_unchecked_with_md(
174        name: PlSmallStr,
175        mut chunks: Vec<ArrayRef>,
176        dtype: &ArrowDataType,
177        md: Option<&Metadata>,
178    ) -> PolarsResult<Self> {
179        match dtype {
180            ArrowDataType::Utf8View => Ok(StringChunked::from_chunks(name, chunks).into_series()),
181            ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 => {
182                let chunks =
183                    cast_chunks(&chunks, &DataType::String, CastOptions::NonStrict).unwrap();
184                Ok(StringChunked::from_chunks(name, chunks).into_series())
185            },
186            ArrowDataType::BinaryView => Ok(BinaryChunked::from_chunks(name, chunks).into_series()),
187            ArrowDataType::LargeBinary => {
188                if let Some(md) = md {
189                    if md.maintain_type() {
190                        return Ok(BinaryOffsetChunked::from_chunks(name, chunks).into_series());
191                    }
192                }
193                let chunks =
194                    cast_chunks(&chunks, &DataType::Binary, CastOptions::NonStrict).unwrap();
195                Ok(BinaryChunked::from_chunks(name, chunks).into_series())
196            },
197            ArrowDataType::Binary => {
198                let chunks =
199                    cast_chunks(&chunks, &DataType::Binary, CastOptions::NonStrict).unwrap();
200                Ok(BinaryChunked::from_chunks(name, chunks).into_series())
201            },
202            ArrowDataType::List(_) | ArrowDataType::LargeList(_) => {
203                let (chunks, dtype) = to_physical_and_dtype(chunks, md);
204                unsafe {
205                    Ok(
206                        ListChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype)
207                            .into_series(),
208                    )
209                }
210            },
211            #[cfg(feature = "dtype-array")]
212            ArrowDataType::FixedSizeList(_, _) => {
213                let (chunks, dtype) = to_physical_and_dtype(chunks, md);
214                unsafe {
215                    Ok(
216                        ArrayChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype)
217                            .into_series(),
218                    )
219                }
220            },
221            ArrowDataType::Boolean => Ok(BooleanChunked::from_chunks(name, chunks).into_series()),
222            #[cfg(feature = "dtype-u8")]
223            ArrowDataType::UInt8 => Ok(UInt8Chunked::from_chunks(name, chunks).into_series()),
224            #[cfg(feature = "dtype-u16")]
225            ArrowDataType::UInt16 => Ok(UInt16Chunked::from_chunks(name, chunks).into_series()),
226            ArrowDataType::UInt32 => Ok(UInt32Chunked::from_chunks(name, chunks).into_series()),
227            ArrowDataType::UInt64 => Ok(UInt64Chunked::from_chunks(name, chunks).into_series()),
228            ArrowDataType::UInt128 => feature_gated!(
229                "dtype-u128",
230                Ok(UInt128Chunked::from_chunks(name, chunks).into_series())
231            ),
232            #[cfg(feature = "dtype-i8")]
233            ArrowDataType::Int8 => Ok(Int8Chunked::from_chunks(name, chunks).into_series()),
234            #[cfg(feature = "dtype-i16")]
235            ArrowDataType::Int16 => Ok(Int16Chunked::from_chunks(name, chunks).into_series()),
236            ArrowDataType::Int32 => Ok(Int32Chunked::from_chunks(name, chunks).into_series()),
237            ArrowDataType::Int64 => Ok(Int64Chunked::from_chunks(name, chunks).into_series()),
238            ArrowDataType::Int128 => feature_gated!(
239                "dtype-i128",
240                Ok(Int128Chunked::from_chunks(name, chunks).into_series())
241            ),
242            ArrowDataType::Float16 => {
243                let chunks =
244                    cast_chunks(&chunks, &DataType::Float32, CastOptions::NonStrict).unwrap();
245                Ok(Float32Chunked::from_chunks(name, chunks).into_series())
246            },
247            ArrowDataType::Float32 => Ok(Float32Chunked::from_chunks(name, chunks).into_series()),
248            ArrowDataType::Float64 => Ok(Float64Chunked::from_chunks(name, chunks).into_series()),
249            #[cfg(feature = "dtype-date")]
250            ArrowDataType::Date32 => {
251                let chunks =
252                    cast_chunks(&chunks, &DataType::Int32, CastOptions::Overflowing).unwrap();
253                Ok(Int32Chunked::from_chunks(name, chunks)
254                    .into_date()
255                    .into_series())
256            },
257            #[cfg(feature = "dtype-datetime")]
258            ArrowDataType::Date64 => {
259                let chunks =
260                    cast_chunks(&chunks, &DataType::Int64, CastOptions::Overflowing).unwrap();
261                let ca = Int64Chunked::from_chunks(name, chunks);
262                Ok(ca.into_datetime(TimeUnit::Milliseconds, None).into_series())
263            },
264            #[cfg(feature = "dtype-datetime")]
265            ArrowDataType::Timestamp(tu, tz) => {
266                let tz = TimeZone::opt_try_new(tz.clone())?;
267                let chunks =
268                    cast_chunks(&chunks, &DataType::Int64, CastOptions::NonStrict).unwrap();
269                let s = Int64Chunked::from_chunks(name, chunks)
270                    .into_datetime(tu.into(), tz)
271                    .into_series();
272                Ok(match tu {
273                    ArrowTimeUnit::Second => &s * MILLISECONDS,
274                    ArrowTimeUnit::Millisecond => s,
275                    ArrowTimeUnit::Microsecond => s,
276                    ArrowTimeUnit::Nanosecond => s,
277                })
278            },
279            #[cfg(feature = "dtype-duration")]
280            ArrowDataType::Duration(tu) => {
281                let chunks =
282                    cast_chunks(&chunks, &DataType::Int64, CastOptions::NonStrict).unwrap();
283                let s = Int64Chunked::from_chunks(name, chunks)
284                    .into_duration(tu.into())
285                    .into_series();
286                Ok(match tu {
287                    ArrowTimeUnit::Second => &s * MILLISECONDS,
288                    ArrowTimeUnit::Millisecond => s,
289                    ArrowTimeUnit::Microsecond => s,
290                    ArrowTimeUnit::Nanosecond => s,
291                })
292            },
293            #[cfg(feature = "dtype-time")]
294            ArrowDataType::Time64(tu) | ArrowDataType::Time32(tu) => {
295                let mut chunks = chunks;
296                if matches!(dtype, ArrowDataType::Time32(_)) {
297                    chunks =
298                        cast_chunks(&chunks, &DataType::Int32, CastOptions::NonStrict).unwrap();
299                }
300                let chunks =
301                    cast_chunks(&chunks, &DataType::Int64, CastOptions::NonStrict).unwrap();
302                let s = Int64Chunked::from_chunks(name, chunks)
303                    .into_time()
304                    .into_series();
305                Ok(match tu {
306                    ArrowTimeUnit::Second => &s * NANOSECONDS,
307                    ArrowTimeUnit::Millisecond => &s * 1_000_000,
308                    ArrowTimeUnit::Microsecond => &s * 1_000,
309                    ArrowTimeUnit::Nanosecond => s,
310                })
311            },
312            ArrowDataType::Decimal32(precision, scale) => {
313                feature_gated!("dtype-decimal", {
314                    polars_compute::decimal::dec128_verify_prec_scale(*precision, *scale)?;
315
316                    let mut chunks = chunks;
317                    for chunk in chunks.iter_mut() {
318                        let old_chunk = chunk
319                            .as_any_mut()
320                            .downcast_mut::<PrimitiveArray<i32>>()
321                            .unwrap();
322
323                        // For now, we just cast the whole data to i128.
324                        let (_, values, validity) = std::mem::take(old_chunk).into_inner();
325                        *chunk = PrimitiveArray::new(
326                            ArrowDataType::Int128,
327                            values.iter().map(|&v| v as i128).collect(),
328                            validity,
329                        )
330                        .to_boxed();
331                    }
332
333                    let s = Int128Chunked::from_chunks(name, chunks)
334                        .into_decimal_unchecked(*precision, *scale)
335                        .into_series();
336                    Ok(s)
337                })
338            },
339            ArrowDataType::Decimal64(precision, scale) => {
340                feature_gated!("dtype-decimal", {
341                    polars_compute::decimal::dec128_verify_prec_scale(*precision, *scale)?;
342
343                    let mut chunks = chunks;
344                    for chunk in chunks.iter_mut() {
345                        let old_chunk = chunk
346                            .as_any_mut()
347                            .downcast_mut::<PrimitiveArray<i64>>()
348                            .unwrap();
349
350                        // For now, we just cast the whole data to i128.
351                        let (_, values, validity) = std::mem::take(old_chunk).into_inner();
352                        *chunk = PrimitiveArray::new(
353                            ArrowDataType::Int128,
354                            values.iter().map(|&v| v as i128).collect(),
355                            validity,
356                        )
357                        .to_boxed();
358                    }
359
360                    let s = Int128Chunked::from_chunks(name, chunks)
361                        .into_decimal_unchecked(*precision, *scale)
362                        .into_series();
363                    Ok(s)
364                })
365            },
366            ArrowDataType::Decimal(precision, scale) => {
367                feature_gated!("dtype-decimal", {
368                    polars_compute::decimal::dec128_verify_prec_scale(*precision, *scale)?;
369
370                    let mut chunks = chunks;
371                    for chunk in chunks.iter_mut() {
372                        *chunk = std::mem::take(
373                            chunk
374                                .as_any_mut()
375                                .downcast_mut::<PrimitiveArray<i128>>()
376                                .unwrap(),
377                        )
378                        .to(ArrowDataType::Int128)
379                        .to_boxed();
380                    }
381
382                    let s = Int128Chunked::from_chunks(name, chunks)
383                        .into_decimal_unchecked(*precision, *scale)
384                        .into_series();
385                    Ok(s)
386                })
387            },
388            ArrowDataType::Decimal256(precision, scale) => {
389                feature_gated!("dtype-decimal", {
390                    use arrow::types::i256;
391
392                    polars_compute::decimal::dec128_verify_prec_scale(*precision, *scale)?;
393
394                    let mut chunks = chunks;
395                    for chunk in chunks.iter_mut() {
396                        let arr = std::mem::take(
397                            chunk
398                                .as_any_mut()
399                                .downcast_mut::<PrimitiveArray<i256>>()
400                                .unwrap(),
401                        );
402                        let arr_128: PrimitiveArray<i128> = arr.iter().map(|opt_v| {
403                            if let Some(v) = opt_v {
404                                let smaller: Option<i128> = (*v).try_into().ok();
405                                let smaller = smaller.filter(|v| dec128_fits(*v, *precision));
406                                smaller.ok_or_else(|| {
407                                    polars_err!(ComputeError: "Decimal256 to Decimal128 conversion overflowed, Decimal256 is not (yet) supported in Polars")
408                                }).map(Some)
409                            } else {
410                                Ok(None)
411                            }
412                        }).try_collect_arr_trusted()?;
413
414                        *chunk = arr_128.to(ArrowDataType::Int128).to_boxed();
415                    }
416
417                    let s = Int128Chunked::from_chunks(name, chunks)
418                        .into_decimal_unchecked(*precision, *scale)
419                        .into_series();
420                    Ok(s)
421                })
422            },
423            ArrowDataType::Null => Ok(new_null(name, &chunks)),
424            #[cfg(not(feature = "dtype-categorical"))]
425            ArrowDataType::Dictionary(_, _, _) => {
426                panic!("activate dtype-categorical to convert dictionary arrays")
427            },
428            #[cfg(feature = "dtype-categorical")]
429            ArrowDataType::Dictionary(key_type, _, _) => {
430                let polars_dtype = DataType::from_arrow(chunks[0].dtype(), md);
431
432                let mut series_iter = chunks.into_iter().map(|arr| {
433                    import_arrow_dictionary_array(name.clone(), arr, key_type, &polars_dtype)
434                });
435
436                let mut first = series_iter.next().unwrap()?;
437
438                for s in series_iter {
439                    first.append_owned(s?)?;
440                }
441
442                Ok(first)
443            },
444            #[cfg(feature = "object")]
445            ArrowDataType::Extension(ext)
446                if ext.name == POLARS_OBJECT_EXTENSION_NAME && ext.metadata.is_some() =>
447            {
448                assert_eq!(chunks.len(), 1);
449                let arr = chunks[0]
450                    .as_any()
451                    .downcast_ref::<FixedSizeBinaryArray>()
452                    .unwrap();
453                // SAFETY:
454                // this is highly unsafe. it will dereference a raw ptr on the heap
455                // make sure the ptr is allocated and from this pid
456                // (the pid is checked before dereference)
457                let s = {
458                    let pe = PolarsExtension::new(arr.clone());
459                    let s = pe.get_series(&name);
460                    pe.take_and_forget();
461                    s
462                };
463                Ok(s)
464            },
465            #[cfg(feature = "dtype-extension")]
466            ArrowDataType::Extension(ext) => {
467                use crate::datatypes::extension::get_extension_type_or_storage;
468
469                for chunk in &mut chunks {
470                    debug_assert!(
471                        chunk.dtype() == dtype,
472                        "expected chunk dtype to be {:?}, got {:?}",
473                        dtype,
474                        chunk.dtype()
475                    );
476                    *chunk.dtype_mut() = ext.inner.clone();
477                }
478                let storage = Series::_try_from_arrow_unchecked_with_md(
479                    name.clone(),
480                    chunks,
481                    &ext.inner,
482                    md,
483                )?;
484
485                Ok(
486                    match get_extension_type_or_storage(
487                        &ext.name,
488                        storage.dtype(),
489                        ext.metadata.as_deref(),
490                    ) {
491                        Some(typ) => ExtensionChunked::from_storage(typ, storage).into_series(),
492                        None => storage,
493                    },
494                )
495            },
496
497            #[cfg(feature = "dtype-struct")]
498            ArrowDataType::Struct(_) => {
499                let (chunks, dtype) = to_physical_and_dtype(chunks, md);
500
501                unsafe {
502                    let mut ca =
503                        StructChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype);
504                    StructChunked::propagate_nulls_mut(&mut ca);
505                    Ok(ca.into_series())
506                }
507            },
508            ArrowDataType::FixedSizeBinary(_) => {
509                let chunks = cast_chunks(&chunks, &DataType::Binary, CastOptions::NonStrict)?;
510                Ok(BinaryChunked::from_chunks(name, chunks).into_series())
511            },
512            ArrowDataType::Map(field, _is_ordered) => {
513                let struct_arrays = chunks
514                    .iter()
515                    .map(|arr| {
516                        let arr = arr.as_any().downcast_ref::<MapArray>().unwrap();
517                        arr.field().clone()
518                    })
519                    .collect::<Vec<_>>();
520
521                let (phys_struct_arrays, dtype) =
522                    to_physical_and_dtype(struct_arrays, field.metadata.as_deref());
523
524                let chunks = chunks
525                    .iter()
526                    .zip(phys_struct_arrays)
527                    .map(|(arr, values)| {
528                        let arr = arr.as_any().downcast_ref::<MapArray>().unwrap();
529                        let offsets: &OffsetsBuffer<i32> = arr.offsets();
530
531                        let validity = values.validity().cloned();
532
533                        Box::from(ListArray::<i64>::new(
534                            ListArray::<i64>::default_datatype(values.dtype().clone()),
535                            OffsetsBuffer::<i64>::from(offsets),
536                            values,
537                            validity,
538                        )) as ArrayRef
539                    })
540                    .collect();
541
542                unsafe {
543                    let out = ListChunked::from_chunks_and_dtype_unchecked(
544                        name,
545                        chunks,
546                        DataType::List(Box::new(dtype)),
547                    );
548
549                    Ok(out.into_series())
550                }
551            },
552            ArrowDataType::Interval(IntervalUnit::MonthDayNano) => {
553                check_allow_importing_interval_as_struct("month_day_nano_interval")?;
554
555                feature_gated!("dtype-struct", {
556                    let chunks = chunks
557                        .into_iter()
558                        .map(convert_month_day_nano_to_struct)
559                        .collect::<PolarsResult<Vec<_>>>()?;
560
561                    Ok(StructChunked::from_chunks_and_dtype_unchecked(
562                        name,
563                        chunks,
564                        DataType::_month_days_ns_struct_type(),
565                    )
566                    .into_series())
567                })
568            },
569
570            dt => polars_bail!(ComputeError: "cannot create series from {:?}", dt),
571        }
572    }
573}
574
575fn convert<F: Fn(&dyn Array) -> ArrayRef>(arr: &[ArrayRef], f: F) -> Vec<ArrayRef> {
576    arr.iter().map(|arr| f(&**arr)).collect()
577}
578
579/// Converts to physical types and bubbles up the correct [`DataType`].
580#[allow(clippy::only_used_in_recursion)]
581unsafe fn to_physical_and_dtype(
582    arrays: Vec<ArrayRef>,
583    md: Option<&Metadata>,
584) -> (Vec<ArrayRef>, DataType) {
585    match arrays[0].dtype() {
586        ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 => {
587            let chunks = cast_chunks(&arrays, &DataType::String, CastOptions::NonStrict).unwrap();
588            (chunks, DataType::String)
589        },
590        ArrowDataType::Binary | ArrowDataType::LargeBinary | ArrowDataType::FixedSizeBinary(_) => {
591            let chunks = cast_chunks(&arrays, &DataType::Binary, CastOptions::NonStrict).unwrap();
592            (chunks, DataType::Binary)
593        },
594        #[allow(unused_variables)]
595        dt @ ArrowDataType::Dictionary(_, _, _) => {
596            feature_gated!("dtype-categorical", {
597                let s = unsafe {
598                    let dt = dt.clone();
599                    Series::_try_from_arrow_unchecked_with_md(PlSmallStr::EMPTY, arrays, &dt, md)
600                }
601                .unwrap();
602                (s.chunks().clone(), s.dtype().clone())
603            })
604        },
605        dt @ ArrowDataType::Extension(_) => {
606            feature_gated!("dtype-extension", {
607                let s = unsafe {
608                    let dt = dt.clone();
609                    Series::_try_from_arrow_unchecked_with_md(PlSmallStr::EMPTY, arrays, &dt, md)
610                }
611                .unwrap();
612                (s.chunks().clone(), s.dtype().clone())
613            })
614        },
615        ArrowDataType::List(field) => {
616            let out = convert(&arrays, |arr| {
617                cast(arr, &ArrowDataType::LargeList(field.clone())).unwrap()
618            });
619            to_physical_and_dtype(out, md)
620        },
621        #[cfg(feature = "dtype-array")]
622        ArrowDataType::FixedSizeList(field, size) => {
623            let values = arrays
624                .iter()
625                .map(|arr| {
626                    let arr = arr.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
627                    arr.values().clone()
628                })
629                .collect::<Vec<_>>();
630
631            let (converted_values, dtype) =
632                to_physical_and_dtype(values, field.metadata.as_deref());
633
634            let arrays = arrays
635                .iter()
636                .zip(converted_values)
637                .map(|(arr, values)| {
638                    let arr = arr.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
639
640                    let dtype = FixedSizeListArray::default_datatype(values.dtype().clone(), *size);
641                    Box::from(FixedSizeListArray::new(
642                        dtype,
643                        arr.len(),
644                        values,
645                        arr.validity().cloned(),
646                    )) as ArrayRef
647                })
648                .collect();
649            (arrays, DataType::Array(Box::new(dtype), *size))
650        },
651        ArrowDataType::LargeList(field) => {
652            let values = arrays
653                .iter()
654                .map(|arr| {
655                    let arr = arr.as_any().downcast_ref::<ListArray<i64>>().unwrap();
656                    arr.values().clone()
657                })
658                .collect::<Vec<_>>();
659
660            let (converted_values, dtype) =
661                to_physical_and_dtype(values, field.metadata.as_deref());
662
663            let arrays = arrays
664                .iter()
665                .zip(converted_values)
666                .map(|(arr, values)| {
667                    let arr = arr.as_any().downcast_ref::<ListArray<i64>>().unwrap();
668
669                    let dtype = ListArray::<i64>::default_datatype(values.dtype().clone());
670                    Box::from(ListArray::<i64>::new(
671                        dtype,
672                        arr.offsets().clone(),
673                        values,
674                        arr.validity().cloned(),
675                    )) as ArrayRef
676                })
677                .collect();
678            (arrays, DataType::List(Box::new(dtype)))
679        },
680        ArrowDataType::Struct(_fields) => {
681            feature_gated!("dtype-struct", {
682                let mut pl_fields = None;
683                let arrays = arrays
684                    .iter()
685                    .map(|arr| {
686                        let arr = arr.as_any().downcast_ref::<StructArray>().unwrap();
687                        let (values, dtypes): (Vec<_>, Vec<_>) = arr
688                            .values()
689                            .iter()
690                            .zip(_fields.iter())
691                            .map(|(value, field)| {
692                                let mut out = to_physical_and_dtype(
693                                    vec![value.clone()],
694                                    field.metadata.as_deref(),
695                                );
696                                (out.0.pop().unwrap(), out.1)
697                            })
698                            .unzip();
699
700                        let arrow_fields = values
701                            .iter()
702                            .zip(_fields.iter())
703                            .map(|(arr, field)| {
704                                ArrowField::new(field.name.clone(), arr.dtype().clone(), true)
705                            })
706                            .collect();
707                        let arrow_array = Box::new(StructArray::new(
708                            ArrowDataType::Struct(arrow_fields),
709                            arr.len(),
710                            values,
711                            arr.validity().cloned(),
712                        )) as ArrayRef;
713
714                        if pl_fields.is_none() {
715                            pl_fields = Some(
716                                _fields
717                                    .iter()
718                                    .zip(dtypes)
719                                    .map(|(field, dtype)| Field::new(field.name.clone(), dtype))
720                                    .collect_vec(),
721                            )
722                        }
723
724                        arrow_array
725                    })
726                    .collect_vec();
727
728                (arrays, DataType::Struct(pl_fields.unwrap()))
729            })
730        },
731        // Use Series architecture to convert nested logical types to physical.
732        dt @ (ArrowDataType::Duration(_)
733        | ArrowDataType::Time32(_)
734        | ArrowDataType::Time64(_)
735        | ArrowDataType::Timestamp(_, _)
736        | ArrowDataType::Date32
737        | ArrowDataType::Decimal(_, _)
738        | ArrowDataType::Date64
739        | ArrowDataType::Map(_, _)) => {
740            let dt = dt.clone();
741            let mut s = Series::_try_from_arrow_unchecked(PlSmallStr::EMPTY, arrays, &dt).unwrap();
742            let dtype = s.dtype().clone();
743            (std::mem::take(s.chunks_mut()), dtype)
744        },
745        dt => {
746            let dtype = DataType::from_arrow(dt, md);
747            (arrays, dtype)
748        },
749    }
750}
751
752#[cfg(feature = "dtype-categorical")]
753unsafe fn import_arrow_dictionary_array(
754    name: PlSmallStr,
755    arr: Box<dyn Array>,
756    key_type: &arrow::datatypes::IntegerType,
757    polars_dtype: &DataType,
758) -> PolarsResult<Series> {
759    use arrow::datatypes::IntegerType as I;
760
761    if matches!(
762        polars_dtype,
763        DataType::Categorical(_, _) | DataType::Enum(_, _)
764    ) {
765        macro_rules! unpack_categorical_chunked {
766            ($dt:ty) => {{
767                let arr = arr.as_any().downcast_ref::<DictionaryArray<$dt>>().unwrap();
768                let keys = arr.keys();
769                let values = arr.values();
770                let values = cast(&**values, &ArrowDataType::Utf8View)?;
771                let values = values.as_any().downcast_ref::<Utf8ViewArray>().unwrap();
772                with_match_categorical_physical_type!(polars_dtype.cat_physical().unwrap(), |$C| {
773                    let ca = CategoricalChunked::<$C>::from_str_iter(
774                        name,
775                        polars_dtype.clone(),
776                        keys.iter().map(|k| {
777                            let k: usize = (*k?).try_into().ok()?;
778                            values.get(k)
779                        }),
780                    )?;
781                    Ok(ca.into_series())
782                })
783            }};
784        }
785
786        match key_type {
787            I::Int8 => unpack_categorical_chunked!(i8),
788            I::UInt8 => unpack_categorical_chunked!(u8),
789            I::Int16 => unpack_categorical_chunked!(i16),
790            I::UInt16 => unpack_categorical_chunked!(u16),
791            I::Int32 => unpack_categorical_chunked!(i32),
792            I::UInt32 => unpack_categorical_chunked!(u32),
793            I::Int64 => unpack_categorical_chunked!(i64),
794            I::UInt64 => unpack_categorical_chunked!(u64),
795            _ => polars_bail!(
796                ComputeError: "unsupported arrow key type: {key_type:?}"
797            ),
798        }
799    } else {
800        macro_rules! unpack_keys_values {
801            ($dt:ty) => {{
802                let arr = arr.as_any().downcast_ref::<DictionaryArray<$dt>>().unwrap();
803                let keys = arr.keys();
804                let keys = polars_compute::cast::primitive_to_primitive::<
805                    $dt,
806                    <IdxType as PolarsNumericType>::Native,
807                >(keys, &IDX_DTYPE.to_arrow(CompatLevel::newest()));
808                (keys, arr.values())
809            }};
810        }
811
812        let (keys, values) = match key_type {
813            I::Int8 => unpack_keys_values!(i8),
814            I::UInt8 => unpack_keys_values!(u8),
815            I::Int16 => unpack_keys_values!(i16),
816            I::UInt16 => unpack_keys_values!(u16),
817            I::Int32 => unpack_keys_values!(i32),
818            I::UInt32 => unpack_keys_values!(u32),
819            I::Int64 => unpack_keys_values!(i64),
820            I::UInt64 => unpack_keys_values!(u64),
821            _ => polars_bail!(
822                ComputeError: "unsupported arrow key type: {key_type:?}"
823            ),
824        };
825
826        let values = Series::_try_from_arrow_unchecked_with_md(
827            name,
828            vec![values.clone()],
829            values.dtype(),
830            None,
831        )?;
832
833        values.take(&IdxCa::from_chunks_and_dtype(
834            PlSmallStr::EMPTY,
835            vec![keys.to_boxed()],
836            IDX_DTYPE,
837        ))
838    }
839}
840
841#[cfg(feature = "dtype-struct")]
842fn convert_month_day_nano_to_struct(chunk: Box<dyn Array>) -> PolarsResult<Box<dyn Array>> {
843    let arr: &PrimitiveArray<months_days_ns> = chunk.as_any().downcast_ref().unwrap();
844
845    let values: &[months_days_ns] = arr.values();
846
847    let (months_out, days_out, nanoseconds_out): (Vec<i32>, Vec<i32>, Vec<i64>) = values
848        .iter()
849        .map(|x| (x.months(), x.days(), x.ns()))
850        .collect();
851
852    let out = StructArray::new(
853        DataType::_month_days_ns_struct_type()
854            .to_physical()
855            .to_arrow(CompatLevel::newest()),
856        arr.len(),
857        vec![
858            PrimitiveArray::<i32>::from_vec(months_out).boxed(),
859            PrimitiveArray::<i32>::from_vec(days_out).boxed(),
860            PrimitiveArray::<i64>::from_vec(nanoseconds_out).boxed(),
861        ],
862        arr.validity().cloned(),
863    );
864
865    Ok(out.boxed())
866}
867
868fn check_types(chunks: &[ArrayRef]) -> PolarsResult<ArrowDataType> {
869    let mut chunks_iter = chunks.iter();
870    let dtype: ArrowDataType = chunks_iter
871        .next()
872        .ok_or_else(|| polars_err!(NoData: "expected at least one array-ref"))?
873        .dtype()
874        .clone();
875
876    for chunk in chunks_iter {
877        if chunk.dtype() != &dtype {
878            polars_bail!(
879                ComputeError: "cannot create series from multiple arrays with different types"
880            );
881        }
882    }
883    Ok(dtype)
884}
885
886impl Series {
887    pub fn try_new<T>(
888        name: PlSmallStr,
889        data: T,
890    ) -> Result<Self, <(PlSmallStr, T) as TryInto<Self>>::Error>
891    where
892        (PlSmallStr, T): TryInto<Self>,
893    {
894        // # TODO
895        // * Remove the TryFrom<tuple> impls in favor of this
896        <(PlSmallStr, T) as TryInto<Self>>::try_into((name, data))
897    }
898}
899
900impl TryFrom<(PlSmallStr, Vec<ArrayRef>)> for Series {
901    type Error = PolarsError;
902
903    fn try_from(name_arr: (PlSmallStr, Vec<ArrayRef>)) -> PolarsResult<Self> {
904        let (name, chunks) = name_arr;
905
906        let dtype = check_types(&chunks)?;
907        // SAFETY:
908        // dtype is checked
909        unsafe { Series::_try_from_arrow_unchecked(name, chunks, &dtype) }
910    }
911}
912
913impl TryFrom<(PlSmallStr, ArrayRef)> for Series {
914    type Error = PolarsError;
915
916    fn try_from(name_arr: (PlSmallStr, ArrayRef)) -> PolarsResult<Self> {
917        let (name, arr) = name_arr;
918        Series::try_from((name, vec![arr]))
919    }
920}
921
922impl TryFrom<(&ArrowField, Vec<ArrayRef>)> for Series {
923    type Error = PolarsError;
924
925    fn try_from(field_arr: (&ArrowField, Vec<ArrayRef>)) -> PolarsResult<Self> {
926        let (field, chunks) = field_arr;
927
928        let dtype = check_types(&chunks)?;
929        polars_ensure!(dtype == *field.dtype(), ComputeError: "Arrow Field dtype does not match the ArrayRef dtypes");
930
931        // SAFETY:
932        // dtype is checked
933        unsafe {
934            Series::_try_from_arrow_unchecked_with_md(
935                field.name.clone(),
936                chunks,
937                &dtype,
938                field.metadata.as_deref(),
939            )
940        }
941    }
942}
943
944impl TryFrom<(&ArrowField, ArrayRef)> for Series {
945    type Error = PolarsError;
946
947    fn try_from(field_arr: (&ArrowField, ArrayRef)) -> PolarsResult<Self> {
948        let (field, arr) = field_arr;
949        Series::try_from((field, vec![arr]))
950    }
951}
952
953/// Used to convert a [`ChunkedArray`], `&dyn SeriesTrait` and [`Series`]
954/// into a [`Series`].
955/// # Safety
956///
957/// This trait is marked `unsafe` as the `is_series` return is used
958/// to transmute to `Series`. This must always return `false` except
959/// for `Series` structs.
960pub unsafe trait IntoSeries {
961    fn is_series() -> bool {
962        false
963    }
964
965    fn into_series(self) -> Series
966    where
967        Self: Sized;
968}
969
970impl<T> From<ChunkedArray<T>> for Series
971where
972    T: PolarsDataType,
973    ChunkedArray<T>: IntoSeries,
974{
975    fn from(ca: ChunkedArray<T>) -> Self {
976        ca.into_series()
977    }
978}
979
980#[cfg(feature = "dtype-date")]
981impl From<DateChunked> for Series {
982    fn from(a: DateChunked) -> Self {
983        a.into_series()
984    }
985}
986
987#[cfg(feature = "dtype-datetime")]
988impl From<DatetimeChunked> for Series {
989    fn from(a: DatetimeChunked) -> Self {
990        a.into_series()
991    }
992}
993
994#[cfg(feature = "dtype-duration")]
995impl From<DurationChunked> for Series {
996    fn from(a: DurationChunked) -> Self {
997        a.into_series()
998    }
999}
1000
1001#[cfg(feature = "dtype-time")]
1002impl From<TimeChunked> for Series {
1003    fn from(a: TimeChunked) -> Self {
1004        a.into_series()
1005    }
1006}
1007
1008unsafe impl IntoSeries for Arc<dyn SeriesTrait> {
1009    fn into_series(self) -> Series {
1010        Series(self)
1011    }
1012}
1013
1014unsafe impl IntoSeries for Series {
1015    fn is_series() -> bool {
1016        true
1017    }
1018
1019    fn into_series(self) -> Series {
1020        self
1021    }
1022}
1023
1024fn new_null(name: PlSmallStr, chunks: &[ArrayRef]) -> Series {
1025    let len = chunks.iter().map(|arr| arr.len()).sum();
1026    Series::new_null(name, len)
1027}