Skip to main content

polars_core/series/
from.rs

1use arrow::datatypes::{IntervalUnit, Metadata};
2use arrow::offset::OffsetsBuffer;
3#[cfg(any(
4    feature = "dtype-date",
5    feature = "dtype-datetime",
6    feature = "dtype-time",
7    feature = "dtype-duration"
8))]
9use arrow::temporal_conversions::*;
10use arrow::types::months_days_ns;
11use polars_compute::cast::cast_unchecked as cast;
12#[cfg(feature = "dtype-decimal")]
13use polars_compute::decimal::dec128_fits;
14use polars_error::feature_gated;
15use polars_utils::itertools::Itertools;
16
17use crate::chunked_array::cast::{CastOptions, cast_chunks};
18#[cfg(feature = "object")]
19use crate::chunked_array::object::extension::polars_extension::PolarsExtension;
20#[cfg(feature = "object")]
21use crate::chunked_array::object::registry::get_object_builder;
22use crate::config::check_allow_importing_interval_as_struct;
23use crate::prelude::*;
24
25impl Series {
26    pub fn from_array<A: ParameterFreeDtypeStaticArray>(name: PlSmallStr, array: A) -> Self {
27        unsafe {
28            Self::from_chunks_and_dtype_unchecked(
29                name,
30                vec![Box::new(array)],
31                &DataType::from_arrow_dtype(&A::get_dtype()),
32            )
33        }
34    }
35
36    pub fn from_chunk_and_dtype(
37        name: PlSmallStr,
38        chunk: ArrayRef,
39        dtype: &DataType,
40    ) -> PolarsResult<Self> {
41        if &dtype.to_physical().to_arrow(CompatLevel::newest()) != chunk.dtype() {
42            polars_bail!(
43                InvalidOperation: "cannot create a series of type '{dtype}' of arrow chunk with type '{:?}'",
44                chunk.dtype()
45            );
46        }
47
48        // SAFETY: We check that the datatype matches.
49        let series = unsafe { Self::from_chunks_and_dtype_unchecked(name, vec![chunk], dtype) };
50        Ok(series)
51    }
52
53    /// Takes chunks and a polars datatype and constructs the Series.
54    /// This is faster than creating from chunks and an arrow datatype because there is no
55    /// casting involved.
56    ///
57    /// # Safety
58    ///
59    /// The caller must ensure that the given `dtype`'s physical type matches all the `ArrayRef` dtypes.
60    pub unsafe fn from_chunks_and_dtype_unchecked(
61        name: PlSmallStr,
62        chunks: Vec<ArrayRef>,
63        dtype: &DataType,
64    ) -> Self {
65        use DataType::*;
66        match dtype {
67            Int8 => Int8Chunked::from_chunks(name, chunks).into_series(),
68            Int16 => Int16Chunked::from_chunks(name, chunks).into_series(),
69            Int32 => Int32Chunked::from_chunks(name, chunks).into_series(),
70            Int64 => Int64Chunked::from_chunks(name, chunks).into_series(),
71            UInt8 => UInt8Chunked::from_chunks(name, chunks).into_series(),
72            UInt16 => UInt16Chunked::from_chunks(name, chunks).into_series(),
73            UInt32 => UInt32Chunked::from_chunks(name, chunks).into_series(),
74            UInt64 => UInt64Chunked::from_chunks(name, chunks).into_series(),
75            #[cfg(feature = "dtype-i128")]
76            Int128 => Int128Chunked::from_chunks(name, chunks).into_series(),
77            #[cfg(feature = "dtype-u128")]
78            UInt128 => UInt128Chunked::from_chunks(name, chunks).into_series(),
79            #[cfg(feature = "dtype-date")]
80            Date => Int32Chunked::from_chunks(name, chunks)
81                .into_date()
82                .into_series(),
83            #[cfg(feature = "dtype-time")]
84            Time => Int64Chunked::from_chunks(name, chunks)
85                .into_time()
86                .into_series(),
87            #[cfg(feature = "dtype-duration")]
88            Duration(tu) => Int64Chunked::from_chunks(name, chunks)
89                .into_duration(*tu)
90                .into_series(),
91            #[cfg(feature = "dtype-datetime")]
92            Datetime(tu, tz) => Int64Chunked::from_chunks(name, chunks)
93                .into_datetime(*tu, tz.clone())
94                .into_series(),
95            #[cfg(feature = "dtype-decimal")]
96            Decimal(precision, scale) => Int128Chunked::from_chunks(name, chunks)
97                .into_decimal_unchecked(*precision, *scale)
98                .into_series(),
99            #[cfg(feature = "dtype-array")]
100            Array(_, _) => {
101                ArrayChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype.clone())
102                    .into_series()
103            },
104            List(_) => ListChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype.clone())
105                .into_series(),
106            String => StringChunked::from_chunks(name, chunks).into_series(),
107            Binary => BinaryChunked::from_chunks(name, chunks).into_series(),
108            #[cfg(feature = "dtype-categorical")]
109            dt @ (Categorical(_, _) | Enum(_, _)) => {
110                with_match_categorical_physical_type!(dt.cat_physical().unwrap(), |$C| {
111                    let phys = ChunkedArray::from_chunks(name, chunks);
112                    CategoricalChunked::<$C>::from_cats_and_dtype_unchecked(phys, dt.clone()).into_series()
113                })
114            },
115            Boolean => BooleanChunked::from_chunks(name, chunks).into_series(),
116            #[cfg(feature = "dtype-f16")]
117            Float16 => Float16Chunked::from_chunks(name, chunks).into_series(),
118            Float32 => Float32Chunked::from_chunks(name, chunks).into_series(),
119            Float64 => Float64Chunked::from_chunks(name, chunks).into_series(),
120            BinaryOffset => BinaryOffsetChunked::from_chunks(name, chunks).into_series(),
121            #[cfg(feature = "dtype-extension")]
122            Extension(typ, storage) => ExtensionChunked::from_storage(
123                typ.clone(),
124                Series::from_chunks_and_dtype_unchecked(name, chunks, storage),
125            )
126            .into_series(),
127            #[cfg(feature = "dtype-struct")]
128            Struct(_) => {
129                let mut ca =
130                    StructChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype.clone());
131                StructChunked::propagate_nulls_mut(&mut ca);
132                ca.into_series()
133            },
134            #[cfg(feature = "object")]
135            Object(_) => {
136                if let Some(arr) = chunks[0].as_any().downcast_ref::<FixedSizeBinaryArray>() {
137                    assert_eq!(chunks.len(), 1);
138                    // SAFETY:
139                    // this is highly unsafe. it will dereference a raw ptr on the heap
140                    // make sure the ptr is allocated and from this pid
141                    // (the pid is checked before dereference)
142                    {
143                        let pe = PolarsExtension::new(arr.clone());
144                        let s = pe.get_series(&name);
145                        pe.take_and_forget();
146                        s
147                    }
148                } else {
149                    unsafe { get_object_builder(name, 0).from_chunks(chunks) }
150                }
151            },
152            Null => new_null(name, &chunks),
153            Unknown(_) => {
154                panic!("dtype is unknown; consider supplying data-types for all operations")
155            },
156            #[allow(unreachable_patterns)]
157            _ => unreachable!(),
158        }
159    }
160
161    /// # Safety
162    /// The caller must ensure that the given `dtype` matches all the `ArrayRef` dtypes.
163    pub unsafe fn _try_from_arrow_unchecked(
164        name: PlSmallStr,
165        chunks: Vec<ArrayRef>,
166        dtype: &ArrowDataType,
167    ) -> PolarsResult<Self> {
168        Self::_try_from_arrow_unchecked_with_md(name, chunks, dtype, None)
169    }
170
171    /// Create a new Series without checking if the inner dtype of the chunks is correct
172    ///
173    /// # Safety
174    /// The caller must ensure that the given `dtype` matches all the `ArrayRef` dtypes.
175    pub unsafe fn _try_from_arrow_unchecked_with_md(
176        name: PlSmallStr,
177        mut chunks: Vec<ArrayRef>,
178        dtype: &ArrowDataType,
179        md: Option<&Metadata>,
180    ) -> PolarsResult<Self> {
181        match dtype {
182            ArrowDataType::Utf8View => Ok(StringChunked::from_chunks(name, chunks).into_series()),
183            ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 => {
184                let chunks =
185                    cast_chunks(&chunks, &DataType::String, CastOptions::NonStrict).unwrap();
186                Ok(StringChunked::from_chunks(name, chunks).into_series())
187            },
188            ArrowDataType::BinaryView => Ok(BinaryChunked::from_chunks(name, chunks).into_series()),
189            ArrowDataType::LargeBinary => {
190                if let Some(md) = md {
191                    if md.maintain_type() {
192                        return Ok(BinaryOffsetChunked::from_chunks(name, chunks).into_series());
193                    }
194                }
195                let chunks =
196                    cast_chunks(&chunks, &DataType::Binary, CastOptions::NonStrict).unwrap();
197                Ok(BinaryChunked::from_chunks(name, chunks).into_series())
198            },
199            ArrowDataType::Binary => {
200                let chunks =
201                    cast_chunks(&chunks, &DataType::Binary, CastOptions::NonStrict).unwrap();
202                Ok(BinaryChunked::from_chunks(name, chunks).into_series())
203            },
204            ArrowDataType::List(_) | ArrowDataType::LargeList(_) => {
205                let (chunks, dtype) = to_physical_and_dtype(chunks, md);
206                unsafe {
207                    Ok(
208                        ListChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype)
209                            .into_series(),
210                    )
211                }
212            },
213            #[cfg(feature = "dtype-array")]
214            ArrowDataType::FixedSizeList(_, _) => {
215                let (chunks, dtype) = to_physical_and_dtype(chunks, md);
216                unsafe {
217                    Ok(
218                        ArrayChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype)
219                            .into_series(),
220                    )
221                }
222            },
223            ArrowDataType::Boolean => Ok(BooleanChunked::from_chunks(name, chunks).into_series()),
224            #[cfg(feature = "dtype-u8")]
225            ArrowDataType::UInt8 => Ok(UInt8Chunked::from_chunks(name, chunks).into_series()),
226            #[cfg(feature = "dtype-u16")]
227            ArrowDataType::UInt16 => Ok(UInt16Chunked::from_chunks(name, chunks).into_series()),
228            ArrowDataType::UInt32 => Ok(UInt32Chunked::from_chunks(name, chunks).into_series()),
229            ArrowDataType::UInt64 => Ok(UInt64Chunked::from_chunks(name, chunks).into_series()),
230            ArrowDataType::UInt128 => feature_gated!(
231                "dtype-u128",
232                Ok(UInt128Chunked::from_chunks(name, chunks).into_series())
233            ),
234            #[cfg(feature = "dtype-i8")]
235            ArrowDataType::Int8 => Ok(Int8Chunked::from_chunks(name, chunks).into_series()),
236            #[cfg(feature = "dtype-i16")]
237            ArrowDataType::Int16 => Ok(Int16Chunked::from_chunks(name, chunks).into_series()),
238            ArrowDataType::Int32 => Ok(Int32Chunked::from_chunks(name, chunks).into_series()),
239            ArrowDataType::Int64 => Ok(Int64Chunked::from_chunks(name, chunks).into_series()),
240            ArrowDataType::Int128 => feature_gated!(
241                "dtype-i128",
242                Ok(Int128Chunked::from_chunks(name, chunks).into_series())
243            ),
244            #[cfg(feature = "dtype-f16")]
245            ArrowDataType::Float16 => {
246                let chunks =
247                    cast_chunks(&chunks, &DataType::Float16, CastOptions::NonStrict).unwrap();
248                Ok(Float16Chunked::from_chunks(name, chunks).into_series())
249            },
250            ArrowDataType::Float32 => Ok(Float32Chunked::from_chunks(name, chunks).into_series()),
251            ArrowDataType::Float64 => Ok(Float64Chunked::from_chunks(name, chunks).into_series()),
252            #[cfg(feature = "dtype-date")]
253            ArrowDataType::Date32 => {
254                let chunks =
255                    cast_chunks(&chunks, &DataType::Int32, CastOptions::Overflowing).unwrap();
256                Ok(Int32Chunked::from_chunks(name, chunks)
257                    .into_date()
258                    .into_series())
259            },
260            #[cfg(feature = "dtype-datetime")]
261            ArrowDataType::Date64 => {
262                let chunks =
263                    cast_chunks(&chunks, &DataType::Int64, CastOptions::Overflowing).unwrap();
264                let ca = Int64Chunked::from_chunks(name, chunks);
265                Ok(ca.into_datetime(TimeUnit::Milliseconds, None).into_series())
266            },
267            #[cfg(feature = "dtype-datetime")]
268            ArrowDataType::Timestamp(tu, tz) => {
269                let tz = TimeZone::opt_try_new(tz.clone())?;
270                let chunks =
271                    cast_chunks(&chunks, &DataType::Int64, CastOptions::NonStrict).unwrap();
272                let s = Int64Chunked::from_chunks(name, chunks)
273                    .into_datetime(tu.into(), tz)
274                    .into_series();
275                Ok(match tu {
276                    ArrowTimeUnit::Second => &s * MILLISECONDS,
277                    ArrowTimeUnit::Millisecond => s,
278                    ArrowTimeUnit::Microsecond => s,
279                    ArrowTimeUnit::Nanosecond => s,
280                })
281            },
282            #[cfg(feature = "dtype-duration")]
283            ArrowDataType::Duration(tu) => {
284                let chunks =
285                    cast_chunks(&chunks, &DataType::Int64, CastOptions::NonStrict).unwrap();
286                let s = Int64Chunked::from_chunks(name, chunks)
287                    .into_duration(tu.into())
288                    .into_series();
289                Ok(match tu {
290                    ArrowTimeUnit::Second => &s * MILLISECONDS,
291                    ArrowTimeUnit::Millisecond => s,
292                    ArrowTimeUnit::Microsecond => s,
293                    ArrowTimeUnit::Nanosecond => s,
294                })
295            },
296            #[cfg(feature = "dtype-time")]
297            ArrowDataType::Time64(tu) | ArrowDataType::Time32(tu) => {
298                let mut chunks = chunks;
299                if matches!(dtype, ArrowDataType::Time32(_)) {
300                    chunks =
301                        cast_chunks(&chunks, &DataType::Int32, CastOptions::NonStrict).unwrap();
302                }
303                let chunks =
304                    cast_chunks(&chunks, &DataType::Int64, CastOptions::NonStrict).unwrap();
305                let s = Int64Chunked::from_chunks(name, chunks)
306                    .into_time()
307                    .into_series();
308                Ok(match tu {
309                    ArrowTimeUnit::Second => &s * NANOSECONDS,
310                    ArrowTimeUnit::Millisecond => &s * 1_000_000,
311                    ArrowTimeUnit::Microsecond => &s * 1_000,
312                    ArrowTimeUnit::Nanosecond => s,
313                })
314            },
315            ArrowDataType::Decimal32(precision, scale) => {
316                feature_gated!("dtype-decimal", {
317                    polars_compute::decimal::dec128_verify_prec_scale(*precision, *scale)?;
318
319                    let mut chunks = chunks;
320                    for chunk in chunks.iter_mut() {
321                        let old_chunk = chunk
322                            .as_any_mut()
323                            .downcast_mut::<PrimitiveArray<i32>>()
324                            .unwrap();
325
326                        // For now, we just cast the whole data to i128.
327                        let (_, values, validity) = std::mem::take(old_chunk).into_inner();
328                        *chunk = PrimitiveArray::new(
329                            ArrowDataType::Int128,
330                            values.iter().map(|&v| v as i128).collect(),
331                            validity,
332                        )
333                        .to_boxed();
334                    }
335
336                    let s = Int128Chunked::from_chunks(name, chunks)
337                        .into_decimal_unchecked(*precision, *scale)
338                        .into_series();
339                    Ok(s)
340                })
341            },
342            ArrowDataType::Decimal64(precision, scale) => {
343                feature_gated!("dtype-decimal", {
344                    polars_compute::decimal::dec128_verify_prec_scale(*precision, *scale)?;
345
346                    let mut chunks = chunks;
347                    for chunk in chunks.iter_mut() {
348                        let old_chunk = chunk
349                            .as_any_mut()
350                            .downcast_mut::<PrimitiveArray<i64>>()
351                            .unwrap();
352
353                        // For now, we just cast the whole data to i128.
354                        let (_, values, validity) = std::mem::take(old_chunk).into_inner();
355                        *chunk = PrimitiveArray::new(
356                            ArrowDataType::Int128,
357                            values.iter().map(|&v| v as i128).collect(),
358                            validity,
359                        )
360                        .to_boxed();
361                    }
362
363                    let s = Int128Chunked::from_chunks(name, chunks)
364                        .into_decimal_unchecked(*precision, *scale)
365                        .into_series();
366                    Ok(s)
367                })
368            },
369            ArrowDataType::Decimal(precision, scale) => {
370                feature_gated!("dtype-decimal", {
371                    polars_compute::decimal::dec128_verify_prec_scale(*precision, *scale)?;
372
373                    let mut chunks = chunks;
374                    for chunk in chunks.iter_mut() {
375                        *chunk = std::mem::take(
376                            chunk
377                                .as_any_mut()
378                                .downcast_mut::<PrimitiveArray<i128>>()
379                                .unwrap(),
380                        )
381                        .to(ArrowDataType::Int128)
382                        .to_boxed();
383                    }
384
385                    let s = Int128Chunked::from_chunks(name, chunks)
386                        .into_decimal_unchecked(*precision, *scale)
387                        .into_series();
388                    Ok(s)
389                })
390            },
391            ArrowDataType::Decimal256(precision, scale) => {
392                feature_gated!("dtype-decimal", {
393                    use arrow::types::i256;
394
395                    polars_compute::decimal::dec128_verify_prec_scale(*precision, *scale)?;
396
397                    let mut chunks = chunks;
398                    for chunk in chunks.iter_mut() {
399                        let arr = std::mem::take(
400                            chunk
401                                .as_any_mut()
402                                .downcast_mut::<PrimitiveArray<i256>>()
403                                .unwrap(),
404                        );
405                        let arr_128: PrimitiveArray<i128> = arr.iter().map(|opt_v| {
406                            if let Some(v) = opt_v {
407                                let smaller: Option<i128> = (*v).try_into().ok();
408                                let smaller = smaller.filter(|v| dec128_fits(*v, *precision));
409                                smaller.ok_or_else(|| {
410                                    polars_err!(ComputeError: "Decimal256 to Decimal128 conversion overflowed, Decimal256 is not (yet) supported in Polars")
411                                }).map(Some)
412                            } else {
413                                Ok(None)
414                            }
415                        }).try_collect_arr_trusted()?;
416
417                        *chunk = arr_128.to(ArrowDataType::Int128).to_boxed();
418                    }
419
420                    let s = Int128Chunked::from_chunks(name, chunks)
421                        .into_decimal_unchecked(*precision, *scale)
422                        .into_series();
423                    Ok(s)
424                })
425            },
426            ArrowDataType::Null => Ok(new_null(name, &chunks)),
427            #[cfg(not(feature = "dtype-categorical"))]
428            ArrowDataType::Dictionary(_, _, _) => {
429                panic!("activate dtype-categorical to convert dictionary arrays")
430            },
431            #[cfg(feature = "dtype-categorical")]
432            ArrowDataType::Dictionary(key_type, _, _) => {
433                let polars_dtype = DataType::from_arrow(chunks[0].dtype(), md);
434
435                let mut series_iter = chunks.into_iter().map(|arr| {
436                    import_arrow_dictionary_array(name.clone(), arr, key_type, &polars_dtype)
437                });
438
439                let mut first = series_iter.next().unwrap()?;
440
441                for s in series_iter {
442                    first.append_owned(s?)?;
443                }
444
445                Ok(first)
446            },
447            #[cfg(feature = "object")]
448            ArrowDataType::Extension(ext)
449                if ext.name == POLARS_OBJECT_EXTENSION_NAME && ext.metadata.is_some() =>
450            {
451                assert_eq!(chunks.len(), 1);
452                let arr = chunks[0]
453                    .as_any()
454                    .downcast_ref::<FixedSizeBinaryArray>()
455                    .unwrap();
456                // SAFETY:
457                // this is highly unsafe. it will dereference a raw ptr on the heap
458                // make sure the ptr is allocated and from this pid
459                // (the pid is checked before dereference)
460                let s = {
461                    let pe = PolarsExtension::new(arr.clone());
462                    let s = pe.get_series(&name);
463                    pe.take_and_forget();
464                    s
465                };
466                Ok(s)
467            },
468            #[cfg(feature = "dtype-extension")]
469            ArrowDataType::Extension(ext) => {
470                use crate::datatypes::extension::get_extension_type_or_storage;
471
472                for chunk in &mut chunks {
473                    debug_assert!(
474                        chunk.dtype() == dtype,
475                        "expected chunk dtype to be {:?}, got {:?}",
476                        dtype,
477                        chunk.dtype()
478                    );
479                    *chunk.dtype_mut() = ext.inner.clone();
480                }
481                let storage = Series::_try_from_arrow_unchecked_with_md(
482                    name.clone(),
483                    chunks,
484                    &ext.inner,
485                    md,
486                )?;
487
488                Ok(
489                    match get_extension_type_or_storage(
490                        &ext.name,
491                        storage.dtype(),
492                        ext.metadata.as_deref(),
493                    ) {
494                        Some(typ) => ExtensionChunked::from_storage(typ, storage).into_series(),
495                        None => storage,
496                    },
497                )
498            },
499
500            #[cfg(feature = "dtype-struct")]
501            ArrowDataType::Struct(_) => {
502                let (chunks, dtype) = to_physical_and_dtype(chunks, md);
503
504                unsafe {
505                    let mut ca =
506                        StructChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype);
507                    StructChunked::propagate_nulls_mut(&mut ca);
508                    Ok(ca.into_series())
509                }
510            },
511            ArrowDataType::FixedSizeBinary(_) => {
512                let chunks = cast_chunks(&chunks, &DataType::Binary, CastOptions::NonStrict)?;
513                Ok(BinaryChunked::from_chunks(name, chunks).into_series())
514            },
515            ArrowDataType::Map(field, _is_ordered) => {
516                let struct_arrays = chunks
517                    .iter()
518                    .map(|arr| {
519                        let arr = arr.as_any().downcast_ref::<MapArray>().unwrap();
520                        arr.field().clone()
521                    })
522                    .collect::<Vec<_>>();
523
524                let (phys_struct_arrays, dtype) =
525                    to_physical_and_dtype(struct_arrays, field.metadata.as_deref());
526
527                let chunks = chunks
528                    .iter()
529                    .zip(phys_struct_arrays)
530                    .map(|(arr, values)| {
531                        let arr = arr.as_any().downcast_ref::<MapArray>().unwrap();
532                        let offsets: &OffsetsBuffer<i32> = arr.offsets();
533
534                        let validity = values.validity().cloned();
535
536                        Box::from(ListArray::<i64>::new(
537                            ListArray::<i64>::default_datatype(values.dtype().clone()),
538                            OffsetsBuffer::<i64>::from(offsets),
539                            values,
540                            validity,
541                        )) as ArrayRef
542                    })
543                    .collect();
544
545                unsafe {
546                    let out = ListChunked::from_chunks_and_dtype_unchecked(
547                        name,
548                        chunks,
549                        DataType::List(Box::new(dtype)),
550                    );
551
552                    Ok(out.into_series())
553                }
554            },
555            ArrowDataType::Interval(IntervalUnit::MonthDayNano) => {
556                check_allow_importing_interval_as_struct("month_day_nano_interval")?;
557
558                feature_gated!("dtype-struct", {
559                    let chunks = chunks
560                        .into_iter()
561                        .map(convert_month_day_nano_to_struct)
562                        .collect::<PolarsResult<Vec<_>>>()?;
563
564                    Ok(StructChunked::from_chunks_and_dtype_unchecked(
565                        name,
566                        chunks,
567                        DataType::_month_days_ns_struct_type(),
568                    )
569                    .into_series())
570                })
571            },
572
573            dt => polars_bail!(ComputeError: "cannot create series from {:?}", dt),
574        }
575    }
576
577    #[cfg(feature = "dtype-categorical")]
578    pub fn from_cats_and_dtype(
579        cats: &Series,
580        dtype: &DataType,
581        strict: bool,
582    ) -> PolarsResult<Series> {
583        let phys = dtype.cat_physical()?;
584        let phys_dtype = DataType::from(phys);
585        if cats.dtype() != &phys_dtype {
586            polars_bail!(
587                SchemaMismatch:
588                "cannot convert column of type {} to {} with physical type {}; \
589                column dtype must match the enum/categorical's physical type",
590                cats.dtype(), dtype, phys_dtype
591            )
592        }
593
594        let out = with_match_categorical_physical_type!(phys, |$C| {
595            // SAFETY: we are guarded by the type system.
596            type PhysCa = ChunkedArray<<$C as PolarsCategoricalType>::PolarsPhysical>;
597            let ca: &PhysCa = cats.as_ref().as_ref();
598            CategoricalChunked::<$C>::from_cats_and_dtype(ca.clone(), dtype.clone()).into_series()
599        });
600
601        if strict && out.null_count() != cats.null_count() {
602            polars_bail!(
603                ComputeError:
604                "found invalid category value when converting from physical to {dtype}",
605            );
606        }
607
608        Ok(out)
609    }
610}
611
612fn convert<F: Fn(&dyn Array) -> ArrayRef>(arr: &[ArrayRef], f: F) -> Vec<ArrayRef> {
613    arr.iter().map(|arr| f(&**arr)).collect()
614}
615
616/// Converts to physical types and bubbles up the correct [`DataType`].
617#[allow(clippy::only_used_in_recursion)]
618unsafe fn to_physical_and_dtype(
619    arrays: Vec<ArrayRef>,
620    md: Option<&Metadata>,
621) -> (Vec<ArrayRef>, DataType) {
622    match arrays[0].dtype() {
623        ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 => {
624            let chunks = cast_chunks(&arrays, &DataType::String, CastOptions::NonStrict).unwrap();
625            (chunks, DataType::String)
626        },
627        ArrowDataType::Binary | ArrowDataType::LargeBinary | ArrowDataType::FixedSizeBinary(_) => {
628            let chunks = cast_chunks(&arrays, &DataType::Binary, CastOptions::NonStrict).unwrap();
629            (chunks, DataType::Binary)
630        },
631        #[allow(unused_variables)]
632        dt @ ArrowDataType::Dictionary(_, _, _) => {
633            feature_gated!("dtype-categorical", {
634                let s = unsafe {
635                    let dt = dt.clone();
636                    Series::_try_from_arrow_unchecked_with_md(PlSmallStr::EMPTY, arrays, &dt, md)
637                }
638                .unwrap();
639                (s.chunks().clone(), s.dtype().clone())
640            })
641        },
642        dt @ ArrowDataType::Extension(_) => {
643            feature_gated!("dtype-extension", {
644                let s = unsafe {
645                    let dt = dt.clone();
646                    Series::_try_from_arrow_unchecked_with_md(PlSmallStr::EMPTY, arrays, &dt, md)
647                }
648                .unwrap();
649                (s.chunks().clone(), s.dtype().clone())
650            })
651        },
652        ArrowDataType::List(field) => {
653            let out = convert(&arrays, |arr| {
654                cast(arr, &ArrowDataType::LargeList(field.clone())).unwrap()
655            });
656            to_physical_and_dtype(out, md)
657        },
658        #[cfg(feature = "dtype-array")]
659        ArrowDataType::FixedSizeList(field, size) => {
660            let values = arrays
661                .iter()
662                .map(|arr| {
663                    let arr = arr.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
664                    arr.values().clone()
665                })
666                .collect::<Vec<_>>();
667
668            let (converted_values, dtype) =
669                to_physical_and_dtype(values, field.metadata.as_deref());
670
671            let arrays = arrays
672                .iter()
673                .zip(converted_values)
674                .map(|(arr, values)| {
675                    let arr = arr.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
676
677                    let dtype = FixedSizeListArray::default_datatype(values.dtype().clone(), *size);
678                    Box::from(FixedSizeListArray::new(
679                        dtype,
680                        arr.len(),
681                        values,
682                        arr.validity().cloned(),
683                    )) as ArrayRef
684                })
685                .collect();
686            (arrays, DataType::Array(Box::new(dtype), *size))
687        },
688        ArrowDataType::LargeList(field) => {
689            let values = arrays
690                .iter()
691                .map(|arr| {
692                    let arr = arr.as_any().downcast_ref::<ListArray<i64>>().unwrap();
693                    arr.values().clone()
694                })
695                .collect::<Vec<_>>();
696
697            let (converted_values, dtype) =
698                to_physical_and_dtype(values, field.metadata.as_deref());
699
700            let arrays = arrays
701                .iter()
702                .zip(converted_values)
703                .map(|(arr, values)| {
704                    let arr = arr.as_any().downcast_ref::<ListArray<i64>>().unwrap();
705
706                    let dtype = ListArray::<i64>::default_datatype(values.dtype().clone());
707                    Box::from(ListArray::<i64>::new(
708                        dtype,
709                        arr.offsets().clone(),
710                        values,
711                        arr.validity().cloned(),
712                    )) as ArrayRef
713                })
714                .collect();
715            (arrays, DataType::List(Box::new(dtype)))
716        },
717        ArrowDataType::Struct(_fields) => {
718            feature_gated!("dtype-struct", {
719                let mut pl_fields = None;
720                let arrays = arrays
721                    .iter()
722                    .map(|arr| {
723                        let arr = arr.as_any().downcast_ref::<StructArray>().unwrap();
724                        let (values, dtypes): (Vec<_>, Vec<_>) = arr
725                            .values()
726                            .iter()
727                            .zip(_fields.iter())
728                            .map(|(value, field)| {
729                                let mut out = to_physical_and_dtype(
730                                    vec![value.clone()],
731                                    field.metadata.as_deref(),
732                                );
733                                (out.0.pop().unwrap(), out.1)
734                            })
735                            .unzip();
736
737                        let arrow_fields = values
738                            .iter()
739                            .zip(_fields.iter())
740                            .map(|(arr, field)| {
741                                ArrowField::new(field.name.clone(), arr.dtype().clone(), true)
742                            })
743                            .collect();
744                        let arrow_array = Box::new(StructArray::new(
745                            ArrowDataType::Struct(arrow_fields),
746                            arr.len(),
747                            values,
748                            arr.validity().cloned(),
749                        )) as ArrayRef;
750
751                        if pl_fields.is_none() {
752                            pl_fields = Some(
753                                _fields
754                                    .iter()
755                                    .zip(dtypes)
756                                    .map(|(field, dtype)| Field::new(field.name.clone(), dtype))
757                                    .collect_vec(),
758                            )
759                        }
760
761                        arrow_array
762                    })
763                    .collect_vec();
764
765                (arrays, DataType::Struct(pl_fields.unwrap()))
766            })
767        },
768        // Use Series architecture to convert nested logical types to physical.
769        dt @ (ArrowDataType::Duration(_)
770        | ArrowDataType::Time32(_)
771        | ArrowDataType::Time64(_)
772        | ArrowDataType::Timestamp(_, _)
773        | ArrowDataType::Date32
774        | ArrowDataType::Decimal(_, _)
775        | ArrowDataType::Date64
776        | ArrowDataType::Map(_, _)) => {
777            let dt = dt.clone();
778            let mut s = Series::_try_from_arrow_unchecked(PlSmallStr::EMPTY, arrays, &dt).unwrap();
779            let dtype = s.dtype().clone();
780            (std::mem::take(s.chunks_mut()), dtype)
781        },
782        dt => {
783            let dtype = DataType::from_arrow(dt, md);
784            (arrays, dtype)
785        },
786    }
787}
788
789#[cfg(feature = "dtype-categorical")]
790unsafe fn import_arrow_dictionary_array(
791    name: PlSmallStr,
792    arr: Box<dyn Array>,
793    key_type: &arrow::datatypes::IntegerType,
794    polars_dtype: &DataType,
795) -> PolarsResult<Series> {
796    use arrow::datatypes::IntegerType as I;
797
798    if matches!(
799        polars_dtype,
800        DataType::Categorical(_, _) | DataType::Enum(_, _)
801    ) {
802        macro_rules! unpack_categorical_chunked {
803            ($dt:ty) => {{
804                let arr = arr.as_any().downcast_ref::<DictionaryArray<$dt>>().unwrap();
805                let keys = arr.keys();
806                let values = arr.values();
807                let values = cast(&**values, &ArrowDataType::Utf8View)?;
808                let values = values.as_any().downcast_ref::<Utf8ViewArray>().unwrap();
809                with_match_categorical_physical_type!(polars_dtype.cat_physical().unwrap(), |$C| {
810                    let ca = CategoricalChunked::<$C>::from_str_iter(
811                        name,
812                        polars_dtype.clone(),
813                        keys.iter().map(|k| {
814                            let k: usize = (*k?).try_into().ok()?;
815                            values.get(k)
816                        }),
817                    )?;
818                    Ok(ca.into_series())
819                })
820            }};
821        }
822
823        match key_type {
824            I::Int8 => unpack_categorical_chunked!(i8),
825            I::UInt8 => unpack_categorical_chunked!(u8),
826            I::Int16 => unpack_categorical_chunked!(i16),
827            I::UInt16 => unpack_categorical_chunked!(u16),
828            I::Int32 => unpack_categorical_chunked!(i32),
829            I::UInt32 => unpack_categorical_chunked!(u32),
830            I::Int64 => unpack_categorical_chunked!(i64),
831            I::UInt64 => unpack_categorical_chunked!(u64),
832            _ => polars_bail!(
833                ComputeError: "unsupported arrow key type: {key_type:?}"
834            ),
835        }
836    } else {
837        macro_rules! unpack_keys_values {
838            ($dt:ty) => {{
839                let arr = arr.as_any().downcast_ref::<DictionaryArray<$dt>>().unwrap();
840                let keys = arr.keys();
841                let keys = polars_compute::cast::primitive_to_primitive::<
842                    $dt,
843                    <IdxType as PolarsNumericType>::Native,
844                >(keys, &IDX_DTYPE.to_arrow(CompatLevel::newest()));
845                (keys, arr.values())
846            }};
847        }
848
849        let (keys, values) = match key_type {
850            I::Int8 => unpack_keys_values!(i8),
851            I::UInt8 => unpack_keys_values!(u8),
852            I::Int16 => unpack_keys_values!(i16),
853            I::UInt16 => unpack_keys_values!(u16),
854            I::Int32 => unpack_keys_values!(i32),
855            I::UInt32 => unpack_keys_values!(u32),
856            I::Int64 => unpack_keys_values!(i64),
857            I::UInt64 => unpack_keys_values!(u64),
858            _ => polars_bail!(
859                ComputeError: "unsupported arrow key type: {key_type:?}"
860            ),
861        };
862
863        let values = Series::_try_from_arrow_unchecked_with_md(
864            name,
865            vec![values.clone()],
866            values.dtype(),
867            None,
868        )?;
869
870        values.take(&IdxCa::from_chunks_and_dtype(
871            PlSmallStr::EMPTY,
872            vec![keys.to_boxed()],
873            IDX_DTYPE,
874        ))
875    }
876}
877
878#[cfg(feature = "dtype-struct")]
879fn convert_month_day_nano_to_struct(chunk: Box<dyn Array>) -> PolarsResult<Box<dyn Array>> {
880    let arr: &PrimitiveArray<months_days_ns> = chunk.as_any().downcast_ref().unwrap();
881
882    let values: &[months_days_ns] = arr.values();
883
884    let (months_out, days_out, nanoseconds_out): (Vec<i32>, Vec<i32>, Vec<i64>) = values
885        .iter()
886        .map(|x| (x.months(), x.days(), x.ns()))
887        .collect();
888
889    let out = StructArray::new(
890        DataType::_month_days_ns_struct_type()
891            .to_physical()
892            .to_arrow(CompatLevel::newest()),
893        arr.len(),
894        vec![
895            PrimitiveArray::<i32>::from_vec(months_out).boxed(),
896            PrimitiveArray::<i32>::from_vec(days_out).boxed(),
897            PrimitiveArray::<i64>::from_vec(nanoseconds_out).boxed(),
898        ],
899        arr.validity().cloned(),
900    );
901
902    Ok(out.boxed())
903}
904
905fn check_types(chunks: &[ArrayRef]) -> PolarsResult<ArrowDataType> {
906    let mut chunks_iter = chunks.iter();
907    let dtype: ArrowDataType = chunks_iter
908        .next()
909        .ok_or_else(|| polars_err!(NoData: "expected at least one array-ref"))?
910        .dtype()
911        .clone();
912
913    for chunk in chunks_iter {
914        if chunk.dtype() != &dtype {
915            polars_bail!(
916                ComputeError: "cannot create series from multiple arrays with different types"
917            );
918        }
919    }
920    Ok(dtype)
921}
922
923impl Series {
924    pub fn try_new<T>(
925        name: PlSmallStr,
926        data: T,
927    ) -> Result<Self, <(PlSmallStr, T) as TryInto<Self>>::Error>
928    where
929        (PlSmallStr, T): TryInto<Self>,
930    {
931        // # TODO
932        // * Remove the TryFrom<tuple> impls in favor of this
933        <(PlSmallStr, T) as TryInto<Self>>::try_into((name, data))
934    }
935}
936
937impl TryFrom<(PlSmallStr, Vec<ArrayRef>)> for Series {
938    type Error = PolarsError;
939
940    fn try_from(name_arr: (PlSmallStr, Vec<ArrayRef>)) -> PolarsResult<Self> {
941        let (name, chunks) = name_arr;
942
943        let dtype = check_types(&chunks)?;
944        // SAFETY:
945        // dtype is checked
946        unsafe { Series::_try_from_arrow_unchecked(name, chunks, &dtype) }
947    }
948}
949
950impl TryFrom<(PlSmallStr, ArrayRef)> for Series {
951    type Error = PolarsError;
952
953    fn try_from(name_arr: (PlSmallStr, ArrayRef)) -> PolarsResult<Self> {
954        let (name, arr) = name_arr;
955        Series::try_from((name, vec![arr]))
956    }
957}
958
959impl TryFrom<(&ArrowField, Vec<ArrayRef>)> for Series {
960    type Error = PolarsError;
961
962    fn try_from(field_arr: (&ArrowField, Vec<ArrayRef>)) -> PolarsResult<Self> {
963        let (field, chunks) = field_arr;
964        let arrow_dt = field.dtype();
965        let dtype = check_types(&chunks)?;
966        let compatible = match (&dtype, arrow_dt) {
967            // See #26174, we don't care about dictionary ordering.
968            (
969                ArrowDataType::Dictionary(int0, inner0, _ord0),
970                ArrowDataType::Dictionary(int1, inner1, _ord1),
971            ) => (int0, inner0) == (int1, inner1),
972            (l, r) => l == r,
973        };
974        polars_ensure!(compatible, ComputeError: "Arrow Field dtype does not match the ArrayRef dtypes");
975
976        // SAFETY:
977        // dtype is checked
978        unsafe {
979            Series::_try_from_arrow_unchecked_with_md(
980                field.name.clone(),
981                chunks,
982                &dtype,
983                field.metadata.as_deref(),
984            )
985        }
986    }
987}
988
989impl TryFrom<(&ArrowField, ArrayRef)> for Series {
990    type Error = PolarsError;
991
992    fn try_from(field_arr: (&ArrowField, ArrayRef)) -> PolarsResult<Self> {
993        let (field, arr) = field_arr;
994        Series::try_from((field, vec![arr]))
995    }
996}
997
998/// Used to convert a [`ChunkedArray`], `&dyn SeriesTrait` and [`Series`]
999/// into a [`Series`].
1000/// # Safety
1001///
1002/// This trait is marked `unsafe` as the `is_series` return is used
1003/// to transmute to `Series`. This must always return `false` except
1004/// for `Series` structs.
1005pub unsafe trait IntoSeries {
1006    fn is_series() -> bool {
1007        false
1008    }
1009
1010    fn into_series(self) -> Series
1011    where
1012        Self: Sized;
1013}
1014
1015impl<T> From<ChunkedArray<T>> for Series
1016where
1017    T: PolarsDataType,
1018    ChunkedArray<T>: IntoSeries,
1019{
1020    fn from(ca: ChunkedArray<T>) -> Self {
1021        ca.into_series()
1022    }
1023}
1024
1025#[cfg(feature = "dtype-date")]
1026impl From<DateChunked> for Series {
1027    fn from(a: DateChunked) -> Self {
1028        a.into_series()
1029    }
1030}
1031
1032#[cfg(feature = "dtype-datetime")]
1033impl From<DatetimeChunked> for Series {
1034    fn from(a: DatetimeChunked) -> Self {
1035        a.into_series()
1036    }
1037}
1038
1039#[cfg(feature = "dtype-duration")]
1040impl From<DurationChunked> for Series {
1041    fn from(a: DurationChunked) -> Self {
1042        a.into_series()
1043    }
1044}
1045
1046#[cfg(feature = "dtype-time")]
1047impl From<TimeChunked> for Series {
1048    fn from(a: TimeChunked) -> Self {
1049        a.into_series()
1050    }
1051}
1052
1053unsafe impl IntoSeries for Arc<dyn SeriesTrait> {
1054    fn into_series(self) -> Series {
1055        Series(self)
1056    }
1057}
1058
1059unsafe impl IntoSeries for Series {
1060    fn is_series() -> bool {
1061        true
1062    }
1063
1064    fn into_series(self) -> Series {
1065        self
1066    }
1067}
1068
1069fn new_null(name: PlSmallStr, chunks: &[ArrayRef]) -> Series {
1070    let len = chunks.iter().map(|arr| arr.len()).sum();
1071    Series::new_null(name, len)
1072}