Skip to main content

polars_core/datatypes/
dtype.rs

1use std::borrow::Cow;
2use std::collections::BTreeMap;
3
4use arrow::datatypes::{
5    DTYPE_CATEGORICAL_NEW, DTYPE_ENUM_VALUES_LEGACY, DTYPE_ENUM_VALUES_NEW, MAINTAIN_PL_TYPE,
6    Metadata, PL_KEY,
7};
8#[cfg(feature = "dtype-array")]
9use polars_utils::format_tuple;
10use polars_utils::itertools::Itertools;
11#[cfg(any(feature = "serde-lazy", feature = "serde"))]
12use serde::{Deserialize, Serialize};
13pub use temporal::time_zone::TimeZone;
14
15use super::*;
16#[cfg(feature = "object")]
17use crate::chunked_array::object::registry::get_object_physical_type;
18#[cfg(feature = "dtype-extension")]
19pub use crate::datatypes::extension::ExtensionTypeInstance;
20use crate::utils::materialize_dyn_int;
21
22pub trait MetaDataExt: IntoMetadata {
23    fn pl_enum_metadata(&self) -> Option<&str> {
24        let md = self.into_metadata_ref();
25        let values = md
26            .get(DTYPE_ENUM_VALUES_NEW)
27            .or_else(|| md.get(DTYPE_ENUM_VALUES_LEGACY));
28        Some(values?.as_str())
29    }
30
31    fn pl_categorical_metadata(&self) -> Option<&str> {
32        // We ignore DTYPE_CATEGORICAL_LEGACY here, as we already map all
33        // string-typed arrow dictionaries to the global Categories, and the
34        // legacy metadata format only specifies the now-removed physical
35        // ordering parameter.
36        Some(
37            self.into_metadata_ref()
38                .get(DTYPE_CATEGORICAL_NEW)?
39                .as_str(),
40        )
41    }
42
43    fn maintain_type(&self) -> bool {
44        let metadata = self.into_metadata_ref();
45        metadata.get(PL_KEY).map(|s| s.as_str()) == Some(MAINTAIN_PL_TYPE)
46    }
47}
48
49impl MetaDataExt for Metadata {}
50pub trait IntoMetadata {
51    #[allow(clippy::wrong_self_convention)]
52    fn into_metadata_ref(&self) -> &Metadata;
53}
54
55impl IntoMetadata for Metadata {
56    fn into_metadata_ref(&self) -> &Metadata {
57        self
58    }
59}
60
61#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
62#[cfg_attr(
63    any(feature = "serde", feature = "serde-lazy"),
64    derive(Serialize, Deserialize)
65)]
66#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
67pub enum UnknownKind {
68    // Hold the value to determine the concrete size.
69    Int(i128),
70    Float,
71    // Can be Categorical or String
72    Str,
73    #[default]
74    Any,
75}
76
77impl UnknownKind {
78    pub fn materialize(&self) -> Option<DataType> {
79        let dtype = match self {
80            UnknownKind::Int(v) => materialize_dyn_int(*v).dtype(),
81            UnknownKind::Float => DataType::Float64,
82            UnknownKind::Str => DataType::String,
83            UnknownKind::Any => return None,
84        };
85        Some(dtype)
86    }
87}
88
89#[derive(Clone)]
90pub enum DataType {
91    Boolean,
92    UInt8,
93    UInt16,
94    UInt32,
95    UInt64,
96    UInt128,
97    Int8,
98    Int16,
99    Int32,
100    Int64,
101    Int128,
102    Float16,
103    Float32,
104    Float64,
105    /// Fixed point decimal type optional precision and non-negative scale.
106    /// This is backed by a signed 128-bit integer which allows for up to 38 significant digits.
107    /// Meaning max precision is 38.
108    #[cfg(feature = "dtype-decimal")]
109    Decimal(usize, usize), // (precision, scale), invariant: 1 <= precision <= 38.
110    /// String data
111    String,
112    Binary,
113    BinaryOffset,
114    /// A 32-bit date representing the elapsed time since UNIX epoch (1970-01-01)
115    /// in days (32 bits).
116    Date,
117    /// A 64-bit date representing the elapsed time since UNIX epoch (1970-01-01)
118    /// in the given timeunit (64 bits).
119    Datetime(TimeUnit, Option<TimeZone>),
120    /// 64-bit integer representing difference between times in milliseconds or nanoseconds
121    Duration(TimeUnit),
122    /// A 64-bit time representing the elapsed time since midnight in nanoseconds
123    Time,
124    /// A nested list with a fixed size in each row
125    #[cfg(feature = "dtype-array")]
126    Array(Box<DataType>, usize),
127    /// A nested list with a variable size in each row
128    List(Box<DataType>),
129    /// A generic type that can be used in a `Series`
130    /// &'static str can be used to determine/set inner type
131    #[cfg(feature = "object")]
132    Object(&'static str),
133    Null,
134    #[cfg(feature = "dtype-categorical")]
135    Categorical(Arc<Categories>, Arc<CategoricalMapping>),
136    // It is an Option, so that matching Enum/Categoricals can take the same guards.
137    #[cfg(feature = "dtype-categorical")]
138    Enum(Arc<FrozenCategories>, Arc<CategoricalMapping>),
139    #[cfg(feature = "dtype-struct")]
140    Struct(Vec<Field>),
141    #[cfg(feature = "dtype-extension")]
142    Extension(ExtensionTypeInstance, Box<DataType>),
143    // some logical types we cannot know statically, e.g. Datetime
144    Unknown(UnknownKind),
145}
146
147pub trait AsRefDataType {
148    fn as_ref_dtype(&self) -> &DataType;
149}
150
151impl Hash for DataType {
152    fn hash<H: Hasher>(&self, state: &mut H) {
153        std::mem::discriminant(self).hash(state)
154    }
155}
156
157impl PartialEq for DataType {
158    fn eq(&self, other: &Self) -> bool {
159        use DataType::*;
160        {
161            match (self, other) {
162                #[cfg(feature = "dtype-categorical")]
163                (Categorical(cats_l, _), Categorical(cats_r, _)) => Arc::ptr_eq(cats_l, cats_r),
164                #[cfg(feature = "dtype-categorical")]
165                (Enum(fcats_l, _), Enum(fcats_r, _)) => Arc::ptr_eq(fcats_l, fcats_r),
166                (Datetime(tu_l, tz_l), Datetime(tu_r, tz_r)) => tu_l == tu_r && tz_l == tz_r,
167                (List(left_inner), List(right_inner)) => left_inner == right_inner,
168                #[cfg(feature = "dtype-duration")]
169                (Duration(tu_l), Duration(tu_r)) => tu_l == tu_r,
170                #[cfg(feature = "dtype-decimal")]
171                (Decimal(p1, s1), Decimal(p2, s2)) => (p1, s1) == (p2, s2),
172                #[cfg(feature = "object")]
173                (Object(lhs), Object(rhs)) => lhs == rhs,
174                #[cfg(feature = "dtype-struct")]
175                (Struct(lhs), Struct(rhs)) => {
176                    std::ptr::eq(Vec::as_ptr(lhs), Vec::as_ptr(rhs)) || lhs == rhs
177                },
178                #[cfg(feature = "dtype-array")]
179                (Array(left_inner, left_width), Array(right_inner, right_width)) => {
180                    left_width == right_width && left_inner == right_inner
181                },
182                (Unknown(l), Unknown(r)) => match (l, r) {
183                    (UnknownKind::Int(_), UnknownKind::Int(_)) => true,
184                    _ => l == r,
185                },
186                _ => std::mem::discriminant(self) == std::mem::discriminant(other),
187            }
188        }
189    }
190}
191
192impl Eq for DataType {}
193
194impl DataType {
195    pub const IDX_DTYPE: Self = {
196        #[cfg(not(feature = "bigidx"))]
197        {
198            DataType::UInt32
199        }
200        #[cfg(feature = "bigidx")]
201        {
202            DataType::UInt64
203        }
204    };
205
206    pub fn pretty_format(&self) -> String {
207        match self {
208            #[cfg(feature = "dtype-struct")]
209            Self::Struct(fields) => {
210                let formatted_fields = fields
211                    .iter()
212                    .map(|field| format!("{}: {}", field.name, field.dtype.pretty_format()))
213                    .collect::<Vec<String>>()
214                    .join(", ");
215                format!("struct {{{}}}", formatted_fields)
216            },
217            Self::List(inner_dtype) => {
218                let formatted_dtype = inner_dtype.pretty_format();
219                format!("list[{}]", formatted_dtype)
220            },
221            #[cfg(feature = "dtype-array")]
222            Self::Array(inner_dtype, size) => {
223                let formatted_dtype = inner_dtype.pretty_format();
224                format!("array[{}, {}]", formatted_dtype, size)
225            },
226            _ => {
227                format!("{}", self)
228            },
229        }
230    }
231
232    pub fn value_within_range(&self, other: AnyValue) -> bool {
233        use DataType::*;
234        match self {
235            UInt8 => other.extract::<u8>().is_some(),
236            #[cfg(feature = "dtype-u16")]
237            UInt16 => other.extract::<u16>().is_some(),
238            UInt32 => other.extract::<u32>().is_some(),
239            UInt64 => other.extract::<u64>().is_some(),
240            #[cfg(feature = "dtype-u128")]
241            UInt128 => other.extract::<u128>().is_some(),
242            #[cfg(feature = "dtype-i8")]
243            Int8 => other.extract::<i8>().is_some(),
244            #[cfg(feature = "dtype-i16")]
245            Int16 => other.extract::<i16>().is_some(),
246            Int32 => other.extract::<i32>().is_some(),
247            Int64 => other.extract::<i64>().is_some(),
248            #[cfg(feature = "dtype-i128")]
249            Int128 => other.extract::<i128>().is_some(),
250            _ => false,
251        }
252    }
253
254    /// Struct representation of the arrow `month_day_nano_interval` type.
255    #[cfg(feature = "dtype-struct")]
256    pub fn _month_days_ns_struct_type() -> Self {
257        DataType::Struct(vec![
258            Field::new(PlSmallStr::from_static("months"), DataType::Int32),
259            Field::new(PlSmallStr::from_static("days"), DataType::Int32),
260            Field::new(
261                PlSmallStr::from_static("nanoseconds"),
262                DataType::Duration(TimeUnit::Nanoseconds),
263            ),
264        ])
265    }
266
267    /// Check if the whole dtype is known.
268    pub fn is_known(&self) -> bool {
269        match self {
270            DataType::List(inner) => inner.is_known(),
271            #[cfg(feature = "dtype-array")]
272            DataType::Array(inner, _) => inner.is_known(),
273            #[cfg(feature = "dtype-struct")]
274            DataType::Struct(fields) => fields.iter().all(|fld| fld.dtype.is_known()),
275            DataType::Unknown(_) => false,
276            _ => true,
277        }
278    }
279
280    /// Materialize this datatype if it is unknown. All other datatypes
281    /// are left unchanged.
282    pub fn materialize_unknown(self, allow_unknown: bool) -> PolarsResult<DataType> {
283        match self {
284            DataType::Unknown(u) => match u.materialize() {
285                Some(known) => Ok(known),
286                None => {
287                    if allow_unknown {
288                        Ok(DataType::Unknown(u))
289                    } else {
290                        polars_bail!(SchemaMismatch: "failed to materialize unknown type")
291                    }
292                },
293            },
294            DataType::List(inner) => Ok(DataType::List(Box::new(
295                inner.materialize_unknown(allow_unknown)?,
296            ))),
297            #[cfg(feature = "dtype-array")]
298            DataType::Array(inner, size) => Ok(DataType::Array(
299                Box::new(inner.materialize_unknown(allow_unknown)?),
300                size,
301            )),
302            #[cfg(feature = "dtype-struct")]
303            DataType::Struct(fields) => Ok(DataType::Struct(
304                fields
305                    .into_iter()
306                    .map(|f| {
307                        PolarsResult::Ok(Field::new(
308                            f.name,
309                            f.dtype.materialize_unknown(allow_unknown)?,
310                        ))
311                    })
312                    .try_collect_vec()?,
313            )),
314            _ => Ok(self),
315        }
316    }
317
318    #[cfg(feature = "dtype-array")]
319    /// Get the full shape of a multidimensional array.
320    pub fn get_shape(&self) -> Option<Vec<usize>> {
321        fn get_shape_impl(dt: &DataType, shape: &mut Vec<usize>) {
322            if let DataType::Array(inner, size) = dt {
323                shape.push(*size);
324                get_shape_impl(inner, shape);
325            }
326        }
327
328        if let DataType::Array(inner, size) = self {
329            let mut shape = vec![*size];
330            get_shape_impl(inner, &mut shape);
331            Some(shape)
332        } else {
333            None
334        }
335    }
336
337    /// Get the inner data type of a nested type.
338    pub fn inner_dtype(&self) -> Option<&DataType> {
339        match self {
340            DataType::List(inner) => Some(inner),
341            #[cfg(feature = "dtype-array")]
342            DataType::Array(inner, _) => Some(inner),
343            _ => None,
344        }
345    }
346
347    /// Get the inner data type of a nested type.
348    pub fn into_inner_dtype(self) -> Option<DataType> {
349        match self {
350            DataType::List(inner) => Some(*inner),
351            #[cfg(feature = "dtype-array")]
352            DataType::Array(inner, _) => Some(*inner),
353            _ => None,
354        }
355    }
356
357    /// Get the inner data type of a nested type.
358    pub fn try_into_inner_dtype(self) -> PolarsResult<DataType> {
359        match self {
360            DataType::List(inner) => Ok(*inner),
361            #[cfg(feature = "dtype-array")]
362            DataType::Array(inner, _) => Ok(*inner),
363            dt => polars_bail!(InvalidOperation: "cannot get inner datatype of `{dt}`"),
364        }
365    }
366
367    /// Get the absolute inner data type of a nested type.
368    pub fn leaf_dtype(&self) -> &DataType {
369        let mut prev = self;
370        while let Some(dtype) = prev.inner_dtype() {
371            prev = dtype
372        }
373        prev
374    }
375
376    #[cfg(feature = "dtype-array")]
377    /// Get the inner data type of a multidimensional array.
378    pub fn array_leaf_dtype(&self) -> Option<&DataType> {
379        let mut prev = self;
380        match prev {
381            DataType::Array(_, _) => {
382                while let DataType::Array(inner, _) = &prev {
383                    prev = inner;
384                }
385                Some(prev)
386            },
387            _ => None,
388        }
389    }
390
391    /// Cast the leaf types of Lists/Arrays and keep the nesting.
392    pub fn cast_leaf(&self, to: DataType) -> DataType {
393        use DataType::*;
394        match self {
395            List(inner) => List(Box::new(inner.cast_leaf(to))),
396            #[cfg(feature = "dtype-array")]
397            Array(inner, size) => Array(Box::new(inner.cast_leaf(to)), *size),
398            _ => to,
399        }
400    }
401
402    /// Map all leaf types of nested dtypes (list, array, struct) using the
403    /// supplied function.
404    pub fn map_leaves<F: FnMut(DataType) -> DataType>(self, f: &mut F) -> DataType {
405        use DataType::*;
406        match self {
407            List(inner) => List(Box::new(inner.map_leaves(f))),
408            #[cfg(feature = "dtype-array")]
409            Array(inner, size) => Array(Box::new(inner.map_leaves(f)), size),
410            #[cfg(feature = "dtype-struct")]
411            Struct(fields) => {
412                let new_fields = fields
413                    .into_iter()
414                    .map(|fld| Field::new(fld.name, fld.dtype.map_leaves(f)))
415                    .collect();
416                Struct(new_fields)
417            },
418            #[cfg(feature = "dtype-extension")]
419            Extension(ext, storage) => Extension(ext, Box::new(storage.map_leaves(f))),
420            _ => f(self),
421        }
422    }
423
424    /// Return whether the cast to `to` makes sense.
425    ///
426    /// If it `None`, we are not sure.
427    pub fn can_cast_to(&self, to: &DataType) -> Option<bool> {
428        if self == to {
429            return Some(true);
430        }
431        if self.is_primitive_numeric() && to.is_primitive_numeric() {
432            return Some(true);
433        }
434
435        if self.is_null() {
436            return Some(true);
437        }
438
439        use DataType as D;
440        Some(match (self, to) {
441            #[cfg(feature = "dtype-categorical")]
442            (D::Categorical(_, _) | D::Enum(_, _), D::Binary)
443            | (D::Binary, D::Categorical(_, _) | D::Enum(_, _)) => false, // TODO @ cat-rework: why can we not cast to Binary?
444
445            #[cfg(feature = "dtype-categorical")]
446            (D::Categorical(_, _) | D::Enum(_, _), D::String)
447            | (D::String, D::Categorical(_, _) | D::Enum(_, _)) => true,
448
449            #[cfg(feature = "object")]
450            (D::Object(_), D::Object(_)) => true,
451            #[cfg(feature = "object")]
452            (D::Object(_), _) | (_, D::Object(_)) => false,
453
454            (D::Boolean, dt) | (dt, D::Boolean) => match dt {
455                dt if dt.is_primitive_numeric() => true,
456                #[cfg(feature = "dtype-decimal")]
457                D::Decimal(_, _) => true,
458                D::String | D::Binary => true,
459                _ => false,
460            },
461
462            (D::List(from), D::List(to)) => from.can_cast_to(to)?,
463            #[cfg(feature = "dtype-array")]
464            (D::Array(from, l_width), D::Array(to, r_width)) => {
465                l_width == r_width && from.can_cast_to(to)?
466            },
467            #[cfg(feature = "dtype-struct")]
468            (D::Struct(l_fields), D::Struct(r_fields)) => {
469                if l_fields.is_empty() {
470                    return Some(true);
471                }
472
473                if l_fields.len() != r_fields.len() {
474                    return Some(false);
475                }
476
477                for (l, r) in l_fields.iter().zip(r_fields) {
478                    if !l.dtype().can_cast_to(r.dtype())? {
479                        return Some(false);
480                    }
481                }
482
483                true
484            },
485
486            // @NOTE: we are being conversative
487            _ => return None,
488        })
489    }
490
491    pub fn implode(self) -> DataType {
492        DataType::List(Box::new(self))
493    }
494
495    /// Convert to the physical data type
496    #[must_use]
497    pub fn to_physical(&self) -> DataType {
498        use DataType::*;
499        match self {
500            Date => Int32,
501            Datetime(_, _) => Int64,
502            Duration(_) => Int64,
503            Time => Int64,
504            #[cfg(feature = "dtype-decimal")]
505            Decimal(_, _) => Int128,
506            #[cfg(feature = "dtype-categorical")]
507            Categorical(cats, _) => cats.physical().dtype(),
508            #[cfg(feature = "dtype-categorical")]
509            Enum(fcats, _) => fcats.physical().dtype(),
510            #[cfg(feature = "dtype-array")]
511            Array(dt, width) => Array(Box::new(dt.to_physical()), *width),
512            List(dt) => List(Box::new(dt.to_physical())),
513            #[cfg(feature = "dtype-struct")]
514            Struct(fields) => {
515                let new_fields = fields
516                    .iter()
517                    .map(|s| Field::new(s.name().clone(), s.dtype().to_physical()))
518                    .collect();
519                Struct(new_fields)
520            },
521            #[cfg(feature = "dtype-extension")]
522            Extension(_, storage) => storage.to_physical(),
523            _ => self.clone(),
524        }
525    }
526
527    #[must_use]
528    pub fn to_storage(&self) -> DataType {
529        use DataType::*;
530        match self {
531            #[cfg(feature = "dtype-extension")]
532            Extension(_, storage) => storage.to_storage(),
533            _ => self.clone(),
534        }
535    }
536
537    pub fn is_supported_list_arithmetic_input(&self) -> bool {
538        self.is_primitive_numeric() || self.is_bool() || self.is_null()
539    }
540
541    /// Check if this [`DataType`] is a logical type
542    pub fn is_logical(&self) -> bool {
543        self != &self.to_physical()
544    }
545
546    /// Check if this [`DataType`] is a temporal type
547    pub fn is_temporal(&self) -> bool {
548        use DataType::*;
549        matches!(self, Date | Datetime(_, _) | Duration(_) | Time)
550    }
551
552    /// Check if datatype is a primitive type. By that we mean that
553    /// it is not a nested or logical type.
554    pub fn is_primitive(&self) -> bool {
555        self.is_primitive_numeric()
556            | matches!(
557                self,
558                DataType::Boolean | DataType::String | DataType::Binary
559            )
560    }
561
562    /// Check if this [`DataType`] is a primitive numeric type (excludes Decimal).
563    pub fn is_primitive_numeric(&self) -> bool {
564        self.is_float() || self.is_integer()
565    }
566
567    /// Check if this [`DataType`] is a boolean.
568    pub fn is_bool(&self) -> bool {
569        matches!(self, DataType::Boolean)
570    }
571
572    /// Check if this [`DataType`] is a list.
573    pub fn is_list(&self) -> bool {
574        matches!(self, DataType::List(_))
575    }
576
577    /// Check if this [`DataType`] is an array.
578    pub fn is_array(&self) -> bool {
579        #[cfg(feature = "dtype-array")]
580        {
581            matches!(self, DataType::Array(_, _))
582        }
583        #[cfg(not(feature = "dtype-array"))]
584        {
585            false
586        }
587    }
588
589    pub fn is_nested(&self) -> bool {
590        match self {
591            DataType::List(_) => true,
592            #[cfg(feature = "dtype-array")]
593            DataType::Array(_, _) => true,
594            #[cfg(feature = "dtype-struct")]
595            DataType::Struct(_) => true,
596            #[cfg(feature = "dtype-extension")]
597            DataType::Extension(_, storage) => storage.is_nested(),
598            _ => false,
599        }
600    }
601
602    /// Check if this [`DataType`] is a struct
603    pub fn is_struct(&self) -> bool {
604        #[cfg(feature = "dtype-struct")]
605        {
606            matches!(self, DataType::Struct(_))
607        }
608        #[cfg(not(feature = "dtype-struct"))]
609        {
610            false
611        }
612    }
613
614    pub fn is_binary(&self) -> bool {
615        matches!(self, DataType::Binary)
616    }
617
618    pub fn is_date(&self) -> bool {
619        matches!(self, DataType::Date)
620    }
621    pub fn is_datetime(&self) -> bool {
622        matches!(self, DataType::Datetime(..))
623    }
624
625    pub fn is_duration(&self) -> bool {
626        matches!(self, DataType::Duration(..))
627    }
628
629    pub fn is_object(&self) -> bool {
630        #[cfg(feature = "object")]
631        {
632            matches!(self, DataType::Object(_))
633        }
634        #[cfg(not(feature = "object"))]
635        {
636            false
637        }
638    }
639
640    pub fn is_null(&self) -> bool {
641        matches!(self, DataType::Null)
642    }
643
644    pub fn contains_views(&self) -> bool {
645        use DataType::*;
646        match self {
647            Binary | String => true,
648            List(inner) => inner.contains_views(),
649            #[cfg(feature = "dtype-array")]
650            Array(inner, _) => inner.contains_views(),
651            #[cfg(feature = "dtype-struct")]
652            Struct(fields) => fields.iter().any(|field| field.dtype.contains_views()),
653            _ => false,
654        }
655    }
656
657    pub fn contains_categoricals(&self) -> bool {
658        use DataType::*;
659        match self {
660            #[cfg(feature = "dtype-categorical")]
661            Categorical(_, _) => true,
662            List(inner) => inner.contains_categoricals(),
663            #[cfg(feature = "dtype-array")]
664            Array(inner, _) => inner.contains_categoricals(),
665            #[cfg(feature = "dtype-struct")]
666            Struct(fields) => fields
667                .iter()
668                .any(|field| field.dtype.contains_categoricals()),
669            _ => false,
670        }
671    }
672
673    pub fn contains_enums(&self) -> bool {
674        use DataType::*;
675        match self {
676            #[cfg(feature = "dtype-categorical")]
677            Enum(_, _) => true,
678            List(inner) => inner.contains_enums(),
679            #[cfg(feature = "dtype-array")]
680            Array(inner, _) => inner.contains_enums(),
681            #[cfg(feature = "dtype-struct")]
682            Struct(fields) => fields.iter().any(|field| field.dtype.contains_enums()),
683            _ => false,
684        }
685    }
686
687    pub fn contains_objects(&self) -> bool {
688        use DataType::*;
689        match self {
690            #[cfg(feature = "object")]
691            Object(_) => true,
692            List(inner) => inner.contains_objects(),
693            #[cfg(feature = "dtype-array")]
694            Array(inner, _) => inner.contains_objects(),
695            #[cfg(feature = "dtype-struct")]
696            Struct(fields) => fields.iter().any(|field| field.dtype.contains_objects()),
697            _ => false,
698        }
699    }
700
701    pub fn contains_list_recursive(&self) -> bool {
702        use DataType as D;
703        match self {
704            D::List(_) => true,
705            #[cfg(feature = "dtype-array")]
706            D::Array(inner, _) => inner.contains_list_recursive(),
707            #[cfg(feature = "dtype-struct")]
708            D::Struct(fields) => fields
709                .iter()
710                .any(|field| field.dtype.contains_list_recursive()),
711            _ => false,
712        }
713    }
714
715    pub fn contains_unknown(&self) -> bool {
716        use DataType as D;
717        match self {
718            D::Unknown(_) => true,
719            D::List(inner) => inner.contains_unknown(),
720            #[cfg(feature = "dtype-array")]
721            D::Array(inner, _) => inner.contains_unknown(),
722            #[cfg(feature = "dtype-struct")]
723            D::Struct(fields) => fields.iter().any(|field| field.dtype.contains_unknown()),
724            _ => false,
725        }
726    }
727
728    pub fn contains_dtype_recursive(&self, dtype: &DataType) -> bool {
729        if self == dtype {
730            return true;
731        }
732        use DataType as D;
733        match self {
734            D::List(inner) => inner.contains_dtype_recursive(dtype),
735            #[cfg(feature = "dtype-array")]
736            D::Array(inner, _) => inner.contains_dtype_recursive(dtype),
737            #[cfg(feature = "dtype-struct")]
738            D::Struct(fields) => fields
739                .iter()
740                .any(|field| field.dtype.contains_dtype_recursive(dtype)),
741            _ => false,
742        }
743    }
744
745    /// Check if type is sortable
746    pub fn is_ord(&self) -> bool {
747        let phys = self.to_physical();
748        phys.is_primitive_numeric()
749            || self.is_decimal()
750            || matches!(
751                phys,
752                DataType::Binary | DataType::String | DataType::Boolean
753            )
754    }
755
756    /// Check if this [`DataType`] is a Decimal type (of any scale/precision).
757    pub fn is_decimal(&self) -> bool {
758        match self {
759            #[cfg(feature = "dtype-decimal")]
760            DataType::Decimal(_, _) => true,
761            _ => false,
762        }
763    }
764
765    /// Check if this [`DataType`] is a basic floating point type (excludes Decimal).
766    /// Note, this also includes `Unknown(UnknownKind::Float)`.
767    pub fn is_float(&self) -> bool {
768        matches!(
769            self,
770            DataType::Float16
771                | DataType::Float32
772                | DataType::Float64
773                | DataType::Unknown(UnknownKind::Float)
774        )
775    }
776
777    /// Check if this [`DataType`] is an integer. Note, this also includes `Unknown(UnknownKind::Int)`.
778    pub fn is_integer(&self) -> bool {
779        matches!(
780            self,
781            DataType::Int8
782                | DataType::Int16
783                | DataType::Int32
784                | DataType::Int64
785                | DataType::Int128
786                | DataType::UInt8
787                | DataType::UInt16
788                | DataType::UInt32
789                | DataType::UInt64
790                | DataType::UInt128
791                | DataType::Unknown(UnknownKind::Int(_))
792        )
793    }
794
795    pub fn is_signed_integer(&self) -> bool {
796        // allow because it cannot be replaced when object feature is activated
797        matches!(
798            self,
799            DataType::Int8 | DataType::Int16 | DataType::Int32 | DataType::Int64 | DataType::Int128
800        )
801    }
802
803    pub fn is_unsigned_integer(&self) -> bool {
804        matches!(
805            self,
806            DataType::UInt8
807                | DataType::UInt16
808                | DataType::UInt32
809                | DataType::UInt64
810                | DataType::UInt128,
811        )
812    }
813
814    pub fn is_string(&self) -> bool {
815        matches!(self, DataType::String | DataType::Unknown(UnknownKind::Str))
816    }
817
818    pub fn is_categorical(&self) -> bool {
819        #[cfg(feature = "dtype-categorical")]
820        {
821            matches!(self, DataType::Categorical(_, _))
822        }
823        #[cfg(not(feature = "dtype-categorical"))]
824        {
825            false
826        }
827    }
828
829    pub fn is_enum(&self) -> bool {
830        #[cfg(feature = "dtype-categorical")]
831        {
832            matches!(self, DataType::Enum(_, _))
833        }
834        #[cfg(not(feature = "dtype-categorical"))]
835        {
836            false
837        }
838    }
839
840    pub fn is_extension(&self) -> bool {
841        #[cfg(feature = "dtype-extension")]
842        {
843            matches!(self, DataType::Extension(_, _))
844        }
845        #[cfg(not(feature = "dtype-extension"))]
846        {
847            false
848        }
849    }
850
851    /// Convert to an Arrow Field.
852    pub fn to_arrow_field(&self, name: PlSmallStr, compat_level: CompatLevel) -> ArrowField {
853        let field = ArrowField::new(name, self.to_arrow(compat_level), true);
854
855        if let Some(metadata) = self.to_arrow_field_metadata() {
856            field.with_metadata(metadata)
857        } else {
858            field
859        }
860    }
861
862    pub fn to_arrow_field_metadata(&self) -> Option<Metadata> {
863        match self {
864            #[cfg(feature = "dtype-categorical")]
865            DataType::Enum(fcats, _map) => {
866                let cats = fcats.categories();
867                let strings_size: usize = cats
868                    .values_iter()
869                    .map(|s| (s.len() + 1).ilog10() as usize + 1 + s.len())
870                    .sum();
871                let mut encoded = String::with_capacity(strings_size);
872                for cat in cats.values_iter() {
873                    encoded.push_str(itoa::Buffer::new().format(cat.len()));
874                    encoded.push(';');
875                    encoded.push_str(cat);
876                }
877                Some(BTreeMap::from([(
878                    PlSmallStr::from_static(DTYPE_ENUM_VALUES_NEW),
879                    PlSmallStr::from_string(encoded),
880                )]))
881            },
882            #[cfg(feature = "dtype-categorical")]
883            DataType::Categorical(cats, _) => {
884                let mut encoded = String::new();
885                encoded.push_str(itoa::Buffer::new().format(cats.name().len()));
886                encoded.push(';');
887                encoded.push_str(cats.name());
888                encoded.push_str(itoa::Buffer::new().format(cats.namespace().len()));
889                encoded.push(';');
890                encoded.push_str(cats.namespace());
891                encoded.push_str(cats.physical().as_str());
892                encoded.push(';');
893
894                Some(BTreeMap::from([(
895                    PlSmallStr::from_static(DTYPE_CATEGORICAL_NEW),
896                    PlSmallStr::from_string(encoded),
897                )]))
898            },
899            DataType::BinaryOffset => Some(BTreeMap::from([(
900                PlSmallStr::from_static(PL_KEY),
901                PlSmallStr::from_static(MAINTAIN_PL_TYPE),
902            )])),
903            _ => None,
904        }
905    }
906
907    /// Try to get the maximum value for this datatype.
908    pub fn max(&self) -> PolarsResult<Scalar> {
909        use DataType::*;
910        let v = match self {
911            Int8 => Scalar::from(i8::MAX),
912            Int16 => Scalar::from(i16::MAX),
913            Int32 => Scalar::from(i32::MAX),
914            Int64 => Scalar::from(i64::MAX),
915            Int128 => Scalar::from(i128::MAX),
916            UInt8 => Scalar::from(u8::MAX),
917            UInt16 => Scalar::from(u16::MAX),
918            UInt32 => Scalar::from(u32::MAX),
919            UInt64 => Scalar::from(u64::MAX),
920            UInt128 => Scalar::from(u128::MAX),
921            Float16 => Scalar::from(pf16::INFINITY),
922            Float32 => Scalar::from(f32::INFINITY),
923            Float64 => Scalar::from(f64::INFINITY),
924            #[cfg(feature = "dtype-time")]
925            Time => Scalar::new(Time, AnyValue::Time(NS_IN_DAY - 1)),
926            dt => polars_bail!(ComputeError: "cannot determine upper bound for dtype `{dt}`"),
927        };
928        Ok(v)
929    }
930
931    /// Try to get the minimum value for this datatype.
932    pub fn min(&self) -> PolarsResult<Scalar> {
933        use DataType::*;
934        let v = match self {
935            Int8 => Scalar::from(i8::MIN),
936            Int16 => Scalar::from(i16::MIN),
937            Int32 => Scalar::from(i32::MIN),
938            Int64 => Scalar::from(i64::MIN),
939            Int128 => Scalar::from(i128::MIN),
940            UInt8 => Scalar::from(u8::MIN),
941            UInt16 => Scalar::from(u16::MIN),
942            UInt32 => Scalar::from(u32::MIN),
943            UInt64 => Scalar::from(u64::MIN),
944            UInt128 => Scalar::from(u128::MIN),
945            Float16 => Scalar::from(pf16::NEG_INFINITY),
946            Float32 => Scalar::from(f32::NEG_INFINITY),
947            Float64 => Scalar::from(f64::NEG_INFINITY),
948            #[cfg(feature = "dtype-time")]
949            Time => Scalar::new(Time, AnyValue::Time(0)),
950            dt => polars_bail!(ComputeError: "cannot determine lower bound for dtype `{}`", dt),
951        };
952        Ok(v)
953    }
954
955    /// Convert to an Arrow data type.
956    #[inline]
957    pub fn to_arrow(&self, compat_level: CompatLevel) -> ArrowDataType {
958        self.try_to_arrow(compat_level).unwrap()
959    }
960
961    #[inline]
962    pub fn try_to_arrow(&self, compat_level: CompatLevel) -> PolarsResult<ArrowDataType> {
963        use DataType::*;
964        match self {
965            Boolean => Ok(ArrowDataType::Boolean),
966            UInt8 => Ok(ArrowDataType::UInt8),
967            UInt16 => Ok(ArrowDataType::UInt16),
968            UInt32 => Ok(ArrowDataType::UInt32),
969            UInt64 => Ok(ArrowDataType::UInt64),
970            UInt128 => Ok(ArrowDataType::UInt128),
971            Int8 => Ok(ArrowDataType::Int8),
972            Int16 => Ok(ArrowDataType::Int16),
973            Int32 => Ok(ArrowDataType::Int32),
974            Int64 => Ok(ArrowDataType::Int64),
975            Int128 => Ok(ArrowDataType::Int128),
976            Float16 => Ok(ArrowDataType::Float16),
977            Float32 => Ok(ArrowDataType::Float32),
978            Float64 => Ok(ArrowDataType::Float64),
979            #[cfg(feature = "dtype-decimal")]
980            Decimal(precision, scale) => {
981                assert!(*precision >= 1 && *precision <= 38);
982                Ok(ArrowDataType::Decimal(*precision, *scale))
983            },
984            String => {
985                let dt = if compat_level.0 >= 1 {
986                    ArrowDataType::Utf8View
987                } else {
988                    ArrowDataType::LargeUtf8
989                };
990                Ok(dt)
991            },
992            Binary => {
993                let dt = if compat_level.0 >= 1 {
994                    ArrowDataType::BinaryView
995                } else {
996                    ArrowDataType::LargeBinary
997                };
998                Ok(dt)
999            },
1000            Date => Ok(ArrowDataType::Date32),
1001            Datetime(unit, tz) => Ok(ArrowDataType::Timestamp(
1002                unit.to_arrow(),
1003                tz.as_deref().cloned(),
1004            )),
1005            Duration(unit) => Ok(ArrowDataType::Duration(unit.to_arrow())),
1006            Time => Ok(ArrowDataType::Time64(ArrowTimeUnit::Nanosecond)),
1007            #[cfg(feature = "dtype-array")]
1008            Array(dt, width) => Ok(ArrowDataType::FixedSizeList(
1009                Box::new(dt.to_arrow_field(LIST_VALUES_NAME, compat_level)),
1010                *width,
1011            )),
1012            List(dt) => Ok(ArrowDataType::LargeList(Box::new(
1013                dt.to_arrow_field(LIST_VALUES_NAME, compat_level),
1014            ))),
1015            Null => Ok(ArrowDataType::Null),
1016            #[cfg(feature = "object")]
1017            Object(_) => Ok(get_object_physical_type()),
1018            #[cfg(feature = "dtype-categorical")]
1019            Categorical(_, _) | Enum(_, _) => {
1020                let arrow_phys = match self.cat_physical().unwrap() {
1021                    CategoricalPhysical::U8 => IntegerType::UInt8,
1022                    CategoricalPhysical::U16 => IntegerType::UInt16,
1023                    CategoricalPhysical::U32 => IntegerType::UInt32,
1024                };
1025
1026                let values = if compat_level.0 >= 1 {
1027                    ArrowDataType::Utf8View
1028                } else {
1029                    ArrowDataType::LargeUtf8
1030                };
1031
1032                Ok(ArrowDataType::Dictionary(
1033                    arrow_phys,
1034                    Box::new(values),
1035                    false,
1036                ))
1037            },
1038            #[cfg(feature = "dtype-struct")]
1039            Struct(fields) => {
1040                let fields = fields
1041                    .iter()
1042                    .map(|fld| fld.to_arrow(compat_level))
1043                    .collect();
1044                Ok(ArrowDataType::Struct(fields))
1045            },
1046            BinaryOffset => Ok(ArrowDataType::LargeBinary),
1047            #[cfg(feature = "dtype-extension")]
1048            Extension(typ, inner) => Ok(ArrowDataType::Extension(Box::new(
1049                arrow::datatypes::ExtensionType {
1050                    name: typ.name().into(),
1051                    inner: inner.try_to_arrow(compat_level)?,
1052                    metadata: typ.serialize_metadata().map(|m| m.into()),
1053                },
1054            ))),
1055            Unknown(kind) => {
1056                let dt = match kind {
1057                    UnknownKind::Any => ArrowDataType::Unknown,
1058                    UnknownKind::Float => ArrowDataType::Float64,
1059                    UnknownKind::Str => ArrowDataType::Utf8View,
1060                    UnknownKind::Int(v) => {
1061                        return materialize_dyn_int(*v).dtype().try_to_arrow(compat_level);
1062                    },
1063                };
1064                Ok(dt)
1065            },
1066        }
1067    }
1068
1069    pub fn is_nested_null(&self) -> bool {
1070        use DataType::*;
1071        match self {
1072            Null => true,
1073            List(field) => field.is_nested_null(),
1074            #[cfg(feature = "dtype-array")]
1075            Array(field, _) => field.is_nested_null(),
1076            #[cfg(feature = "dtype-struct")]
1077            Struct(fields) => fields.iter().all(|fld| fld.dtype.is_nested_null()),
1078            _ => false,
1079        }
1080    }
1081
1082    /// Answers if this type matches the given type of a schema.
1083    ///
1084    /// Allows (nested) Null types in this type to match any type in the schema,
1085    /// but not vice versa. In such a case Ok(true) is returned, because a cast
1086    /// is necessary. If no cast is necessary Ok(false) is returned, and an
1087    /// error is returned if the types are incompatible.
1088    pub fn matches_schema_type(&self, schema_type: &DataType) -> PolarsResult<bool> {
1089        match (self, schema_type) {
1090            (DataType::List(l), DataType::List(r)) => l.matches_schema_type(r),
1091            #[cfg(feature = "dtype-array")]
1092            (DataType::Array(l, sl), DataType::Array(r, sr)) => {
1093                Ok(l.matches_schema_type(r)? && sl == sr)
1094            },
1095            #[cfg(feature = "dtype-struct")]
1096            (DataType::Struct(l), DataType::Struct(r)) => {
1097                if l.len() != r.len() {
1098                    polars_bail!(SchemaMismatch: "structs have different number of fields: {} vs {}", l.len(), r.len());
1099                }
1100                let mut must_cast = false;
1101                for (l, r) in l.iter().zip(r.iter()) {
1102                    must_cast |= l.dtype.matches_schema_type(&r.dtype)?;
1103                }
1104                Ok(must_cast)
1105            },
1106            (DataType::Null, DataType::Null) => Ok(false),
1107            #[cfg(feature = "dtype-decimal")]
1108            (DataType::Decimal(p1, s1), DataType::Decimal(p2, s2)) => Ok((p1, s1) != (p2, s2)),
1109            // We don't allow the other way around, only if our current type is
1110            // null and the schema isn't we allow it.
1111            (DataType::Null, _) => Ok(true),
1112            #[cfg(feature = "dtype-categorical")]
1113            (DataType::Categorical(l, _), DataType::Categorical(r, _)) => {
1114                ensure_same_categories(l, r)?;
1115                Ok(false)
1116            },
1117            #[cfg(feature = "dtype-categorical")]
1118            (DataType::Enum(l, _), DataType::Enum(r, _)) => {
1119                ensure_same_frozen_categories(l, r)?;
1120                Ok(false)
1121            },
1122
1123            (l, r) if l == r => Ok(false),
1124            (l, r) => {
1125                polars_bail!(SchemaMismatch: "type {:?} is incompatible with expected type {:?}", l, r)
1126            },
1127        }
1128    }
1129
1130    #[inline]
1131    pub fn is_unknown(&self) -> bool {
1132        matches!(self, DataType::Unknown(_))
1133    }
1134
1135    pub fn nesting_level(&self) -> usize {
1136        let mut level = 0;
1137        let mut slf = self;
1138        while let Some(inner_dtype) = slf.inner_dtype() {
1139            level += 1;
1140            slf = inner_dtype;
1141        }
1142        level
1143    }
1144
1145    /// If this dtype is a Categorical or Enum, returns the physical backing type.
1146    #[cfg(feature = "dtype-categorical")]
1147    pub fn cat_physical(&self) -> PolarsResult<CategoricalPhysical> {
1148        match self {
1149            DataType::Categorical(cats, _) => Ok(cats.physical()),
1150            DataType::Enum(fcats, _) => Ok(fcats.physical()),
1151            _ => {
1152                polars_bail!(SchemaMismatch: "invalid dtype: expected an Enum or Categorical type, received '{:?}'", self)
1153            },
1154        }
1155    }
1156
1157    /// If this dtype is a Categorical or Enum, returns the underlying mapping.
1158    #[cfg(feature = "dtype-categorical")]
1159    pub fn cat_mapping(&self) -> PolarsResult<&Arc<CategoricalMapping>> {
1160        match self {
1161            DataType::Categorical(_, mapping) | DataType::Enum(_, mapping) => Ok(mapping),
1162            _ => {
1163                polars_bail!(SchemaMismatch: "invalid dtype: expected an Enum or Categorical type, received '{:?}'", self)
1164            },
1165        }
1166    }
1167
1168    #[cfg(feature = "dtype-categorical")]
1169    pub fn from_categories(cats: Arc<Categories>) -> Self {
1170        let mapping = cats.mapping();
1171        Self::Categorical(cats, mapping)
1172    }
1173
1174    #[cfg(feature = "dtype-categorical")]
1175    pub fn from_frozen_categories(fcats: Arc<FrozenCategories>) -> Self {
1176        let mapping = fcats.mapping().clone();
1177        Self::Enum(fcats, mapping)
1178    }
1179
1180    pub fn is_numeric(&self) -> bool {
1181        self.is_integer() || self.is_float() || self.is_decimal()
1182    }
1183
1184    pub fn numeric_to_unsigned_bit_repr(&self) -> Option<DataType> {
1185        use DataType::*;
1186
1187        Some(match self {
1188            Int8 | UInt8 => UInt8,
1189            Int16 | UInt16 | Float16 => UInt16,
1190            Int32 | UInt32 | Float32 => UInt32,
1191            Int64 | UInt64 | Float64 => UInt64,
1192            Int128 | UInt128 => UInt128,
1193            _ => return None,
1194        })
1195    }
1196}
1197
1198impl Display for DataType {
1199    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
1200        let s = match self {
1201            DataType::Null => "null",
1202            DataType::Boolean => "bool",
1203            DataType::UInt8 => "u8",
1204            DataType::UInt16 => "u16",
1205            DataType::UInt32 => "u32",
1206            DataType::UInt64 => "u64",
1207            DataType::UInt128 => "u128",
1208            DataType::Int8 => "i8",
1209            DataType::Int16 => "i16",
1210            DataType::Int32 => "i32",
1211            DataType::Int64 => "i64",
1212            DataType::Int128 => "i128",
1213            DataType::Float16 => "f16",
1214            DataType::Float32 => "f32",
1215            DataType::Float64 => "f64",
1216            #[cfg(feature = "dtype-decimal")]
1217            DataType::Decimal(p, s) => return write!(f, "decimal[{p},{s}]"),
1218            DataType::String => "str",
1219            DataType::Binary => "binary",
1220            DataType::BinaryOffset => "binary[offset]",
1221            DataType::Date => "date",
1222            DataType::Datetime(tu, None) => return write!(f, "datetime[{tu}]"),
1223            DataType::Datetime(tu, Some(tz)) => return write!(f, "datetime[{tu}, {tz}]"),
1224            DataType::Duration(tu) => return write!(f, "duration[{tu}]"),
1225            DataType::Time => "time",
1226            #[cfg(feature = "dtype-array")]
1227            DataType::Array(_, _) => {
1228                let tp = self.array_leaf_dtype().unwrap();
1229
1230                let dims = self.get_shape().unwrap();
1231                let shape = if dims.len() == 1 {
1232                    format!("{}", dims[0])
1233                } else {
1234                    format_tuple!(dims)
1235                };
1236                return write!(f, "array[{tp}, {shape}]");
1237            },
1238            DataType::List(tp) => return write!(f, "list[{tp}]"),
1239            #[cfg(feature = "object")]
1240            DataType::Object(s) => s,
1241            #[cfg(feature = "dtype-categorical")]
1242            DataType::Categorical(_, _) => "cat",
1243            #[cfg(feature = "dtype-categorical")]
1244            DataType::Enum(_, _) => "enum",
1245            #[cfg(feature = "dtype-struct")]
1246            DataType::Struct(fields) => return write!(f, "struct[{}]", fields.len()),
1247            #[cfg(feature = "dtype-extension")]
1248            DataType::Extension(typ, _) => return write!(f, "ext[{}]", typ.0.dyn_display()),
1249            DataType::Unknown(kind) => match kind {
1250                UnknownKind::Any => "unknown",
1251                UnknownKind::Int(_) => "dyn int",
1252                UnknownKind::Float => "dyn float",
1253                UnknownKind::Str => "dyn str",
1254            },
1255        };
1256        f.write_str(s)
1257    }
1258}
1259
1260impl std::fmt::Debug for DataType {
1261    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
1262        use DataType::*;
1263        match self {
1264            Boolean => write!(f, "Boolean"),
1265            UInt8 => write!(f, "UInt8"),
1266            UInt16 => write!(f, "UInt16"),
1267            UInt32 => write!(f, "UInt32"),
1268            UInt64 => write!(f, "UInt64"),
1269            UInt128 => write!(f, "UInt128"),
1270            Int8 => write!(f, "Int8"),
1271            Int16 => write!(f, "Int16"),
1272            Int32 => write!(f, "Int32"),
1273            Int64 => write!(f, "Int64"),
1274            Int128 => write!(f, "Int128"),
1275            Float16 => write!(f, "Float16"),
1276            Float32 => write!(f, "Float32"),
1277            Float64 => write!(f, "Float64"),
1278            String => write!(f, "String"),
1279            Binary => write!(f, "Binary"),
1280            BinaryOffset => write!(f, "BinaryOffset"),
1281            Date => write!(f, "Date"),
1282            Time => write!(f, "Time"),
1283            Duration(unit) => write!(f, "Duration('{unit}')"),
1284            Datetime(unit, opt_tz) => {
1285                if let Some(tz) = opt_tz {
1286                    write!(f, "Datetime('{unit}', '{tz}')")
1287                } else {
1288                    write!(f, "Datetime('{unit}')")
1289                }
1290            },
1291            #[cfg(feature = "dtype-decimal")]
1292            Decimal(p, s) => write!(f, "Decimal({p}, {s})"),
1293            #[cfg(feature = "dtype-array")]
1294            Array(inner, size) => write!(f, "Array({inner:?}, {size})"),
1295            List(inner) => write!(f, "List({inner:?})"),
1296            #[cfg(feature = "dtype-struct")]
1297            Struct(fields) => {
1298                let mut first = true;
1299                write!(f, "Struct({{")?;
1300                for field in fields {
1301                    if !first {
1302                        write!(f, ", ")?;
1303                    }
1304                    write!(f, "'{}': {:?}", field.name(), field.dtype())?;
1305                    first = false;
1306                }
1307                write!(f, "}})")
1308            },
1309            #[cfg(feature = "dtype-categorical")]
1310            Categorical(cats, _) => {
1311                if cats.is_global() {
1312                    write!(f, "Categorical")
1313                } else if cats.namespace().is_empty() && cats.physical() == CategoricalPhysical::U32
1314                {
1315                    write!(f, "Categorical('{}')", cats.name())
1316                } else {
1317                    write!(
1318                        f,
1319                        "Categorical('{}', '{}', {:?})",
1320                        cats.name(),
1321                        cats.namespace(),
1322                        cats.physical()
1323                    )
1324                }
1325            },
1326            #[cfg(feature = "dtype-categorical")]
1327            Enum(_, _) => write!(f, "Enum([...])"),
1328            #[cfg(feature = "object")]
1329            Object(_) => write!(f, "Object"),
1330            Null => write!(f, "Null"),
1331            #[cfg(feature = "dtype-extension")]
1332            Extension(typ, inner) => write!(f, "Extension({}, {inner:?})", typ.0.dyn_debug()),
1333            Unknown(kind) => write!(f, "Unknown({kind:?})"),
1334        }
1335    }
1336}
1337
1338pub fn merge_dtypes(left: &DataType, right: &DataType) -> PolarsResult<DataType> {
1339    use DataType::*;
1340    Ok(match (left, right) {
1341        #[cfg(feature = "dtype-categorical")]
1342        (Categorical(cats_l, map), Categorical(cats_r, _)) => {
1343            ensure_same_categories(cats_l, cats_r)?;
1344            Categorical(cats_l.clone(), map.clone())
1345        },
1346        #[cfg(feature = "dtype-categorical")]
1347        (Enum(fcats_l, map), Enum(fcats_r, _)) => {
1348            ensure_same_frozen_categories(fcats_l, fcats_r)?;
1349            Enum(fcats_l.clone(), map.clone())
1350        },
1351        (List(inner_l), List(inner_r)) => {
1352            let merged = merge_dtypes(inner_l, inner_r)?;
1353            List(Box::new(merged))
1354        },
1355        #[cfg(feature = "dtype-struct")]
1356        (Struct(inner_l), Struct(inner_r)) => {
1357            polars_ensure!(inner_l.len() == inner_r.len(), ComputeError: "cannot combine structs with differing amounts of fields ({} != {})", inner_l.len(), inner_r.len());
1358            let fields = inner_l.iter().zip(inner_r.iter()).map(|(l, r)| {
1359                polars_ensure!(l.name() == r.name(), ComputeError: "cannot combine structs with different fields ({} != {})", l.name(), r.name());
1360                let merged = merge_dtypes(l.dtype(), r.dtype())?;
1361                Ok(Field::new(l.name().clone(), merged))
1362            }).collect::<PolarsResult<Vec<_>>>()?;
1363            Struct(fields)
1364        },
1365        #[cfg(feature = "dtype-array")]
1366        (Array(inner_l, width_l), Array(inner_r, width_r)) => {
1367            polars_ensure!(width_l == width_r, ComputeError: "widths of FixedSizeWidth Series are not equal");
1368            let merged = merge_dtypes(inner_l, inner_r)?;
1369            Array(Box::new(merged), *width_l)
1370        },
1371        (left, right) if left == right => left.clone(),
1372        _ => polars_bail!(ComputeError: "unable to merge datatypes"),
1373    })
1374}
1375
1376fn collect_nested_types(
1377    dtype: &DataType,
1378    result: &mut PlHashSet<DataType>,
1379    include_compound_types: bool,
1380) {
1381    match dtype {
1382        DataType::List(inner) => {
1383            if include_compound_types {
1384                result.insert(dtype.clone());
1385            }
1386            collect_nested_types(inner, result, include_compound_types);
1387        },
1388        #[cfg(feature = "dtype-array")]
1389        DataType::Array(inner, _) => {
1390            if include_compound_types {
1391                result.insert(dtype.clone());
1392            }
1393            collect_nested_types(inner, result, include_compound_types);
1394        },
1395        #[cfg(feature = "dtype-struct")]
1396        DataType::Struct(fields) => {
1397            if include_compound_types {
1398                result.insert(dtype.clone());
1399            }
1400            for field in fields {
1401                collect_nested_types(field.dtype(), result, include_compound_types);
1402            }
1403        },
1404        _ => {
1405            result.insert(dtype.clone());
1406        },
1407    }
1408}
1409
1410pub fn unpack_dtypes(dtype: &DataType, include_compound_types: bool) -> PlHashSet<DataType> {
1411    let mut result = PlHashSet::new();
1412    collect_nested_types(dtype, &mut result, include_compound_types);
1413    result
1414}
1415
1416#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
1417#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1418#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
1419pub struct CompatLevel(pub(crate) u16);
1420
1421impl CompatLevel {
1422    pub const fn newest() -> CompatLevel {
1423        CompatLevel(1)
1424    }
1425
1426    pub const fn oldest() -> CompatLevel {
1427        CompatLevel(0)
1428    }
1429
1430    // The following methods are only used internally
1431
1432    #[doc(hidden)]
1433    pub fn with_level(level: u16) -> PolarsResult<CompatLevel> {
1434        if level > CompatLevel::newest().0 {
1435            polars_bail!(InvalidOperation: "invalid compat level");
1436        }
1437        Ok(CompatLevel(level))
1438    }
1439
1440    #[doc(hidden)]
1441    pub fn get_level(&self) -> u16 {
1442        self.0
1443    }
1444
1445    /// Whether this compat level uses Utf8View/BinaryView types.
1446    pub fn uses_binview_types(&self) -> bool {
1447        *self != CompatLevel::oldest()
1448    }
1449}
1450
1451impl DataType {
1452    pub fn visit_with(&self, mut visitor_fn: impl FnMut(&DataType)) {
1453        self.try_visit_with(|dtype| {
1454            visitor_fn(dtype);
1455            Ok(())
1456        })
1457        .unwrap();
1458    }
1459
1460    pub fn try_visit_with(
1461        &self,
1462        mut visitor_fn: impl FnMut(&DataType) -> PolarsResult<()>,
1463    ) -> PolarsResult<()> {
1464        DataType::try_mutate_with(Cow::Borrowed(self), |dtype| {
1465            visitor_fn(dtype.as_ref()).map(|_| dtype)
1466        })
1467        .map(|_| ())
1468    }
1469
1470    pub fn try_mutate_with<'d>(
1471        dtype: Cow<'d, DataType>,
1472        mut visitor_fn: impl FnMut(Cow<'d, DataType>) -> PolarsResult<Cow<'d, DataType>>,
1473    ) -> PolarsResult<Cow<'d, DataType>> {
1474        DtypeVisitor {
1475            visitor_fn: &mut visitor_fn,
1476        }
1477        .visit_rec(dtype)
1478    }
1479}
1480
1481struct DtypeVisitor<'d, 'f> {
1482    visitor_fn: &'f mut dyn FnMut(Cow<'d, DataType>) -> PolarsResult<Cow<'d, DataType>>,
1483}
1484
1485impl<'d, 'f> DtypeVisitor<'d, 'f> {
1486    fn visit_rec(&mut self, dtype: Cow<'d, DataType>) -> PolarsResult<Cow<'d, DataType>> {
1487        let dtype = match dtype.as_ref() {
1488            DataType::List(_) => match dtype {
1489                Cow::Owned(DataType::List(mut inner)) => {
1490                    self.visit_ref_mut(inner.as_mut())?;
1491                    Cow::Owned(DataType::List(inner))
1492                },
1493                Cow::Borrowed(DataType::List(inner)) => {
1494                    let ret = self.visit_rec(Cow::Borrowed(inner.as_ref()))?;
1495
1496                    if std::ptr::eq(ret.as_ref(), inner.as_ref()) {
1497                        dtype
1498                    } else {
1499                        Cow::Owned(DataType::List(Box::new(ret.into_owned())))
1500                    }
1501                },
1502                _ => unreachable!(),
1503            },
1504            #[cfg(feature = "dtype-array")]
1505            DataType::Array(..) => match dtype {
1506                Cow::Owned(DataType::Array(mut inner, width)) => {
1507                    self.visit_ref_mut(inner.as_mut())?;
1508                    Cow::Owned(DataType::Array(inner, width))
1509                },
1510                Cow::Borrowed(DataType::Array(inner, width)) => {
1511                    let ret = self.visit_rec(Cow::Borrowed(inner.as_ref()))?;
1512
1513                    if std::ptr::eq(ret.as_ref(), inner.as_ref()) {
1514                        dtype
1515                    } else {
1516                        Cow::Owned(DataType::Array(Box::new(ret.into_owned()), *width))
1517                    }
1518                },
1519                _ => unreachable!(),
1520            },
1521            #[cfg(feature = "dtype-struct")]
1522            DataType::Struct(_) => match dtype {
1523                Cow::Owned(DataType::Struct(mut fields)) => {
1524                    for f in &mut fields {
1525                        self.visit_ref_mut(&mut f.dtype)?;
1526                    }
1527
1528                    Cow::Owned(DataType::Struct(fields))
1529                },
1530                Cow::Borrowed(DataType::Struct(fields)) => {
1531                    let mut new_fields = vec![];
1532
1533                    for (i, f) in fields.iter().enumerate() {
1534                        let ret = self.visit_rec(Cow::Borrowed(f.dtype()))?;
1535
1536                        if std::ptr::eq(ret.as_ref(), f.dtype()) && new_fields.is_empty() {
1537                            continue;
1538                        }
1539
1540                        if new_fields.is_empty() {
1541                            new_fields.reserve_exact(fields.len());
1542                            new_fields.extend(fields.iter().take(i).cloned());
1543                        }
1544
1545                        new_fields.push(Field::new(f.name().clone(), ret.into_owned()));
1546                    }
1547
1548                    if new_fields.is_empty() {
1549                        dtype
1550                    } else {
1551                        assert_eq!(new_fields.len(), fields.len());
1552                        Cow::Owned(DataType::Struct(new_fields))
1553                    }
1554                },
1555                _ => unreachable!(),
1556            },
1557            #[cfg(feature = "dtype-extension")]
1558            DataType::Extension(..) => match dtype {
1559                Cow::Owned(DataType::Extension(ext, mut storage)) => {
1560                    self.visit_ref_mut(storage.as_mut())?;
1561                    Cow::Owned(DataType::Extension(ext, storage))
1562                },
1563                Cow::Borrowed(DataType::Extension(ext, storage)) => {
1564                    let ret = self.visit_rec(Cow::Borrowed(storage.as_ref()))?;
1565
1566                    if std::ptr::eq(ret.as_ref(), storage.as_ref()) {
1567                        dtype
1568                    } else {
1569                        Cow::Owned(DataType::Extension(ext.clone(), Box::new(ret.into_owned())))
1570                    }
1571                },
1572                _ => unreachable!(),
1573            },
1574            _ => {
1575                debug_assert!(!dtype.is_nested());
1576                dtype
1577            },
1578        };
1579
1580        (self.visitor_fn)(dtype)
1581    }
1582
1583    /// `dtype` will be set to an unspecified value if this returns an error.
1584    fn visit_ref_mut(&mut self, dtype: &mut DataType) -> PolarsResult<()> {
1585        *dtype = self
1586            .visit_rec(Cow::Owned(std::mem::replace(dtype, DataType::Null)))?
1587            .into_owned();
1588
1589        Ok(())
1590    }
1591}
1592
1593#[cfg(test)]
1594mod tests {
1595    use super::*;
1596
1597    #[cfg(feature = "dtype-array")]
1598    #[test]
1599    fn test_unpack_primitive_dtypes() {
1600        let inner_type = DataType::Float64;
1601        let array_type = DataType::Array(Box::new(inner_type), 10);
1602        let list_type = DataType::List(Box::new(array_type));
1603
1604        let result = unpack_dtypes(&list_type, false);
1605
1606        let mut expected = PlHashSet::new();
1607        expected.insert(DataType::Float64);
1608
1609        assert_eq!(result, expected)
1610    }
1611
1612    #[cfg(feature = "dtype-array")]
1613    #[test]
1614    fn test_unpack_compound_dtypes() {
1615        let inner_type = DataType::Float64;
1616        let array_type = DataType::Array(Box::new(inner_type), 10);
1617        let list_type = DataType::List(Box::new(array_type.clone()));
1618
1619        let result = unpack_dtypes(&list_type, true);
1620
1621        let mut expected = PlHashSet::new();
1622        expected.insert(list_type);
1623        expected.insert(array_type);
1624        expected.insert(DataType::Float64);
1625
1626        assert_eq!(result, expected)
1627    }
1628}