polars_core/datatypes/
dtype.rs

1use std::collections::BTreeMap;
2
3use arrow::datatypes::{
4    DTYPE_CATEGORICAL_NEW, DTYPE_ENUM_VALUES_LEGACY, DTYPE_ENUM_VALUES_NEW, MAINTAIN_PL_TYPE,
5    Metadata, PL_KEY,
6};
7#[cfg(feature = "dtype-array")]
8use polars_utils::format_tuple;
9use polars_utils::itertools::Itertools;
10#[cfg(any(feature = "serde-lazy", feature = "serde"))]
11use serde::{Deserialize, Serialize};
12pub use temporal::time_zone::TimeZone;
13
14use super::*;
15#[cfg(feature = "object")]
16use crate::chunked_array::object::registry::get_object_physical_type;
17#[cfg(feature = "dtype-extension")]
18pub use crate::datatypes::extension::ExtensionTypeInstance;
19use crate::utils::materialize_dyn_int;
20
21pub trait MetaDataExt: IntoMetadata {
22    fn pl_enum_metadata(&self) -> Option<&str> {
23        let md = self.into_metadata_ref();
24        let values = md
25            .get(DTYPE_ENUM_VALUES_NEW)
26            .or_else(|| md.get(DTYPE_ENUM_VALUES_LEGACY));
27        Some(values?.as_str())
28    }
29
30    fn pl_categorical_metadata(&self) -> Option<&str> {
31        // We ignore DTYPE_CATEGORICAL_LEGACY here, as we already map all
32        // string-typed arrow dictionaries to the global Categories, and the
33        // legacy metadata format only specifies the now-removed physical
34        // ordering parameter.
35        Some(
36            self.into_metadata_ref()
37                .get(DTYPE_CATEGORICAL_NEW)?
38                .as_str(),
39        )
40    }
41
42    fn maintain_type(&self) -> bool {
43        let metadata = self.into_metadata_ref();
44        metadata.get(PL_KEY).map(|s| s.as_str()) == Some(MAINTAIN_PL_TYPE)
45    }
46}
47
48impl MetaDataExt for Metadata {}
49pub trait IntoMetadata {
50    #[allow(clippy::wrong_self_convention)]
51    fn into_metadata_ref(&self) -> &Metadata;
52}
53
54impl IntoMetadata for Metadata {
55    fn into_metadata_ref(&self) -> &Metadata {
56        self
57    }
58}
59
60#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
61#[cfg_attr(
62    any(feature = "serde", feature = "serde-lazy"),
63    derive(Serialize, Deserialize)
64)]
65#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
66pub enum UnknownKind {
67    // Hold the value to determine the concrete size.
68    Int(i128),
69    Float,
70    // Can be Categorical or String
71    Str,
72    #[default]
73    Any,
74}
75
76impl UnknownKind {
77    pub fn materialize(&self) -> Option<DataType> {
78        let dtype = match self {
79            UnknownKind::Int(v) => materialize_dyn_int(*v).dtype(),
80            UnknownKind::Float => DataType::Float64,
81            UnknownKind::Str => DataType::String,
82            UnknownKind::Any => return None,
83        };
84        Some(dtype)
85    }
86}
87
88#[derive(Clone)]
89pub enum DataType {
90    Boolean,
91    UInt8,
92    UInt16,
93    UInt32,
94    UInt64,
95    UInt128,
96    Int8,
97    Int16,
98    Int32,
99    Int64,
100    Int128,
101    Float16,
102    Float32,
103    Float64,
104    /// Fixed point decimal type optional precision and non-negative scale.
105    /// This is backed by a signed 128-bit integer which allows for up to 38 significant digits.
106    /// Meaning max precision is 38.
107    #[cfg(feature = "dtype-decimal")]
108    Decimal(usize, usize), // (precision, scale), invariant: 1 <= precision <= 38.
109    /// String data
110    String,
111    Binary,
112    BinaryOffset,
113    /// A 32-bit date representing the elapsed time since UNIX epoch (1970-01-01)
114    /// in days (32 bits).
115    Date,
116    /// A 64-bit date representing the elapsed time since UNIX epoch (1970-01-01)
117    /// in the given timeunit (64 bits).
118    Datetime(TimeUnit, Option<TimeZone>),
119    /// 64-bit integer representing difference between times in milliseconds or nanoseconds
120    Duration(TimeUnit),
121    /// A 64-bit time representing the elapsed time since midnight in nanoseconds
122    Time,
123    /// A nested list with a fixed size in each row
124    #[cfg(feature = "dtype-array")]
125    Array(Box<DataType>, usize),
126    /// A nested list with a variable size in each row
127    List(Box<DataType>),
128    /// A generic type that can be used in a `Series`
129    /// &'static str can be used to determine/set inner type
130    #[cfg(feature = "object")]
131    Object(&'static str),
132    Null,
133    #[cfg(feature = "dtype-categorical")]
134    Categorical(Arc<Categories>, Arc<CategoricalMapping>),
135    // It is an Option, so that matching Enum/Categoricals can take the same guards.
136    #[cfg(feature = "dtype-categorical")]
137    Enum(Arc<FrozenCategories>, Arc<CategoricalMapping>),
138    #[cfg(feature = "dtype-struct")]
139    Struct(Vec<Field>),
140    #[cfg(feature = "dtype-extension")]
141    Extension(ExtensionTypeInstance, Box<DataType>),
142    // some logical types we cannot know statically, e.g. Datetime
143    Unknown(UnknownKind),
144}
145
146impl Default for DataType {
147    fn default() -> Self {
148        DataType::Unknown(UnknownKind::Any)
149    }
150}
151
152pub trait AsRefDataType {
153    fn as_ref_dtype(&self) -> &DataType;
154}
155
156impl Hash for DataType {
157    fn hash<H: Hasher>(&self, state: &mut H) {
158        std::mem::discriminant(self).hash(state)
159    }
160}
161
162impl PartialEq for DataType {
163    fn eq(&self, other: &Self) -> bool {
164        use DataType::*;
165        {
166            match (self, other) {
167                #[cfg(feature = "dtype-categorical")]
168                (Categorical(cats_l, _), Categorical(cats_r, _)) => Arc::ptr_eq(cats_l, cats_r),
169                #[cfg(feature = "dtype-categorical")]
170                (Enum(fcats_l, _), Enum(fcats_r, _)) => Arc::ptr_eq(fcats_l, fcats_r),
171                (Datetime(tu_l, tz_l), Datetime(tu_r, tz_r)) => tu_l == tu_r && tz_l == tz_r,
172                (List(left_inner), List(right_inner)) => left_inner == right_inner,
173                #[cfg(feature = "dtype-duration")]
174                (Duration(tu_l), Duration(tu_r)) => tu_l == tu_r,
175                #[cfg(feature = "dtype-decimal")]
176                (Decimal(p1, s1), Decimal(p2, s2)) => (p1, s1) == (p2, s2),
177                #[cfg(feature = "object")]
178                (Object(lhs), Object(rhs)) => lhs == rhs,
179                #[cfg(feature = "dtype-struct")]
180                (Struct(lhs), Struct(rhs)) => {
181                    std::ptr::eq(Vec::as_ptr(lhs), Vec::as_ptr(rhs)) || lhs == rhs
182                },
183                #[cfg(feature = "dtype-array")]
184                (Array(left_inner, left_width), Array(right_inner, right_width)) => {
185                    left_width == right_width && left_inner == right_inner
186                },
187                (Unknown(l), Unknown(r)) => match (l, r) {
188                    (UnknownKind::Int(_), UnknownKind::Int(_)) => true,
189                    _ => l == r,
190                },
191                _ => std::mem::discriminant(self) == std::mem::discriminant(other),
192            }
193        }
194    }
195}
196
197impl Eq for DataType {}
198
199impl DataType {
200    pub const IDX_DTYPE: Self = {
201        #[cfg(not(feature = "bigidx"))]
202        {
203            DataType::UInt32
204        }
205        #[cfg(feature = "bigidx")]
206        {
207            DataType::UInt64
208        }
209    };
210
211    pub fn value_within_range(&self, other: AnyValue) -> bool {
212        use DataType::*;
213        match self {
214            UInt8 => other.extract::<u8>().is_some(),
215            #[cfg(feature = "dtype-u16")]
216            UInt16 => other.extract::<u16>().is_some(),
217            UInt32 => other.extract::<u32>().is_some(),
218            UInt64 => other.extract::<u64>().is_some(),
219            #[cfg(feature = "dtype-u128")]
220            UInt128 => other.extract::<u128>().is_some(),
221            #[cfg(feature = "dtype-i8")]
222            Int8 => other.extract::<i8>().is_some(),
223            #[cfg(feature = "dtype-i16")]
224            Int16 => other.extract::<i16>().is_some(),
225            Int32 => other.extract::<i32>().is_some(),
226            Int64 => other.extract::<i64>().is_some(),
227            #[cfg(feature = "dtype-i128")]
228            Int128 => other.extract::<i128>().is_some(),
229            _ => false,
230        }
231    }
232
233    /// Struct representation of the arrow `month_day_nano_interval` type.
234    #[cfg(feature = "dtype-struct")]
235    pub fn _month_days_ns_struct_type() -> Self {
236        DataType::Struct(vec![
237            Field::new(PlSmallStr::from_static("months"), DataType::Int32),
238            Field::new(PlSmallStr::from_static("days"), DataType::Int32),
239            Field::new(
240                PlSmallStr::from_static("nanoseconds"),
241                DataType::Duration(TimeUnit::Nanoseconds),
242            ),
243        ])
244    }
245
246    /// Check if the whole dtype is known.
247    pub fn is_known(&self) -> bool {
248        match self {
249            DataType::List(inner) => inner.is_known(),
250            #[cfg(feature = "dtype-array")]
251            DataType::Array(inner, _) => inner.is_known(),
252            #[cfg(feature = "dtype-struct")]
253            DataType::Struct(fields) => fields.iter().all(|fld| fld.dtype.is_known()),
254            DataType::Unknown(_) => false,
255            _ => true,
256        }
257    }
258
259    /// Materialize this datatype if it is unknown. All other datatypes
260    /// are left unchanged.
261    pub fn materialize_unknown(self, allow_unknown: bool) -> PolarsResult<DataType> {
262        match self {
263            DataType::Unknown(u) => match u.materialize() {
264                Some(known) => Ok(known),
265                None => {
266                    if allow_unknown {
267                        Ok(DataType::Unknown(u))
268                    } else {
269                        polars_bail!(SchemaMismatch: "failed to materialize unknown type")
270                    }
271                },
272            },
273            DataType::List(inner) => Ok(DataType::List(Box::new(
274                inner.materialize_unknown(allow_unknown)?,
275            ))),
276            #[cfg(feature = "dtype-array")]
277            DataType::Array(inner, size) => Ok(DataType::Array(
278                Box::new(inner.materialize_unknown(allow_unknown)?),
279                size,
280            )),
281            #[cfg(feature = "dtype-struct")]
282            DataType::Struct(fields) => Ok(DataType::Struct(
283                fields
284                    .into_iter()
285                    .map(|f| {
286                        PolarsResult::Ok(Field::new(
287                            f.name,
288                            f.dtype.materialize_unknown(allow_unknown)?,
289                        ))
290                    })
291                    .try_collect_vec()?,
292            )),
293            _ => Ok(self),
294        }
295    }
296
297    #[cfg(feature = "dtype-array")]
298    /// Get the full shape of a multidimensional array.
299    pub fn get_shape(&self) -> Option<Vec<usize>> {
300        fn get_shape_impl(dt: &DataType, shape: &mut Vec<usize>) {
301            if let DataType::Array(inner, size) = dt {
302                shape.push(*size);
303                get_shape_impl(inner, shape);
304            }
305        }
306
307        if let DataType::Array(inner, size) = self {
308            let mut shape = vec![*size];
309            get_shape_impl(inner, &mut shape);
310            Some(shape)
311        } else {
312            None
313        }
314    }
315
316    /// Get the inner data type of a nested type.
317    pub fn inner_dtype(&self) -> Option<&DataType> {
318        match self {
319            DataType::List(inner) => Some(inner),
320            #[cfg(feature = "dtype-array")]
321            DataType::Array(inner, _) => Some(inner),
322            _ => None,
323        }
324    }
325
326    /// Get the inner data type of a nested type.
327    pub fn into_inner_dtype(self) -> Option<DataType> {
328        match self {
329            DataType::List(inner) => Some(*inner),
330            #[cfg(feature = "dtype-array")]
331            DataType::Array(inner, _) => Some(*inner),
332            _ => None,
333        }
334    }
335
336    /// Get the inner data type of a nested type.
337    pub fn try_into_inner_dtype(self) -> PolarsResult<DataType> {
338        match self {
339            DataType::List(inner) => Ok(*inner),
340            #[cfg(feature = "dtype-array")]
341            DataType::Array(inner, _) => Ok(*inner),
342            dt => polars_bail!(InvalidOperation: "cannot get inner datatype of `{dt}`"),
343        }
344    }
345
346    /// Get the absolute inner data type of a nested type.
347    pub fn leaf_dtype(&self) -> &DataType {
348        let mut prev = self;
349        while let Some(dtype) = prev.inner_dtype() {
350            prev = dtype
351        }
352        prev
353    }
354
355    #[cfg(feature = "dtype-array")]
356    /// Get the inner data type of a multidimensional array.
357    pub fn array_leaf_dtype(&self) -> Option<&DataType> {
358        let mut prev = self;
359        match prev {
360            DataType::Array(_, _) => {
361                while let DataType::Array(inner, _) = &prev {
362                    prev = inner;
363                }
364                Some(prev)
365            },
366            _ => None,
367        }
368    }
369
370    /// Cast the leaf types of Lists/Arrays and keep the nesting.
371    pub fn cast_leaf(&self, to: DataType) -> DataType {
372        use DataType::*;
373        match self {
374            List(inner) => List(Box::new(inner.cast_leaf(to))),
375            #[cfg(feature = "dtype-array")]
376            Array(inner, size) => Array(Box::new(inner.cast_leaf(to)), *size),
377            _ => to,
378        }
379    }
380
381    /// Map all leaf types of nested dtypes (list, array, struct) using the
382    /// supplied function.
383    pub fn map_leaves<F: FnMut(DataType) -> DataType>(self, f: &mut F) -> DataType {
384        use DataType::*;
385        match self {
386            List(inner) => List(Box::new(inner.map_leaves(f))),
387            #[cfg(feature = "dtype-array")]
388            Array(inner, size) => Array(Box::new(inner.map_leaves(f)), size),
389            #[cfg(feature = "dtype-struct")]
390            Struct(fields) => {
391                let new_fields = fields
392                    .into_iter()
393                    .map(|fld| Field::new(fld.name, fld.dtype.map_leaves(f)))
394                    .collect();
395                Struct(new_fields)
396            },
397            #[cfg(feature = "dtype-extension")]
398            Extension(ext, storage) => Extension(ext, Box::new(storage.map_leaves(f))),
399            _ => f(self),
400        }
401    }
402
403    /// Return whether the cast to `to` makes sense.
404    ///
405    /// If it `None`, we are not sure.
406    pub fn can_cast_to(&self, to: &DataType) -> Option<bool> {
407        if self == to {
408            return Some(true);
409        }
410        if self.is_primitive_numeric() && to.is_primitive_numeric() {
411            return Some(true);
412        }
413
414        if self.is_null() {
415            return Some(true);
416        }
417
418        use DataType as D;
419        Some(match (self, to) {
420            #[cfg(feature = "dtype-categorical")]
421            (D::Categorical(_, _) | D::Enum(_, _), D::Binary)
422            | (D::Binary, D::Categorical(_, _) | D::Enum(_, _)) => false, // TODO @ cat-rework: why can we not cast to Binary?
423
424            #[cfg(feature = "object")]
425            (D::Object(_), D::Object(_)) => true,
426            #[cfg(feature = "object")]
427            (D::Object(_), _) | (_, D::Object(_)) => false,
428
429            (D::Boolean, dt) | (dt, D::Boolean) => match dt {
430                dt if dt.is_primitive_numeric() => true,
431                #[cfg(feature = "dtype-decimal")]
432                D::Decimal(_, _) => true,
433                D::String | D::Binary => true,
434                _ => false,
435            },
436
437            (D::List(from), D::List(to)) => from.can_cast_to(to)?,
438            #[cfg(feature = "dtype-array")]
439            (D::Array(from, l_width), D::Array(to, r_width)) => {
440                l_width == r_width && from.can_cast_to(to)?
441            },
442            #[cfg(feature = "dtype-struct")]
443            (D::Struct(l_fields), D::Struct(r_fields)) => {
444                if l_fields.is_empty() {
445                    return Some(true);
446                }
447
448                if l_fields.len() != r_fields.len() {
449                    return Some(false);
450                }
451
452                for (l, r) in l_fields.iter().zip(r_fields) {
453                    if !l.dtype().can_cast_to(r.dtype())? {
454                        return Some(false);
455                    }
456                }
457
458                true
459            },
460
461            // @NOTE: we are being conversative
462            _ => return None,
463        })
464    }
465
466    pub fn implode(self) -> DataType {
467        DataType::List(Box::new(self))
468    }
469
470    /// Convert to the physical data type
471    #[must_use]
472    pub fn to_physical(&self) -> DataType {
473        use DataType::*;
474        match self {
475            Date => Int32,
476            Datetime(_, _) => Int64,
477            Duration(_) => Int64,
478            Time => Int64,
479            #[cfg(feature = "dtype-decimal")]
480            Decimal(_, _) => Int128,
481            #[cfg(feature = "dtype-categorical")]
482            Categorical(cats, _) => cats.physical().dtype(),
483            #[cfg(feature = "dtype-categorical")]
484            Enum(fcats, _) => fcats.physical().dtype(),
485            #[cfg(feature = "dtype-array")]
486            Array(dt, width) => Array(Box::new(dt.to_physical()), *width),
487            List(dt) => List(Box::new(dt.to_physical())),
488            #[cfg(feature = "dtype-struct")]
489            Struct(fields) => {
490                let new_fields = fields
491                    .iter()
492                    .map(|s| Field::new(s.name().clone(), s.dtype().to_physical()))
493                    .collect();
494                Struct(new_fields)
495            },
496            #[cfg(feature = "dtype-extension")]
497            Extension(_, storage) => storage.to_physical(),
498            _ => self.clone(),
499        }
500    }
501
502    #[must_use]
503    pub fn to_storage(&self) -> DataType {
504        use DataType::*;
505        match self {
506            #[cfg(feature = "dtype-extension")]
507            Extension(_, storage) => storage.to_storage(),
508            _ => self.clone(),
509        }
510    }
511
512    pub fn is_supported_list_arithmetic_input(&self) -> bool {
513        self.is_primitive_numeric() || self.is_bool() || self.is_null()
514    }
515
516    /// Check if this [`DataType`] is a logical type
517    pub fn is_logical(&self) -> bool {
518        self != &self.to_physical()
519    }
520
521    /// Check if this [`DataType`] is a temporal type
522    pub fn is_temporal(&self) -> bool {
523        use DataType::*;
524        matches!(self, Date | Datetime(_, _) | Duration(_) | Time)
525    }
526
527    /// Check if datatype is a primitive type. By that we mean that
528    /// it is not a nested or logical type.
529    pub fn is_primitive(&self) -> bool {
530        self.is_primitive_numeric()
531            | matches!(
532                self,
533                DataType::Boolean | DataType::String | DataType::Binary
534            )
535    }
536
537    /// Check if this [`DataType`] is a primitive numeric type (excludes Decimal).
538    pub fn is_primitive_numeric(&self) -> bool {
539        self.is_float() || self.is_integer()
540    }
541
542    /// Check if this [`DataType`] is a boolean.
543    pub fn is_bool(&self) -> bool {
544        matches!(self, DataType::Boolean)
545    }
546
547    /// Check if this [`DataType`] is a list.
548    pub fn is_list(&self) -> bool {
549        matches!(self, DataType::List(_))
550    }
551
552    /// Check if this [`DataType`] is an array.
553    pub fn is_array(&self) -> bool {
554        #[cfg(feature = "dtype-array")]
555        {
556            matches!(self, DataType::Array(_, _))
557        }
558        #[cfg(not(feature = "dtype-array"))]
559        {
560            false
561        }
562    }
563
564    pub fn is_nested(&self) -> bool {
565        match self {
566            DataType::List(_) => true,
567            #[cfg(feature = "dtype-array")]
568            DataType::Array(_, _) => true,
569            #[cfg(feature = "dtype-struct")]
570            DataType::Struct(_) => true,
571            #[cfg(feature = "dtype-extension")]
572            DataType::Extension(_, storage) => storage.is_nested(),
573            _ => false,
574        }
575    }
576
577    /// Check if this [`DataType`] is a struct
578    pub fn is_struct(&self) -> bool {
579        #[cfg(feature = "dtype-struct")]
580        {
581            matches!(self, DataType::Struct(_))
582        }
583        #[cfg(not(feature = "dtype-struct"))]
584        {
585            false
586        }
587    }
588
589    pub fn is_binary(&self) -> bool {
590        matches!(self, DataType::Binary)
591    }
592
593    pub fn is_date(&self) -> bool {
594        matches!(self, DataType::Date)
595    }
596    pub fn is_datetime(&self) -> bool {
597        matches!(self, DataType::Datetime(..))
598    }
599
600    pub fn is_duration(&self) -> bool {
601        matches!(self, DataType::Duration(..))
602    }
603
604    pub fn is_object(&self) -> bool {
605        #[cfg(feature = "object")]
606        {
607            matches!(self, DataType::Object(_))
608        }
609        #[cfg(not(feature = "object"))]
610        {
611            false
612        }
613    }
614
615    pub fn is_null(&self) -> bool {
616        matches!(self, DataType::Null)
617    }
618
619    pub fn contains_views(&self) -> bool {
620        use DataType::*;
621        match self {
622            Binary | String => true,
623            List(inner) => inner.contains_views(),
624            #[cfg(feature = "dtype-array")]
625            Array(inner, _) => inner.contains_views(),
626            #[cfg(feature = "dtype-struct")]
627            Struct(fields) => fields.iter().any(|field| field.dtype.contains_views()),
628            _ => false,
629        }
630    }
631
632    pub fn contains_categoricals(&self) -> bool {
633        use DataType::*;
634        match self {
635            #[cfg(feature = "dtype-categorical")]
636            Categorical(_, _) | Enum(_, _) => true,
637            List(inner) => inner.contains_categoricals(),
638            #[cfg(feature = "dtype-array")]
639            Array(inner, _) => inner.contains_categoricals(),
640            #[cfg(feature = "dtype-struct")]
641            Struct(fields) => fields
642                .iter()
643                .any(|field| field.dtype.contains_categoricals()),
644            _ => false,
645        }
646    }
647
648    pub fn contains_objects(&self) -> bool {
649        use DataType::*;
650        match self {
651            #[cfg(feature = "object")]
652            Object(_) => true,
653            List(inner) => inner.contains_objects(),
654            #[cfg(feature = "dtype-array")]
655            Array(inner, _) => inner.contains_objects(),
656            #[cfg(feature = "dtype-struct")]
657            Struct(fields) => fields.iter().any(|field| field.dtype.contains_objects()),
658            _ => false,
659        }
660    }
661
662    pub fn contains_list_recursive(&self) -> bool {
663        use DataType as D;
664        match self {
665            D::List(_) => true,
666            #[cfg(feature = "dtype-array")]
667            D::Array(inner, _) => inner.contains_list_recursive(),
668            #[cfg(feature = "dtype-struct")]
669            D::Struct(fields) => fields
670                .iter()
671                .any(|field| field.dtype.contains_list_recursive()),
672            _ => false,
673        }
674    }
675
676    pub fn contains_unknown(&self) -> bool {
677        use DataType as D;
678        match self {
679            D::Unknown(_) => true,
680            D::List(inner) => inner.contains_unknown(),
681            #[cfg(feature = "dtype-array")]
682            D::Array(inner, _) => inner.contains_unknown(),
683            #[cfg(feature = "dtype-struct")]
684            D::Struct(fields) => fields.iter().any(|field| field.dtype.contains_unknown()),
685            _ => false,
686        }
687    }
688
689    /// Check if type is sortable
690    pub fn is_ord(&self) -> bool {
691        let phys = self.to_physical();
692        phys.is_primitive_numeric()
693            || self.is_decimal()
694            || matches!(
695                phys,
696                DataType::Binary | DataType::String | DataType::Boolean
697            )
698    }
699
700    /// Check if this [`DataType`] is a Decimal type (of any scale/precision).
701    pub fn is_decimal(&self) -> bool {
702        match self {
703            #[cfg(feature = "dtype-decimal")]
704            DataType::Decimal(_, _) => true,
705            _ => false,
706        }
707    }
708
709    /// Check if this [`DataType`] is a basic floating point type (excludes Decimal).
710    /// Note, this also includes `Unknown(UnknownKind::Float)`.
711    pub fn is_float(&self) -> bool {
712        matches!(
713            self,
714            DataType::Float16
715                | DataType::Float32
716                | DataType::Float64
717                | DataType::Unknown(UnknownKind::Float)
718        )
719    }
720
721    /// Check if this [`DataType`] is an integer. Note, this also includes `Unknown(UnknownKind::Int)`.
722    pub fn is_integer(&self) -> bool {
723        matches!(
724            self,
725            DataType::Int8
726                | DataType::Int16
727                | DataType::Int32
728                | DataType::Int64
729                | DataType::Int128
730                | DataType::UInt8
731                | DataType::UInt16
732                | DataType::UInt32
733                | DataType::UInt64
734                | DataType::UInt128
735                | DataType::Unknown(UnknownKind::Int(_))
736        )
737    }
738
739    pub fn is_signed_integer(&self) -> bool {
740        // allow because it cannot be replaced when object feature is activated
741        matches!(
742            self,
743            DataType::Int8 | DataType::Int16 | DataType::Int32 | DataType::Int64 | DataType::Int128
744        )
745    }
746
747    pub fn is_unsigned_integer(&self) -> bool {
748        matches!(
749            self,
750            DataType::UInt8
751                | DataType::UInt16
752                | DataType::UInt32
753                | DataType::UInt64
754                | DataType::UInt128,
755        )
756    }
757
758    pub fn is_string(&self) -> bool {
759        matches!(self, DataType::String | DataType::Unknown(UnknownKind::Str))
760    }
761
762    pub fn is_categorical(&self) -> bool {
763        #[cfg(feature = "dtype-categorical")]
764        {
765            matches!(self, DataType::Categorical(_, _))
766        }
767        #[cfg(not(feature = "dtype-categorical"))]
768        {
769            false
770        }
771    }
772
773    pub fn is_enum(&self) -> bool {
774        #[cfg(feature = "dtype-categorical")]
775        {
776            matches!(self, DataType::Enum(_, _))
777        }
778        #[cfg(not(feature = "dtype-categorical"))]
779        {
780            false
781        }
782    }
783
784    pub fn is_extension(&self) -> bool {
785        #[cfg(feature = "dtype-extension")]
786        {
787            matches!(self, DataType::Extension(_, _))
788        }
789        #[cfg(not(feature = "dtype-extension"))]
790        {
791            false
792        }
793    }
794
795    /// Convert to an Arrow Field.
796    pub fn to_arrow_field(&self, name: PlSmallStr, compat_level: CompatLevel) -> ArrowField {
797        let metadata = match self {
798            #[cfg(feature = "dtype-categorical")]
799            DataType::Enum(fcats, _map) => {
800                let cats = fcats.categories();
801                let strings_size: usize = cats
802                    .values_iter()
803                    .map(|s| (s.len() + 1).ilog10() as usize + 1 + s.len())
804                    .sum();
805                let mut encoded = String::with_capacity(strings_size);
806                for cat in cats.values_iter() {
807                    encoded.push_str(itoa::Buffer::new().format(cat.len()));
808                    encoded.push(';');
809                    encoded.push_str(cat);
810                }
811                Some(BTreeMap::from([(
812                    PlSmallStr::from_static(DTYPE_ENUM_VALUES_NEW),
813                    PlSmallStr::from_string(encoded),
814                )]))
815            },
816            #[cfg(feature = "dtype-categorical")]
817            DataType::Categorical(cats, _) => {
818                let mut encoded = String::new();
819                encoded.push_str(itoa::Buffer::new().format(cats.name().len()));
820                encoded.push(';');
821                encoded.push_str(cats.name());
822                encoded.push_str(itoa::Buffer::new().format(cats.namespace().len()));
823                encoded.push(';');
824                encoded.push_str(cats.namespace());
825                encoded.push_str(cats.physical().as_str());
826                encoded.push(';');
827
828                Some(BTreeMap::from([(
829                    PlSmallStr::from_static(DTYPE_CATEGORICAL_NEW),
830                    PlSmallStr::from_string(encoded),
831                )]))
832            },
833            DataType::BinaryOffset => Some(BTreeMap::from([(
834                PlSmallStr::from_static(PL_KEY),
835                PlSmallStr::from_static(MAINTAIN_PL_TYPE),
836            )])),
837            _ => None,
838        };
839
840        let field = ArrowField::new(name, self.to_arrow(compat_level), true);
841
842        if let Some(metadata) = metadata {
843            field.with_metadata(metadata)
844        } else {
845            field
846        }
847    }
848
849    /// Try to get the maximum value for this datatype.
850    pub fn max(&self) -> PolarsResult<Scalar> {
851        use DataType::*;
852        let v = match self {
853            Int8 => Scalar::from(i8::MAX),
854            Int16 => Scalar::from(i16::MAX),
855            Int32 => Scalar::from(i32::MAX),
856            Int64 => Scalar::from(i64::MAX),
857            Int128 => Scalar::from(i128::MAX),
858            UInt8 => Scalar::from(u8::MAX),
859            UInt16 => Scalar::from(u16::MAX),
860            UInt32 => Scalar::from(u32::MAX),
861            UInt64 => Scalar::from(u64::MAX),
862            UInt128 => Scalar::from(u128::MAX),
863            Float16 => Scalar::from(pf16::INFINITY),
864            Float32 => Scalar::from(f32::INFINITY),
865            Float64 => Scalar::from(f64::INFINITY),
866            #[cfg(feature = "dtype-time")]
867            Time => Scalar::new(Time, AnyValue::Time(NS_IN_DAY - 1)),
868            dt => polars_bail!(ComputeError: "cannot determine upper bound for dtype `{}`", dt),
869        };
870        Ok(v)
871    }
872
873    /// Try to get the minimum value for this datatype.
874    pub fn min(&self) -> PolarsResult<Scalar> {
875        use DataType::*;
876        let v = match self {
877            Int8 => Scalar::from(i8::MIN),
878            Int16 => Scalar::from(i16::MIN),
879            Int32 => Scalar::from(i32::MIN),
880            Int64 => Scalar::from(i64::MIN),
881            Int128 => Scalar::from(i128::MIN),
882            UInt8 => Scalar::from(u8::MIN),
883            UInt16 => Scalar::from(u16::MIN),
884            UInt32 => Scalar::from(u32::MIN),
885            UInt64 => Scalar::from(u64::MIN),
886            UInt128 => Scalar::from(u128::MIN),
887            Float16 => Scalar::from(pf16::NEG_INFINITY),
888            Float32 => Scalar::from(f32::NEG_INFINITY),
889            Float64 => Scalar::from(f64::NEG_INFINITY),
890            #[cfg(feature = "dtype-time")]
891            Time => Scalar::new(Time, AnyValue::Time(0)),
892            dt => polars_bail!(ComputeError: "cannot determine lower bound for dtype `{}`", dt),
893        };
894        Ok(v)
895    }
896
897    /// Convert to an Arrow data type.
898    #[inline]
899    pub fn to_arrow(&self, compat_level: CompatLevel) -> ArrowDataType {
900        self.try_to_arrow(compat_level).unwrap()
901    }
902
903    #[inline]
904    pub fn try_to_arrow(&self, compat_level: CompatLevel) -> PolarsResult<ArrowDataType> {
905        use DataType::*;
906        match self {
907            Boolean => Ok(ArrowDataType::Boolean),
908            UInt8 => Ok(ArrowDataType::UInt8),
909            UInt16 => Ok(ArrowDataType::UInt16),
910            UInt32 => Ok(ArrowDataType::UInt32),
911            UInt64 => Ok(ArrowDataType::UInt64),
912            UInt128 => Ok(ArrowDataType::UInt128),
913            Int8 => Ok(ArrowDataType::Int8),
914            Int16 => Ok(ArrowDataType::Int16),
915            Int32 => Ok(ArrowDataType::Int32),
916            Int64 => Ok(ArrowDataType::Int64),
917            Int128 => Ok(ArrowDataType::Int128),
918            Float16 => Ok(ArrowDataType::Float16),
919            Float32 => Ok(ArrowDataType::Float32),
920            Float64 => Ok(ArrowDataType::Float64),
921            #[cfg(feature = "dtype-decimal")]
922            Decimal(precision, scale) => {
923                assert!(*precision >= 1 && *precision <= 38);
924                Ok(ArrowDataType::Decimal(*precision, *scale))
925            },
926            String => {
927                let dt = if compat_level.0 >= 1 {
928                    ArrowDataType::Utf8View
929                } else {
930                    ArrowDataType::LargeUtf8
931                };
932                Ok(dt)
933            },
934            Binary => {
935                let dt = if compat_level.0 >= 1 {
936                    ArrowDataType::BinaryView
937                } else {
938                    ArrowDataType::LargeBinary
939                };
940                Ok(dt)
941            },
942            Date => Ok(ArrowDataType::Date32),
943            Datetime(unit, tz) => Ok(ArrowDataType::Timestamp(
944                unit.to_arrow(),
945                tz.as_deref().cloned(),
946            )),
947            Duration(unit) => Ok(ArrowDataType::Duration(unit.to_arrow())),
948            Time => Ok(ArrowDataType::Time64(ArrowTimeUnit::Nanosecond)),
949            #[cfg(feature = "dtype-array")]
950            Array(dt, width) => Ok(ArrowDataType::FixedSizeList(
951                Box::new(dt.to_arrow_field(LIST_VALUES_NAME, compat_level)),
952                *width,
953            )),
954            List(dt) => Ok(ArrowDataType::LargeList(Box::new(
955                dt.to_arrow_field(LIST_VALUES_NAME, compat_level),
956            ))),
957            Null => Ok(ArrowDataType::Null),
958            #[cfg(feature = "object")]
959            Object(_) => Ok(get_object_physical_type()),
960            #[cfg(feature = "dtype-categorical")]
961            Categorical(_, _) | Enum(_, _) => {
962                let arrow_phys = match self.cat_physical().unwrap() {
963                    CategoricalPhysical::U8 => IntegerType::UInt8,
964                    CategoricalPhysical::U16 => IntegerType::UInt16,
965                    CategoricalPhysical::U32 => IntegerType::UInt32,
966                };
967
968                let values = if compat_level.0 >= 1 {
969                    ArrowDataType::Utf8View
970                } else {
971                    ArrowDataType::LargeUtf8
972                };
973
974                Ok(ArrowDataType::Dictionary(
975                    arrow_phys,
976                    Box::new(values),
977                    false,
978                ))
979            },
980            #[cfg(feature = "dtype-struct")]
981            Struct(fields) => {
982                let fields = fields
983                    .iter()
984                    .map(|fld| fld.to_arrow(compat_level))
985                    .collect();
986                Ok(ArrowDataType::Struct(fields))
987            },
988            BinaryOffset => Ok(ArrowDataType::LargeBinary),
989            #[cfg(feature = "dtype-extension")]
990            Extension(typ, inner) => Ok(ArrowDataType::Extension(Box::new(
991                arrow::datatypes::ExtensionType {
992                    name: typ.name().into(),
993                    inner: inner.try_to_arrow(compat_level)?,
994                    metadata: typ.serialize_metadata().map(|m| m.into()),
995                },
996            ))),
997            Unknown(kind) => {
998                let dt = match kind {
999                    UnknownKind::Any => ArrowDataType::Unknown,
1000                    UnknownKind::Float => ArrowDataType::Float64,
1001                    UnknownKind::Str => ArrowDataType::Utf8View,
1002                    UnknownKind::Int(v) => {
1003                        return materialize_dyn_int(*v).dtype().try_to_arrow(compat_level);
1004                    },
1005                };
1006                Ok(dt)
1007            },
1008        }
1009    }
1010
1011    pub fn is_nested_null(&self) -> bool {
1012        use DataType::*;
1013        match self {
1014            Null => true,
1015            List(field) => field.is_nested_null(),
1016            #[cfg(feature = "dtype-array")]
1017            Array(field, _) => field.is_nested_null(),
1018            #[cfg(feature = "dtype-struct")]
1019            Struct(fields) => fields.iter().all(|fld| fld.dtype.is_nested_null()),
1020            _ => false,
1021        }
1022    }
1023
1024    /// Answers if this type matches the given type of a schema.
1025    ///
1026    /// Allows (nested) Null types in this type to match any type in the schema,
1027    /// but not vice versa. In such a case Ok(true) is returned, because a cast
1028    /// is necessary. If no cast is necessary Ok(false) is returned, and an
1029    /// error is returned if the types are incompatible.
1030    pub fn matches_schema_type(&self, schema_type: &DataType) -> PolarsResult<bool> {
1031        match (self, schema_type) {
1032            (DataType::List(l), DataType::List(r)) => l.matches_schema_type(r),
1033            #[cfg(feature = "dtype-array")]
1034            (DataType::Array(l, sl), DataType::Array(r, sr)) => {
1035                Ok(l.matches_schema_type(r)? && sl == sr)
1036            },
1037            #[cfg(feature = "dtype-struct")]
1038            (DataType::Struct(l), DataType::Struct(r)) => {
1039                if l.len() != r.len() {
1040                    polars_bail!(SchemaMismatch: "structs have different number of fields: {} vs {}", l.len(), r.len());
1041                }
1042                let mut must_cast = false;
1043                for (l, r) in l.iter().zip(r.iter()) {
1044                    must_cast |= l.dtype.matches_schema_type(&r.dtype)?;
1045                }
1046                Ok(must_cast)
1047            },
1048            (DataType::Null, DataType::Null) => Ok(false),
1049            #[cfg(feature = "dtype-decimal")]
1050            (DataType::Decimal(p1, s1), DataType::Decimal(p2, s2)) => Ok((p1, s1) != (p2, s2)),
1051            // We don't allow the other way around, only if our current type is
1052            // null and the schema isn't we allow it.
1053            (DataType::Null, _) => Ok(true),
1054            #[cfg(feature = "dtype-categorical")]
1055            (DataType::Categorical(l, _), DataType::Categorical(r, _)) => {
1056                ensure_same_categories(l, r)?;
1057                Ok(false)
1058            },
1059            #[cfg(feature = "dtype-categorical")]
1060            (DataType::Enum(l, _), DataType::Enum(r, _)) => {
1061                ensure_same_frozen_categories(l, r)?;
1062                Ok(false)
1063            },
1064
1065            (l, r) if l == r => Ok(false),
1066            (l, r) => {
1067                polars_bail!(SchemaMismatch: "type {:?} is incompatible with expected type {:?}", l, r)
1068            },
1069        }
1070    }
1071
1072    #[inline]
1073    pub fn is_unknown(&self) -> bool {
1074        matches!(self, DataType::Unknown(_))
1075    }
1076
1077    pub fn nesting_level(&self) -> usize {
1078        let mut level = 0;
1079        let mut slf = self;
1080        while let Some(inner_dtype) = slf.inner_dtype() {
1081            level += 1;
1082            slf = inner_dtype;
1083        }
1084        level
1085    }
1086
1087    /// If this dtype is a Categorical or Enum, returns the physical backing type.
1088    #[cfg(feature = "dtype-categorical")]
1089    pub fn cat_physical(&self) -> PolarsResult<CategoricalPhysical> {
1090        match self {
1091            DataType::Categorical(cats, _) => Ok(cats.physical()),
1092            DataType::Enum(fcats, _) => Ok(fcats.physical()),
1093            _ => {
1094                polars_bail!(SchemaMismatch: "invalid dtype: expected an Enum or Categorical type, received '{:?}'", self)
1095            },
1096        }
1097    }
1098
1099    /// If this dtype is a Categorical or Enum, returns the underlying mapping.
1100    #[cfg(feature = "dtype-categorical")]
1101    pub fn cat_mapping(&self) -> PolarsResult<&Arc<CategoricalMapping>> {
1102        match self {
1103            DataType::Categorical(_, mapping) | DataType::Enum(_, mapping) => Ok(mapping),
1104            _ => {
1105                polars_bail!(SchemaMismatch: "invalid dtype: expected an Enum or Categorical type, received '{:?}'", self)
1106            },
1107        }
1108    }
1109
1110    #[cfg(feature = "dtype-categorical")]
1111    pub fn from_categories(cats: Arc<Categories>) -> Self {
1112        let mapping = cats.mapping();
1113        Self::Categorical(cats, mapping)
1114    }
1115
1116    #[cfg(feature = "dtype-categorical")]
1117    pub fn from_frozen_categories(fcats: Arc<FrozenCategories>) -> Self {
1118        let mapping = fcats.mapping().clone();
1119        Self::Enum(fcats, mapping)
1120    }
1121
1122    pub fn is_numeric(&self) -> bool {
1123        self.is_integer() || self.is_float() || self.is_decimal()
1124    }
1125}
1126
1127impl Display for DataType {
1128    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
1129        let s = match self {
1130            DataType::Null => "null",
1131            DataType::Boolean => "bool",
1132            DataType::UInt8 => "u8",
1133            DataType::UInt16 => "u16",
1134            DataType::UInt32 => "u32",
1135            DataType::UInt64 => "u64",
1136            DataType::UInt128 => "u128",
1137            DataType::Int8 => "i8",
1138            DataType::Int16 => "i16",
1139            DataType::Int32 => "i32",
1140            DataType::Int64 => "i64",
1141            DataType::Int128 => "i128",
1142            DataType::Float16 => "f16",
1143            DataType::Float32 => "f32",
1144            DataType::Float64 => "f64",
1145            #[cfg(feature = "dtype-decimal")]
1146            DataType::Decimal(p, s) => return write!(f, "decimal[{p},{s}]"),
1147            DataType::String => "str",
1148            DataType::Binary => "binary",
1149            DataType::BinaryOffset => "binary[offset]",
1150            DataType::Date => "date",
1151            DataType::Datetime(tu, None) => return write!(f, "datetime[{tu}]"),
1152            DataType::Datetime(tu, Some(tz)) => return write!(f, "datetime[{tu}, {tz}]"),
1153            DataType::Duration(tu) => return write!(f, "duration[{tu}]"),
1154            DataType::Time => "time",
1155            #[cfg(feature = "dtype-array")]
1156            DataType::Array(_, _) => {
1157                let tp = self.array_leaf_dtype().unwrap();
1158
1159                let dims = self.get_shape().unwrap();
1160                let shape = if dims.len() == 1 {
1161                    format!("{}", dims[0])
1162                } else {
1163                    format_tuple!(dims)
1164                };
1165                return write!(f, "array[{tp}, {shape}]");
1166            },
1167            DataType::List(tp) => return write!(f, "list[{tp}]"),
1168            #[cfg(feature = "object")]
1169            DataType::Object(s) => s,
1170            #[cfg(feature = "dtype-categorical")]
1171            DataType::Categorical(_, _) => "cat",
1172            #[cfg(feature = "dtype-categorical")]
1173            DataType::Enum(_, _) => "enum",
1174            #[cfg(feature = "dtype-struct")]
1175            DataType::Struct(fields) => return write!(f, "struct[{}]", fields.len()),
1176            #[cfg(feature = "dtype-extension")]
1177            DataType::Extension(typ, _) => return write!(f, "ext[{}]", typ.0.dyn_display()),
1178            DataType::Unknown(kind) => match kind {
1179                UnknownKind::Any => "unknown",
1180                UnknownKind::Int(_) => "dyn int",
1181                UnknownKind::Float => "dyn float",
1182                UnknownKind::Str => "dyn str",
1183            },
1184        };
1185        f.write_str(s)
1186    }
1187}
1188
1189impl std::fmt::Debug for DataType {
1190    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
1191        use DataType::*;
1192        match self {
1193            Boolean => write!(f, "Boolean"),
1194            UInt8 => write!(f, "UInt8"),
1195            UInt16 => write!(f, "UInt16"),
1196            UInt32 => write!(f, "UInt32"),
1197            UInt64 => write!(f, "UInt64"),
1198            UInt128 => write!(f, "UInt128"),
1199            Int8 => write!(f, "Int8"),
1200            Int16 => write!(f, "Int16"),
1201            Int32 => write!(f, "Int32"),
1202            Int64 => write!(f, "Int64"),
1203            Int128 => write!(f, "Int128"),
1204            Float16 => write!(f, "Float16"),
1205            Float32 => write!(f, "Float32"),
1206            Float64 => write!(f, "Float64"),
1207            String => write!(f, "String"),
1208            Binary => write!(f, "Binary"),
1209            BinaryOffset => write!(f, "BinaryOffset"),
1210            Date => write!(f, "Date"),
1211            Time => write!(f, "Time"),
1212            Duration(unit) => write!(f, "Duration('{unit}')"),
1213            Datetime(unit, opt_tz) => {
1214                if let Some(tz) = opt_tz {
1215                    write!(f, "Datetime('{unit}', '{tz}')")
1216                } else {
1217                    write!(f, "Datetime('{unit}')")
1218                }
1219            },
1220            #[cfg(feature = "dtype-decimal")]
1221            Decimal(p, s) => write!(f, "Decimal({p}, {s})"),
1222            #[cfg(feature = "dtype-array")]
1223            Array(inner, size) => write!(f, "Array({inner:?}, {size})"),
1224            List(inner) => write!(f, "List({inner:?})"),
1225            #[cfg(feature = "dtype-struct")]
1226            Struct(fields) => {
1227                let mut first = true;
1228                write!(f, "Struct({{")?;
1229                for field in fields {
1230                    if !first {
1231                        write!(f, ", ")?;
1232                    }
1233                    write!(f, "'{}': {:?}", field.name(), field.dtype())?;
1234                    first = false;
1235                }
1236                write!(f, "}})")
1237            },
1238            #[cfg(feature = "dtype-categorical")]
1239            Categorical(cats, _) => {
1240                if cats.is_global() {
1241                    write!(f, "Categorical")
1242                } else if cats.namespace().is_empty() && cats.physical() == CategoricalPhysical::U32
1243                {
1244                    write!(f, "Categorical('{}')", cats.name())
1245                } else {
1246                    write!(
1247                        f,
1248                        "Categorical('{}', '{}', {:?})",
1249                        cats.name(),
1250                        cats.namespace(),
1251                        cats.physical()
1252                    )
1253                }
1254            },
1255            #[cfg(feature = "dtype-categorical")]
1256            Enum(_, _) => write!(f, "Enum([...])"),
1257            #[cfg(feature = "object")]
1258            Object(_) => write!(f, "Object"),
1259            Null => write!(f, "Null"),
1260            #[cfg(feature = "dtype-extension")]
1261            Extension(typ, inner) => write!(f, "Extension({}, {inner:?})", typ.0.dyn_debug()),
1262            Unknown(kind) => write!(f, "Unknown({kind:?})"),
1263        }
1264    }
1265}
1266
1267pub fn merge_dtypes(left: &DataType, right: &DataType) -> PolarsResult<DataType> {
1268    use DataType::*;
1269    Ok(match (left, right) {
1270        #[cfg(feature = "dtype-categorical")]
1271        (Categorical(cats_l, map), Categorical(cats_r, _)) => {
1272            ensure_same_categories(cats_l, cats_r)?;
1273            Categorical(cats_l.clone(), map.clone())
1274        },
1275        #[cfg(feature = "dtype-categorical")]
1276        (Enum(fcats_l, map), Enum(fcats_r, _)) => {
1277            ensure_same_frozen_categories(fcats_l, fcats_r)?;
1278            Enum(fcats_l.clone(), map.clone())
1279        },
1280        (List(inner_l), List(inner_r)) => {
1281            let merged = merge_dtypes(inner_l, inner_r)?;
1282            List(Box::new(merged))
1283        },
1284        #[cfg(feature = "dtype-struct")]
1285        (Struct(inner_l), Struct(inner_r)) => {
1286            polars_ensure!(inner_l.len() == inner_r.len(), ComputeError: "cannot combine structs with differing amounts of fields ({} != {})", inner_l.len(), inner_r.len());
1287            let fields = inner_l.iter().zip(inner_r.iter()).map(|(l, r)| {
1288                polars_ensure!(l.name() == r.name(), ComputeError: "cannot combine structs with different fields ({} != {})", l.name(), r.name());
1289                let merged = merge_dtypes(l.dtype(), r.dtype())?;
1290                Ok(Field::new(l.name().clone(), merged))
1291            }).collect::<PolarsResult<Vec<_>>>()?;
1292            Struct(fields)
1293        },
1294        #[cfg(feature = "dtype-array")]
1295        (Array(inner_l, width_l), Array(inner_r, width_r)) => {
1296            polars_ensure!(width_l == width_r, ComputeError: "widths of FixedSizeWidth Series are not equal");
1297            let merged = merge_dtypes(inner_l, inner_r)?;
1298            Array(Box::new(merged), *width_l)
1299        },
1300        (left, right) if left == right => left.clone(),
1301        _ => polars_bail!(ComputeError: "unable to merge datatypes"),
1302    })
1303}
1304
1305fn collect_nested_types(
1306    dtype: &DataType,
1307    result: &mut PlHashSet<DataType>,
1308    include_compound_types: bool,
1309) {
1310    match dtype {
1311        DataType::List(inner) => {
1312            if include_compound_types {
1313                result.insert(dtype.clone());
1314            }
1315            collect_nested_types(inner, result, include_compound_types);
1316        },
1317        #[cfg(feature = "dtype-array")]
1318        DataType::Array(inner, _) => {
1319            if include_compound_types {
1320                result.insert(dtype.clone());
1321            }
1322            collect_nested_types(inner, result, include_compound_types);
1323        },
1324        #[cfg(feature = "dtype-struct")]
1325        DataType::Struct(fields) => {
1326            if include_compound_types {
1327                result.insert(dtype.clone());
1328            }
1329            for field in fields {
1330                collect_nested_types(field.dtype(), result, include_compound_types);
1331            }
1332        },
1333        _ => {
1334            result.insert(dtype.clone());
1335        },
1336    }
1337}
1338
1339pub fn unpack_dtypes(dtype: &DataType, include_compound_types: bool) -> PlHashSet<DataType> {
1340    let mut result = PlHashSet::new();
1341    collect_nested_types(dtype, &mut result, include_compound_types);
1342    result
1343}
1344
1345#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
1346#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1347#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
1348pub struct CompatLevel(pub(crate) u16);
1349
1350impl CompatLevel {
1351    pub const fn newest() -> CompatLevel {
1352        CompatLevel(1)
1353    }
1354
1355    pub const fn oldest() -> CompatLevel {
1356        CompatLevel(0)
1357    }
1358
1359    // The following methods are only used internally
1360
1361    #[doc(hidden)]
1362    pub fn with_level(level: u16) -> PolarsResult<CompatLevel> {
1363        if level > CompatLevel::newest().0 {
1364            polars_bail!(InvalidOperation: "invalid compat level");
1365        }
1366        Ok(CompatLevel(level))
1367    }
1368
1369    #[doc(hidden)]
1370    pub fn get_level(&self) -> u16 {
1371        self.0
1372    }
1373}
1374
1375#[cfg(test)]
1376mod tests {
1377    use super::*;
1378
1379    #[cfg(feature = "dtype-array")]
1380    #[test]
1381    fn test_unpack_primitive_dtypes() {
1382        let inner_type = DataType::Float64;
1383        let array_type = DataType::Array(Box::new(inner_type), 10);
1384        let list_type = DataType::List(Box::new(array_type));
1385
1386        let result = unpack_dtypes(&list_type, false);
1387
1388        let mut expected = PlHashSet::new();
1389        expected.insert(DataType::Float64);
1390
1391        assert_eq!(result, expected)
1392    }
1393
1394    #[cfg(feature = "dtype-array")]
1395    #[test]
1396    fn test_unpack_compound_dtypes() {
1397        let inner_type = DataType::Float64;
1398        let array_type = DataType::Array(Box::new(inner_type), 10);
1399        let list_type = DataType::List(Box::new(array_type.clone()));
1400
1401        let result = unpack_dtypes(&list_type, true);
1402
1403        let mut expected = PlHashSet::new();
1404        expected.insert(list_type);
1405        expected.insert(array_type);
1406        expected.insert(DataType::Float64);
1407
1408        assert_eq!(result, expected)
1409    }
1410}