polars_core/datatypes/
dtype.rs

1use std::collections::BTreeMap;
2
3use arrow::datatypes::{
4    DTYPE_CATEGORICAL_NEW, DTYPE_ENUM_VALUES_LEGACY, DTYPE_ENUM_VALUES_NEW, MAINTAIN_PL_TYPE,
5    Metadata, PL_KEY,
6};
7#[cfg(feature = "dtype-array")]
8use polars_utils::format_tuple;
9use polars_utils::itertools::Itertools;
10#[cfg(any(feature = "serde-lazy", feature = "serde"))]
11use serde::{Deserialize, Serialize};
12pub use temporal::time_zone::TimeZone;
13
14use super::*;
15#[cfg(feature = "object")]
16use crate::chunked_array::object::registry::get_object_physical_type;
17#[cfg(feature = "dtype-extension")]
18pub use crate::datatypes::extension::ExtensionTypeInstance;
19use crate::utils::materialize_dyn_int;
20
21pub trait MetaDataExt: IntoMetadata {
22    fn pl_enum_metadata(&self) -> Option<&str> {
23        let md = self.into_metadata_ref();
24        let values = md
25            .get(DTYPE_ENUM_VALUES_NEW)
26            .or_else(|| md.get(DTYPE_ENUM_VALUES_LEGACY));
27        Some(values?.as_str())
28    }
29
30    fn pl_categorical_metadata(&self) -> Option<&str> {
31        // We ignore DTYPE_CATEGORICAL_LEGACY here, as we already map all
32        // string-typed arrow dictionaries to the global Categories, and the
33        // legacy metadata format only specifies the now-removed physical
34        // ordering parameter.
35        Some(
36            self.into_metadata_ref()
37                .get(DTYPE_CATEGORICAL_NEW)?
38                .as_str(),
39        )
40    }
41
42    fn maintain_type(&self) -> bool {
43        let metadata = self.into_metadata_ref();
44        metadata.get(PL_KEY).map(|s| s.as_str()) == Some(MAINTAIN_PL_TYPE)
45    }
46}
47
48impl MetaDataExt for Metadata {}
49pub trait IntoMetadata {
50    #[allow(clippy::wrong_self_convention)]
51    fn into_metadata_ref(&self) -> &Metadata;
52}
53
54impl IntoMetadata for Metadata {
55    fn into_metadata_ref(&self) -> &Metadata {
56        self
57    }
58}
59
60#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
61#[cfg_attr(
62    any(feature = "serde", feature = "serde-lazy"),
63    derive(Serialize, Deserialize)
64)]
65#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
66pub enum UnknownKind {
67    // Hold the value to determine the concrete size.
68    Int(i128),
69    Float,
70    // Can be Categorical or String
71    Str,
72    #[default]
73    Any,
74}
75
76impl UnknownKind {
77    pub fn materialize(&self) -> Option<DataType> {
78        let dtype = match self {
79            UnknownKind::Int(v) => materialize_dyn_int(*v).dtype(),
80            UnknownKind::Float => DataType::Float64,
81            UnknownKind::Str => DataType::String,
82            UnknownKind::Any => return None,
83        };
84        Some(dtype)
85    }
86}
87
88#[derive(Clone)]
89pub enum DataType {
90    Boolean,
91    UInt8,
92    UInt16,
93    UInt32,
94    UInt64,
95    UInt128,
96    Int8,
97    Int16,
98    Int32,
99    Int64,
100    Int128,
101    Float16,
102    Float32,
103    Float64,
104    /// Fixed point decimal type optional precision and non-negative scale.
105    /// This is backed by a signed 128-bit integer which allows for up to 38 significant digits.
106    /// Meaning max precision is 38.
107    #[cfg(feature = "dtype-decimal")]
108    Decimal(usize, usize), // (precision, scale), invariant: 1 <= precision <= 38.
109    /// String data
110    String,
111    Binary,
112    BinaryOffset,
113    /// A 32-bit date representing the elapsed time since UNIX epoch (1970-01-01)
114    /// in days (32 bits).
115    Date,
116    /// A 64-bit date representing the elapsed time since UNIX epoch (1970-01-01)
117    /// in the given timeunit (64 bits).
118    Datetime(TimeUnit, Option<TimeZone>),
119    /// 64-bit integer representing difference between times in milliseconds or nanoseconds
120    Duration(TimeUnit),
121    /// A 64-bit time representing the elapsed time since midnight in nanoseconds
122    Time,
123    /// A nested list with a fixed size in each row
124    #[cfg(feature = "dtype-array")]
125    Array(Box<DataType>, usize),
126    /// A nested list with a variable size in each row
127    List(Box<DataType>),
128    /// A generic type that can be used in a `Series`
129    /// &'static str can be used to determine/set inner type
130    #[cfg(feature = "object")]
131    Object(&'static str),
132    Null,
133    #[cfg(feature = "dtype-categorical")]
134    Categorical(Arc<Categories>, Arc<CategoricalMapping>),
135    // It is an Option, so that matching Enum/Categoricals can take the same guards.
136    #[cfg(feature = "dtype-categorical")]
137    Enum(Arc<FrozenCategories>, Arc<CategoricalMapping>),
138    #[cfg(feature = "dtype-struct")]
139    Struct(Vec<Field>),
140    #[cfg(feature = "dtype-extension")]
141    Extension(ExtensionTypeInstance, Box<DataType>),
142    // some logical types we cannot know statically, e.g. Datetime
143    Unknown(UnknownKind),
144}
145
146impl Default for DataType {
147    fn default() -> Self {
148        DataType::Unknown(UnknownKind::Any)
149    }
150}
151
152pub trait AsRefDataType {
153    fn as_ref_dtype(&self) -> &DataType;
154}
155
156impl Hash for DataType {
157    fn hash<H: Hasher>(&self, state: &mut H) {
158        std::mem::discriminant(self).hash(state)
159    }
160}
161
162impl PartialEq for DataType {
163    fn eq(&self, other: &Self) -> bool {
164        use DataType::*;
165        {
166            match (self, other) {
167                #[cfg(feature = "dtype-categorical")]
168                (Categorical(cats_l, _), Categorical(cats_r, _)) => Arc::ptr_eq(cats_l, cats_r),
169                #[cfg(feature = "dtype-categorical")]
170                (Enum(fcats_l, _), Enum(fcats_r, _)) => Arc::ptr_eq(fcats_l, fcats_r),
171                (Datetime(tu_l, tz_l), Datetime(tu_r, tz_r)) => tu_l == tu_r && tz_l == tz_r,
172                (List(left_inner), List(right_inner)) => left_inner == right_inner,
173                #[cfg(feature = "dtype-duration")]
174                (Duration(tu_l), Duration(tu_r)) => tu_l == tu_r,
175                #[cfg(feature = "dtype-decimal")]
176                (Decimal(p1, s1), Decimal(p2, s2)) => (p1, s1) == (p2, s2),
177                #[cfg(feature = "object")]
178                (Object(lhs), Object(rhs)) => lhs == rhs,
179                #[cfg(feature = "dtype-struct")]
180                (Struct(lhs), Struct(rhs)) => {
181                    std::ptr::eq(Vec::as_ptr(lhs), Vec::as_ptr(rhs)) || lhs == rhs
182                },
183                #[cfg(feature = "dtype-array")]
184                (Array(left_inner, left_width), Array(right_inner, right_width)) => {
185                    left_width == right_width && left_inner == right_inner
186                },
187                (Unknown(l), Unknown(r)) => match (l, r) {
188                    (UnknownKind::Int(_), UnknownKind::Int(_)) => true,
189                    _ => l == r,
190                },
191                _ => std::mem::discriminant(self) == std::mem::discriminant(other),
192            }
193        }
194    }
195}
196
197impl Eq for DataType {}
198
199impl DataType {
200    pub const IDX_DTYPE: Self = {
201        #[cfg(not(feature = "bigidx"))]
202        {
203            DataType::UInt32
204        }
205        #[cfg(feature = "bigidx")]
206        {
207            DataType::UInt64
208        }
209    };
210
211    pub fn value_within_range(&self, other: AnyValue) -> bool {
212        use DataType::*;
213        match self {
214            UInt8 => other.extract::<u8>().is_some(),
215            #[cfg(feature = "dtype-u16")]
216            UInt16 => other.extract::<u16>().is_some(),
217            UInt32 => other.extract::<u32>().is_some(),
218            UInt64 => other.extract::<u64>().is_some(),
219            #[cfg(feature = "dtype-u128")]
220            UInt128 => other.extract::<u128>().is_some(),
221            #[cfg(feature = "dtype-i8")]
222            Int8 => other.extract::<i8>().is_some(),
223            #[cfg(feature = "dtype-i16")]
224            Int16 => other.extract::<i16>().is_some(),
225            Int32 => other.extract::<i32>().is_some(),
226            Int64 => other.extract::<i64>().is_some(),
227            #[cfg(feature = "dtype-i128")]
228            Int128 => other.extract::<i128>().is_some(),
229            _ => false,
230        }
231    }
232
233    /// Struct representation of the arrow `month_day_nano_interval` type.
234    #[cfg(feature = "dtype-struct")]
235    pub fn _month_days_ns_struct_type() -> Self {
236        DataType::Struct(vec![
237            Field::new(PlSmallStr::from_static("months"), DataType::Int32),
238            Field::new(PlSmallStr::from_static("days"), DataType::Int32),
239            Field::new(
240                PlSmallStr::from_static("nanoseconds"),
241                DataType::Duration(TimeUnit::Nanoseconds),
242            ),
243        ])
244    }
245
246    /// Check if the whole dtype is known.
247    pub fn is_known(&self) -> bool {
248        match self {
249            DataType::List(inner) => inner.is_known(),
250            #[cfg(feature = "dtype-array")]
251            DataType::Array(inner, _) => inner.is_known(),
252            #[cfg(feature = "dtype-struct")]
253            DataType::Struct(fields) => fields.iter().all(|fld| fld.dtype.is_known()),
254            DataType::Unknown(_) => false,
255            _ => true,
256        }
257    }
258
259    /// Materialize this datatype if it is unknown. All other datatypes
260    /// are left unchanged.
261    pub fn materialize_unknown(self, allow_unknown: bool) -> PolarsResult<DataType> {
262        match self {
263            DataType::Unknown(u) => match u.materialize() {
264                Some(known) => Ok(known),
265                None => {
266                    if allow_unknown {
267                        Ok(DataType::Unknown(u))
268                    } else {
269                        polars_bail!(SchemaMismatch: "failed to materialize unknown type")
270                    }
271                },
272            },
273            DataType::List(inner) => Ok(DataType::List(Box::new(
274                inner.materialize_unknown(allow_unknown)?,
275            ))),
276            #[cfg(feature = "dtype-array")]
277            DataType::Array(inner, size) => Ok(DataType::Array(
278                Box::new(inner.materialize_unknown(allow_unknown)?),
279                size,
280            )),
281            #[cfg(feature = "dtype-struct")]
282            DataType::Struct(fields) => Ok(DataType::Struct(
283                fields
284                    .into_iter()
285                    .map(|f| {
286                        PolarsResult::Ok(Field::new(
287                            f.name,
288                            f.dtype.materialize_unknown(allow_unknown)?,
289                        ))
290                    })
291                    .try_collect_vec()?,
292            )),
293            _ => Ok(self),
294        }
295    }
296
297    #[cfg(feature = "dtype-array")]
298    /// Get the full shape of a multidimensional array.
299    pub fn get_shape(&self) -> Option<Vec<usize>> {
300        fn get_shape_impl(dt: &DataType, shape: &mut Vec<usize>) {
301            if let DataType::Array(inner, size) = dt {
302                shape.push(*size);
303                get_shape_impl(inner, shape);
304            }
305        }
306
307        if let DataType::Array(inner, size) = self {
308            let mut shape = vec![*size];
309            get_shape_impl(inner, &mut shape);
310            Some(shape)
311        } else {
312            None
313        }
314    }
315
316    /// Get the inner data type of a nested type.
317    pub fn inner_dtype(&self) -> Option<&DataType> {
318        match self {
319            DataType::List(inner) => Some(inner),
320            #[cfg(feature = "dtype-array")]
321            DataType::Array(inner, _) => Some(inner),
322            _ => None,
323        }
324    }
325
326    /// Get the inner data type of a nested type.
327    pub fn into_inner_dtype(self) -> Option<DataType> {
328        match self {
329            DataType::List(inner) => Some(*inner),
330            #[cfg(feature = "dtype-array")]
331            DataType::Array(inner, _) => Some(*inner),
332            _ => None,
333        }
334    }
335
336    /// Get the inner data type of a nested type.
337    pub fn try_into_inner_dtype(self) -> PolarsResult<DataType> {
338        match self {
339            DataType::List(inner) => Ok(*inner),
340            #[cfg(feature = "dtype-array")]
341            DataType::Array(inner, _) => Ok(*inner),
342            dt => polars_bail!(InvalidOperation: "cannot get inner datatype of `{dt}`"),
343        }
344    }
345
346    /// Get the absolute inner data type of a nested type.
347    pub fn leaf_dtype(&self) -> &DataType {
348        let mut prev = self;
349        while let Some(dtype) = prev.inner_dtype() {
350            prev = dtype
351        }
352        prev
353    }
354
355    #[cfg(feature = "dtype-array")]
356    /// Get the inner data type of a multidimensional array.
357    pub fn array_leaf_dtype(&self) -> Option<&DataType> {
358        let mut prev = self;
359        match prev {
360            DataType::Array(_, _) => {
361                while let DataType::Array(inner, _) = &prev {
362                    prev = inner;
363                }
364                Some(prev)
365            },
366            _ => None,
367        }
368    }
369
370    /// Cast the leaf types of Lists/Arrays and keep the nesting.
371    pub fn cast_leaf(&self, to: DataType) -> DataType {
372        use DataType::*;
373        match self {
374            List(inner) => List(Box::new(inner.cast_leaf(to))),
375            #[cfg(feature = "dtype-array")]
376            Array(inner, size) => Array(Box::new(inner.cast_leaf(to)), *size),
377            _ => to,
378        }
379    }
380
381    /// Return whether the cast to `to` makes sense.
382    ///
383    /// If it `None`, we are not sure.
384    pub fn can_cast_to(&self, to: &DataType) -> Option<bool> {
385        if self == to {
386            return Some(true);
387        }
388        if self.is_primitive_numeric() && to.is_primitive_numeric() {
389            return Some(true);
390        }
391
392        if self.is_null() {
393            return Some(true);
394        }
395
396        use DataType as D;
397        Some(match (self, to) {
398            #[cfg(feature = "dtype-categorical")]
399            (D::Categorical(_, _) | D::Enum(_, _), D::Binary)
400            | (D::Binary, D::Categorical(_, _) | D::Enum(_, _)) => false, // TODO @ cat-rework: why can we not cast to Binary?
401
402            #[cfg(feature = "object")]
403            (D::Object(_), D::Object(_)) => true,
404            #[cfg(feature = "object")]
405            (D::Object(_), _) | (_, D::Object(_)) => false,
406
407            (D::Boolean, dt) | (dt, D::Boolean) => match dt {
408                dt if dt.is_primitive_numeric() => true,
409                #[cfg(feature = "dtype-decimal")]
410                D::Decimal(_, _) => true,
411                D::String | D::Binary => true,
412                _ => false,
413            },
414
415            (D::List(from), D::List(to)) => from.can_cast_to(to)?,
416            #[cfg(feature = "dtype-array")]
417            (D::Array(from, l_width), D::Array(to, r_width)) => {
418                l_width == r_width && from.can_cast_to(to)?
419            },
420            #[cfg(feature = "dtype-struct")]
421            (D::Struct(l_fields), D::Struct(r_fields)) => {
422                if l_fields.is_empty() {
423                    return Some(true);
424                }
425
426                if l_fields.len() != r_fields.len() {
427                    return Some(false);
428                }
429
430                for (l, r) in l_fields.iter().zip(r_fields) {
431                    if !l.dtype().can_cast_to(r.dtype())? {
432                        return Some(false);
433                    }
434                }
435
436                true
437            },
438
439            // @NOTE: we are being conversative
440            _ => return None,
441        })
442    }
443
444    pub fn implode(self) -> DataType {
445        DataType::List(Box::new(self))
446    }
447
448    /// Convert to the physical data type
449    #[must_use]
450    pub fn to_physical(&self) -> DataType {
451        use DataType::*;
452        match self {
453            Date => Int32,
454            Datetime(_, _) => Int64,
455            Duration(_) => Int64,
456            Time => Int64,
457            #[cfg(feature = "dtype-decimal")]
458            Decimal(_, _) => Int128,
459            #[cfg(feature = "dtype-categorical")]
460            Categorical(cats, _) => cats.physical().dtype(),
461            #[cfg(feature = "dtype-categorical")]
462            Enum(fcats, _) => fcats.physical().dtype(),
463            #[cfg(feature = "dtype-array")]
464            Array(dt, width) => Array(Box::new(dt.to_physical()), *width),
465            List(dt) => List(Box::new(dt.to_physical())),
466            #[cfg(feature = "dtype-struct")]
467            Struct(fields) => {
468                let new_fields = fields
469                    .iter()
470                    .map(|s| Field::new(s.name().clone(), s.dtype().to_physical()))
471                    .collect();
472                Struct(new_fields)
473            },
474            #[cfg(feature = "dtype-extension")]
475            Extension(_, storage) => storage.to_physical(),
476            _ => self.clone(),
477        }
478    }
479
480    #[must_use]
481    pub fn to_storage(&self) -> DataType {
482        use DataType::*;
483        match self {
484            #[cfg(feature = "dtype-extension")]
485            Extension(_, storage) => storage.to_storage(),
486            _ => self.clone(),
487        }
488    }
489
490    pub fn is_supported_list_arithmetic_input(&self) -> bool {
491        self.is_primitive_numeric() || self.is_bool() || self.is_null()
492    }
493
494    /// Check if this [`DataType`] is a logical type
495    pub fn is_logical(&self) -> bool {
496        self != &self.to_physical()
497    }
498
499    /// Check if this [`DataType`] is a temporal type
500    pub fn is_temporal(&self) -> bool {
501        use DataType::*;
502        matches!(self, Date | Datetime(_, _) | Duration(_) | Time)
503    }
504
505    /// Check if datatype is a primitive type. By that we mean that
506    /// it is not a nested or logical type.
507    pub fn is_primitive(&self) -> bool {
508        self.is_primitive_numeric()
509            | matches!(
510                self,
511                DataType::Boolean | DataType::String | DataType::Binary
512            )
513    }
514
515    /// Check if this [`DataType`] is a primitive numeric type (excludes Decimal).
516    pub fn is_primitive_numeric(&self) -> bool {
517        self.is_float() || self.is_integer()
518    }
519
520    /// Check if this [`DataType`] is a boolean.
521    pub fn is_bool(&self) -> bool {
522        matches!(self, DataType::Boolean)
523    }
524
525    /// Check if this [`DataType`] is a list.
526    pub fn is_list(&self) -> bool {
527        matches!(self, DataType::List(_))
528    }
529
530    /// Check if this [`DataType`] is an array.
531    pub fn is_array(&self) -> bool {
532        #[cfg(feature = "dtype-array")]
533        {
534            matches!(self, DataType::Array(_, _))
535        }
536        #[cfg(not(feature = "dtype-array"))]
537        {
538            false
539        }
540    }
541
542    pub fn is_nested(&self) -> bool {
543        match self {
544            DataType::List(_) => true,
545            #[cfg(feature = "dtype-array")]
546            DataType::Array(_, _) => true,
547            #[cfg(feature = "dtype-struct")]
548            DataType::Struct(_) => true,
549            #[cfg(feature = "dtype-extension")]
550            DataType::Extension(_, storage) => storage.is_nested(),
551            _ => false,
552        }
553    }
554
555    /// Check if this [`DataType`] is a struct
556    pub fn is_struct(&self) -> bool {
557        #[cfg(feature = "dtype-struct")]
558        {
559            matches!(self, DataType::Struct(_))
560        }
561        #[cfg(not(feature = "dtype-struct"))]
562        {
563            false
564        }
565    }
566
567    pub fn is_binary(&self) -> bool {
568        matches!(self, DataType::Binary)
569    }
570
571    pub fn is_date(&self) -> bool {
572        matches!(self, DataType::Date)
573    }
574    pub fn is_datetime(&self) -> bool {
575        matches!(self, DataType::Datetime(..))
576    }
577
578    pub fn is_duration(&self) -> bool {
579        matches!(self, DataType::Duration(..))
580    }
581
582    pub fn is_object(&self) -> bool {
583        #[cfg(feature = "object")]
584        {
585            matches!(self, DataType::Object(_))
586        }
587        #[cfg(not(feature = "object"))]
588        {
589            false
590        }
591    }
592
593    pub fn is_null(&self) -> bool {
594        matches!(self, DataType::Null)
595    }
596
597    pub fn contains_views(&self) -> bool {
598        use DataType::*;
599        match self {
600            Binary | String => true,
601            List(inner) => inner.contains_views(),
602            #[cfg(feature = "dtype-array")]
603            Array(inner, _) => inner.contains_views(),
604            #[cfg(feature = "dtype-struct")]
605            Struct(fields) => fields.iter().any(|field| field.dtype.contains_views()),
606            _ => false,
607        }
608    }
609
610    pub fn contains_categoricals(&self) -> bool {
611        use DataType::*;
612        match self {
613            #[cfg(feature = "dtype-categorical")]
614            Categorical(_, _) | Enum(_, _) => true,
615            List(inner) => inner.contains_categoricals(),
616            #[cfg(feature = "dtype-array")]
617            Array(inner, _) => inner.contains_categoricals(),
618            #[cfg(feature = "dtype-struct")]
619            Struct(fields) => fields
620                .iter()
621                .any(|field| field.dtype.contains_categoricals()),
622            _ => false,
623        }
624    }
625
626    pub fn contains_objects(&self) -> bool {
627        use DataType::*;
628        match self {
629            #[cfg(feature = "object")]
630            Object(_) => true,
631            List(inner) => inner.contains_objects(),
632            #[cfg(feature = "dtype-array")]
633            Array(inner, _) => inner.contains_objects(),
634            #[cfg(feature = "dtype-struct")]
635            Struct(fields) => fields.iter().any(|field| field.dtype.contains_objects()),
636            _ => false,
637        }
638    }
639
640    pub fn contains_list_recursive(&self) -> bool {
641        use DataType as D;
642        match self {
643            D::List(_) => true,
644            #[cfg(feature = "dtype-array")]
645            D::Array(inner, _) => inner.contains_list_recursive(),
646            #[cfg(feature = "dtype-struct")]
647            D::Struct(fields) => fields
648                .iter()
649                .any(|field| field.dtype.contains_list_recursive()),
650            _ => false,
651        }
652    }
653
654    pub fn contains_unknown(&self) -> bool {
655        use DataType as D;
656        match self {
657            D::Unknown(_) => true,
658            D::List(inner) => inner.contains_unknown(),
659            #[cfg(feature = "dtype-array")]
660            D::Array(inner, _) => inner.contains_unknown(),
661            #[cfg(feature = "dtype-struct")]
662            D::Struct(fields) => fields.iter().any(|field| field.dtype.contains_unknown()),
663            _ => false,
664        }
665    }
666
667    /// Check if type is sortable
668    pub fn is_ord(&self) -> bool {
669        let phys = self.to_physical();
670        phys.is_primitive_numeric()
671            || self.is_decimal()
672            || matches!(
673                phys,
674                DataType::Binary | DataType::String | DataType::Boolean
675            )
676    }
677
678    /// Check if this [`DataType`] is a Decimal type (of any scale/precision).
679    pub fn is_decimal(&self) -> bool {
680        match self {
681            #[cfg(feature = "dtype-decimal")]
682            DataType::Decimal(_, _) => true,
683            _ => false,
684        }
685    }
686
687    /// Check if this [`DataType`] is a basic floating point type (excludes Decimal).
688    /// Note, this also includes `Unknown(UnknownKind::Float)`.
689    pub fn is_float(&self) -> bool {
690        matches!(
691            self,
692            DataType::Float16
693                | DataType::Float32
694                | DataType::Float64
695                | DataType::Unknown(UnknownKind::Float)
696        )
697    }
698
699    /// Check if this [`DataType`] is an integer. Note, this also includes `Unknown(UnknownKind::Int)`.
700    pub fn is_integer(&self) -> bool {
701        matches!(
702            self,
703            DataType::Int8
704                | DataType::Int16
705                | DataType::Int32
706                | DataType::Int64
707                | DataType::Int128
708                | DataType::UInt8
709                | DataType::UInt16
710                | DataType::UInt32
711                | DataType::UInt64
712                | DataType::UInt128
713                | DataType::Unknown(UnknownKind::Int(_))
714        )
715    }
716
717    pub fn is_signed_integer(&self) -> bool {
718        // allow because it cannot be replaced when object feature is activated
719        matches!(
720            self,
721            DataType::Int8 | DataType::Int16 | DataType::Int32 | DataType::Int64 | DataType::Int128
722        )
723    }
724
725    pub fn is_unsigned_integer(&self) -> bool {
726        matches!(
727            self,
728            DataType::UInt8
729                | DataType::UInt16
730                | DataType::UInt32
731                | DataType::UInt64
732                | DataType::UInt128,
733        )
734    }
735
736    pub fn is_string(&self) -> bool {
737        matches!(self, DataType::String | DataType::Unknown(UnknownKind::Str))
738    }
739
740    pub fn is_categorical(&self) -> bool {
741        #[cfg(feature = "dtype-categorical")]
742        {
743            matches!(self, DataType::Categorical(_, _))
744        }
745        #[cfg(not(feature = "dtype-categorical"))]
746        {
747            false
748        }
749    }
750
751    pub fn is_enum(&self) -> bool {
752        #[cfg(feature = "dtype-categorical")]
753        {
754            matches!(self, DataType::Enum(_, _))
755        }
756        #[cfg(not(feature = "dtype-categorical"))]
757        {
758            false
759        }
760    }
761
762    pub fn is_extension(&self) -> bool {
763        #[cfg(feature = "dtype-extension")]
764        {
765            matches!(self, DataType::Extension(_, _))
766        }
767        #[cfg(not(feature = "dtype-extension"))]
768        {
769            false
770        }
771    }
772
773    /// Convert to an Arrow Field.
774    pub fn to_arrow_field(&self, name: PlSmallStr, compat_level: CompatLevel) -> ArrowField {
775        let metadata = match self {
776            #[cfg(feature = "dtype-categorical")]
777            DataType::Enum(fcats, _map) => {
778                let cats = fcats.categories();
779                let strings_size: usize = cats
780                    .values_iter()
781                    .map(|s| (s.len() + 1).ilog10() as usize + 1 + s.len())
782                    .sum();
783                let mut encoded = String::with_capacity(strings_size);
784                for cat in cats.values_iter() {
785                    encoded.push_str(itoa::Buffer::new().format(cat.len()));
786                    encoded.push(';');
787                    encoded.push_str(cat);
788                }
789                Some(BTreeMap::from([(
790                    PlSmallStr::from_static(DTYPE_ENUM_VALUES_NEW),
791                    PlSmallStr::from_string(encoded),
792                )]))
793            },
794            #[cfg(feature = "dtype-categorical")]
795            DataType::Categorical(cats, _) => {
796                let mut encoded = String::new();
797                encoded.push_str(itoa::Buffer::new().format(cats.name().len()));
798                encoded.push(';');
799                encoded.push_str(cats.name());
800                encoded.push_str(itoa::Buffer::new().format(cats.namespace().len()));
801                encoded.push(';');
802                encoded.push_str(cats.namespace());
803                encoded.push_str(cats.physical().as_str());
804                encoded.push(';');
805
806                Some(BTreeMap::from([(
807                    PlSmallStr::from_static(DTYPE_CATEGORICAL_NEW),
808                    PlSmallStr::from_string(encoded),
809                )]))
810            },
811            DataType::BinaryOffset => Some(BTreeMap::from([(
812                PlSmallStr::from_static(PL_KEY),
813                PlSmallStr::from_static(MAINTAIN_PL_TYPE),
814            )])),
815            _ => None,
816        };
817
818        let field = ArrowField::new(name, self.to_arrow(compat_level), true);
819
820        if let Some(metadata) = metadata {
821            field.with_metadata(metadata)
822        } else {
823            field
824        }
825    }
826
827    /// Try to get the maximum value for this datatype.
828    pub fn max(&self) -> PolarsResult<Scalar> {
829        use DataType::*;
830        let v = match self {
831            Int8 => Scalar::from(i8::MAX),
832            Int16 => Scalar::from(i16::MAX),
833            Int32 => Scalar::from(i32::MAX),
834            Int64 => Scalar::from(i64::MAX),
835            Int128 => Scalar::from(i128::MAX),
836            UInt8 => Scalar::from(u8::MAX),
837            UInt16 => Scalar::from(u16::MAX),
838            UInt32 => Scalar::from(u32::MAX),
839            UInt64 => Scalar::from(u64::MAX),
840            UInt128 => Scalar::from(u128::MAX),
841            Float16 => Scalar::from(pf16::INFINITY),
842            Float32 => Scalar::from(f32::INFINITY),
843            Float64 => Scalar::from(f64::INFINITY),
844            #[cfg(feature = "dtype-time")]
845            Time => Scalar::new(Time, AnyValue::Time(NS_IN_DAY - 1)),
846            dt => polars_bail!(ComputeError: "cannot determine upper bound for dtype `{}`", dt),
847        };
848        Ok(v)
849    }
850
851    /// Try to get the minimum value for this datatype.
852    pub fn min(&self) -> PolarsResult<Scalar> {
853        use DataType::*;
854        let v = match self {
855            Int8 => Scalar::from(i8::MIN),
856            Int16 => Scalar::from(i16::MIN),
857            Int32 => Scalar::from(i32::MIN),
858            Int64 => Scalar::from(i64::MIN),
859            Int128 => Scalar::from(i128::MIN),
860            UInt8 => Scalar::from(u8::MIN),
861            UInt16 => Scalar::from(u16::MIN),
862            UInt32 => Scalar::from(u32::MIN),
863            UInt64 => Scalar::from(u64::MIN),
864            UInt128 => Scalar::from(u128::MIN),
865            Float16 => Scalar::from(pf16::NEG_INFINITY),
866            Float32 => Scalar::from(f32::NEG_INFINITY),
867            Float64 => Scalar::from(f64::NEG_INFINITY),
868            #[cfg(feature = "dtype-time")]
869            Time => Scalar::new(Time, AnyValue::Time(0)),
870            dt => polars_bail!(ComputeError: "cannot determine lower bound for dtype `{}`", dt),
871        };
872        Ok(v)
873    }
874
875    /// Convert to an Arrow data type.
876    #[inline]
877    pub fn to_arrow(&self, compat_level: CompatLevel) -> ArrowDataType {
878        self.try_to_arrow(compat_level).unwrap()
879    }
880
881    #[inline]
882    pub fn try_to_arrow(&self, compat_level: CompatLevel) -> PolarsResult<ArrowDataType> {
883        use DataType::*;
884        match self {
885            Boolean => Ok(ArrowDataType::Boolean),
886            UInt8 => Ok(ArrowDataType::UInt8),
887            UInt16 => Ok(ArrowDataType::UInt16),
888            UInt32 => Ok(ArrowDataType::UInt32),
889            UInt64 => Ok(ArrowDataType::UInt64),
890            UInt128 => Ok(ArrowDataType::UInt128),
891            Int8 => Ok(ArrowDataType::Int8),
892            Int16 => Ok(ArrowDataType::Int16),
893            Int32 => Ok(ArrowDataType::Int32),
894            Int64 => Ok(ArrowDataType::Int64),
895            Int128 => Ok(ArrowDataType::Int128),
896            Float16 => Ok(ArrowDataType::Float16),
897            Float32 => Ok(ArrowDataType::Float32),
898            Float64 => Ok(ArrowDataType::Float64),
899            #[cfg(feature = "dtype-decimal")]
900            Decimal(precision, scale) => {
901                assert!(*precision >= 1 && *precision <= 38);
902                Ok(ArrowDataType::Decimal(*precision, *scale))
903            },
904            String => {
905                let dt = if compat_level.0 >= 1 {
906                    ArrowDataType::Utf8View
907                } else {
908                    ArrowDataType::LargeUtf8
909                };
910                Ok(dt)
911            },
912            Binary => {
913                let dt = if compat_level.0 >= 1 {
914                    ArrowDataType::BinaryView
915                } else {
916                    ArrowDataType::LargeBinary
917                };
918                Ok(dt)
919            },
920            Date => Ok(ArrowDataType::Date32),
921            Datetime(unit, tz) => Ok(ArrowDataType::Timestamp(
922                unit.to_arrow(),
923                tz.as_deref().cloned(),
924            )),
925            Duration(unit) => Ok(ArrowDataType::Duration(unit.to_arrow())),
926            Time => Ok(ArrowDataType::Time64(ArrowTimeUnit::Nanosecond)),
927            #[cfg(feature = "dtype-array")]
928            Array(dt, width) => Ok(ArrowDataType::FixedSizeList(
929                Box::new(dt.to_arrow_field(LIST_VALUES_NAME, compat_level)),
930                *width,
931            )),
932            List(dt) => Ok(ArrowDataType::LargeList(Box::new(
933                dt.to_arrow_field(LIST_VALUES_NAME, compat_level),
934            ))),
935            Null => Ok(ArrowDataType::Null),
936            #[cfg(feature = "object")]
937            Object(_) => Ok(get_object_physical_type()),
938            #[cfg(feature = "dtype-categorical")]
939            Categorical(_, _) | Enum(_, _) => {
940                let arrow_phys = match self.cat_physical().unwrap() {
941                    CategoricalPhysical::U8 => IntegerType::UInt8,
942                    CategoricalPhysical::U16 => IntegerType::UInt16,
943                    CategoricalPhysical::U32 => IntegerType::UInt32,
944                };
945
946                let values = if compat_level.0 >= 1 {
947                    ArrowDataType::Utf8View
948                } else {
949                    ArrowDataType::LargeUtf8
950                };
951
952                Ok(ArrowDataType::Dictionary(
953                    arrow_phys,
954                    Box::new(values),
955                    false,
956                ))
957            },
958            #[cfg(feature = "dtype-struct")]
959            Struct(fields) => {
960                let fields = fields
961                    .iter()
962                    .map(|fld| fld.to_arrow(compat_level))
963                    .collect();
964                Ok(ArrowDataType::Struct(fields))
965            },
966            BinaryOffset => Ok(ArrowDataType::LargeBinary),
967            #[cfg(feature = "dtype-extension")]
968            Extension(typ, inner) => Ok(ArrowDataType::Extension(Box::new(
969                arrow::datatypes::ExtensionType {
970                    name: typ.name().into(),
971                    inner: inner.try_to_arrow(compat_level)?,
972                    metadata: typ.serialize_metadata().map(|m| m.into()),
973                },
974            ))),
975            Unknown(kind) => {
976                let dt = match kind {
977                    UnknownKind::Any => ArrowDataType::Unknown,
978                    UnknownKind::Float => ArrowDataType::Float64,
979                    UnknownKind::Str => ArrowDataType::Utf8View,
980                    UnknownKind::Int(v) => {
981                        return materialize_dyn_int(*v).dtype().try_to_arrow(compat_level);
982                    },
983                };
984                Ok(dt)
985            },
986        }
987    }
988
989    pub fn is_nested_null(&self) -> bool {
990        use DataType::*;
991        match self {
992            Null => true,
993            List(field) => field.is_nested_null(),
994            #[cfg(feature = "dtype-array")]
995            Array(field, _) => field.is_nested_null(),
996            #[cfg(feature = "dtype-struct")]
997            Struct(fields) => fields.iter().all(|fld| fld.dtype.is_nested_null()),
998            _ => false,
999        }
1000    }
1001
1002    /// Answers if this type matches the given type of a schema.
1003    ///
1004    /// Allows (nested) Null types in this type to match any type in the schema,
1005    /// but not vice versa. In such a case Ok(true) is returned, because a cast
1006    /// is necessary. If no cast is necessary Ok(false) is returned, and an
1007    /// error is returned if the types are incompatible.
1008    pub fn matches_schema_type(&self, schema_type: &DataType) -> PolarsResult<bool> {
1009        match (self, schema_type) {
1010            (DataType::List(l), DataType::List(r)) => l.matches_schema_type(r),
1011            #[cfg(feature = "dtype-array")]
1012            (DataType::Array(l, sl), DataType::Array(r, sr)) => {
1013                Ok(l.matches_schema_type(r)? && sl == sr)
1014            },
1015            #[cfg(feature = "dtype-struct")]
1016            (DataType::Struct(l), DataType::Struct(r)) => {
1017                if l.len() != r.len() {
1018                    polars_bail!(SchemaMismatch: "structs have different number of fields: {} vs {}", l.len(), r.len());
1019                }
1020                let mut must_cast = false;
1021                for (l, r) in l.iter().zip(r.iter()) {
1022                    must_cast |= l.dtype.matches_schema_type(&r.dtype)?;
1023                }
1024                Ok(must_cast)
1025            },
1026            (DataType::Null, DataType::Null) => Ok(false),
1027            #[cfg(feature = "dtype-decimal")]
1028            (DataType::Decimal(p1, s1), DataType::Decimal(p2, s2)) => Ok((p1, s1) != (p2, s2)),
1029            // We don't allow the other way around, only if our current type is
1030            // null and the schema isn't we allow it.
1031            (DataType::Null, _) => Ok(true),
1032            #[cfg(feature = "dtype-categorical")]
1033            (DataType::Categorical(l, _), DataType::Categorical(r, _)) => {
1034                ensure_same_categories(l, r)?;
1035                Ok(false)
1036            },
1037            #[cfg(feature = "dtype-categorical")]
1038            (DataType::Enum(l, _), DataType::Enum(r, _)) => {
1039                ensure_same_frozen_categories(l, r)?;
1040                Ok(false)
1041            },
1042
1043            (l, r) if l == r => Ok(false),
1044            (l, r) => {
1045                polars_bail!(SchemaMismatch: "type {:?} is incompatible with expected type {:?}", l, r)
1046            },
1047        }
1048    }
1049
1050    #[inline]
1051    pub fn is_unknown(&self) -> bool {
1052        matches!(self, DataType::Unknown(_))
1053    }
1054
1055    pub fn nesting_level(&self) -> usize {
1056        let mut level = 0;
1057        let mut slf = self;
1058        while let Some(inner_dtype) = slf.inner_dtype() {
1059            level += 1;
1060            slf = inner_dtype;
1061        }
1062        level
1063    }
1064
1065    /// If this dtype is a Categorical or Enum, returns the physical backing type.
1066    #[cfg(feature = "dtype-categorical")]
1067    pub fn cat_physical(&self) -> PolarsResult<CategoricalPhysical> {
1068        match self {
1069            DataType::Categorical(cats, _) => Ok(cats.physical()),
1070            DataType::Enum(fcats, _) => Ok(fcats.physical()),
1071            _ => {
1072                polars_bail!(SchemaMismatch: "invalid dtype: expected an Enum or Categorical type, received '{:?}'", self)
1073            },
1074        }
1075    }
1076
1077    /// If this dtype is a Categorical or Enum, returns the underlying mapping.
1078    #[cfg(feature = "dtype-categorical")]
1079    pub fn cat_mapping(&self) -> PolarsResult<&Arc<CategoricalMapping>> {
1080        match self {
1081            DataType::Categorical(_, mapping) | DataType::Enum(_, mapping) => Ok(mapping),
1082            _ => {
1083                polars_bail!(SchemaMismatch: "invalid dtype: expected an Enum or Categorical type, received '{:?}'", self)
1084            },
1085        }
1086    }
1087
1088    #[cfg(feature = "dtype-categorical")]
1089    pub fn from_categories(cats: Arc<Categories>) -> Self {
1090        let mapping = cats.mapping();
1091        Self::Categorical(cats, mapping)
1092    }
1093
1094    #[cfg(feature = "dtype-categorical")]
1095    pub fn from_frozen_categories(fcats: Arc<FrozenCategories>) -> Self {
1096        let mapping = fcats.mapping().clone();
1097        Self::Enum(fcats, mapping)
1098    }
1099
1100    pub fn is_numeric(&self) -> bool {
1101        self.is_integer() || self.is_float() || self.is_decimal()
1102    }
1103}
1104
1105impl Display for DataType {
1106    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
1107        let s = match self {
1108            DataType::Null => "null",
1109            DataType::Boolean => "bool",
1110            DataType::UInt8 => "u8",
1111            DataType::UInt16 => "u16",
1112            DataType::UInt32 => "u32",
1113            DataType::UInt64 => "u64",
1114            DataType::UInt128 => "u128",
1115            DataType::Int8 => "i8",
1116            DataType::Int16 => "i16",
1117            DataType::Int32 => "i32",
1118            DataType::Int64 => "i64",
1119            DataType::Int128 => "i128",
1120            DataType::Float16 => "f16",
1121            DataType::Float32 => "f32",
1122            DataType::Float64 => "f64",
1123            #[cfg(feature = "dtype-decimal")]
1124            DataType::Decimal(p, s) => return write!(f, "decimal[{p},{s}]"),
1125            DataType::String => "str",
1126            DataType::Binary => "binary",
1127            DataType::BinaryOffset => "binary[offset]",
1128            DataType::Date => "date",
1129            DataType::Datetime(tu, None) => return write!(f, "datetime[{tu}]"),
1130            DataType::Datetime(tu, Some(tz)) => return write!(f, "datetime[{tu}, {tz}]"),
1131            DataType::Duration(tu) => return write!(f, "duration[{tu}]"),
1132            DataType::Time => "time",
1133            #[cfg(feature = "dtype-array")]
1134            DataType::Array(_, _) => {
1135                let tp = self.array_leaf_dtype().unwrap();
1136
1137                let dims = self.get_shape().unwrap();
1138                let shape = if dims.len() == 1 {
1139                    format!("{}", dims[0])
1140                } else {
1141                    format_tuple!(dims)
1142                };
1143                return write!(f, "array[{tp}, {shape}]");
1144            },
1145            DataType::List(tp) => return write!(f, "list[{tp}]"),
1146            #[cfg(feature = "object")]
1147            DataType::Object(s) => s,
1148            #[cfg(feature = "dtype-categorical")]
1149            DataType::Categorical(_, _) => "cat",
1150            #[cfg(feature = "dtype-categorical")]
1151            DataType::Enum(_, _) => "enum",
1152            #[cfg(feature = "dtype-struct")]
1153            DataType::Struct(fields) => return write!(f, "struct[{}]", fields.len()),
1154            #[cfg(feature = "dtype-extension")]
1155            DataType::Extension(typ, _) => return write!(f, "ext[{}]", typ.0.dyn_display()),
1156            DataType::Unknown(kind) => match kind {
1157                UnknownKind::Any => "unknown",
1158                UnknownKind::Int(_) => "dyn int",
1159                UnknownKind::Float => "dyn float",
1160                UnknownKind::Str => "dyn str",
1161            },
1162        };
1163        f.write_str(s)
1164    }
1165}
1166
1167impl std::fmt::Debug for DataType {
1168    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
1169        use DataType::*;
1170        match self {
1171            Boolean => write!(f, "Boolean"),
1172            UInt8 => write!(f, "UInt8"),
1173            UInt16 => write!(f, "UInt16"),
1174            UInt32 => write!(f, "UInt32"),
1175            UInt64 => write!(f, "UInt64"),
1176            UInt128 => write!(f, "UInt128"),
1177            Int8 => write!(f, "Int8"),
1178            Int16 => write!(f, "Int16"),
1179            Int32 => write!(f, "Int32"),
1180            Int64 => write!(f, "Int64"),
1181            Int128 => write!(f, "Int128"),
1182            Float16 => write!(f, "Float16"),
1183            Float32 => write!(f, "Float32"),
1184            Float64 => write!(f, "Float64"),
1185            String => write!(f, "String"),
1186            Binary => write!(f, "Binary"),
1187            BinaryOffset => write!(f, "BinaryOffset"),
1188            Date => write!(f, "Date"),
1189            Time => write!(f, "Time"),
1190            Duration(unit) => write!(f, "Duration('{unit}')"),
1191            Datetime(unit, opt_tz) => {
1192                if let Some(tz) = opt_tz {
1193                    write!(f, "Datetime('{unit}', '{tz}')")
1194                } else {
1195                    write!(f, "Datetime('{unit}')")
1196                }
1197            },
1198            #[cfg(feature = "dtype-decimal")]
1199            Decimal(p, s) => write!(f, "Decimal({p}, {s})"),
1200            #[cfg(feature = "dtype-array")]
1201            Array(inner, size) => write!(f, "Array({inner:?}, {size})"),
1202            List(inner) => write!(f, "List({inner:?})"),
1203            #[cfg(feature = "dtype-struct")]
1204            Struct(fields) => {
1205                let mut first = true;
1206                write!(f, "Struct({{")?;
1207                for field in fields {
1208                    if !first {
1209                        write!(f, ", ")?;
1210                    }
1211                    write!(f, "'{}': {:?}", field.name(), field.dtype())?;
1212                    first = false;
1213                }
1214                write!(f, "}})")
1215            },
1216            #[cfg(feature = "dtype-categorical")]
1217            Categorical(cats, _) => {
1218                if cats.is_global() {
1219                    write!(f, "Categorical")
1220                } else if cats.namespace().is_empty() && cats.physical() == CategoricalPhysical::U32
1221                {
1222                    write!(f, "Categorical('{}')", cats.name())
1223                } else {
1224                    write!(
1225                        f,
1226                        "Categorical('{}', '{}', {:?})",
1227                        cats.name(),
1228                        cats.namespace(),
1229                        cats.physical()
1230                    )
1231                }
1232            },
1233            #[cfg(feature = "dtype-categorical")]
1234            Enum(_, _) => write!(f, "Enum([...])"),
1235            #[cfg(feature = "object")]
1236            Object(_) => write!(f, "Object"),
1237            Null => write!(f, "Null"),
1238            #[cfg(feature = "dtype-extension")]
1239            Extension(typ, inner) => write!(f, "Extension({}, {inner:?})", typ.0.dyn_debug()),
1240            Unknown(kind) => write!(f, "Unknown({kind:?})"),
1241        }
1242    }
1243}
1244
1245pub fn merge_dtypes(left: &DataType, right: &DataType) -> PolarsResult<DataType> {
1246    use DataType::*;
1247    Ok(match (left, right) {
1248        #[cfg(feature = "dtype-categorical")]
1249        (Categorical(cats_l, map), Categorical(cats_r, _)) => {
1250            ensure_same_categories(cats_l, cats_r)?;
1251            Categorical(cats_l.clone(), map.clone())
1252        },
1253        #[cfg(feature = "dtype-categorical")]
1254        (Enum(fcats_l, map), Enum(fcats_r, _)) => {
1255            ensure_same_frozen_categories(fcats_l, fcats_r)?;
1256            Enum(fcats_l.clone(), map.clone())
1257        },
1258        (List(inner_l), List(inner_r)) => {
1259            let merged = merge_dtypes(inner_l, inner_r)?;
1260            List(Box::new(merged))
1261        },
1262        #[cfg(feature = "dtype-struct")]
1263        (Struct(inner_l), Struct(inner_r)) => {
1264            polars_ensure!(inner_l.len() == inner_r.len(), ComputeError: "cannot combine structs with differing amounts of fields ({} != {})", inner_l.len(), inner_r.len());
1265            let fields = inner_l.iter().zip(inner_r.iter()).map(|(l, r)| {
1266                polars_ensure!(l.name() == r.name(), ComputeError: "cannot combine structs with different fields ({} != {})", l.name(), r.name());
1267                let merged = merge_dtypes(l.dtype(), r.dtype())?;
1268                Ok(Field::new(l.name().clone(), merged))
1269            }).collect::<PolarsResult<Vec<_>>>()?;
1270            Struct(fields)
1271        },
1272        #[cfg(feature = "dtype-array")]
1273        (Array(inner_l, width_l), Array(inner_r, width_r)) => {
1274            polars_ensure!(width_l == width_r, ComputeError: "widths of FixedSizeWidth Series are not equal");
1275            let merged = merge_dtypes(inner_l, inner_r)?;
1276            Array(Box::new(merged), *width_l)
1277        },
1278        (left, right) if left == right => left.clone(),
1279        _ => polars_bail!(ComputeError: "unable to merge datatypes"),
1280    })
1281}
1282
1283fn collect_nested_types(
1284    dtype: &DataType,
1285    result: &mut PlHashSet<DataType>,
1286    include_compound_types: bool,
1287) {
1288    match dtype {
1289        DataType::List(inner) => {
1290            if include_compound_types {
1291                result.insert(dtype.clone());
1292            }
1293            collect_nested_types(inner, result, include_compound_types);
1294        },
1295        #[cfg(feature = "dtype-array")]
1296        DataType::Array(inner, _) => {
1297            if include_compound_types {
1298                result.insert(dtype.clone());
1299            }
1300            collect_nested_types(inner, result, include_compound_types);
1301        },
1302        #[cfg(feature = "dtype-struct")]
1303        DataType::Struct(fields) => {
1304            if include_compound_types {
1305                result.insert(dtype.clone());
1306            }
1307            for field in fields {
1308                collect_nested_types(field.dtype(), result, include_compound_types);
1309            }
1310        },
1311        _ => {
1312            result.insert(dtype.clone());
1313        },
1314    }
1315}
1316
1317pub fn unpack_dtypes(dtype: &DataType, include_compound_types: bool) -> PlHashSet<DataType> {
1318    let mut result = PlHashSet::new();
1319    collect_nested_types(dtype, &mut result, include_compound_types);
1320    result
1321}
1322
1323#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
1324#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1325#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
1326pub struct CompatLevel(pub(crate) u16);
1327
1328impl CompatLevel {
1329    pub const fn newest() -> CompatLevel {
1330        CompatLevel(1)
1331    }
1332
1333    pub const fn oldest() -> CompatLevel {
1334        CompatLevel(0)
1335    }
1336
1337    // The following methods are only used internally
1338
1339    #[doc(hidden)]
1340    pub fn with_level(level: u16) -> PolarsResult<CompatLevel> {
1341        if level > CompatLevel::newest().0 {
1342            polars_bail!(InvalidOperation: "invalid compat level");
1343        }
1344        Ok(CompatLevel(level))
1345    }
1346
1347    #[doc(hidden)]
1348    pub fn get_level(&self) -> u16 {
1349        self.0
1350    }
1351}
1352
1353#[cfg(test)]
1354mod tests {
1355    use super::*;
1356
1357    #[cfg(feature = "dtype-array")]
1358    #[test]
1359    fn test_unpack_primitive_dtypes() {
1360        let inner_type = DataType::Float64;
1361        let array_type = DataType::Array(Box::new(inner_type), 10);
1362        let list_type = DataType::List(Box::new(array_type));
1363
1364        let result = unpack_dtypes(&list_type, false);
1365
1366        let mut expected = PlHashSet::new();
1367        expected.insert(DataType::Float64);
1368
1369        assert_eq!(result, expected)
1370    }
1371
1372    #[cfg(feature = "dtype-array")]
1373    #[test]
1374    fn test_unpack_compound_dtypes() {
1375        let inner_type = DataType::Float64;
1376        let array_type = DataType::Array(Box::new(inner_type), 10);
1377        let list_type = DataType::List(Box::new(array_type.clone()));
1378
1379        let result = unpack_dtypes(&list_type, true);
1380
1381        let mut expected = PlHashSet::new();
1382        expected.insert(list_type);
1383        expected.insert(array_type);
1384        expected.insert(DataType::Float64);
1385
1386        assert_eq!(result, expected)
1387    }
1388}