Skip to main content

polars_core/datatypes/
dtype.rs

1use std::borrow::Cow;
2use std::collections::BTreeMap;
3
4use arrow::datatypes::{
5    DTYPE_CATEGORICAL_NEW, DTYPE_ENUM_VALUES_LEGACY, DTYPE_ENUM_VALUES_NEW, MAINTAIN_PL_TYPE,
6    Metadata, PL_KEY,
7};
8#[cfg(feature = "dtype-array")]
9use polars_utils::format_tuple;
10use polars_utils::itertools::Itertools;
11#[cfg(any(feature = "serde-lazy", feature = "serde"))]
12use serde::{Deserialize, Serialize};
13pub use temporal::time_zone::TimeZone;
14
15use super::*;
16#[cfg(feature = "object")]
17use crate::chunked_array::object::registry::get_object_physical_type;
18#[cfg(feature = "dtype-extension")]
19pub use crate::datatypes::extension::ExtensionTypeInstance;
20use crate::utils::materialize_dyn_int;
21
22pub trait MetaDataExt: IntoMetadata {
23    fn pl_enum_metadata(&self) -> Option<&str> {
24        let md = self.into_metadata_ref();
25        let values = md
26            .get(DTYPE_ENUM_VALUES_NEW)
27            .or_else(|| md.get(DTYPE_ENUM_VALUES_LEGACY));
28        Some(values?.as_str())
29    }
30
31    fn pl_categorical_metadata(&self) -> Option<&str> {
32        // We ignore DTYPE_CATEGORICAL_LEGACY here, as we already map all
33        // string-typed arrow dictionaries to the global Categories, and the
34        // legacy metadata format only specifies the now-removed physical
35        // ordering parameter.
36        Some(
37            self.into_metadata_ref()
38                .get(DTYPE_CATEGORICAL_NEW)?
39                .as_str(),
40        )
41    }
42
43    fn maintain_type(&self) -> bool {
44        let metadata = self.into_metadata_ref();
45        metadata.get(PL_KEY).map(|s| s.as_str()) == Some(MAINTAIN_PL_TYPE)
46    }
47}
48
49impl MetaDataExt for Metadata {}
50pub trait IntoMetadata {
51    #[allow(clippy::wrong_self_convention)]
52    fn into_metadata_ref(&self) -> &Metadata;
53}
54
55impl IntoMetadata for Metadata {
56    fn into_metadata_ref(&self) -> &Metadata {
57        self
58    }
59}
60
61#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
62#[cfg_attr(
63    any(feature = "serde", feature = "serde-lazy"),
64    derive(Serialize, Deserialize)
65)]
66#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
67pub enum UnknownKind {
68    // Hold the value to determine the concrete size.
69    Int(i128),
70    Float,
71    // Can be Categorical or String
72    Str,
73    #[default]
74    Any,
75}
76
77impl UnknownKind {
78    pub fn materialize(&self) -> Option<DataType> {
79        let dtype = match self {
80            UnknownKind::Int(v) => materialize_dyn_int(*v).dtype(),
81            UnknownKind::Float => DataType::Float64,
82            UnknownKind::Str => DataType::String,
83            UnknownKind::Any => return None,
84        };
85        Some(dtype)
86    }
87}
88
89#[derive(Clone)]
90pub enum DataType {
91    Boolean,
92    UInt8,
93    UInt16,
94    UInt32,
95    UInt64,
96    UInt128,
97    Int8,
98    Int16,
99    Int32,
100    Int64,
101    Int128,
102    Float16,
103    Float32,
104    Float64,
105    /// Fixed point decimal type optional precision and non-negative scale.
106    /// This is backed by a signed 128-bit integer which allows for up to 38 significant digits.
107    /// Meaning max precision is 38.
108    #[cfg(feature = "dtype-decimal")]
109    Decimal(usize, usize), // (precision, scale), invariant: 1 <= precision <= 38.
110    /// String data
111    String,
112    Binary,
113    BinaryOffset,
114    /// A 32-bit date representing the elapsed time since UNIX epoch (1970-01-01)
115    /// in days (32 bits).
116    Date,
117    /// A 64-bit date representing the elapsed time since UNIX epoch (1970-01-01)
118    /// in the given timeunit (64 bits).
119    Datetime(TimeUnit, Option<TimeZone>),
120    /// 64-bit integer representing difference between times in milliseconds or nanoseconds
121    Duration(TimeUnit),
122    /// A 64-bit time representing the elapsed time since midnight in nanoseconds
123    Time,
124    /// A nested list with a fixed size in each row
125    #[cfg(feature = "dtype-array")]
126    Array(Box<DataType>, usize),
127    /// A nested list with a variable size in each row
128    List(Box<DataType>),
129    /// A generic type that can be used in a `Series`
130    /// &'static str can be used to determine/set inner type
131    #[cfg(feature = "object")]
132    Object(&'static str),
133    Null,
134    #[cfg(feature = "dtype-categorical")]
135    Categorical(Arc<Categories>, Arc<CategoricalMapping>),
136    // It is an Option, so that matching Enum/Categoricals can take the same guards.
137    #[cfg(feature = "dtype-categorical")]
138    Enum(Arc<FrozenCategories>, Arc<CategoricalMapping>),
139    #[cfg(feature = "dtype-struct")]
140    Struct(Vec<Field>),
141    #[cfg(feature = "dtype-extension")]
142    Extension(ExtensionTypeInstance, Box<DataType>),
143    // some logical types we cannot know statically, e.g. Datetime
144    Unknown(UnknownKind),
145}
146
147pub trait AsRefDataType {
148    fn as_ref_dtype(&self) -> &DataType;
149}
150
151impl Hash for DataType {
152    fn hash<H: Hasher>(&self, state: &mut H) {
153        std::mem::discriminant(self).hash(state)
154    }
155}
156
157impl PartialEq for DataType {
158    fn eq(&self, other: &Self) -> bool {
159        use DataType::*;
160        {
161            match (self, other) {
162                #[cfg(feature = "dtype-categorical")]
163                (Categorical(cats_l, _), Categorical(cats_r, _)) => Arc::ptr_eq(cats_l, cats_r),
164                #[cfg(feature = "dtype-categorical")]
165                (Enum(fcats_l, _), Enum(fcats_r, _)) => Arc::ptr_eq(fcats_l, fcats_r),
166                (Datetime(tu_l, tz_l), Datetime(tu_r, tz_r)) => tu_l == tu_r && tz_l == tz_r,
167                (List(left_inner), List(right_inner)) => left_inner == right_inner,
168                #[cfg(feature = "dtype-duration")]
169                (Duration(tu_l), Duration(tu_r)) => tu_l == tu_r,
170                #[cfg(feature = "dtype-decimal")]
171                (Decimal(p1, s1), Decimal(p2, s2)) => (p1, s1) == (p2, s2),
172                #[cfg(feature = "object")]
173                (Object(lhs), Object(rhs)) => lhs == rhs,
174                #[cfg(feature = "dtype-struct")]
175                (Struct(lhs), Struct(rhs)) => {
176                    std::ptr::eq(Vec::as_ptr(lhs), Vec::as_ptr(rhs)) || lhs == rhs
177                },
178                #[cfg(feature = "dtype-array")]
179                (Array(left_inner, left_width), Array(right_inner, right_width)) => {
180                    left_width == right_width && left_inner == right_inner
181                },
182                #[cfg(feature = "dtype-extension")]
183                (Extension(ext_l, storage_l), Extension(ext_r, storage_r)) => {
184                    ext_l == ext_r && storage_l == storage_r
185                },
186                (Unknown(l), Unknown(r)) => match (l, r) {
187                    (UnknownKind::Int(_), UnknownKind::Int(_)) => true,
188                    _ => l == r,
189                },
190                _ => std::mem::discriminant(self) == std::mem::discriminant(other),
191            }
192        }
193    }
194}
195
196impl Eq for DataType {}
197
198impl DataType {
199    pub const IDX_DTYPE: Self = {
200        #[cfg(not(feature = "bigidx"))]
201        {
202            DataType::UInt32
203        }
204        #[cfg(feature = "bigidx")]
205        {
206            DataType::UInt64
207        }
208    };
209
210    pub fn pretty_format(&self) -> String {
211        match self {
212            #[cfg(feature = "dtype-struct")]
213            Self::Struct(fields) => {
214                let formatted_fields = fields
215                    .iter()
216                    .map(|field| format!("{}: {}", field.name, field.dtype.pretty_format()))
217                    .collect::<Vec<String>>()
218                    .join(", ");
219                format!("struct {{{}}}", formatted_fields)
220            },
221            Self::List(inner_dtype) => {
222                let formatted_dtype = inner_dtype.pretty_format();
223                format!("list[{}]", formatted_dtype)
224            },
225            #[cfg(feature = "dtype-array")]
226            Self::Array(inner_dtype, size) => {
227                let formatted_dtype = inner_dtype.pretty_format();
228                format!("array[{}, {}]", formatted_dtype, size)
229            },
230            _ => {
231                format!("{}", self)
232            },
233        }
234    }
235
236    pub fn value_within_range(&self, other: AnyValue) -> bool {
237        use DataType::*;
238        match self {
239            UInt8 => other.extract::<u8>().is_some(),
240            #[cfg(feature = "dtype-u16")]
241            UInt16 => other.extract::<u16>().is_some(),
242            UInt32 => other.extract::<u32>().is_some(),
243            UInt64 => other.extract::<u64>().is_some(),
244            #[cfg(feature = "dtype-u128")]
245            UInt128 => other.extract::<u128>().is_some(),
246            #[cfg(feature = "dtype-i8")]
247            Int8 => other.extract::<i8>().is_some(),
248            #[cfg(feature = "dtype-i16")]
249            Int16 => other.extract::<i16>().is_some(),
250            Int32 => other.extract::<i32>().is_some(),
251            Int64 => other.extract::<i64>().is_some(),
252            #[cfg(feature = "dtype-i128")]
253            Int128 => other.extract::<i128>().is_some(),
254            _ => false,
255        }
256    }
257
258    /// Struct representation of the arrow `month_day_nano_interval` type.
259    #[cfg(feature = "dtype-struct")]
260    pub fn _month_days_ns_struct_type() -> Self {
261        DataType::Struct(vec![
262            Field::new(PlSmallStr::from_static("months"), DataType::Int32),
263            Field::new(PlSmallStr::from_static("days"), DataType::Int32),
264            Field::new(
265                PlSmallStr::from_static("nanoseconds"),
266                DataType::Duration(TimeUnit::Nanoseconds),
267            ),
268        ])
269    }
270
271    /// Check if the whole dtype is known.
272    pub fn is_known(&self) -> bool {
273        match self {
274            DataType::List(inner) => inner.is_known(),
275            #[cfg(feature = "dtype-array")]
276            DataType::Array(inner, _) => inner.is_known(),
277            #[cfg(feature = "dtype-struct")]
278            DataType::Struct(fields) => fields.iter().all(|fld| fld.dtype.is_known()),
279            DataType::Unknown(_) => false,
280            _ => true,
281        }
282    }
283
284    /// Materialize this datatype if it is unknown. All other datatypes
285    /// are left unchanged.
286    pub fn materialize_unknown(self, allow_unknown: bool) -> PolarsResult<DataType> {
287        match self {
288            DataType::Unknown(u) => match u.materialize() {
289                Some(known) => Ok(known),
290                None => {
291                    if allow_unknown {
292                        Ok(DataType::Unknown(u))
293                    } else {
294                        polars_bail!(SchemaMismatch: "failed to materialize unknown type")
295                    }
296                },
297            },
298            DataType::List(inner) => Ok(DataType::List(Box::new(
299                inner.materialize_unknown(allow_unknown)?,
300            ))),
301            #[cfg(feature = "dtype-array")]
302            DataType::Array(inner, size) => Ok(DataType::Array(
303                Box::new(inner.materialize_unknown(allow_unknown)?),
304                size,
305            )),
306            #[cfg(feature = "dtype-struct")]
307            DataType::Struct(fields) => Ok(DataType::Struct(
308                fields
309                    .into_iter()
310                    .map(|f| {
311                        PolarsResult::Ok(Field::new(
312                            f.name,
313                            f.dtype.materialize_unknown(allow_unknown)?,
314                        ))
315                    })
316                    .try_collect_vec()?,
317            )),
318            _ => Ok(self),
319        }
320    }
321
322    #[cfg(feature = "dtype-array")]
323    /// Get the full shape of a multidimensional array.
324    pub fn get_shape(&self) -> Option<Vec<usize>> {
325        fn get_shape_impl(dt: &DataType, shape: &mut Vec<usize>) {
326            if let DataType::Array(inner, size) = dt {
327                shape.push(*size);
328                get_shape_impl(inner, shape);
329            }
330        }
331
332        if let DataType::Array(inner, size) = self {
333            let mut shape = vec![*size];
334            get_shape_impl(inner, &mut shape);
335            Some(shape)
336        } else {
337            None
338        }
339    }
340
341    /// Get the inner data type of a nested type.
342    pub fn inner_dtype(&self) -> Option<&DataType> {
343        match self {
344            DataType::List(inner) => Some(inner),
345            #[cfg(feature = "dtype-array")]
346            DataType::Array(inner, _) => Some(inner),
347            _ => None,
348        }
349    }
350
351    /// Get the inner data type of a nested type.
352    pub fn into_inner_dtype(self) -> Option<DataType> {
353        match self {
354            DataType::List(inner) => Some(*inner),
355            #[cfg(feature = "dtype-array")]
356            DataType::Array(inner, _) => Some(*inner),
357            _ => None,
358        }
359    }
360
361    /// Get the inner data type of a nested type.
362    pub fn try_into_inner_dtype(self) -> PolarsResult<DataType> {
363        match self {
364            DataType::List(inner) => Ok(*inner),
365            #[cfg(feature = "dtype-array")]
366            DataType::Array(inner, _) => Ok(*inner),
367            dt => polars_bail!(InvalidOperation: "cannot get inner datatype of `{dt}`"),
368        }
369    }
370
371    /// Get the absolute inner data type of a nested type.
372    pub fn leaf_dtype(&self) -> &DataType {
373        let mut prev = self;
374        while let Some(dtype) = prev.inner_dtype() {
375            prev = dtype
376        }
377        prev
378    }
379
380    #[cfg(feature = "dtype-array")]
381    /// Get the inner data type of a multidimensional array.
382    pub fn array_leaf_dtype(&self) -> Option<&DataType> {
383        let mut prev = self;
384        match prev {
385            DataType::Array(_, _) => {
386                while let DataType::Array(inner, _) = &prev {
387                    prev = inner;
388                }
389                Some(prev)
390            },
391            _ => None,
392        }
393    }
394
395    /// Cast the leaf types of Lists/Arrays and keep the nesting.
396    pub fn cast_leaf(&self, to: DataType) -> DataType {
397        use DataType::*;
398        match self {
399            List(inner) => List(Box::new(inner.cast_leaf(to))),
400            #[cfg(feature = "dtype-array")]
401            Array(inner, size) => Array(Box::new(inner.cast_leaf(to)), *size),
402            _ => to,
403        }
404    }
405
406    /// Map all leaf types of nested dtypes (list, array, struct) using the
407    /// supplied function.
408    pub fn map_leaves<F: FnMut(DataType) -> DataType>(self, f: &mut F) -> DataType {
409        use DataType::*;
410        match self {
411            List(inner) => List(Box::new(inner.map_leaves(f))),
412            #[cfg(feature = "dtype-array")]
413            Array(inner, size) => Array(Box::new(inner.map_leaves(f)), size),
414            #[cfg(feature = "dtype-struct")]
415            Struct(fields) => {
416                let new_fields = fields
417                    .into_iter()
418                    .map(|fld| Field::new(fld.name, fld.dtype.map_leaves(f)))
419                    .collect();
420                Struct(new_fields)
421            },
422            #[cfg(feature = "dtype-extension")]
423            Extension(ext, storage) => Extension(ext, Box::new(storage.map_leaves(f))),
424            _ => f(self),
425        }
426    }
427
428    /// Return whether the cast to `to` makes sense.
429    ///
430    /// If it `None`, we are not sure.
431    pub fn can_cast_to(&self, to: &DataType) -> Option<bool> {
432        if self == to {
433            return Some(true);
434        }
435        if self.is_primitive_numeric() && to.is_primitive_numeric() {
436            return Some(true);
437        }
438
439        if self.is_null() {
440            return Some(true);
441        }
442
443        use DataType as D;
444        Some(match (self, to) {
445            #[cfg(feature = "dtype-categorical")]
446            (D::Categorical(_, _) | D::Enum(_, _), D::Binary)
447            | (D::Binary, D::Categorical(_, _) | D::Enum(_, _)) => false, // TODO @ cat-rework: why can we not cast to Binary?
448
449            #[cfg(feature = "dtype-categorical")]
450            (D::Categorical(_, _) | D::Enum(_, _), D::String)
451            | (D::String, D::Categorical(_, _) | D::Enum(_, _)) => true,
452
453            #[cfg(feature = "object")]
454            (D::Object(_), D::Object(_)) => true,
455            #[cfg(feature = "object")]
456            (D::Object(_), _) | (_, D::Object(_)) => false,
457
458            (D::Boolean, dt) | (dt, D::Boolean) => match dt {
459                dt if dt.is_primitive_numeric() => true,
460                #[cfg(feature = "dtype-decimal")]
461                D::Decimal(_, _) => true,
462                D::String | D::Binary => true,
463                _ => false,
464            },
465
466            (D::List(from), D::List(to)) => from.can_cast_to(to)?,
467            #[cfg(feature = "dtype-array")]
468            (D::Array(from, l_width), D::Array(to, r_width)) => {
469                l_width == r_width && from.can_cast_to(to)?
470            },
471            #[cfg(feature = "dtype-struct")]
472            (D::Struct(l_fields), D::Struct(r_fields)) => {
473                if l_fields.is_empty() {
474                    return Some(true);
475                }
476
477                if l_fields.len() != r_fields.len() {
478                    return Some(false);
479                }
480
481                for (l, r) in l_fields.iter().zip(r_fields) {
482                    if !l.dtype().can_cast_to(r.dtype())? {
483                        return Some(false);
484                    }
485                }
486
487                true
488            },
489
490            // @NOTE: we are being conversative
491            _ => return None,
492        })
493    }
494
495    pub fn implode(self) -> DataType {
496        DataType::List(Box::new(self))
497    }
498
499    /// Convert to the physical data type
500    #[must_use]
501    pub fn to_physical(&self) -> DataType {
502        use DataType::*;
503        match self {
504            Date => Int32,
505            Datetime(_, _) => Int64,
506            Duration(_) => Int64,
507            Time => Int64,
508            #[cfg(feature = "dtype-decimal")]
509            Decimal(_, _) => Int128,
510            #[cfg(feature = "dtype-categorical")]
511            Categorical(cats, _) => cats.physical().dtype(),
512            #[cfg(feature = "dtype-categorical")]
513            Enum(fcats, _) => fcats.physical().dtype(),
514            #[cfg(feature = "dtype-array")]
515            Array(dt, width) => Array(Box::new(dt.to_physical()), *width),
516            List(dt) => List(Box::new(dt.to_physical())),
517            #[cfg(feature = "dtype-struct")]
518            Struct(fields) => {
519                let new_fields = fields
520                    .iter()
521                    .map(|s| Field::new(s.name().clone(), s.dtype().to_physical()))
522                    .collect();
523                Struct(new_fields)
524            },
525            #[cfg(feature = "dtype-extension")]
526            Extension(_, storage) => storage.to_physical(),
527            _ => self.clone(),
528        }
529    }
530
531    #[must_use]
532    pub fn to_storage(&self) -> DataType {
533        use DataType::*;
534        match self {
535            #[cfg(feature = "dtype-extension")]
536            Extension(_, storage) => storage.to_storage(),
537            _ => self.clone(),
538        }
539    }
540
541    pub fn is_supported_list_arithmetic_input(&self) -> bool {
542        self.is_primitive_numeric() || self.is_bool() || self.is_null()
543    }
544
545    /// Check if this [`DataType`] is a logical type
546    pub fn is_logical(&self) -> bool {
547        self != &self.to_physical()
548    }
549
550    /// Check if this [`DataType`] is a temporal type
551    pub fn is_temporal(&self) -> bool {
552        use DataType::*;
553        matches!(self, Date | Datetime(_, _) | Duration(_) | Time)
554    }
555
556    /// Check if datatype is a primitive type. By that we mean that
557    /// it is not a nested or logical type.
558    pub fn is_primitive(&self) -> bool {
559        self.is_primitive_numeric()
560            | matches!(
561                self,
562                DataType::Boolean | DataType::String | DataType::Binary
563            )
564    }
565
566    /// Check if this [`DataType`] is a primitive numeric type (excludes Decimal).
567    pub fn is_primitive_numeric(&self) -> bool {
568        self.is_float() || self.is_integer()
569    }
570
571    /// Check if this [`DataType`] is a boolean.
572    pub fn is_bool(&self) -> bool {
573        matches!(self, DataType::Boolean)
574    }
575
576    /// Check if this [`DataType`] is a list.
577    pub fn is_list(&self) -> bool {
578        matches!(self, DataType::List(_))
579    }
580
581    /// Check if this [`DataType`] is an array.
582    pub fn is_array(&self) -> bool {
583        #[cfg(feature = "dtype-array")]
584        {
585            matches!(self, DataType::Array(_, _))
586        }
587        #[cfg(not(feature = "dtype-array"))]
588        {
589            false
590        }
591    }
592
593    pub fn is_nested(&self) -> bool {
594        match self {
595            DataType::List(_) => true,
596            #[cfg(feature = "dtype-array")]
597            DataType::Array(_, _) => true,
598            #[cfg(feature = "dtype-struct")]
599            DataType::Struct(_) => true,
600            #[cfg(feature = "dtype-extension")]
601            DataType::Extension(_, storage) => storage.is_nested(),
602            _ => false,
603        }
604    }
605
606    /// Check if this [`DataType`] is a struct
607    pub fn is_struct(&self) -> bool {
608        #[cfg(feature = "dtype-struct")]
609        {
610            matches!(self, DataType::Struct(_))
611        }
612        #[cfg(not(feature = "dtype-struct"))]
613        {
614            false
615        }
616    }
617
618    pub fn is_binary(&self) -> bool {
619        matches!(self, DataType::Binary)
620    }
621
622    pub fn is_date(&self) -> bool {
623        matches!(self, DataType::Date)
624    }
625    pub fn is_datetime(&self) -> bool {
626        matches!(self, DataType::Datetime(..))
627    }
628
629    pub fn is_duration(&self) -> bool {
630        matches!(self, DataType::Duration(..))
631    }
632
633    pub fn is_object(&self) -> bool {
634        #[cfg(feature = "object")]
635        {
636            matches!(self, DataType::Object(_))
637        }
638        #[cfg(not(feature = "object"))]
639        {
640            false
641        }
642    }
643
644    pub fn is_null(&self) -> bool {
645        matches!(self, DataType::Null)
646    }
647
648    pub fn contains_views(&self) -> bool {
649        use DataType::*;
650        match self {
651            Binary | String => true,
652            List(inner) => inner.contains_views(),
653            #[cfg(feature = "dtype-array")]
654            Array(inner, _) => inner.contains_views(),
655            #[cfg(feature = "dtype-struct")]
656            Struct(fields) => fields.iter().any(|field| field.dtype.contains_views()),
657            _ => false,
658        }
659    }
660
661    pub fn contains_categoricals(&self) -> bool {
662        use DataType::*;
663        match self {
664            #[cfg(feature = "dtype-categorical")]
665            Categorical(_, _) => true,
666            List(inner) => inner.contains_categoricals(),
667            #[cfg(feature = "dtype-array")]
668            Array(inner, _) => inner.contains_categoricals(),
669            #[cfg(feature = "dtype-struct")]
670            Struct(fields) => fields
671                .iter()
672                .any(|field| field.dtype.contains_categoricals()),
673            _ => false,
674        }
675    }
676
677    pub fn contains_enums(&self) -> bool {
678        use DataType::*;
679        match self {
680            #[cfg(feature = "dtype-categorical")]
681            Enum(_, _) => true,
682            List(inner) => inner.contains_enums(),
683            #[cfg(feature = "dtype-array")]
684            Array(inner, _) => inner.contains_enums(),
685            #[cfg(feature = "dtype-struct")]
686            Struct(fields) => fields.iter().any(|field| field.dtype.contains_enums()),
687            _ => false,
688        }
689    }
690
691    pub fn contains_objects(&self) -> bool {
692        use DataType::*;
693        match self {
694            #[cfg(feature = "object")]
695            Object(_) => true,
696            List(inner) => inner.contains_objects(),
697            #[cfg(feature = "dtype-array")]
698            Array(inner, _) => inner.contains_objects(),
699            #[cfg(feature = "dtype-struct")]
700            Struct(fields) => fields.iter().any(|field| field.dtype.contains_objects()),
701            _ => false,
702        }
703    }
704
705    pub fn contains_list_recursive(&self) -> bool {
706        use DataType as D;
707        match self {
708            D::List(_) => true,
709            #[cfg(feature = "dtype-array")]
710            D::Array(inner, _) => inner.contains_list_recursive(),
711            #[cfg(feature = "dtype-struct")]
712            D::Struct(fields) => fields
713                .iter()
714                .any(|field| field.dtype.contains_list_recursive()),
715            _ => false,
716        }
717    }
718
719    pub fn contains_unknown(&self) -> bool {
720        use DataType as D;
721        match self {
722            D::Unknown(_) => true,
723            D::List(inner) => inner.contains_unknown(),
724            #[cfg(feature = "dtype-array")]
725            D::Array(inner, _) => inner.contains_unknown(),
726            #[cfg(feature = "dtype-struct")]
727            D::Struct(fields) => fields.iter().any(|field| field.dtype.contains_unknown()),
728            _ => false,
729        }
730    }
731
732    pub fn contains_dtype_recursive(&self, dtype: &DataType) -> bool {
733        if self == dtype {
734            return true;
735        }
736        use DataType as D;
737        match self {
738            D::List(inner) => inner.contains_dtype_recursive(dtype),
739            #[cfg(feature = "dtype-array")]
740            D::Array(inner, _) => inner.contains_dtype_recursive(dtype),
741            #[cfg(feature = "dtype-struct")]
742            D::Struct(fields) => fields
743                .iter()
744                .any(|field| field.dtype.contains_dtype_recursive(dtype)),
745            _ => false,
746        }
747    }
748
749    /// Check if type is sortable
750    pub fn is_ord(&self) -> bool {
751        let phys = self.to_physical();
752        phys.is_primitive_numeric()
753            || self.is_decimal()
754            || matches!(
755                phys,
756                DataType::Binary | DataType::String | DataType::Boolean
757            )
758    }
759
760    /// Check if this [`DataType`] is a Decimal type (of any scale/precision).
761    pub fn is_decimal(&self) -> bool {
762        match self {
763            #[cfg(feature = "dtype-decimal")]
764            DataType::Decimal(_, _) => true,
765            _ => false,
766        }
767    }
768
769    /// Check if this [`DataType`] is a basic floating point type (excludes Decimal).
770    /// Note, this also includes `Unknown(UnknownKind::Float)`.
771    pub fn is_float(&self) -> bool {
772        matches!(
773            self,
774            DataType::Float16
775                | DataType::Float32
776                | DataType::Float64
777                | DataType::Unknown(UnknownKind::Float)
778        )
779    }
780
781    /// Check if this [`DataType`] is an integer. Note, this also includes `Unknown(UnknownKind::Int)`.
782    pub fn is_integer(&self) -> bool {
783        matches!(
784            self,
785            DataType::Int8
786                | DataType::Int16
787                | DataType::Int32
788                | DataType::Int64
789                | DataType::Int128
790                | DataType::UInt8
791                | DataType::UInt16
792                | DataType::UInt32
793                | DataType::UInt64
794                | DataType::UInt128
795                | DataType::Unknown(UnknownKind::Int(_))
796        )
797    }
798
799    pub fn is_signed_integer(&self) -> bool {
800        // allow because it cannot be replaced when object feature is activated
801        matches!(
802            self,
803            DataType::Int8 | DataType::Int16 | DataType::Int32 | DataType::Int64 | DataType::Int128
804        )
805    }
806
807    pub fn is_unsigned_integer(&self) -> bool {
808        matches!(
809            self,
810            DataType::UInt8
811                | DataType::UInt16
812                | DataType::UInt32
813                | DataType::UInt64
814                | DataType::UInt128,
815        )
816    }
817
818    pub fn is_string(&self) -> bool {
819        matches!(self, DataType::String | DataType::Unknown(UnknownKind::Str))
820    }
821
822    pub fn is_categorical(&self) -> bool {
823        #[cfg(feature = "dtype-categorical")]
824        {
825            matches!(self, DataType::Categorical(_, _))
826        }
827        #[cfg(not(feature = "dtype-categorical"))]
828        {
829            false
830        }
831    }
832
833    pub fn is_enum(&self) -> bool {
834        #[cfg(feature = "dtype-categorical")]
835        {
836            matches!(self, DataType::Enum(_, _))
837        }
838        #[cfg(not(feature = "dtype-categorical"))]
839        {
840            false
841        }
842    }
843
844    pub fn is_extension(&self) -> bool {
845        #[cfg(feature = "dtype-extension")]
846        {
847            matches!(self, DataType::Extension(_, _))
848        }
849        #[cfg(not(feature = "dtype-extension"))]
850        {
851            false
852        }
853    }
854
855    /// Convert to an Arrow Field.
856    pub fn to_arrow_field(&self, name: PlSmallStr, compat_level: CompatLevel) -> ArrowField {
857        let field = ArrowField::new(name, self.to_arrow(compat_level), true);
858
859        if let Some(metadata) = self.to_arrow_field_metadata() {
860            field.with_metadata(metadata)
861        } else {
862            field
863        }
864    }
865
866    pub fn to_arrow_field_metadata(&self) -> Option<Metadata> {
867        match self {
868            #[cfg(feature = "dtype-categorical")]
869            DataType::Enum(fcats, _map) => {
870                let cats = fcats.categories();
871                let strings_size: usize = cats
872                    .values_iter()
873                    .map(|s| (s.len() + 1).ilog10() as usize + 1 + s.len())
874                    .sum();
875                let mut encoded = String::with_capacity(strings_size);
876                for cat in cats.values_iter() {
877                    encoded.push_str(itoa::Buffer::new().format(cat.len()));
878                    encoded.push(';');
879                    encoded.push_str(cat);
880                }
881                Some(BTreeMap::from([(
882                    PlSmallStr::from_static(DTYPE_ENUM_VALUES_NEW),
883                    PlSmallStr::from_string(encoded),
884                )]))
885            },
886            #[cfg(feature = "dtype-categorical")]
887            DataType::Categorical(cats, _) => {
888                let mut encoded = String::new();
889                encoded.push_str(itoa::Buffer::new().format(cats.name().len()));
890                encoded.push(';');
891                encoded.push_str(cats.name());
892                encoded.push_str(itoa::Buffer::new().format(cats.namespace().len()));
893                encoded.push(';');
894                encoded.push_str(cats.namespace());
895                encoded.push_str(cats.physical().as_str());
896                encoded.push(';');
897
898                Some(BTreeMap::from([(
899                    PlSmallStr::from_static(DTYPE_CATEGORICAL_NEW),
900                    PlSmallStr::from_string(encoded),
901                )]))
902            },
903            DataType::BinaryOffset => Some(BTreeMap::from([(
904                PlSmallStr::from_static(PL_KEY),
905                PlSmallStr::from_static(MAINTAIN_PL_TYPE),
906            )])),
907            #[cfg(feature = "dtype-extension")]
908            DataType::Extension(_ext, storage) => storage.to_arrow_field_metadata(),
909            _ => None,
910        }
911    }
912
913    /// Try to get the maximum value for this datatype.
914    pub fn max(&self) -> PolarsResult<Scalar> {
915        use DataType::*;
916        let v = match self {
917            Int8 => Scalar::from(i8::MAX),
918            Int16 => Scalar::from(i16::MAX),
919            Int32 => Scalar::from(i32::MAX),
920            Int64 => Scalar::from(i64::MAX),
921            Int128 => Scalar::from(i128::MAX),
922            UInt8 => Scalar::from(u8::MAX),
923            UInt16 => Scalar::from(u16::MAX),
924            UInt32 => Scalar::from(u32::MAX),
925            UInt64 => Scalar::from(u64::MAX),
926            UInt128 => Scalar::from(u128::MAX),
927            Float16 => Scalar::from(pf16::INFINITY),
928            Float32 => Scalar::from(f32::INFINITY),
929            Float64 => Scalar::from(f64::INFINITY),
930            #[cfg(feature = "dtype-time")]
931            Time => Scalar::new(Time, AnyValue::Time(NS_IN_DAY - 1)),
932            dt => polars_bail!(ComputeError: "cannot determine upper bound for dtype `{dt}`"),
933        };
934        Ok(v)
935    }
936
937    /// Try to get the minimum value for this datatype.
938    pub fn min(&self) -> PolarsResult<Scalar> {
939        use DataType::*;
940        let v = match self {
941            Int8 => Scalar::from(i8::MIN),
942            Int16 => Scalar::from(i16::MIN),
943            Int32 => Scalar::from(i32::MIN),
944            Int64 => Scalar::from(i64::MIN),
945            Int128 => Scalar::from(i128::MIN),
946            UInt8 => Scalar::from(u8::MIN),
947            UInt16 => Scalar::from(u16::MIN),
948            UInt32 => Scalar::from(u32::MIN),
949            UInt64 => Scalar::from(u64::MIN),
950            UInt128 => Scalar::from(u128::MIN),
951            Float16 => Scalar::from(pf16::NEG_INFINITY),
952            Float32 => Scalar::from(f32::NEG_INFINITY),
953            Float64 => Scalar::from(f64::NEG_INFINITY),
954            #[cfg(feature = "dtype-time")]
955            Time => Scalar::new(Time, AnyValue::Time(0)),
956            dt => polars_bail!(ComputeError: "cannot determine lower bound for dtype `{}`", dt),
957        };
958        Ok(v)
959    }
960
961    /// Convert to an Arrow data type.
962    #[inline]
963    pub fn to_arrow(&self, compat_level: CompatLevel) -> ArrowDataType {
964        self.try_to_arrow(compat_level).unwrap()
965    }
966
967    #[inline]
968    pub fn try_to_arrow(&self, compat_level: CompatLevel) -> PolarsResult<ArrowDataType> {
969        use DataType::*;
970        match self {
971            Boolean => Ok(ArrowDataType::Boolean),
972            UInt8 => Ok(ArrowDataType::UInt8),
973            UInt16 => Ok(ArrowDataType::UInt16),
974            UInt32 => Ok(ArrowDataType::UInt32),
975            UInt64 => Ok(ArrowDataType::UInt64),
976            UInt128 => Ok(ArrowDataType::UInt128),
977            Int8 => Ok(ArrowDataType::Int8),
978            Int16 => Ok(ArrowDataType::Int16),
979            Int32 => Ok(ArrowDataType::Int32),
980            Int64 => Ok(ArrowDataType::Int64),
981            Int128 => Ok(ArrowDataType::Int128),
982            Float16 => Ok(ArrowDataType::Float16),
983            Float32 => Ok(ArrowDataType::Float32),
984            Float64 => Ok(ArrowDataType::Float64),
985            #[cfg(feature = "dtype-decimal")]
986            Decimal(precision, scale) => {
987                assert!(*precision >= 1 && *precision <= 38);
988                Ok(ArrowDataType::Decimal(*precision, *scale))
989            },
990            String => {
991                let dt = if compat_level.0 >= 1 {
992                    ArrowDataType::Utf8View
993                } else {
994                    ArrowDataType::LargeUtf8
995                };
996                Ok(dt)
997            },
998            Binary => {
999                let dt = if compat_level.0 >= 1 {
1000                    ArrowDataType::BinaryView
1001                } else {
1002                    ArrowDataType::LargeBinary
1003                };
1004                Ok(dt)
1005            },
1006            Date => Ok(ArrowDataType::Date32),
1007            Datetime(unit, tz) => Ok(ArrowDataType::Timestamp(
1008                unit.to_arrow(),
1009                tz.as_deref().cloned(),
1010            )),
1011            Duration(unit) => Ok(ArrowDataType::Duration(unit.to_arrow())),
1012            Time => Ok(ArrowDataType::Time64(ArrowTimeUnit::Nanosecond)),
1013            #[cfg(feature = "dtype-array")]
1014            Array(dt, width) => Ok(ArrowDataType::FixedSizeList(
1015                Box::new(dt.to_arrow_field(LIST_VALUES_NAME, compat_level)),
1016                *width,
1017            )),
1018            List(dt) => Ok(ArrowDataType::LargeList(Box::new(
1019                dt.to_arrow_field(LIST_VALUES_NAME, compat_level),
1020            ))),
1021            Null => Ok(ArrowDataType::Null),
1022            #[cfg(feature = "object")]
1023            Object(_) => Ok(get_object_physical_type()),
1024            #[cfg(feature = "dtype-categorical")]
1025            Categorical(_, _) | Enum(_, _) => {
1026                let arrow_phys = match self.cat_physical().unwrap() {
1027                    CategoricalPhysical::U8 => IntegerType::UInt8,
1028                    CategoricalPhysical::U16 => IntegerType::UInt16,
1029                    CategoricalPhysical::U32 => IntegerType::UInt32,
1030                };
1031
1032                let values = if compat_level.0 >= 1 {
1033                    ArrowDataType::Utf8View
1034                } else {
1035                    ArrowDataType::LargeUtf8
1036                };
1037
1038                Ok(ArrowDataType::Dictionary(
1039                    arrow_phys,
1040                    Box::new(values),
1041                    matches!(self, Enum(_, _)),
1042                ))
1043            },
1044            #[cfg(feature = "dtype-struct")]
1045            Struct(fields) => {
1046                let fields = fields
1047                    .iter()
1048                    .map(|fld| fld.to_arrow(compat_level))
1049                    .collect();
1050                Ok(ArrowDataType::Struct(fields))
1051            },
1052            BinaryOffset => Ok(ArrowDataType::LargeBinary),
1053            #[cfg(feature = "dtype-extension")]
1054            Extension(typ, inner) => Ok(ArrowDataType::Extension(Box::new(
1055                arrow::datatypes::ExtensionType {
1056                    name: typ.name().into(),
1057                    inner: inner.try_to_arrow(compat_level)?,
1058                    metadata: typ.serialize_metadata().map(|m| m.into()),
1059                },
1060            ))),
1061            Unknown(kind) => {
1062                let dt = match kind {
1063                    UnknownKind::Any => ArrowDataType::Unknown,
1064                    UnknownKind::Float => ArrowDataType::Float64,
1065                    UnknownKind::Str => ArrowDataType::Utf8View,
1066                    UnknownKind::Int(v) => {
1067                        return materialize_dyn_int(*v).dtype().try_to_arrow(compat_level);
1068                    },
1069                };
1070                Ok(dt)
1071            },
1072        }
1073    }
1074
1075    pub fn is_nested_null(&self) -> bool {
1076        use DataType::*;
1077        match self {
1078            Null => true,
1079            List(field) => field.is_nested_null(),
1080            #[cfg(feature = "dtype-array")]
1081            Array(field, _) => field.is_nested_null(),
1082            #[cfg(feature = "dtype-struct")]
1083            Struct(fields) => fields.iter().all(|fld| fld.dtype.is_nested_null()),
1084            _ => false,
1085        }
1086    }
1087
1088    /// Answers if this type matches the given type of a schema.
1089    ///
1090    /// Allows (nested) Null types in this type to match any type in the schema,
1091    /// but not vice versa. In such a case Ok(true) is returned, because a cast
1092    /// is necessary. If no cast is necessary Ok(false) is returned, and an
1093    /// error is returned if the types are incompatible.
1094    pub fn matches_schema_type(&self, schema_type: &DataType) -> PolarsResult<bool> {
1095        match (self, schema_type) {
1096            (DataType::List(l), DataType::List(r)) => l.matches_schema_type(r),
1097            #[cfg(feature = "dtype-array")]
1098            (DataType::Array(l, sl), DataType::Array(r, sr)) => {
1099                Ok(l.matches_schema_type(r)? && sl == sr)
1100            },
1101            #[cfg(feature = "dtype-struct")]
1102            (DataType::Struct(l), DataType::Struct(r)) => {
1103                if l.len() != r.len() {
1104                    polars_bail!(SchemaMismatch: "structs have different number of fields: {} vs {}", l.len(), r.len());
1105                }
1106                let mut must_cast = false;
1107                for (l, r) in l.iter().zip(r.iter()) {
1108                    must_cast |= l.dtype.matches_schema_type(&r.dtype)?;
1109                }
1110                Ok(must_cast)
1111            },
1112            (DataType::Null, DataType::Null) => Ok(false),
1113            #[cfg(feature = "dtype-decimal")]
1114            (DataType::Decimal(p1, s1), DataType::Decimal(p2, s2)) => Ok((p1, s1) != (p2, s2)),
1115            // We don't allow the other way around, only if our current type is
1116            // null and the schema isn't we allow it.
1117            (DataType::Null, _) => Ok(true),
1118            #[cfg(feature = "dtype-categorical")]
1119            (DataType::Categorical(l, _), DataType::Categorical(r, _)) => {
1120                ensure_same_categories(l, r)?;
1121                Ok(false)
1122            },
1123            #[cfg(feature = "dtype-categorical")]
1124            (DataType::Enum(l, _), DataType::Enum(r, _)) => {
1125                ensure_same_frozen_categories(l, r)?;
1126                Ok(false)
1127            },
1128
1129            (l, r) if l == r => Ok(false),
1130            (l, r) => {
1131                polars_bail!(SchemaMismatch: "type {:?} is incompatible with expected type {:?}", l, r)
1132            },
1133        }
1134    }
1135
1136    #[inline]
1137    pub fn is_unknown(&self) -> bool {
1138        matches!(self, DataType::Unknown(_))
1139    }
1140
1141    pub fn nesting_level(&self) -> usize {
1142        let mut level = 0;
1143        let mut slf = self;
1144        while let Some(inner_dtype) = slf.inner_dtype() {
1145            level += 1;
1146            slf = inner_dtype;
1147        }
1148        level
1149    }
1150
1151    /// If this dtype is a Categorical or Enum, returns the physical backing type.
1152    #[cfg(feature = "dtype-categorical")]
1153    pub fn cat_physical(&self) -> PolarsResult<CategoricalPhysical> {
1154        match self {
1155            DataType::Categorical(cats, _) => Ok(cats.physical()),
1156            DataType::Enum(fcats, _) => Ok(fcats.physical()),
1157            _ => {
1158                polars_bail!(SchemaMismatch: "invalid dtype: expected an Enum or Categorical type, received '{:?}'", self)
1159            },
1160        }
1161    }
1162
1163    /// If this dtype is a Categorical or Enum, returns the underlying mapping.
1164    #[cfg(feature = "dtype-categorical")]
1165    pub fn cat_mapping(&self) -> PolarsResult<&Arc<CategoricalMapping>> {
1166        match self {
1167            DataType::Categorical(_, mapping) | DataType::Enum(_, mapping) => Ok(mapping),
1168            _ => {
1169                polars_bail!(SchemaMismatch: "invalid dtype: expected an Enum or Categorical type, received '{:?}'", self)
1170            },
1171        }
1172    }
1173
1174    #[cfg(feature = "dtype-categorical")]
1175    pub fn from_categories(cats: Arc<Categories>) -> Self {
1176        let mapping = cats.mapping();
1177        Self::Categorical(cats, mapping)
1178    }
1179
1180    #[cfg(feature = "dtype-categorical")]
1181    pub fn from_frozen_categories(fcats: Arc<FrozenCategories>) -> Self {
1182        let mapping = fcats.mapping().clone();
1183        Self::Enum(fcats, mapping)
1184    }
1185
1186    pub fn is_numeric(&self) -> bool {
1187        self.is_integer() || self.is_float() || self.is_decimal()
1188    }
1189
1190    pub fn numeric_to_unsigned_bit_repr(&self) -> Option<DataType> {
1191        use DataType::*;
1192
1193        Some(match self {
1194            Int8 | UInt8 => UInt8,
1195            Int16 | UInt16 | Float16 => UInt16,
1196            Int32 | UInt32 | Float32 => UInt32,
1197            Int64 | UInt64 | Float64 => UInt64,
1198            Int128 | UInt128 => UInt128,
1199            _ => return None,
1200        })
1201    }
1202}
1203
1204impl Display for DataType {
1205    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
1206        let s = match self {
1207            DataType::Null => "null",
1208            DataType::Boolean => "bool",
1209            DataType::UInt8 => "u8",
1210            DataType::UInt16 => "u16",
1211            DataType::UInt32 => "u32",
1212            DataType::UInt64 => "u64",
1213            DataType::UInt128 => "u128",
1214            DataType::Int8 => "i8",
1215            DataType::Int16 => "i16",
1216            DataType::Int32 => "i32",
1217            DataType::Int64 => "i64",
1218            DataType::Int128 => "i128",
1219            DataType::Float16 => "f16",
1220            DataType::Float32 => "f32",
1221            DataType::Float64 => "f64",
1222            #[cfg(feature = "dtype-decimal")]
1223            DataType::Decimal(p, s) => return write!(f, "decimal[{p},{s}]"),
1224            DataType::String => "str",
1225            DataType::Binary => "binary",
1226            DataType::BinaryOffset => "binary[offset]",
1227            DataType::Date => "date",
1228            DataType::Datetime(tu, None) => return write!(f, "datetime[{tu}]"),
1229            DataType::Datetime(tu, Some(tz)) => return write!(f, "datetime[{tu}, {tz}]"),
1230            DataType::Duration(tu) => return write!(f, "duration[{tu}]"),
1231            DataType::Time => "time",
1232            #[cfg(feature = "dtype-array")]
1233            DataType::Array(_, _) => {
1234                let tp = self.array_leaf_dtype().unwrap();
1235
1236                let dims = self.get_shape().unwrap();
1237                let shape = if dims.len() == 1 {
1238                    format!("{}", dims[0])
1239                } else {
1240                    format_tuple!(dims)
1241                };
1242                return write!(f, "array[{tp}, {shape}]");
1243            },
1244            DataType::List(tp) => return write!(f, "list[{tp}]"),
1245            #[cfg(feature = "object")]
1246            DataType::Object(s) => s,
1247            #[cfg(feature = "dtype-categorical")]
1248            DataType::Categorical(_, _) => "cat",
1249            #[cfg(feature = "dtype-categorical")]
1250            DataType::Enum(_, _) => "enum",
1251            #[cfg(feature = "dtype-struct")]
1252            DataType::Struct(fields) => return write!(f, "struct[{}]", fields.len()),
1253            #[cfg(feature = "dtype-extension")]
1254            DataType::Extension(typ, _) => return write!(f, "ext[{}]", typ.0.dyn_display()),
1255            DataType::Unknown(kind) => match kind {
1256                UnknownKind::Any => "unknown",
1257                UnknownKind::Int(_) => "dyn int",
1258                UnknownKind::Float => "dyn float",
1259                UnknownKind::Str => "dyn str",
1260            },
1261        };
1262        f.write_str(s)
1263    }
1264}
1265
1266impl std::fmt::Debug for DataType {
1267    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
1268        use DataType::*;
1269        match self {
1270            Boolean => write!(f, "Boolean"),
1271            UInt8 => write!(f, "UInt8"),
1272            UInt16 => write!(f, "UInt16"),
1273            UInt32 => write!(f, "UInt32"),
1274            UInt64 => write!(f, "UInt64"),
1275            UInt128 => write!(f, "UInt128"),
1276            Int8 => write!(f, "Int8"),
1277            Int16 => write!(f, "Int16"),
1278            Int32 => write!(f, "Int32"),
1279            Int64 => write!(f, "Int64"),
1280            Int128 => write!(f, "Int128"),
1281            Float16 => write!(f, "Float16"),
1282            Float32 => write!(f, "Float32"),
1283            Float64 => write!(f, "Float64"),
1284            String => write!(f, "String"),
1285            Binary => write!(f, "Binary"),
1286            BinaryOffset => write!(f, "BinaryOffset"),
1287            Date => write!(f, "Date"),
1288            Time => write!(f, "Time"),
1289            Duration(unit) => write!(f, "Duration('{unit}')"),
1290            Datetime(unit, opt_tz) => {
1291                if let Some(tz) = opt_tz {
1292                    write!(f, "Datetime('{unit}', '{tz}')")
1293                } else {
1294                    write!(f, "Datetime('{unit}')")
1295                }
1296            },
1297            #[cfg(feature = "dtype-decimal")]
1298            Decimal(p, s) => write!(f, "Decimal({p}, {s})"),
1299            #[cfg(feature = "dtype-array")]
1300            Array(inner, size) => write!(f, "Array({inner:?}, {size})"),
1301            List(inner) => write!(f, "List({inner:?})"),
1302            #[cfg(feature = "dtype-struct")]
1303            Struct(fields) => {
1304                let mut first = true;
1305                write!(f, "Struct({{")?;
1306                for field in fields {
1307                    if !first {
1308                        write!(f, ", ")?;
1309                    }
1310                    write!(f, "'{}': {:?}", field.name(), field.dtype())?;
1311                    first = false;
1312                }
1313                write!(f, "}})")
1314            },
1315            #[cfg(feature = "dtype-categorical")]
1316            Categorical(cats, _) => {
1317                if cats.is_global() {
1318                    write!(f, "Categorical")
1319                } else if cats.namespace().is_empty() && cats.physical() == CategoricalPhysical::U32
1320                {
1321                    write!(f, "Categorical('{}')", cats.name())
1322                } else {
1323                    write!(
1324                        f,
1325                        "Categorical('{}', '{}', {:?})",
1326                        cats.name(),
1327                        cats.namespace(),
1328                        cats.physical()
1329                    )
1330                }
1331            },
1332            #[cfg(feature = "dtype-categorical")]
1333            Enum(_, _) => write!(f, "Enum([...])"),
1334            #[cfg(feature = "object")]
1335            Object(_) => write!(f, "Object"),
1336            Null => write!(f, "Null"),
1337            #[cfg(feature = "dtype-extension")]
1338            Extension(typ, inner) => write!(f, "Extension({}, {inner:?})", typ.0.dyn_debug()),
1339            Unknown(kind) => write!(f, "Unknown({kind:?})"),
1340        }
1341    }
1342}
1343
1344pub fn merge_dtypes(left: &DataType, right: &DataType) -> PolarsResult<DataType> {
1345    use DataType::*;
1346    Ok(match (left, right) {
1347        #[cfg(feature = "dtype-categorical")]
1348        (Categorical(cats_l, map), Categorical(cats_r, _)) => {
1349            ensure_same_categories(cats_l, cats_r)?;
1350            Categorical(cats_l.clone(), map.clone())
1351        },
1352        #[cfg(feature = "dtype-categorical")]
1353        (Enum(fcats_l, map), Enum(fcats_r, _)) => {
1354            ensure_same_frozen_categories(fcats_l, fcats_r)?;
1355            Enum(fcats_l.clone(), map.clone())
1356        },
1357        (List(inner_l), List(inner_r)) => {
1358            let merged = merge_dtypes(inner_l, inner_r)?;
1359            List(Box::new(merged))
1360        },
1361        #[cfg(feature = "dtype-struct")]
1362        (Struct(inner_l), Struct(inner_r)) => {
1363            polars_ensure!(inner_l.len() == inner_r.len(), ComputeError: "cannot combine structs with differing amounts of fields ({} != {})", inner_l.len(), inner_r.len());
1364            let fields = inner_l.iter().zip(inner_r.iter()).map(|(l, r)| {
1365                polars_ensure!(l.name() == r.name(), ComputeError: "cannot combine structs with different fields ({} != {})", l.name(), r.name());
1366                let merged = merge_dtypes(l.dtype(), r.dtype())?;
1367                Ok(Field::new(l.name().clone(), merged))
1368            }).collect::<PolarsResult<Vec<_>>>()?;
1369            Struct(fields)
1370        },
1371        #[cfg(feature = "dtype-array")]
1372        (Array(inner_l, width_l), Array(inner_r, width_r)) => {
1373            polars_ensure!(width_l == width_r, ComputeError: "widths of FixedSizeWidth Series are not equal");
1374            let merged = merge_dtypes(inner_l, inner_r)?;
1375            Array(Box::new(merged), *width_l)
1376        },
1377        (left, right) if left == right => left.clone(),
1378        _ => polars_bail!(ComputeError: "unable to merge datatypes"),
1379    })
1380}
1381
1382fn collect_nested_types(
1383    dtype: &DataType,
1384    result: &mut PlHashSet<DataType>,
1385    include_compound_types: bool,
1386) {
1387    match dtype {
1388        DataType::List(inner) => {
1389            if include_compound_types {
1390                result.insert(dtype.clone());
1391            }
1392            collect_nested_types(inner, result, include_compound_types);
1393        },
1394        #[cfg(feature = "dtype-array")]
1395        DataType::Array(inner, _) => {
1396            if include_compound_types {
1397                result.insert(dtype.clone());
1398            }
1399            collect_nested_types(inner, result, include_compound_types);
1400        },
1401        #[cfg(feature = "dtype-struct")]
1402        DataType::Struct(fields) => {
1403            if include_compound_types {
1404                result.insert(dtype.clone());
1405            }
1406            for field in fields {
1407                collect_nested_types(field.dtype(), result, include_compound_types);
1408            }
1409        },
1410        _ => {
1411            result.insert(dtype.clone());
1412        },
1413    }
1414}
1415
1416pub fn unpack_dtypes(dtype: &DataType, include_compound_types: bool) -> PlHashSet<DataType> {
1417    let mut result = PlHashSet::new();
1418    collect_nested_types(dtype, &mut result, include_compound_types);
1419    result
1420}
1421
1422#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
1423#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1424#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
1425pub struct CompatLevel(pub(crate) u16);
1426
1427impl CompatLevel {
1428    pub const fn newest() -> CompatLevel {
1429        CompatLevel(1)
1430    }
1431
1432    pub const fn oldest() -> CompatLevel {
1433        CompatLevel(0)
1434    }
1435
1436    // The following methods are only used internally
1437
1438    #[doc(hidden)]
1439    pub fn with_level(level: u16) -> PolarsResult<CompatLevel> {
1440        if level > CompatLevel::newest().0 {
1441            polars_bail!(InvalidOperation: "invalid compat level");
1442        }
1443        Ok(CompatLevel(level))
1444    }
1445
1446    #[doc(hidden)]
1447    pub fn get_level(&self) -> u16 {
1448        self.0
1449    }
1450
1451    /// Whether this compat level uses Utf8View/BinaryView types.
1452    pub fn uses_binview_types(&self) -> bool {
1453        *self != CompatLevel::oldest()
1454    }
1455}
1456
1457impl DataType {
1458    pub fn visit_with(&self, mut visitor_fn: impl FnMut(&DataType)) {
1459        self.try_visit_with(|dtype| {
1460            visitor_fn(dtype);
1461            Ok(())
1462        })
1463        .unwrap();
1464    }
1465
1466    pub fn try_visit_with(
1467        &self,
1468        mut visitor_fn: impl FnMut(&DataType) -> PolarsResult<()>,
1469    ) -> PolarsResult<()> {
1470        DataType::try_mutate_with(Cow::Borrowed(self), |dtype| {
1471            visitor_fn(dtype.as_ref()).map(|_| dtype)
1472        })
1473        .map(|_| ())
1474    }
1475
1476    pub fn try_mutate_with<'d>(
1477        dtype: Cow<'d, DataType>,
1478        mut visitor_fn: impl FnMut(Cow<'d, DataType>) -> PolarsResult<Cow<'d, DataType>>,
1479    ) -> PolarsResult<Cow<'d, DataType>> {
1480        DtypeVisitor {
1481            visitor_fn: &mut visitor_fn,
1482        }
1483        .visit_rec(dtype)
1484    }
1485}
1486
1487struct DtypeVisitor<'d, 'f> {
1488    visitor_fn: &'f mut dyn FnMut(Cow<'d, DataType>) -> PolarsResult<Cow<'d, DataType>>,
1489}
1490
1491impl<'d, 'f> DtypeVisitor<'d, 'f> {
1492    fn visit_rec(&mut self, dtype: Cow<'d, DataType>) -> PolarsResult<Cow<'d, DataType>> {
1493        let dtype = match dtype.as_ref() {
1494            DataType::List(_) => match dtype {
1495                Cow::Owned(DataType::List(mut inner)) => {
1496                    self.visit_ref_mut(inner.as_mut())?;
1497                    Cow::Owned(DataType::List(inner))
1498                },
1499                Cow::Borrowed(DataType::List(inner)) => {
1500                    let ret = self.visit_rec(Cow::Borrowed(inner.as_ref()))?;
1501
1502                    if std::ptr::eq(ret.as_ref(), inner.as_ref()) {
1503                        dtype
1504                    } else {
1505                        Cow::Owned(DataType::List(Box::new(ret.into_owned())))
1506                    }
1507                },
1508                _ => unreachable!(),
1509            },
1510            #[cfg(feature = "dtype-array")]
1511            DataType::Array(..) => match dtype {
1512                Cow::Owned(DataType::Array(mut inner, width)) => {
1513                    self.visit_ref_mut(inner.as_mut())?;
1514                    Cow::Owned(DataType::Array(inner, width))
1515                },
1516                Cow::Borrowed(DataType::Array(inner, width)) => {
1517                    let ret = self.visit_rec(Cow::Borrowed(inner.as_ref()))?;
1518
1519                    if std::ptr::eq(ret.as_ref(), inner.as_ref()) {
1520                        dtype
1521                    } else {
1522                        Cow::Owned(DataType::Array(Box::new(ret.into_owned()), *width))
1523                    }
1524                },
1525                _ => unreachable!(),
1526            },
1527            #[cfg(feature = "dtype-struct")]
1528            DataType::Struct(_) => match dtype {
1529                Cow::Owned(DataType::Struct(mut fields)) => {
1530                    for f in &mut fields {
1531                        self.visit_ref_mut(&mut f.dtype)?;
1532                    }
1533
1534                    Cow::Owned(DataType::Struct(fields))
1535                },
1536                Cow::Borrowed(DataType::Struct(fields)) => {
1537                    let mut new_fields = vec![];
1538
1539                    for (i, f) in fields.iter().enumerate() {
1540                        let ret = self.visit_rec(Cow::Borrowed(f.dtype()))?;
1541
1542                        if std::ptr::eq(ret.as_ref(), f.dtype()) && new_fields.is_empty() {
1543                            continue;
1544                        }
1545
1546                        if new_fields.is_empty() {
1547                            new_fields.reserve_exact(fields.len());
1548                            new_fields.extend(fields.iter().take(i).cloned());
1549                        }
1550
1551                        new_fields.push(Field::new(f.name().clone(), ret.into_owned()));
1552                    }
1553
1554                    if new_fields.is_empty() {
1555                        dtype
1556                    } else {
1557                        assert_eq!(new_fields.len(), fields.len());
1558                        Cow::Owned(DataType::Struct(new_fields))
1559                    }
1560                },
1561                _ => unreachable!(),
1562            },
1563            #[cfg(feature = "dtype-extension")]
1564            DataType::Extension(..) => match dtype {
1565                Cow::Owned(DataType::Extension(ext, mut storage)) => {
1566                    self.visit_ref_mut(storage.as_mut())?;
1567                    Cow::Owned(DataType::Extension(ext, storage))
1568                },
1569                Cow::Borrowed(DataType::Extension(ext, storage)) => {
1570                    let ret = self.visit_rec(Cow::Borrowed(storage.as_ref()))?;
1571
1572                    if std::ptr::eq(ret.as_ref(), storage.as_ref()) {
1573                        dtype
1574                    } else {
1575                        Cow::Owned(DataType::Extension(ext.clone(), Box::new(ret.into_owned())))
1576                    }
1577                },
1578                _ => unreachable!(),
1579            },
1580            _ => {
1581                debug_assert!(!dtype.is_nested());
1582                dtype
1583            },
1584        };
1585
1586        (self.visitor_fn)(dtype)
1587    }
1588
1589    /// `dtype` will be set to an unspecified value if this returns an error.
1590    fn visit_ref_mut(&mut self, dtype: &mut DataType) -> PolarsResult<()> {
1591        *dtype = self
1592            .visit_rec(Cow::Owned(std::mem::replace(dtype, DataType::Null)))?
1593            .into_owned();
1594
1595        Ok(())
1596    }
1597}
1598
1599#[cfg(test)]
1600mod tests {
1601    use super::*;
1602
1603    #[cfg(feature = "dtype-array")]
1604    #[test]
1605    fn test_unpack_primitive_dtypes() {
1606        let inner_type = DataType::Float64;
1607        let array_type = DataType::Array(Box::new(inner_type), 10);
1608        let list_type = DataType::List(Box::new(array_type));
1609
1610        let result = unpack_dtypes(&list_type, false);
1611
1612        let mut expected = PlHashSet::new();
1613        expected.insert(DataType::Float64);
1614
1615        assert_eq!(result, expected)
1616    }
1617
1618    #[cfg(feature = "dtype-array")]
1619    #[test]
1620    fn test_unpack_compound_dtypes() {
1621        let inner_type = DataType::Float64;
1622        let array_type = DataType::Array(Box::new(inner_type), 10);
1623        let list_type = DataType::List(Box::new(array_type.clone()));
1624
1625        let result = unpack_dtypes(&list_type, true);
1626
1627        let mut expected = PlHashSet::new();
1628        expected.insert(list_type);
1629        expected.insert(array_type);
1630        expected.insert(DataType::Float64);
1631
1632        assert_eq!(result, expected)
1633    }
1634}