1use std::borrow::Cow;
2use std::collections::BTreeMap;
3
4use arrow::datatypes::{
5 DTYPE_CATEGORICAL_NEW, DTYPE_ENUM_VALUES_LEGACY, DTYPE_ENUM_VALUES_NEW, MAINTAIN_PL_TYPE,
6 Metadata, PL_KEY,
7};
8#[cfg(feature = "dtype-array")]
9use polars_utils::format_tuple;
10use polars_utils::itertools::Itertools;
11#[cfg(any(feature = "serde-lazy", feature = "serde"))]
12use serde::{Deserialize, Serialize};
13pub use temporal::time_zone::TimeZone;
14
15use super::*;
16#[cfg(feature = "object")]
17use crate::chunked_array::object::registry::get_object_physical_type;
18#[cfg(feature = "dtype-extension")]
19pub use crate::datatypes::extension::ExtensionTypeInstance;
20use crate::utils::materialize_dyn_int;
21
22pub trait MetaDataExt: IntoMetadata {
23 fn pl_enum_metadata(&self) -> Option<&str> {
24 let md = self.into_metadata_ref();
25 let values = md
26 .get(DTYPE_ENUM_VALUES_NEW)
27 .or_else(|| md.get(DTYPE_ENUM_VALUES_LEGACY));
28 Some(values?.as_str())
29 }
30
31 fn pl_categorical_metadata(&self) -> Option<&str> {
32 Some(
37 self.into_metadata_ref()
38 .get(DTYPE_CATEGORICAL_NEW)?
39 .as_str(),
40 )
41 }
42
43 fn maintain_type(&self) -> bool {
44 let metadata = self.into_metadata_ref();
45 metadata.get(PL_KEY).map(|s| s.as_str()) == Some(MAINTAIN_PL_TYPE)
46 }
47}
48
49impl MetaDataExt for Metadata {}
50pub trait IntoMetadata {
51 #[allow(clippy::wrong_self_convention)]
52 fn into_metadata_ref(&self) -> &Metadata;
53}
54
55impl IntoMetadata for Metadata {
56 fn into_metadata_ref(&self) -> &Metadata {
57 self
58 }
59}
60
61#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
62#[cfg_attr(
63 any(feature = "serde", feature = "serde-lazy"),
64 derive(Serialize, Deserialize)
65)]
66#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
67pub enum UnknownKind {
68 Int(i128),
70 Float,
71 Str,
73 #[default]
74 Any,
75}
76
77impl UnknownKind {
78 pub fn materialize(&self) -> Option<DataType> {
79 let dtype = match self {
80 UnknownKind::Int(v) => materialize_dyn_int(*v).dtype(),
81 UnknownKind::Float => DataType::Float64,
82 UnknownKind::Str => DataType::String,
83 UnknownKind::Any => return None,
84 };
85 Some(dtype)
86 }
87}
88
89#[derive(Clone)]
90pub enum DataType {
91 Boolean,
92 UInt8,
93 UInt16,
94 UInt32,
95 UInt64,
96 UInt128,
97 Int8,
98 Int16,
99 Int32,
100 Int64,
101 Int128,
102 Float16,
103 Float32,
104 Float64,
105 #[cfg(feature = "dtype-decimal")]
109 Decimal(usize, usize), String,
112 Binary,
113 BinaryOffset,
114 Date,
117 Datetime(TimeUnit, Option<TimeZone>),
120 Duration(TimeUnit),
122 Time,
124 #[cfg(feature = "dtype-array")]
126 Array(Box<DataType>, usize),
127 List(Box<DataType>),
129 #[cfg(feature = "object")]
132 Object(&'static str),
133 Null,
134 #[cfg(feature = "dtype-categorical")]
135 Categorical(Arc<Categories>, Arc<CategoricalMapping>),
136 #[cfg(feature = "dtype-categorical")]
138 Enum(Arc<FrozenCategories>, Arc<CategoricalMapping>),
139 #[cfg(feature = "dtype-struct")]
140 Struct(Vec<Field>),
141 #[cfg(feature = "dtype-extension")]
142 Extension(ExtensionTypeInstance, Box<DataType>),
143 Unknown(UnknownKind),
145}
146
147pub trait AsRefDataType {
148 fn as_ref_dtype(&self) -> &DataType;
149}
150
151impl Hash for DataType {
152 fn hash<H: Hasher>(&self, state: &mut H) {
153 std::mem::discriminant(self).hash(state)
154 }
155}
156
157impl PartialEq for DataType {
158 fn eq(&self, other: &Self) -> bool {
159 use DataType::*;
160 {
161 match (self, other) {
162 #[cfg(feature = "dtype-categorical")]
163 (Categorical(cats_l, _), Categorical(cats_r, _)) => Arc::ptr_eq(cats_l, cats_r),
164 #[cfg(feature = "dtype-categorical")]
165 (Enum(fcats_l, _), Enum(fcats_r, _)) => Arc::ptr_eq(fcats_l, fcats_r),
166 (Datetime(tu_l, tz_l), Datetime(tu_r, tz_r)) => tu_l == tu_r && tz_l == tz_r,
167 (List(left_inner), List(right_inner)) => left_inner == right_inner,
168 #[cfg(feature = "dtype-duration")]
169 (Duration(tu_l), Duration(tu_r)) => tu_l == tu_r,
170 #[cfg(feature = "dtype-decimal")]
171 (Decimal(p1, s1), Decimal(p2, s2)) => (p1, s1) == (p2, s2),
172 #[cfg(feature = "object")]
173 (Object(lhs), Object(rhs)) => lhs == rhs,
174 #[cfg(feature = "dtype-struct")]
175 (Struct(lhs), Struct(rhs)) => {
176 std::ptr::eq(Vec::as_ptr(lhs), Vec::as_ptr(rhs)) || lhs == rhs
177 },
178 #[cfg(feature = "dtype-array")]
179 (Array(left_inner, left_width), Array(right_inner, right_width)) => {
180 left_width == right_width && left_inner == right_inner
181 },
182 #[cfg(feature = "dtype-extension")]
183 (Extension(ext_l, storage_l), Extension(ext_r, storage_r)) => {
184 ext_l == ext_r && storage_l == storage_r
185 },
186 (Unknown(l), Unknown(r)) => match (l, r) {
187 (UnknownKind::Int(_), UnknownKind::Int(_)) => true,
188 _ => l == r,
189 },
190 _ => std::mem::discriminant(self) == std::mem::discriminant(other),
191 }
192 }
193 }
194}
195
196impl Eq for DataType {}
197
198impl DataType {
199 pub const IDX_DTYPE: Self = {
200 #[cfg(not(feature = "bigidx"))]
201 {
202 DataType::UInt32
203 }
204 #[cfg(feature = "bigidx")]
205 {
206 DataType::UInt64
207 }
208 };
209
210 pub fn pretty_format(&self) -> String {
211 match self {
212 #[cfg(feature = "dtype-struct")]
213 Self::Struct(fields) => {
214 let formatted_fields = fields
215 .iter()
216 .map(|field| format!("{}: {}", field.name, field.dtype.pretty_format()))
217 .collect::<Vec<String>>()
218 .join(", ");
219 format!("struct {{{}}}", formatted_fields)
220 },
221 Self::List(inner_dtype) => {
222 let formatted_dtype = inner_dtype.pretty_format();
223 format!("list[{}]", formatted_dtype)
224 },
225 #[cfg(feature = "dtype-array")]
226 Self::Array(inner_dtype, size) => {
227 let formatted_dtype = inner_dtype.pretty_format();
228 format!("array[{}, {}]", formatted_dtype, size)
229 },
230 _ => {
231 format!("{}", self)
232 },
233 }
234 }
235
236 pub fn value_within_range(&self, other: AnyValue) -> bool {
237 use DataType::*;
238 match self {
239 UInt8 => other.extract::<u8>().is_some(),
240 #[cfg(feature = "dtype-u16")]
241 UInt16 => other.extract::<u16>().is_some(),
242 UInt32 => other.extract::<u32>().is_some(),
243 UInt64 => other.extract::<u64>().is_some(),
244 #[cfg(feature = "dtype-u128")]
245 UInt128 => other.extract::<u128>().is_some(),
246 #[cfg(feature = "dtype-i8")]
247 Int8 => other.extract::<i8>().is_some(),
248 #[cfg(feature = "dtype-i16")]
249 Int16 => other.extract::<i16>().is_some(),
250 Int32 => other.extract::<i32>().is_some(),
251 Int64 => other.extract::<i64>().is_some(),
252 #[cfg(feature = "dtype-i128")]
253 Int128 => other.extract::<i128>().is_some(),
254 _ => false,
255 }
256 }
257
258 #[cfg(feature = "dtype-struct")]
260 pub fn _month_days_ns_struct_type() -> Self {
261 DataType::Struct(vec![
262 Field::new(PlSmallStr::from_static("months"), DataType::Int32),
263 Field::new(PlSmallStr::from_static("days"), DataType::Int32),
264 Field::new(
265 PlSmallStr::from_static("nanoseconds"),
266 DataType::Duration(TimeUnit::Nanoseconds),
267 ),
268 ])
269 }
270
271 pub fn is_known(&self) -> bool {
273 match self {
274 DataType::List(inner) => inner.is_known(),
275 #[cfg(feature = "dtype-array")]
276 DataType::Array(inner, _) => inner.is_known(),
277 #[cfg(feature = "dtype-struct")]
278 DataType::Struct(fields) => fields.iter().all(|fld| fld.dtype.is_known()),
279 DataType::Unknown(_) => false,
280 _ => true,
281 }
282 }
283
284 pub fn materialize_unknown(self, allow_unknown: bool) -> PolarsResult<DataType> {
287 match self {
288 DataType::Unknown(u) => match u.materialize() {
289 Some(known) => Ok(known),
290 None => {
291 if allow_unknown {
292 Ok(DataType::Unknown(u))
293 } else {
294 polars_bail!(SchemaMismatch: "failed to materialize unknown type")
295 }
296 },
297 },
298 DataType::List(inner) => Ok(DataType::List(Box::new(
299 inner.materialize_unknown(allow_unknown)?,
300 ))),
301 #[cfg(feature = "dtype-array")]
302 DataType::Array(inner, size) => Ok(DataType::Array(
303 Box::new(inner.materialize_unknown(allow_unknown)?),
304 size,
305 )),
306 #[cfg(feature = "dtype-struct")]
307 DataType::Struct(fields) => Ok(DataType::Struct(
308 fields
309 .into_iter()
310 .map(|f| {
311 PolarsResult::Ok(Field::new(
312 f.name,
313 f.dtype.materialize_unknown(allow_unknown)?,
314 ))
315 })
316 .try_collect_vec()?,
317 )),
318 _ => Ok(self),
319 }
320 }
321
322 #[cfg(feature = "dtype-array")]
323 pub fn get_shape(&self) -> Option<Vec<usize>> {
325 fn get_shape_impl(dt: &DataType, shape: &mut Vec<usize>) {
326 if let DataType::Array(inner, size) = dt {
327 shape.push(*size);
328 get_shape_impl(inner, shape);
329 }
330 }
331
332 if let DataType::Array(inner, size) = self {
333 let mut shape = vec![*size];
334 get_shape_impl(inner, &mut shape);
335 Some(shape)
336 } else {
337 None
338 }
339 }
340
341 pub fn inner_dtype(&self) -> Option<&DataType> {
343 match self {
344 DataType::List(inner) => Some(inner),
345 #[cfg(feature = "dtype-array")]
346 DataType::Array(inner, _) => Some(inner),
347 _ => None,
348 }
349 }
350
351 pub fn into_inner_dtype(self) -> Option<DataType> {
353 match self {
354 DataType::List(inner) => Some(*inner),
355 #[cfg(feature = "dtype-array")]
356 DataType::Array(inner, _) => Some(*inner),
357 _ => None,
358 }
359 }
360
361 pub fn try_into_inner_dtype(self) -> PolarsResult<DataType> {
363 match self {
364 DataType::List(inner) => Ok(*inner),
365 #[cfg(feature = "dtype-array")]
366 DataType::Array(inner, _) => Ok(*inner),
367 dt => polars_bail!(InvalidOperation: "cannot get inner datatype of `{dt}`"),
368 }
369 }
370
371 pub fn leaf_dtype(&self) -> &DataType {
373 let mut prev = self;
374 while let Some(dtype) = prev.inner_dtype() {
375 prev = dtype
376 }
377 prev
378 }
379
380 #[cfg(feature = "dtype-array")]
381 pub fn array_leaf_dtype(&self) -> Option<&DataType> {
383 let mut prev = self;
384 match prev {
385 DataType::Array(_, _) => {
386 while let DataType::Array(inner, _) = &prev {
387 prev = inner;
388 }
389 Some(prev)
390 },
391 _ => None,
392 }
393 }
394
395 pub fn cast_leaf(&self, to: DataType) -> DataType {
397 use DataType::*;
398 match self {
399 List(inner) => List(Box::new(inner.cast_leaf(to))),
400 #[cfg(feature = "dtype-array")]
401 Array(inner, size) => Array(Box::new(inner.cast_leaf(to)), *size),
402 _ => to,
403 }
404 }
405
406 pub fn map_leaves<F: FnMut(DataType) -> DataType>(self, f: &mut F) -> DataType {
409 use DataType::*;
410 match self {
411 List(inner) => List(Box::new(inner.map_leaves(f))),
412 #[cfg(feature = "dtype-array")]
413 Array(inner, size) => Array(Box::new(inner.map_leaves(f)), size),
414 #[cfg(feature = "dtype-struct")]
415 Struct(fields) => {
416 let new_fields = fields
417 .into_iter()
418 .map(|fld| Field::new(fld.name, fld.dtype.map_leaves(f)))
419 .collect();
420 Struct(new_fields)
421 },
422 #[cfg(feature = "dtype-extension")]
423 Extension(ext, storage) => Extension(ext, Box::new(storage.map_leaves(f))),
424 _ => f(self),
425 }
426 }
427
428 pub fn can_cast_to(&self, to: &DataType) -> Option<bool> {
432 if self == to {
433 return Some(true);
434 }
435 if self.is_primitive_numeric() && to.is_primitive_numeric() {
436 return Some(true);
437 }
438
439 if self.is_null() {
440 return Some(true);
441 }
442
443 use DataType as D;
444 Some(match (self, to) {
445 #[cfg(feature = "dtype-categorical")]
446 (D::Categorical(_, _) | D::Enum(_, _), D::Binary)
447 | (D::Binary, D::Categorical(_, _) | D::Enum(_, _)) => false, #[cfg(feature = "dtype-categorical")]
450 (D::Categorical(_, _) | D::Enum(_, _), D::String)
451 | (D::String, D::Categorical(_, _) | D::Enum(_, _)) => true,
452
453 #[cfg(feature = "object")]
454 (D::Object(_), D::Object(_)) => true,
455 #[cfg(feature = "object")]
456 (D::Object(_), _) | (_, D::Object(_)) => false,
457
458 (D::Boolean, dt) | (dt, D::Boolean) => match dt {
459 dt if dt.is_primitive_numeric() => true,
460 #[cfg(feature = "dtype-decimal")]
461 D::Decimal(_, _) => true,
462 D::String | D::Binary => true,
463 _ => false,
464 },
465
466 (D::List(from), D::List(to)) => from.can_cast_to(to)?,
467 #[cfg(feature = "dtype-array")]
468 (D::Array(from, l_width), D::Array(to, r_width)) => {
469 l_width == r_width && from.can_cast_to(to)?
470 },
471 #[cfg(feature = "dtype-struct")]
472 (D::Struct(l_fields), D::Struct(r_fields)) => {
473 if l_fields.is_empty() {
474 return Some(true);
475 }
476
477 if l_fields.len() != r_fields.len() {
478 return Some(false);
479 }
480
481 for (l, r) in l_fields.iter().zip(r_fields) {
482 if !l.dtype().can_cast_to(r.dtype())? {
483 return Some(false);
484 }
485 }
486
487 true
488 },
489
490 _ => return None,
492 })
493 }
494
495 pub fn implode(self) -> DataType {
496 DataType::List(Box::new(self))
497 }
498
499 #[must_use]
501 pub fn to_physical(&self) -> DataType {
502 use DataType::*;
503 match self {
504 Date => Int32,
505 Datetime(_, _) => Int64,
506 Duration(_) => Int64,
507 Time => Int64,
508 #[cfg(feature = "dtype-decimal")]
509 Decimal(_, _) => Int128,
510 #[cfg(feature = "dtype-categorical")]
511 Categorical(cats, _) => cats.physical().dtype(),
512 #[cfg(feature = "dtype-categorical")]
513 Enum(fcats, _) => fcats.physical().dtype(),
514 #[cfg(feature = "dtype-array")]
515 Array(dt, width) => Array(Box::new(dt.to_physical()), *width),
516 List(dt) => List(Box::new(dt.to_physical())),
517 #[cfg(feature = "dtype-struct")]
518 Struct(fields) => {
519 let new_fields = fields
520 .iter()
521 .map(|s| Field::new(s.name().clone(), s.dtype().to_physical()))
522 .collect();
523 Struct(new_fields)
524 },
525 #[cfg(feature = "dtype-extension")]
526 Extension(_, storage) => storage.to_physical(),
527 _ => self.clone(),
528 }
529 }
530
531 #[must_use]
532 pub fn to_storage(&self) -> DataType {
533 use DataType::*;
534 match self {
535 #[cfg(feature = "dtype-extension")]
536 Extension(_, storage) => storage.to_storage(),
537 _ => self.clone(),
538 }
539 }
540
541 pub fn is_supported_list_arithmetic_input(&self) -> bool {
542 self.is_primitive_numeric() || self.is_bool() || self.is_null()
543 }
544
545 pub fn is_logical(&self) -> bool {
547 self != &self.to_physical()
548 }
549
550 pub fn is_temporal(&self) -> bool {
552 use DataType::*;
553 matches!(self, Date | Datetime(_, _) | Duration(_) | Time)
554 }
555
556 pub fn is_primitive(&self) -> bool {
559 self.is_primitive_numeric()
560 | matches!(
561 self,
562 DataType::Boolean | DataType::String | DataType::Binary
563 )
564 }
565
566 pub fn is_primitive_numeric(&self) -> bool {
568 self.is_float() || self.is_integer()
569 }
570
571 pub fn is_bool(&self) -> bool {
573 matches!(self, DataType::Boolean)
574 }
575
576 pub fn is_list(&self) -> bool {
578 matches!(self, DataType::List(_))
579 }
580
581 pub fn is_array(&self) -> bool {
583 #[cfg(feature = "dtype-array")]
584 {
585 matches!(self, DataType::Array(_, _))
586 }
587 #[cfg(not(feature = "dtype-array"))]
588 {
589 false
590 }
591 }
592
593 pub fn is_nested(&self) -> bool {
594 match self {
595 DataType::List(_) => true,
596 #[cfg(feature = "dtype-array")]
597 DataType::Array(_, _) => true,
598 #[cfg(feature = "dtype-struct")]
599 DataType::Struct(_) => true,
600 #[cfg(feature = "dtype-extension")]
601 DataType::Extension(_, storage) => storage.is_nested(),
602 _ => false,
603 }
604 }
605
606 pub fn is_struct(&self) -> bool {
608 #[cfg(feature = "dtype-struct")]
609 {
610 matches!(self, DataType::Struct(_))
611 }
612 #[cfg(not(feature = "dtype-struct"))]
613 {
614 false
615 }
616 }
617
618 pub fn is_binary(&self) -> bool {
619 matches!(self, DataType::Binary)
620 }
621
622 pub fn is_date(&self) -> bool {
623 matches!(self, DataType::Date)
624 }
625 pub fn is_datetime(&self) -> bool {
626 matches!(self, DataType::Datetime(..))
627 }
628
629 pub fn is_duration(&self) -> bool {
630 matches!(self, DataType::Duration(..))
631 }
632
633 pub fn is_object(&self) -> bool {
634 #[cfg(feature = "object")]
635 {
636 matches!(self, DataType::Object(_))
637 }
638 #[cfg(not(feature = "object"))]
639 {
640 false
641 }
642 }
643
644 pub fn is_null(&self) -> bool {
645 matches!(self, DataType::Null)
646 }
647
648 pub fn contains_views(&self) -> bool {
649 use DataType::*;
650 match self {
651 Binary | String => true,
652 List(inner) => inner.contains_views(),
653 #[cfg(feature = "dtype-array")]
654 Array(inner, _) => inner.contains_views(),
655 #[cfg(feature = "dtype-struct")]
656 Struct(fields) => fields.iter().any(|field| field.dtype.contains_views()),
657 _ => false,
658 }
659 }
660
661 pub fn contains_categoricals(&self) -> bool {
662 use DataType::*;
663 match self {
664 #[cfg(feature = "dtype-categorical")]
665 Categorical(_, _) => true,
666 List(inner) => inner.contains_categoricals(),
667 #[cfg(feature = "dtype-array")]
668 Array(inner, _) => inner.contains_categoricals(),
669 #[cfg(feature = "dtype-struct")]
670 Struct(fields) => fields
671 .iter()
672 .any(|field| field.dtype.contains_categoricals()),
673 _ => false,
674 }
675 }
676
677 pub fn contains_enums(&self) -> bool {
678 use DataType::*;
679 match self {
680 #[cfg(feature = "dtype-categorical")]
681 Enum(_, _) => true,
682 List(inner) => inner.contains_enums(),
683 #[cfg(feature = "dtype-array")]
684 Array(inner, _) => inner.contains_enums(),
685 #[cfg(feature = "dtype-struct")]
686 Struct(fields) => fields.iter().any(|field| field.dtype.contains_enums()),
687 _ => false,
688 }
689 }
690
691 pub fn contains_objects(&self) -> bool {
692 use DataType::*;
693 match self {
694 #[cfg(feature = "object")]
695 Object(_) => true,
696 List(inner) => inner.contains_objects(),
697 #[cfg(feature = "dtype-array")]
698 Array(inner, _) => inner.contains_objects(),
699 #[cfg(feature = "dtype-struct")]
700 Struct(fields) => fields.iter().any(|field| field.dtype.contains_objects()),
701 _ => false,
702 }
703 }
704
705 pub fn contains_list_recursive(&self) -> bool {
706 use DataType as D;
707 match self {
708 D::List(_) => true,
709 #[cfg(feature = "dtype-array")]
710 D::Array(inner, _) => inner.contains_list_recursive(),
711 #[cfg(feature = "dtype-struct")]
712 D::Struct(fields) => fields
713 .iter()
714 .any(|field| field.dtype.contains_list_recursive()),
715 _ => false,
716 }
717 }
718
719 pub fn contains_unknown(&self) -> bool {
720 use DataType as D;
721 match self {
722 D::Unknown(_) => true,
723 D::List(inner) => inner.contains_unknown(),
724 #[cfg(feature = "dtype-array")]
725 D::Array(inner, _) => inner.contains_unknown(),
726 #[cfg(feature = "dtype-struct")]
727 D::Struct(fields) => fields.iter().any(|field| field.dtype.contains_unknown()),
728 _ => false,
729 }
730 }
731
732 pub fn contains_dtype_recursive(&self, dtype: &DataType) -> bool {
733 if self == dtype {
734 return true;
735 }
736 use DataType as D;
737 match self {
738 D::List(inner) => inner.contains_dtype_recursive(dtype),
739 #[cfg(feature = "dtype-array")]
740 D::Array(inner, _) => inner.contains_dtype_recursive(dtype),
741 #[cfg(feature = "dtype-struct")]
742 D::Struct(fields) => fields
743 .iter()
744 .any(|field| field.dtype.contains_dtype_recursive(dtype)),
745 _ => false,
746 }
747 }
748
749 pub fn is_ord(&self) -> bool {
751 let phys = self.to_physical();
752 phys.is_primitive_numeric()
753 || self.is_decimal()
754 || matches!(
755 phys,
756 DataType::Binary | DataType::String | DataType::Boolean
757 )
758 }
759
760 pub fn is_decimal(&self) -> bool {
762 match self {
763 #[cfg(feature = "dtype-decimal")]
764 DataType::Decimal(_, _) => true,
765 _ => false,
766 }
767 }
768
769 pub fn is_float(&self) -> bool {
772 matches!(
773 self,
774 DataType::Float16
775 | DataType::Float32
776 | DataType::Float64
777 | DataType::Unknown(UnknownKind::Float)
778 )
779 }
780
781 pub fn is_integer(&self) -> bool {
783 matches!(
784 self,
785 DataType::Int8
786 | DataType::Int16
787 | DataType::Int32
788 | DataType::Int64
789 | DataType::Int128
790 | DataType::UInt8
791 | DataType::UInt16
792 | DataType::UInt32
793 | DataType::UInt64
794 | DataType::UInt128
795 | DataType::Unknown(UnknownKind::Int(_))
796 )
797 }
798
799 pub fn is_signed_integer(&self) -> bool {
800 matches!(
802 self,
803 DataType::Int8 | DataType::Int16 | DataType::Int32 | DataType::Int64 | DataType::Int128
804 )
805 }
806
807 pub fn is_unsigned_integer(&self) -> bool {
808 matches!(
809 self,
810 DataType::UInt8
811 | DataType::UInt16
812 | DataType::UInt32
813 | DataType::UInt64
814 | DataType::UInt128,
815 )
816 }
817
818 pub fn is_string(&self) -> bool {
819 matches!(self, DataType::String | DataType::Unknown(UnknownKind::Str))
820 }
821
822 pub fn is_categorical(&self) -> bool {
823 #[cfg(feature = "dtype-categorical")]
824 {
825 matches!(self, DataType::Categorical(_, _))
826 }
827 #[cfg(not(feature = "dtype-categorical"))]
828 {
829 false
830 }
831 }
832
833 pub fn is_enum(&self) -> bool {
834 #[cfg(feature = "dtype-categorical")]
835 {
836 matches!(self, DataType::Enum(_, _))
837 }
838 #[cfg(not(feature = "dtype-categorical"))]
839 {
840 false
841 }
842 }
843
844 pub fn is_extension(&self) -> bool {
845 #[cfg(feature = "dtype-extension")]
846 {
847 matches!(self, DataType::Extension(_, _))
848 }
849 #[cfg(not(feature = "dtype-extension"))]
850 {
851 false
852 }
853 }
854
855 pub fn to_arrow_field(&self, name: PlSmallStr, compat_level: CompatLevel) -> ArrowField {
857 let field = ArrowField::new(name, self.to_arrow(compat_level), true);
858
859 if let Some(metadata) = self.to_arrow_field_metadata() {
860 field.with_metadata(metadata)
861 } else {
862 field
863 }
864 }
865
866 pub fn to_arrow_field_metadata(&self) -> Option<Metadata> {
867 match self {
868 #[cfg(feature = "dtype-categorical")]
869 DataType::Enum(fcats, _map) => {
870 let cats = fcats.categories();
871 let strings_size: usize = cats
872 .values_iter()
873 .map(|s| (s.len() + 1).ilog10() as usize + 1 + s.len())
874 .sum();
875 let mut encoded = String::with_capacity(strings_size);
876 for cat in cats.values_iter() {
877 encoded.push_str(itoa::Buffer::new().format(cat.len()));
878 encoded.push(';');
879 encoded.push_str(cat);
880 }
881 Some(BTreeMap::from([(
882 PlSmallStr::from_static(DTYPE_ENUM_VALUES_NEW),
883 PlSmallStr::from_string(encoded),
884 )]))
885 },
886 #[cfg(feature = "dtype-categorical")]
887 DataType::Categorical(cats, _) => {
888 let mut encoded = String::new();
889 encoded.push_str(itoa::Buffer::new().format(cats.name().len()));
890 encoded.push(';');
891 encoded.push_str(cats.name());
892 encoded.push_str(itoa::Buffer::new().format(cats.namespace().len()));
893 encoded.push(';');
894 encoded.push_str(cats.namespace());
895 encoded.push_str(cats.physical().as_str());
896 encoded.push(';');
897
898 Some(BTreeMap::from([(
899 PlSmallStr::from_static(DTYPE_CATEGORICAL_NEW),
900 PlSmallStr::from_string(encoded),
901 )]))
902 },
903 DataType::BinaryOffset => Some(BTreeMap::from([(
904 PlSmallStr::from_static(PL_KEY),
905 PlSmallStr::from_static(MAINTAIN_PL_TYPE),
906 )])),
907 #[cfg(feature = "dtype-extension")]
908 DataType::Extension(_ext, storage) => storage.to_arrow_field_metadata(),
909 _ => None,
910 }
911 }
912
913 pub fn max(&self) -> PolarsResult<Scalar> {
915 use DataType::*;
916 let v = match self {
917 Int8 => Scalar::from(i8::MAX),
918 Int16 => Scalar::from(i16::MAX),
919 Int32 => Scalar::from(i32::MAX),
920 Int64 => Scalar::from(i64::MAX),
921 Int128 => Scalar::from(i128::MAX),
922 UInt8 => Scalar::from(u8::MAX),
923 UInt16 => Scalar::from(u16::MAX),
924 UInt32 => Scalar::from(u32::MAX),
925 UInt64 => Scalar::from(u64::MAX),
926 UInt128 => Scalar::from(u128::MAX),
927 Float16 => Scalar::from(pf16::INFINITY),
928 Float32 => Scalar::from(f32::INFINITY),
929 Float64 => Scalar::from(f64::INFINITY),
930 #[cfg(feature = "dtype-time")]
931 Time => Scalar::new(Time, AnyValue::Time(NS_IN_DAY - 1)),
932 dt => polars_bail!(ComputeError: "cannot determine upper bound for dtype `{dt}`"),
933 };
934 Ok(v)
935 }
936
937 pub fn min(&self) -> PolarsResult<Scalar> {
939 use DataType::*;
940 let v = match self {
941 Int8 => Scalar::from(i8::MIN),
942 Int16 => Scalar::from(i16::MIN),
943 Int32 => Scalar::from(i32::MIN),
944 Int64 => Scalar::from(i64::MIN),
945 Int128 => Scalar::from(i128::MIN),
946 UInt8 => Scalar::from(u8::MIN),
947 UInt16 => Scalar::from(u16::MIN),
948 UInt32 => Scalar::from(u32::MIN),
949 UInt64 => Scalar::from(u64::MIN),
950 UInt128 => Scalar::from(u128::MIN),
951 Float16 => Scalar::from(pf16::NEG_INFINITY),
952 Float32 => Scalar::from(f32::NEG_INFINITY),
953 Float64 => Scalar::from(f64::NEG_INFINITY),
954 #[cfg(feature = "dtype-time")]
955 Time => Scalar::new(Time, AnyValue::Time(0)),
956 dt => polars_bail!(ComputeError: "cannot determine lower bound for dtype `{}`", dt),
957 };
958 Ok(v)
959 }
960
961 #[inline]
963 pub fn to_arrow(&self, compat_level: CompatLevel) -> ArrowDataType {
964 self.try_to_arrow(compat_level).unwrap()
965 }
966
967 #[inline]
968 pub fn try_to_arrow(&self, compat_level: CompatLevel) -> PolarsResult<ArrowDataType> {
969 use DataType::*;
970 match self {
971 Boolean => Ok(ArrowDataType::Boolean),
972 UInt8 => Ok(ArrowDataType::UInt8),
973 UInt16 => Ok(ArrowDataType::UInt16),
974 UInt32 => Ok(ArrowDataType::UInt32),
975 UInt64 => Ok(ArrowDataType::UInt64),
976 UInt128 => Ok(ArrowDataType::UInt128),
977 Int8 => Ok(ArrowDataType::Int8),
978 Int16 => Ok(ArrowDataType::Int16),
979 Int32 => Ok(ArrowDataType::Int32),
980 Int64 => Ok(ArrowDataType::Int64),
981 Int128 => Ok(ArrowDataType::Int128),
982 Float16 => Ok(ArrowDataType::Float16),
983 Float32 => Ok(ArrowDataType::Float32),
984 Float64 => Ok(ArrowDataType::Float64),
985 #[cfg(feature = "dtype-decimal")]
986 Decimal(precision, scale) => {
987 assert!(*precision >= 1 && *precision <= 38);
988 Ok(ArrowDataType::Decimal(*precision, *scale))
989 },
990 String => {
991 let dt = if compat_level.0 >= 1 {
992 ArrowDataType::Utf8View
993 } else {
994 ArrowDataType::LargeUtf8
995 };
996 Ok(dt)
997 },
998 Binary => {
999 let dt = if compat_level.0 >= 1 {
1000 ArrowDataType::BinaryView
1001 } else {
1002 ArrowDataType::LargeBinary
1003 };
1004 Ok(dt)
1005 },
1006 Date => Ok(ArrowDataType::Date32),
1007 Datetime(unit, tz) => Ok(ArrowDataType::Timestamp(
1008 unit.to_arrow(),
1009 tz.as_deref().cloned(),
1010 )),
1011 Duration(unit) => Ok(ArrowDataType::Duration(unit.to_arrow())),
1012 Time => Ok(ArrowDataType::Time64(ArrowTimeUnit::Nanosecond)),
1013 #[cfg(feature = "dtype-array")]
1014 Array(dt, width) => Ok(ArrowDataType::FixedSizeList(
1015 Box::new(dt.to_arrow_field(LIST_VALUES_NAME, compat_level)),
1016 *width,
1017 )),
1018 List(dt) => Ok(ArrowDataType::LargeList(Box::new(
1019 dt.to_arrow_field(LIST_VALUES_NAME, compat_level),
1020 ))),
1021 Null => Ok(ArrowDataType::Null),
1022 #[cfg(feature = "object")]
1023 Object(_) => Ok(get_object_physical_type()),
1024 #[cfg(feature = "dtype-categorical")]
1025 Categorical(_, _) | Enum(_, _) => {
1026 let arrow_phys = match self.cat_physical().unwrap() {
1027 CategoricalPhysical::U8 => IntegerType::UInt8,
1028 CategoricalPhysical::U16 => IntegerType::UInt16,
1029 CategoricalPhysical::U32 => IntegerType::UInt32,
1030 };
1031
1032 let values = if compat_level.0 >= 1 {
1033 ArrowDataType::Utf8View
1034 } else {
1035 ArrowDataType::LargeUtf8
1036 };
1037
1038 Ok(ArrowDataType::Dictionary(
1039 arrow_phys,
1040 Box::new(values),
1041 matches!(self, Enum(_, _)),
1042 ))
1043 },
1044 #[cfg(feature = "dtype-struct")]
1045 Struct(fields) => {
1046 let fields = fields
1047 .iter()
1048 .map(|fld| fld.to_arrow(compat_level))
1049 .collect();
1050 Ok(ArrowDataType::Struct(fields))
1051 },
1052 BinaryOffset => Ok(ArrowDataType::LargeBinary),
1053 #[cfg(feature = "dtype-extension")]
1054 Extension(typ, inner) => Ok(ArrowDataType::Extension(Box::new(
1055 arrow::datatypes::ExtensionType {
1056 name: typ.name().into(),
1057 inner: inner.try_to_arrow(compat_level)?,
1058 metadata: typ.serialize_metadata().map(|m| m.into()),
1059 },
1060 ))),
1061 Unknown(kind) => {
1062 let dt = match kind {
1063 UnknownKind::Any => ArrowDataType::Unknown,
1064 UnknownKind::Float => ArrowDataType::Float64,
1065 UnknownKind::Str => ArrowDataType::Utf8View,
1066 UnknownKind::Int(v) => {
1067 return materialize_dyn_int(*v).dtype().try_to_arrow(compat_level);
1068 },
1069 };
1070 Ok(dt)
1071 },
1072 }
1073 }
1074
1075 pub fn is_nested_null(&self) -> bool {
1076 use DataType::*;
1077 match self {
1078 Null => true,
1079 List(field) => field.is_nested_null(),
1080 #[cfg(feature = "dtype-array")]
1081 Array(field, _) => field.is_nested_null(),
1082 #[cfg(feature = "dtype-struct")]
1083 Struct(fields) => fields.iter().all(|fld| fld.dtype.is_nested_null()),
1084 _ => false,
1085 }
1086 }
1087
1088 pub fn matches_schema_type(&self, schema_type: &DataType) -> PolarsResult<bool> {
1095 match (self, schema_type) {
1096 (DataType::List(l), DataType::List(r)) => l.matches_schema_type(r),
1097 #[cfg(feature = "dtype-array")]
1098 (DataType::Array(l, sl), DataType::Array(r, sr)) => {
1099 Ok(l.matches_schema_type(r)? && sl == sr)
1100 },
1101 #[cfg(feature = "dtype-struct")]
1102 (DataType::Struct(l), DataType::Struct(r)) => {
1103 if l.len() != r.len() {
1104 polars_bail!(SchemaMismatch: "structs have different number of fields: {} vs {}", l.len(), r.len());
1105 }
1106 let mut must_cast = false;
1107 for (l, r) in l.iter().zip(r.iter()) {
1108 must_cast |= l.dtype.matches_schema_type(&r.dtype)?;
1109 }
1110 Ok(must_cast)
1111 },
1112 (DataType::Null, DataType::Null) => Ok(false),
1113 #[cfg(feature = "dtype-decimal")]
1114 (DataType::Decimal(p1, s1), DataType::Decimal(p2, s2)) => Ok((p1, s1) != (p2, s2)),
1115 (DataType::Null, _) => Ok(true),
1118 #[cfg(feature = "dtype-categorical")]
1119 (DataType::Categorical(l, _), DataType::Categorical(r, _)) => {
1120 ensure_same_categories(l, r)?;
1121 Ok(false)
1122 },
1123 #[cfg(feature = "dtype-categorical")]
1124 (DataType::Enum(l, _), DataType::Enum(r, _)) => {
1125 ensure_same_frozen_categories(l, r)?;
1126 Ok(false)
1127 },
1128
1129 (l, r) if l == r => Ok(false),
1130 (l, r) => {
1131 polars_bail!(SchemaMismatch: "type {:?} is incompatible with expected type {:?}", l, r)
1132 },
1133 }
1134 }
1135
1136 #[inline]
1137 pub fn is_unknown(&self) -> bool {
1138 matches!(self, DataType::Unknown(_))
1139 }
1140
1141 pub fn nesting_level(&self) -> usize {
1142 let mut level = 0;
1143 let mut slf = self;
1144 while let Some(inner_dtype) = slf.inner_dtype() {
1145 level += 1;
1146 slf = inner_dtype;
1147 }
1148 level
1149 }
1150
1151 #[cfg(feature = "dtype-categorical")]
1153 pub fn cat_physical(&self) -> PolarsResult<CategoricalPhysical> {
1154 match self {
1155 DataType::Categorical(cats, _) => Ok(cats.physical()),
1156 DataType::Enum(fcats, _) => Ok(fcats.physical()),
1157 _ => {
1158 polars_bail!(SchemaMismatch: "invalid dtype: expected an Enum or Categorical type, received '{:?}'", self)
1159 },
1160 }
1161 }
1162
1163 #[cfg(feature = "dtype-categorical")]
1165 pub fn cat_mapping(&self) -> PolarsResult<&Arc<CategoricalMapping>> {
1166 match self {
1167 DataType::Categorical(_, mapping) | DataType::Enum(_, mapping) => Ok(mapping),
1168 _ => {
1169 polars_bail!(SchemaMismatch: "invalid dtype: expected an Enum or Categorical type, received '{:?}'", self)
1170 },
1171 }
1172 }
1173
1174 #[cfg(feature = "dtype-categorical")]
1175 pub fn from_categories(cats: Arc<Categories>) -> Self {
1176 let mapping = cats.mapping();
1177 Self::Categorical(cats, mapping)
1178 }
1179
1180 #[cfg(feature = "dtype-categorical")]
1181 pub fn from_frozen_categories(fcats: Arc<FrozenCategories>) -> Self {
1182 let mapping = fcats.mapping().clone();
1183 Self::Enum(fcats, mapping)
1184 }
1185
1186 pub fn is_numeric(&self) -> bool {
1187 self.is_integer() || self.is_float() || self.is_decimal()
1188 }
1189
1190 pub fn numeric_to_unsigned_bit_repr(&self) -> Option<DataType> {
1191 use DataType::*;
1192
1193 Some(match self {
1194 Int8 | UInt8 => UInt8,
1195 Int16 | UInt16 | Float16 => UInt16,
1196 Int32 | UInt32 | Float32 => UInt32,
1197 Int64 | UInt64 | Float64 => UInt64,
1198 Int128 | UInt128 => UInt128,
1199 _ => return None,
1200 })
1201 }
1202}
1203
1204impl Display for DataType {
1205 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
1206 let s = match self {
1207 DataType::Null => "null",
1208 DataType::Boolean => "bool",
1209 DataType::UInt8 => "u8",
1210 DataType::UInt16 => "u16",
1211 DataType::UInt32 => "u32",
1212 DataType::UInt64 => "u64",
1213 DataType::UInt128 => "u128",
1214 DataType::Int8 => "i8",
1215 DataType::Int16 => "i16",
1216 DataType::Int32 => "i32",
1217 DataType::Int64 => "i64",
1218 DataType::Int128 => "i128",
1219 DataType::Float16 => "f16",
1220 DataType::Float32 => "f32",
1221 DataType::Float64 => "f64",
1222 #[cfg(feature = "dtype-decimal")]
1223 DataType::Decimal(p, s) => return write!(f, "decimal[{p},{s}]"),
1224 DataType::String => "str",
1225 DataType::Binary => "binary",
1226 DataType::BinaryOffset => "binary[offset]",
1227 DataType::Date => "date",
1228 DataType::Datetime(tu, None) => return write!(f, "datetime[{tu}]"),
1229 DataType::Datetime(tu, Some(tz)) => return write!(f, "datetime[{tu}, {tz}]"),
1230 DataType::Duration(tu) => return write!(f, "duration[{tu}]"),
1231 DataType::Time => "time",
1232 #[cfg(feature = "dtype-array")]
1233 DataType::Array(_, _) => {
1234 let tp = self.array_leaf_dtype().unwrap();
1235
1236 let dims = self.get_shape().unwrap();
1237 let shape = if dims.len() == 1 {
1238 format!("{}", dims[0])
1239 } else {
1240 format_tuple!(dims)
1241 };
1242 return write!(f, "array[{tp}, {shape}]");
1243 },
1244 DataType::List(tp) => return write!(f, "list[{tp}]"),
1245 #[cfg(feature = "object")]
1246 DataType::Object(s) => s,
1247 #[cfg(feature = "dtype-categorical")]
1248 DataType::Categorical(_, _) => "cat",
1249 #[cfg(feature = "dtype-categorical")]
1250 DataType::Enum(_, _) => "enum",
1251 #[cfg(feature = "dtype-struct")]
1252 DataType::Struct(fields) => return write!(f, "struct[{}]", fields.len()),
1253 #[cfg(feature = "dtype-extension")]
1254 DataType::Extension(typ, _) => return write!(f, "ext[{}]", typ.0.dyn_display()),
1255 DataType::Unknown(kind) => match kind {
1256 UnknownKind::Any => "unknown",
1257 UnknownKind::Int(_) => "dyn int",
1258 UnknownKind::Float => "dyn float",
1259 UnknownKind::Str => "dyn str",
1260 },
1261 };
1262 f.write_str(s)
1263 }
1264}
1265
1266impl std::fmt::Debug for DataType {
1267 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
1268 use DataType::*;
1269 match self {
1270 Boolean => write!(f, "Boolean"),
1271 UInt8 => write!(f, "UInt8"),
1272 UInt16 => write!(f, "UInt16"),
1273 UInt32 => write!(f, "UInt32"),
1274 UInt64 => write!(f, "UInt64"),
1275 UInt128 => write!(f, "UInt128"),
1276 Int8 => write!(f, "Int8"),
1277 Int16 => write!(f, "Int16"),
1278 Int32 => write!(f, "Int32"),
1279 Int64 => write!(f, "Int64"),
1280 Int128 => write!(f, "Int128"),
1281 Float16 => write!(f, "Float16"),
1282 Float32 => write!(f, "Float32"),
1283 Float64 => write!(f, "Float64"),
1284 String => write!(f, "String"),
1285 Binary => write!(f, "Binary"),
1286 BinaryOffset => write!(f, "BinaryOffset"),
1287 Date => write!(f, "Date"),
1288 Time => write!(f, "Time"),
1289 Duration(unit) => write!(f, "Duration('{unit}')"),
1290 Datetime(unit, opt_tz) => {
1291 if let Some(tz) = opt_tz {
1292 write!(f, "Datetime('{unit}', '{tz}')")
1293 } else {
1294 write!(f, "Datetime('{unit}')")
1295 }
1296 },
1297 #[cfg(feature = "dtype-decimal")]
1298 Decimal(p, s) => write!(f, "Decimal({p}, {s})"),
1299 #[cfg(feature = "dtype-array")]
1300 Array(inner, size) => write!(f, "Array({inner:?}, {size})"),
1301 List(inner) => write!(f, "List({inner:?})"),
1302 #[cfg(feature = "dtype-struct")]
1303 Struct(fields) => {
1304 let mut first = true;
1305 write!(f, "Struct({{")?;
1306 for field in fields {
1307 if !first {
1308 write!(f, ", ")?;
1309 }
1310 write!(f, "'{}': {:?}", field.name(), field.dtype())?;
1311 first = false;
1312 }
1313 write!(f, "}})")
1314 },
1315 #[cfg(feature = "dtype-categorical")]
1316 Categorical(cats, _) => {
1317 if cats.is_global() {
1318 write!(f, "Categorical")
1319 } else if cats.namespace().is_empty() && cats.physical() == CategoricalPhysical::U32
1320 {
1321 write!(f, "Categorical('{}')", cats.name())
1322 } else {
1323 write!(
1324 f,
1325 "Categorical('{}', '{}', {:?})",
1326 cats.name(),
1327 cats.namespace(),
1328 cats.physical()
1329 )
1330 }
1331 },
1332 #[cfg(feature = "dtype-categorical")]
1333 Enum(_, _) => write!(f, "Enum([...])"),
1334 #[cfg(feature = "object")]
1335 Object(_) => write!(f, "Object"),
1336 Null => write!(f, "Null"),
1337 #[cfg(feature = "dtype-extension")]
1338 Extension(typ, inner) => write!(f, "Extension({}, {inner:?})", typ.0.dyn_debug()),
1339 Unknown(kind) => write!(f, "Unknown({kind:?})"),
1340 }
1341 }
1342}
1343
1344pub fn merge_dtypes(left: &DataType, right: &DataType) -> PolarsResult<DataType> {
1345 use DataType::*;
1346 Ok(match (left, right) {
1347 #[cfg(feature = "dtype-categorical")]
1348 (Categorical(cats_l, map), Categorical(cats_r, _)) => {
1349 ensure_same_categories(cats_l, cats_r)?;
1350 Categorical(cats_l.clone(), map.clone())
1351 },
1352 #[cfg(feature = "dtype-categorical")]
1353 (Enum(fcats_l, map), Enum(fcats_r, _)) => {
1354 ensure_same_frozen_categories(fcats_l, fcats_r)?;
1355 Enum(fcats_l.clone(), map.clone())
1356 },
1357 (List(inner_l), List(inner_r)) => {
1358 let merged = merge_dtypes(inner_l, inner_r)?;
1359 List(Box::new(merged))
1360 },
1361 #[cfg(feature = "dtype-struct")]
1362 (Struct(inner_l), Struct(inner_r)) => {
1363 polars_ensure!(inner_l.len() == inner_r.len(), ComputeError: "cannot combine structs with differing amounts of fields ({} != {})", inner_l.len(), inner_r.len());
1364 let fields = inner_l.iter().zip(inner_r.iter()).map(|(l, r)| {
1365 polars_ensure!(l.name() == r.name(), ComputeError: "cannot combine structs with different fields ({} != {})", l.name(), r.name());
1366 let merged = merge_dtypes(l.dtype(), r.dtype())?;
1367 Ok(Field::new(l.name().clone(), merged))
1368 }).collect::<PolarsResult<Vec<_>>>()?;
1369 Struct(fields)
1370 },
1371 #[cfg(feature = "dtype-array")]
1372 (Array(inner_l, width_l), Array(inner_r, width_r)) => {
1373 polars_ensure!(width_l == width_r, ComputeError: "widths of FixedSizeWidth Series are not equal");
1374 let merged = merge_dtypes(inner_l, inner_r)?;
1375 Array(Box::new(merged), *width_l)
1376 },
1377 (left, right) if left == right => left.clone(),
1378 _ => polars_bail!(ComputeError: "unable to merge datatypes"),
1379 })
1380}
1381
1382fn collect_nested_types(
1383 dtype: &DataType,
1384 result: &mut PlHashSet<DataType>,
1385 include_compound_types: bool,
1386) {
1387 match dtype {
1388 DataType::List(inner) => {
1389 if include_compound_types {
1390 result.insert(dtype.clone());
1391 }
1392 collect_nested_types(inner, result, include_compound_types);
1393 },
1394 #[cfg(feature = "dtype-array")]
1395 DataType::Array(inner, _) => {
1396 if include_compound_types {
1397 result.insert(dtype.clone());
1398 }
1399 collect_nested_types(inner, result, include_compound_types);
1400 },
1401 #[cfg(feature = "dtype-struct")]
1402 DataType::Struct(fields) => {
1403 if include_compound_types {
1404 result.insert(dtype.clone());
1405 }
1406 for field in fields {
1407 collect_nested_types(field.dtype(), result, include_compound_types);
1408 }
1409 },
1410 _ => {
1411 result.insert(dtype.clone());
1412 },
1413 }
1414}
1415
1416pub fn unpack_dtypes(dtype: &DataType, include_compound_types: bool) -> PlHashSet<DataType> {
1417 let mut result = PlHashSet::new();
1418 collect_nested_types(dtype, &mut result, include_compound_types);
1419 result
1420}
1421
1422#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
1423#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1424#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
1425pub struct CompatLevel(pub(crate) u16);
1426
1427impl CompatLevel {
1428 pub const fn newest() -> CompatLevel {
1429 CompatLevel(1)
1430 }
1431
1432 pub const fn oldest() -> CompatLevel {
1433 CompatLevel(0)
1434 }
1435
1436 #[doc(hidden)]
1439 pub fn with_level(level: u16) -> PolarsResult<CompatLevel> {
1440 if level > CompatLevel::newest().0 {
1441 polars_bail!(InvalidOperation: "invalid compat level");
1442 }
1443 Ok(CompatLevel(level))
1444 }
1445
1446 #[doc(hidden)]
1447 pub fn get_level(&self) -> u16 {
1448 self.0
1449 }
1450
1451 pub fn uses_binview_types(&self) -> bool {
1453 *self != CompatLevel::oldest()
1454 }
1455}
1456
1457impl DataType {
1458 pub fn visit_with(&self, mut visitor_fn: impl FnMut(&DataType)) {
1459 self.try_visit_with(|dtype| {
1460 visitor_fn(dtype);
1461 Ok(())
1462 })
1463 .unwrap();
1464 }
1465
1466 pub fn try_visit_with(
1467 &self,
1468 mut visitor_fn: impl FnMut(&DataType) -> PolarsResult<()>,
1469 ) -> PolarsResult<()> {
1470 DataType::try_mutate_with(Cow::Borrowed(self), |dtype| {
1471 visitor_fn(dtype.as_ref()).map(|_| dtype)
1472 })
1473 .map(|_| ())
1474 }
1475
1476 pub fn try_mutate_with<'d>(
1477 dtype: Cow<'d, DataType>,
1478 mut visitor_fn: impl FnMut(Cow<'d, DataType>) -> PolarsResult<Cow<'d, DataType>>,
1479 ) -> PolarsResult<Cow<'d, DataType>> {
1480 DtypeVisitor {
1481 visitor_fn: &mut visitor_fn,
1482 }
1483 .visit_rec(dtype)
1484 }
1485}
1486
1487struct DtypeVisitor<'d, 'f> {
1488 visitor_fn: &'f mut dyn FnMut(Cow<'d, DataType>) -> PolarsResult<Cow<'d, DataType>>,
1489}
1490
1491impl<'d, 'f> DtypeVisitor<'d, 'f> {
1492 fn visit_rec(&mut self, dtype: Cow<'d, DataType>) -> PolarsResult<Cow<'d, DataType>> {
1493 let dtype = match dtype.as_ref() {
1494 DataType::List(_) => match dtype {
1495 Cow::Owned(DataType::List(mut inner)) => {
1496 self.visit_ref_mut(inner.as_mut())?;
1497 Cow::Owned(DataType::List(inner))
1498 },
1499 Cow::Borrowed(DataType::List(inner)) => {
1500 let ret = self.visit_rec(Cow::Borrowed(inner.as_ref()))?;
1501
1502 if std::ptr::eq(ret.as_ref(), inner.as_ref()) {
1503 dtype
1504 } else {
1505 Cow::Owned(DataType::List(Box::new(ret.into_owned())))
1506 }
1507 },
1508 _ => unreachable!(),
1509 },
1510 #[cfg(feature = "dtype-array")]
1511 DataType::Array(..) => match dtype {
1512 Cow::Owned(DataType::Array(mut inner, width)) => {
1513 self.visit_ref_mut(inner.as_mut())?;
1514 Cow::Owned(DataType::Array(inner, width))
1515 },
1516 Cow::Borrowed(DataType::Array(inner, width)) => {
1517 let ret = self.visit_rec(Cow::Borrowed(inner.as_ref()))?;
1518
1519 if std::ptr::eq(ret.as_ref(), inner.as_ref()) {
1520 dtype
1521 } else {
1522 Cow::Owned(DataType::Array(Box::new(ret.into_owned()), *width))
1523 }
1524 },
1525 _ => unreachable!(),
1526 },
1527 #[cfg(feature = "dtype-struct")]
1528 DataType::Struct(_) => match dtype {
1529 Cow::Owned(DataType::Struct(mut fields)) => {
1530 for f in &mut fields {
1531 self.visit_ref_mut(&mut f.dtype)?;
1532 }
1533
1534 Cow::Owned(DataType::Struct(fields))
1535 },
1536 Cow::Borrowed(DataType::Struct(fields)) => {
1537 let mut new_fields = vec![];
1538
1539 for (i, f) in fields.iter().enumerate() {
1540 let ret = self.visit_rec(Cow::Borrowed(f.dtype()))?;
1541
1542 if std::ptr::eq(ret.as_ref(), f.dtype()) && new_fields.is_empty() {
1543 continue;
1544 }
1545
1546 if new_fields.is_empty() {
1547 new_fields.reserve_exact(fields.len());
1548 new_fields.extend(fields.iter().take(i).cloned());
1549 }
1550
1551 new_fields.push(Field::new(f.name().clone(), ret.into_owned()));
1552 }
1553
1554 if new_fields.is_empty() {
1555 dtype
1556 } else {
1557 assert_eq!(new_fields.len(), fields.len());
1558 Cow::Owned(DataType::Struct(new_fields))
1559 }
1560 },
1561 _ => unreachable!(),
1562 },
1563 #[cfg(feature = "dtype-extension")]
1564 DataType::Extension(..) => match dtype {
1565 Cow::Owned(DataType::Extension(ext, mut storage)) => {
1566 self.visit_ref_mut(storage.as_mut())?;
1567 Cow::Owned(DataType::Extension(ext, storage))
1568 },
1569 Cow::Borrowed(DataType::Extension(ext, storage)) => {
1570 let ret = self.visit_rec(Cow::Borrowed(storage.as_ref()))?;
1571
1572 if std::ptr::eq(ret.as_ref(), storage.as_ref()) {
1573 dtype
1574 } else {
1575 Cow::Owned(DataType::Extension(ext.clone(), Box::new(ret.into_owned())))
1576 }
1577 },
1578 _ => unreachable!(),
1579 },
1580 _ => {
1581 debug_assert!(!dtype.is_nested());
1582 dtype
1583 },
1584 };
1585
1586 (self.visitor_fn)(dtype)
1587 }
1588
1589 fn visit_ref_mut(&mut self, dtype: &mut DataType) -> PolarsResult<()> {
1591 *dtype = self
1592 .visit_rec(Cow::Owned(std::mem::replace(dtype, DataType::Null)))?
1593 .into_owned();
1594
1595 Ok(())
1596 }
1597}
1598
1599#[cfg(test)]
1600mod tests {
1601 use super::*;
1602
1603 #[cfg(feature = "dtype-array")]
1604 #[test]
1605 fn test_unpack_primitive_dtypes() {
1606 let inner_type = DataType::Float64;
1607 let array_type = DataType::Array(Box::new(inner_type), 10);
1608 let list_type = DataType::List(Box::new(array_type));
1609
1610 let result = unpack_dtypes(&list_type, false);
1611
1612 let mut expected = PlHashSet::new();
1613 expected.insert(DataType::Float64);
1614
1615 assert_eq!(result, expected)
1616 }
1617
1618 #[cfg(feature = "dtype-array")]
1619 #[test]
1620 fn test_unpack_compound_dtypes() {
1621 let inner_type = DataType::Float64;
1622 let array_type = DataType::Array(Box::new(inner_type), 10);
1623 let list_type = DataType::List(Box::new(array_type.clone()));
1624
1625 let result = unpack_dtypes(&list_type, true);
1626
1627 let mut expected = PlHashSet::new();
1628 expected.insert(list_type);
1629 expected.insert(array_type);
1630 expected.insert(DataType::Float64);
1631
1632 assert_eq!(result, expected)
1633 }
1634}