1use std::borrow::Cow;
2use std::collections::BTreeMap;
3
4use arrow::datatypes::{
5 DTYPE_CATEGORICAL_NEW, DTYPE_ENUM_VALUES_LEGACY, DTYPE_ENUM_VALUES_NEW, MAINTAIN_PL_TYPE,
6 Metadata, PL_KEY,
7};
8#[cfg(feature = "dtype-array")]
9use polars_utils::format_tuple;
10use polars_utils::itertools::Itertools;
11#[cfg(any(feature = "serde-lazy", feature = "serde"))]
12use serde::{Deserialize, Serialize};
13pub use temporal::time_zone::TimeZone;
14
15use super::*;
16#[cfg(feature = "object")]
17use crate::chunked_array::object::registry::get_object_physical_type;
18#[cfg(feature = "dtype-extension")]
19pub use crate::datatypes::extension::ExtensionTypeInstance;
20use crate::utils::materialize_dyn_int;
21
22pub trait MetaDataExt: IntoMetadata {
23 fn pl_enum_metadata(&self) -> Option<&str> {
24 let md = self.into_metadata_ref();
25 let values = md
26 .get(DTYPE_ENUM_VALUES_NEW)
27 .or_else(|| md.get(DTYPE_ENUM_VALUES_LEGACY));
28 Some(values?.as_str())
29 }
30
31 fn pl_categorical_metadata(&self) -> Option<&str> {
32 Some(
37 self.into_metadata_ref()
38 .get(DTYPE_CATEGORICAL_NEW)?
39 .as_str(),
40 )
41 }
42
43 fn maintain_type(&self) -> bool {
44 let metadata = self.into_metadata_ref();
45 metadata.get(PL_KEY).map(|s| s.as_str()) == Some(MAINTAIN_PL_TYPE)
46 }
47}
48
49impl MetaDataExt for Metadata {}
50pub trait IntoMetadata {
51 #[allow(clippy::wrong_self_convention)]
52 fn into_metadata_ref(&self) -> &Metadata;
53}
54
55impl IntoMetadata for Metadata {
56 fn into_metadata_ref(&self) -> &Metadata {
57 self
58 }
59}
60
61#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
62#[cfg_attr(
63 any(feature = "serde", feature = "serde-lazy"),
64 derive(Serialize, Deserialize)
65)]
66#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
67pub enum UnknownKind {
68 Int(i128),
70 Float,
71 Str,
73 #[default]
74 Any,
75}
76
77impl UnknownKind {
78 pub fn materialize(&self) -> Option<DataType> {
79 let dtype = match self {
80 UnknownKind::Int(v) => materialize_dyn_int(*v).dtype(),
81 UnknownKind::Float => DataType::Float64,
82 UnknownKind::Str => DataType::String,
83 UnknownKind::Any => return None,
84 };
85 Some(dtype)
86 }
87}
88
89#[derive(Clone)]
90pub enum DataType {
91 Boolean,
92 UInt8,
93 UInt16,
94 UInt32,
95 UInt64,
96 UInt128,
97 Int8,
98 Int16,
99 Int32,
100 Int64,
101 Int128,
102 Float16,
103 Float32,
104 Float64,
105 #[cfg(feature = "dtype-decimal")]
109 Decimal(usize, usize), String,
112 Binary,
113 BinaryOffset,
114 Date,
117 Datetime(TimeUnit, Option<TimeZone>),
120 Duration(TimeUnit),
122 Time,
124 #[cfg(feature = "dtype-array")]
126 Array(Box<DataType>, usize),
127 List(Box<DataType>),
129 #[cfg(feature = "object")]
132 Object(&'static str),
133 Null,
134 #[cfg(feature = "dtype-categorical")]
135 Categorical(Arc<Categories>, Arc<CategoricalMapping>),
136 #[cfg(feature = "dtype-categorical")]
138 Enum(Arc<FrozenCategories>, Arc<CategoricalMapping>),
139 #[cfg(feature = "dtype-struct")]
140 Struct(Vec<Field>),
141 #[cfg(feature = "dtype-extension")]
142 Extension(ExtensionTypeInstance, Box<DataType>),
143 Unknown(UnknownKind),
145}
146
147pub trait AsRefDataType {
148 fn as_ref_dtype(&self) -> &DataType;
149}
150
151impl Hash for DataType {
152 fn hash<H: Hasher>(&self, state: &mut H) {
153 std::mem::discriminant(self).hash(state)
154 }
155}
156
157impl PartialEq for DataType {
158 fn eq(&self, other: &Self) -> bool {
159 use DataType::*;
160 {
161 match (self, other) {
162 #[cfg(feature = "dtype-categorical")]
163 (Categorical(cats_l, _), Categorical(cats_r, _)) => Arc::ptr_eq(cats_l, cats_r),
164 #[cfg(feature = "dtype-categorical")]
165 (Enum(fcats_l, _), Enum(fcats_r, _)) => Arc::ptr_eq(fcats_l, fcats_r),
166 (Datetime(tu_l, tz_l), Datetime(tu_r, tz_r)) => tu_l == tu_r && tz_l == tz_r,
167 (List(left_inner), List(right_inner)) => left_inner == right_inner,
168 #[cfg(feature = "dtype-duration")]
169 (Duration(tu_l), Duration(tu_r)) => tu_l == tu_r,
170 #[cfg(feature = "dtype-decimal")]
171 (Decimal(p1, s1), Decimal(p2, s2)) => (p1, s1) == (p2, s2),
172 #[cfg(feature = "object")]
173 (Object(lhs), Object(rhs)) => lhs == rhs,
174 #[cfg(feature = "dtype-struct")]
175 (Struct(lhs), Struct(rhs)) => {
176 std::ptr::eq(Vec::as_ptr(lhs), Vec::as_ptr(rhs)) || lhs == rhs
177 },
178 #[cfg(feature = "dtype-array")]
179 (Array(left_inner, left_width), Array(right_inner, right_width)) => {
180 left_width == right_width && left_inner == right_inner
181 },
182 (Unknown(l), Unknown(r)) => match (l, r) {
183 (UnknownKind::Int(_), UnknownKind::Int(_)) => true,
184 _ => l == r,
185 },
186 _ => std::mem::discriminant(self) == std::mem::discriminant(other),
187 }
188 }
189 }
190}
191
192impl Eq for DataType {}
193
194impl DataType {
195 pub const IDX_DTYPE: Self = {
196 #[cfg(not(feature = "bigidx"))]
197 {
198 DataType::UInt32
199 }
200 #[cfg(feature = "bigidx")]
201 {
202 DataType::UInt64
203 }
204 };
205
206 pub fn pretty_format(&self) -> String {
207 match self {
208 #[cfg(feature = "dtype-struct")]
209 Self::Struct(fields) => {
210 let formatted_fields = fields
211 .iter()
212 .map(|field| format!("{}: {}", field.name, field.dtype.pretty_format()))
213 .collect::<Vec<String>>()
214 .join(", ");
215 format!("struct {{{}}}", formatted_fields)
216 },
217 Self::List(inner_dtype) => {
218 let formatted_dtype = inner_dtype.pretty_format();
219 format!("list[{}]", formatted_dtype)
220 },
221 #[cfg(feature = "dtype-array")]
222 Self::Array(inner_dtype, size) => {
223 let formatted_dtype = inner_dtype.pretty_format();
224 format!("array[{}, {}]", formatted_dtype, size)
225 },
226 _ => {
227 format!("{}", self)
228 },
229 }
230 }
231
232 pub fn value_within_range(&self, other: AnyValue) -> bool {
233 use DataType::*;
234 match self {
235 UInt8 => other.extract::<u8>().is_some(),
236 #[cfg(feature = "dtype-u16")]
237 UInt16 => other.extract::<u16>().is_some(),
238 UInt32 => other.extract::<u32>().is_some(),
239 UInt64 => other.extract::<u64>().is_some(),
240 #[cfg(feature = "dtype-u128")]
241 UInt128 => other.extract::<u128>().is_some(),
242 #[cfg(feature = "dtype-i8")]
243 Int8 => other.extract::<i8>().is_some(),
244 #[cfg(feature = "dtype-i16")]
245 Int16 => other.extract::<i16>().is_some(),
246 Int32 => other.extract::<i32>().is_some(),
247 Int64 => other.extract::<i64>().is_some(),
248 #[cfg(feature = "dtype-i128")]
249 Int128 => other.extract::<i128>().is_some(),
250 _ => false,
251 }
252 }
253
254 #[cfg(feature = "dtype-struct")]
256 pub fn _month_days_ns_struct_type() -> Self {
257 DataType::Struct(vec![
258 Field::new(PlSmallStr::from_static("months"), DataType::Int32),
259 Field::new(PlSmallStr::from_static("days"), DataType::Int32),
260 Field::new(
261 PlSmallStr::from_static("nanoseconds"),
262 DataType::Duration(TimeUnit::Nanoseconds),
263 ),
264 ])
265 }
266
267 pub fn is_known(&self) -> bool {
269 match self {
270 DataType::List(inner) => inner.is_known(),
271 #[cfg(feature = "dtype-array")]
272 DataType::Array(inner, _) => inner.is_known(),
273 #[cfg(feature = "dtype-struct")]
274 DataType::Struct(fields) => fields.iter().all(|fld| fld.dtype.is_known()),
275 DataType::Unknown(_) => false,
276 _ => true,
277 }
278 }
279
280 pub fn materialize_unknown(self, allow_unknown: bool) -> PolarsResult<DataType> {
283 match self {
284 DataType::Unknown(u) => match u.materialize() {
285 Some(known) => Ok(known),
286 None => {
287 if allow_unknown {
288 Ok(DataType::Unknown(u))
289 } else {
290 polars_bail!(SchemaMismatch: "failed to materialize unknown type")
291 }
292 },
293 },
294 DataType::List(inner) => Ok(DataType::List(Box::new(
295 inner.materialize_unknown(allow_unknown)?,
296 ))),
297 #[cfg(feature = "dtype-array")]
298 DataType::Array(inner, size) => Ok(DataType::Array(
299 Box::new(inner.materialize_unknown(allow_unknown)?),
300 size,
301 )),
302 #[cfg(feature = "dtype-struct")]
303 DataType::Struct(fields) => Ok(DataType::Struct(
304 fields
305 .into_iter()
306 .map(|f| {
307 PolarsResult::Ok(Field::new(
308 f.name,
309 f.dtype.materialize_unknown(allow_unknown)?,
310 ))
311 })
312 .try_collect_vec()?,
313 )),
314 _ => Ok(self),
315 }
316 }
317
318 #[cfg(feature = "dtype-array")]
319 pub fn get_shape(&self) -> Option<Vec<usize>> {
321 fn get_shape_impl(dt: &DataType, shape: &mut Vec<usize>) {
322 if let DataType::Array(inner, size) = dt {
323 shape.push(*size);
324 get_shape_impl(inner, shape);
325 }
326 }
327
328 if let DataType::Array(inner, size) = self {
329 let mut shape = vec![*size];
330 get_shape_impl(inner, &mut shape);
331 Some(shape)
332 } else {
333 None
334 }
335 }
336
337 pub fn inner_dtype(&self) -> Option<&DataType> {
339 match self {
340 DataType::List(inner) => Some(inner),
341 #[cfg(feature = "dtype-array")]
342 DataType::Array(inner, _) => Some(inner),
343 _ => None,
344 }
345 }
346
347 pub fn into_inner_dtype(self) -> Option<DataType> {
349 match self {
350 DataType::List(inner) => Some(*inner),
351 #[cfg(feature = "dtype-array")]
352 DataType::Array(inner, _) => Some(*inner),
353 _ => None,
354 }
355 }
356
357 pub fn try_into_inner_dtype(self) -> PolarsResult<DataType> {
359 match self {
360 DataType::List(inner) => Ok(*inner),
361 #[cfg(feature = "dtype-array")]
362 DataType::Array(inner, _) => Ok(*inner),
363 dt => polars_bail!(InvalidOperation: "cannot get inner datatype of `{dt}`"),
364 }
365 }
366
367 pub fn leaf_dtype(&self) -> &DataType {
369 let mut prev = self;
370 while let Some(dtype) = prev.inner_dtype() {
371 prev = dtype
372 }
373 prev
374 }
375
376 #[cfg(feature = "dtype-array")]
377 pub fn array_leaf_dtype(&self) -> Option<&DataType> {
379 let mut prev = self;
380 match prev {
381 DataType::Array(_, _) => {
382 while let DataType::Array(inner, _) = &prev {
383 prev = inner;
384 }
385 Some(prev)
386 },
387 _ => None,
388 }
389 }
390
391 pub fn cast_leaf(&self, to: DataType) -> DataType {
393 use DataType::*;
394 match self {
395 List(inner) => List(Box::new(inner.cast_leaf(to))),
396 #[cfg(feature = "dtype-array")]
397 Array(inner, size) => Array(Box::new(inner.cast_leaf(to)), *size),
398 _ => to,
399 }
400 }
401
402 pub fn map_leaves<F: FnMut(DataType) -> DataType>(self, f: &mut F) -> DataType {
405 use DataType::*;
406 match self {
407 List(inner) => List(Box::new(inner.map_leaves(f))),
408 #[cfg(feature = "dtype-array")]
409 Array(inner, size) => Array(Box::new(inner.map_leaves(f)), size),
410 #[cfg(feature = "dtype-struct")]
411 Struct(fields) => {
412 let new_fields = fields
413 .into_iter()
414 .map(|fld| Field::new(fld.name, fld.dtype.map_leaves(f)))
415 .collect();
416 Struct(new_fields)
417 },
418 #[cfg(feature = "dtype-extension")]
419 Extension(ext, storage) => Extension(ext, Box::new(storage.map_leaves(f))),
420 _ => f(self),
421 }
422 }
423
424 pub fn can_cast_to(&self, to: &DataType) -> Option<bool> {
428 if self == to {
429 return Some(true);
430 }
431 if self.is_primitive_numeric() && to.is_primitive_numeric() {
432 return Some(true);
433 }
434
435 if self.is_null() {
436 return Some(true);
437 }
438
439 use DataType as D;
440 Some(match (self, to) {
441 #[cfg(feature = "dtype-categorical")]
442 (D::Categorical(_, _) | D::Enum(_, _), D::Binary)
443 | (D::Binary, D::Categorical(_, _) | D::Enum(_, _)) => false, #[cfg(feature = "dtype-categorical")]
446 (D::Categorical(_, _) | D::Enum(_, _), D::String)
447 | (D::String, D::Categorical(_, _) | D::Enum(_, _)) => true,
448
449 #[cfg(feature = "object")]
450 (D::Object(_), D::Object(_)) => true,
451 #[cfg(feature = "object")]
452 (D::Object(_), _) | (_, D::Object(_)) => false,
453
454 (D::Boolean, dt) | (dt, D::Boolean) => match dt {
455 dt if dt.is_primitive_numeric() => true,
456 #[cfg(feature = "dtype-decimal")]
457 D::Decimal(_, _) => true,
458 D::String | D::Binary => true,
459 _ => false,
460 },
461
462 (D::List(from), D::List(to)) => from.can_cast_to(to)?,
463 #[cfg(feature = "dtype-array")]
464 (D::Array(from, l_width), D::Array(to, r_width)) => {
465 l_width == r_width && from.can_cast_to(to)?
466 },
467 #[cfg(feature = "dtype-struct")]
468 (D::Struct(l_fields), D::Struct(r_fields)) => {
469 if l_fields.is_empty() {
470 return Some(true);
471 }
472
473 if l_fields.len() != r_fields.len() {
474 return Some(false);
475 }
476
477 for (l, r) in l_fields.iter().zip(r_fields) {
478 if !l.dtype().can_cast_to(r.dtype())? {
479 return Some(false);
480 }
481 }
482
483 true
484 },
485
486 _ => return None,
488 })
489 }
490
491 pub fn implode(self) -> DataType {
492 DataType::List(Box::new(self))
493 }
494
495 #[must_use]
497 pub fn to_physical(&self) -> DataType {
498 use DataType::*;
499 match self {
500 Date => Int32,
501 Datetime(_, _) => Int64,
502 Duration(_) => Int64,
503 Time => Int64,
504 #[cfg(feature = "dtype-decimal")]
505 Decimal(_, _) => Int128,
506 #[cfg(feature = "dtype-categorical")]
507 Categorical(cats, _) => cats.physical().dtype(),
508 #[cfg(feature = "dtype-categorical")]
509 Enum(fcats, _) => fcats.physical().dtype(),
510 #[cfg(feature = "dtype-array")]
511 Array(dt, width) => Array(Box::new(dt.to_physical()), *width),
512 List(dt) => List(Box::new(dt.to_physical())),
513 #[cfg(feature = "dtype-struct")]
514 Struct(fields) => {
515 let new_fields = fields
516 .iter()
517 .map(|s| Field::new(s.name().clone(), s.dtype().to_physical()))
518 .collect();
519 Struct(new_fields)
520 },
521 #[cfg(feature = "dtype-extension")]
522 Extension(_, storage) => storage.to_physical(),
523 _ => self.clone(),
524 }
525 }
526
527 #[must_use]
528 pub fn to_storage(&self) -> DataType {
529 use DataType::*;
530 match self {
531 #[cfg(feature = "dtype-extension")]
532 Extension(_, storage) => storage.to_storage(),
533 _ => self.clone(),
534 }
535 }
536
537 pub fn is_supported_list_arithmetic_input(&self) -> bool {
538 self.is_primitive_numeric() || self.is_bool() || self.is_null()
539 }
540
541 pub fn is_logical(&self) -> bool {
543 self != &self.to_physical()
544 }
545
546 pub fn is_temporal(&self) -> bool {
548 use DataType::*;
549 matches!(self, Date | Datetime(_, _) | Duration(_) | Time)
550 }
551
552 pub fn is_primitive(&self) -> bool {
555 self.is_primitive_numeric()
556 | matches!(
557 self,
558 DataType::Boolean | DataType::String | DataType::Binary
559 )
560 }
561
562 pub fn is_primitive_numeric(&self) -> bool {
564 self.is_float() || self.is_integer()
565 }
566
567 pub fn is_bool(&self) -> bool {
569 matches!(self, DataType::Boolean)
570 }
571
572 pub fn is_list(&self) -> bool {
574 matches!(self, DataType::List(_))
575 }
576
577 pub fn is_array(&self) -> bool {
579 #[cfg(feature = "dtype-array")]
580 {
581 matches!(self, DataType::Array(_, _))
582 }
583 #[cfg(not(feature = "dtype-array"))]
584 {
585 false
586 }
587 }
588
589 pub fn is_nested(&self) -> bool {
590 match self {
591 DataType::List(_) => true,
592 #[cfg(feature = "dtype-array")]
593 DataType::Array(_, _) => true,
594 #[cfg(feature = "dtype-struct")]
595 DataType::Struct(_) => true,
596 #[cfg(feature = "dtype-extension")]
597 DataType::Extension(_, storage) => storage.is_nested(),
598 _ => false,
599 }
600 }
601
602 pub fn is_struct(&self) -> bool {
604 #[cfg(feature = "dtype-struct")]
605 {
606 matches!(self, DataType::Struct(_))
607 }
608 #[cfg(not(feature = "dtype-struct"))]
609 {
610 false
611 }
612 }
613
614 pub fn is_binary(&self) -> bool {
615 matches!(self, DataType::Binary)
616 }
617
618 pub fn is_date(&self) -> bool {
619 matches!(self, DataType::Date)
620 }
621 pub fn is_datetime(&self) -> bool {
622 matches!(self, DataType::Datetime(..))
623 }
624
625 pub fn is_duration(&self) -> bool {
626 matches!(self, DataType::Duration(..))
627 }
628
629 pub fn is_object(&self) -> bool {
630 #[cfg(feature = "object")]
631 {
632 matches!(self, DataType::Object(_))
633 }
634 #[cfg(not(feature = "object"))]
635 {
636 false
637 }
638 }
639
640 pub fn is_null(&self) -> bool {
641 matches!(self, DataType::Null)
642 }
643
644 pub fn contains_views(&self) -> bool {
645 use DataType::*;
646 match self {
647 Binary | String => true,
648 List(inner) => inner.contains_views(),
649 #[cfg(feature = "dtype-array")]
650 Array(inner, _) => inner.contains_views(),
651 #[cfg(feature = "dtype-struct")]
652 Struct(fields) => fields.iter().any(|field| field.dtype.contains_views()),
653 _ => false,
654 }
655 }
656
657 pub fn contains_categoricals(&self) -> bool {
658 use DataType::*;
659 match self {
660 #[cfg(feature = "dtype-categorical")]
661 Categorical(_, _) => true,
662 List(inner) => inner.contains_categoricals(),
663 #[cfg(feature = "dtype-array")]
664 Array(inner, _) => inner.contains_categoricals(),
665 #[cfg(feature = "dtype-struct")]
666 Struct(fields) => fields
667 .iter()
668 .any(|field| field.dtype.contains_categoricals()),
669 _ => false,
670 }
671 }
672
673 pub fn contains_enums(&self) -> bool {
674 use DataType::*;
675 match self {
676 #[cfg(feature = "dtype-categorical")]
677 Enum(_, _) => true,
678 List(inner) => inner.contains_enums(),
679 #[cfg(feature = "dtype-array")]
680 Array(inner, _) => inner.contains_enums(),
681 #[cfg(feature = "dtype-struct")]
682 Struct(fields) => fields.iter().any(|field| field.dtype.contains_enums()),
683 _ => false,
684 }
685 }
686
687 pub fn contains_objects(&self) -> bool {
688 use DataType::*;
689 match self {
690 #[cfg(feature = "object")]
691 Object(_) => true,
692 List(inner) => inner.contains_objects(),
693 #[cfg(feature = "dtype-array")]
694 Array(inner, _) => inner.contains_objects(),
695 #[cfg(feature = "dtype-struct")]
696 Struct(fields) => fields.iter().any(|field| field.dtype.contains_objects()),
697 _ => false,
698 }
699 }
700
701 pub fn contains_list_recursive(&self) -> bool {
702 use DataType as D;
703 match self {
704 D::List(_) => true,
705 #[cfg(feature = "dtype-array")]
706 D::Array(inner, _) => inner.contains_list_recursive(),
707 #[cfg(feature = "dtype-struct")]
708 D::Struct(fields) => fields
709 .iter()
710 .any(|field| field.dtype.contains_list_recursive()),
711 _ => false,
712 }
713 }
714
715 pub fn contains_unknown(&self) -> bool {
716 use DataType as D;
717 match self {
718 D::Unknown(_) => true,
719 D::List(inner) => inner.contains_unknown(),
720 #[cfg(feature = "dtype-array")]
721 D::Array(inner, _) => inner.contains_unknown(),
722 #[cfg(feature = "dtype-struct")]
723 D::Struct(fields) => fields.iter().any(|field| field.dtype.contains_unknown()),
724 _ => false,
725 }
726 }
727
728 pub fn contains_dtype_recursive(&self, dtype: &DataType) -> bool {
729 if self == dtype {
730 return true;
731 }
732 use DataType as D;
733 match self {
734 D::List(inner) => inner.contains_dtype_recursive(dtype),
735 #[cfg(feature = "dtype-array")]
736 D::Array(inner, _) => inner.contains_dtype_recursive(dtype),
737 #[cfg(feature = "dtype-struct")]
738 D::Struct(fields) => fields
739 .iter()
740 .any(|field| field.dtype.contains_dtype_recursive(dtype)),
741 _ => false,
742 }
743 }
744
745 pub fn is_ord(&self) -> bool {
747 let phys = self.to_physical();
748 phys.is_primitive_numeric()
749 || self.is_decimal()
750 || matches!(
751 phys,
752 DataType::Binary | DataType::String | DataType::Boolean
753 )
754 }
755
756 pub fn is_decimal(&self) -> bool {
758 match self {
759 #[cfg(feature = "dtype-decimal")]
760 DataType::Decimal(_, _) => true,
761 _ => false,
762 }
763 }
764
765 pub fn is_float(&self) -> bool {
768 matches!(
769 self,
770 DataType::Float16
771 | DataType::Float32
772 | DataType::Float64
773 | DataType::Unknown(UnknownKind::Float)
774 )
775 }
776
777 pub fn is_integer(&self) -> bool {
779 matches!(
780 self,
781 DataType::Int8
782 | DataType::Int16
783 | DataType::Int32
784 | DataType::Int64
785 | DataType::Int128
786 | DataType::UInt8
787 | DataType::UInt16
788 | DataType::UInt32
789 | DataType::UInt64
790 | DataType::UInt128
791 | DataType::Unknown(UnknownKind::Int(_))
792 )
793 }
794
795 pub fn is_signed_integer(&self) -> bool {
796 matches!(
798 self,
799 DataType::Int8 | DataType::Int16 | DataType::Int32 | DataType::Int64 | DataType::Int128
800 )
801 }
802
803 pub fn is_unsigned_integer(&self) -> bool {
804 matches!(
805 self,
806 DataType::UInt8
807 | DataType::UInt16
808 | DataType::UInt32
809 | DataType::UInt64
810 | DataType::UInt128,
811 )
812 }
813
814 pub fn is_string(&self) -> bool {
815 matches!(self, DataType::String | DataType::Unknown(UnknownKind::Str))
816 }
817
818 pub fn is_categorical(&self) -> bool {
819 #[cfg(feature = "dtype-categorical")]
820 {
821 matches!(self, DataType::Categorical(_, _))
822 }
823 #[cfg(not(feature = "dtype-categorical"))]
824 {
825 false
826 }
827 }
828
829 pub fn is_enum(&self) -> bool {
830 #[cfg(feature = "dtype-categorical")]
831 {
832 matches!(self, DataType::Enum(_, _))
833 }
834 #[cfg(not(feature = "dtype-categorical"))]
835 {
836 false
837 }
838 }
839
840 pub fn is_extension(&self) -> bool {
841 #[cfg(feature = "dtype-extension")]
842 {
843 matches!(self, DataType::Extension(_, _))
844 }
845 #[cfg(not(feature = "dtype-extension"))]
846 {
847 false
848 }
849 }
850
851 pub fn to_arrow_field(&self, name: PlSmallStr, compat_level: CompatLevel) -> ArrowField {
853 let field = ArrowField::new(name, self.to_arrow(compat_level), true);
854
855 if let Some(metadata) = self.to_arrow_field_metadata() {
856 field.with_metadata(metadata)
857 } else {
858 field
859 }
860 }
861
862 pub fn to_arrow_field_metadata(&self) -> Option<Metadata> {
863 match self {
864 #[cfg(feature = "dtype-categorical")]
865 DataType::Enum(fcats, _map) => {
866 let cats = fcats.categories();
867 let strings_size: usize = cats
868 .values_iter()
869 .map(|s| (s.len() + 1).ilog10() as usize + 1 + s.len())
870 .sum();
871 let mut encoded = String::with_capacity(strings_size);
872 for cat in cats.values_iter() {
873 encoded.push_str(itoa::Buffer::new().format(cat.len()));
874 encoded.push(';');
875 encoded.push_str(cat);
876 }
877 Some(BTreeMap::from([(
878 PlSmallStr::from_static(DTYPE_ENUM_VALUES_NEW),
879 PlSmallStr::from_string(encoded),
880 )]))
881 },
882 #[cfg(feature = "dtype-categorical")]
883 DataType::Categorical(cats, _) => {
884 let mut encoded = String::new();
885 encoded.push_str(itoa::Buffer::new().format(cats.name().len()));
886 encoded.push(';');
887 encoded.push_str(cats.name());
888 encoded.push_str(itoa::Buffer::new().format(cats.namespace().len()));
889 encoded.push(';');
890 encoded.push_str(cats.namespace());
891 encoded.push_str(cats.physical().as_str());
892 encoded.push(';');
893
894 Some(BTreeMap::from([(
895 PlSmallStr::from_static(DTYPE_CATEGORICAL_NEW),
896 PlSmallStr::from_string(encoded),
897 )]))
898 },
899 DataType::BinaryOffset => Some(BTreeMap::from([(
900 PlSmallStr::from_static(PL_KEY),
901 PlSmallStr::from_static(MAINTAIN_PL_TYPE),
902 )])),
903 _ => None,
904 }
905 }
906
907 pub fn max(&self) -> PolarsResult<Scalar> {
909 use DataType::*;
910 let v = match self {
911 Int8 => Scalar::from(i8::MAX),
912 Int16 => Scalar::from(i16::MAX),
913 Int32 => Scalar::from(i32::MAX),
914 Int64 => Scalar::from(i64::MAX),
915 Int128 => Scalar::from(i128::MAX),
916 UInt8 => Scalar::from(u8::MAX),
917 UInt16 => Scalar::from(u16::MAX),
918 UInt32 => Scalar::from(u32::MAX),
919 UInt64 => Scalar::from(u64::MAX),
920 UInt128 => Scalar::from(u128::MAX),
921 Float16 => Scalar::from(pf16::INFINITY),
922 Float32 => Scalar::from(f32::INFINITY),
923 Float64 => Scalar::from(f64::INFINITY),
924 #[cfg(feature = "dtype-time")]
925 Time => Scalar::new(Time, AnyValue::Time(NS_IN_DAY - 1)),
926 dt => polars_bail!(ComputeError: "cannot determine upper bound for dtype `{dt}`"),
927 };
928 Ok(v)
929 }
930
931 pub fn min(&self) -> PolarsResult<Scalar> {
933 use DataType::*;
934 let v = match self {
935 Int8 => Scalar::from(i8::MIN),
936 Int16 => Scalar::from(i16::MIN),
937 Int32 => Scalar::from(i32::MIN),
938 Int64 => Scalar::from(i64::MIN),
939 Int128 => Scalar::from(i128::MIN),
940 UInt8 => Scalar::from(u8::MIN),
941 UInt16 => Scalar::from(u16::MIN),
942 UInt32 => Scalar::from(u32::MIN),
943 UInt64 => Scalar::from(u64::MIN),
944 UInt128 => Scalar::from(u128::MIN),
945 Float16 => Scalar::from(pf16::NEG_INFINITY),
946 Float32 => Scalar::from(f32::NEG_INFINITY),
947 Float64 => Scalar::from(f64::NEG_INFINITY),
948 #[cfg(feature = "dtype-time")]
949 Time => Scalar::new(Time, AnyValue::Time(0)),
950 dt => polars_bail!(ComputeError: "cannot determine lower bound for dtype `{}`", dt),
951 };
952 Ok(v)
953 }
954
955 #[inline]
957 pub fn to_arrow(&self, compat_level: CompatLevel) -> ArrowDataType {
958 self.try_to_arrow(compat_level).unwrap()
959 }
960
961 #[inline]
962 pub fn try_to_arrow(&self, compat_level: CompatLevel) -> PolarsResult<ArrowDataType> {
963 use DataType::*;
964 match self {
965 Boolean => Ok(ArrowDataType::Boolean),
966 UInt8 => Ok(ArrowDataType::UInt8),
967 UInt16 => Ok(ArrowDataType::UInt16),
968 UInt32 => Ok(ArrowDataType::UInt32),
969 UInt64 => Ok(ArrowDataType::UInt64),
970 UInt128 => Ok(ArrowDataType::UInt128),
971 Int8 => Ok(ArrowDataType::Int8),
972 Int16 => Ok(ArrowDataType::Int16),
973 Int32 => Ok(ArrowDataType::Int32),
974 Int64 => Ok(ArrowDataType::Int64),
975 Int128 => Ok(ArrowDataType::Int128),
976 Float16 => Ok(ArrowDataType::Float16),
977 Float32 => Ok(ArrowDataType::Float32),
978 Float64 => Ok(ArrowDataType::Float64),
979 #[cfg(feature = "dtype-decimal")]
980 Decimal(precision, scale) => {
981 assert!(*precision >= 1 && *precision <= 38);
982 Ok(ArrowDataType::Decimal(*precision, *scale))
983 },
984 String => {
985 let dt = if compat_level.0 >= 1 {
986 ArrowDataType::Utf8View
987 } else {
988 ArrowDataType::LargeUtf8
989 };
990 Ok(dt)
991 },
992 Binary => {
993 let dt = if compat_level.0 >= 1 {
994 ArrowDataType::BinaryView
995 } else {
996 ArrowDataType::LargeBinary
997 };
998 Ok(dt)
999 },
1000 Date => Ok(ArrowDataType::Date32),
1001 Datetime(unit, tz) => Ok(ArrowDataType::Timestamp(
1002 unit.to_arrow(),
1003 tz.as_deref().cloned(),
1004 )),
1005 Duration(unit) => Ok(ArrowDataType::Duration(unit.to_arrow())),
1006 Time => Ok(ArrowDataType::Time64(ArrowTimeUnit::Nanosecond)),
1007 #[cfg(feature = "dtype-array")]
1008 Array(dt, width) => Ok(ArrowDataType::FixedSizeList(
1009 Box::new(dt.to_arrow_field(LIST_VALUES_NAME, compat_level)),
1010 *width,
1011 )),
1012 List(dt) => Ok(ArrowDataType::LargeList(Box::new(
1013 dt.to_arrow_field(LIST_VALUES_NAME, compat_level),
1014 ))),
1015 Null => Ok(ArrowDataType::Null),
1016 #[cfg(feature = "object")]
1017 Object(_) => Ok(get_object_physical_type()),
1018 #[cfg(feature = "dtype-categorical")]
1019 Categorical(_, _) | Enum(_, _) => {
1020 let arrow_phys = match self.cat_physical().unwrap() {
1021 CategoricalPhysical::U8 => IntegerType::UInt8,
1022 CategoricalPhysical::U16 => IntegerType::UInt16,
1023 CategoricalPhysical::U32 => IntegerType::UInt32,
1024 };
1025
1026 let values = if compat_level.0 >= 1 {
1027 ArrowDataType::Utf8View
1028 } else {
1029 ArrowDataType::LargeUtf8
1030 };
1031
1032 Ok(ArrowDataType::Dictionary(
1033 arrow_phys,
1034 Box::new(values),
1035 false,
1036 ))
1037 },
1038 #[cfg(feature = "dtype-struct")]
1039 Struct(fields) => {
1040 let fields = fields
1041 .iter()
1042 .map(|fld| fld.to_arrow(compat_level))
1043 .collect();
1044 Ok(ArrowDataType::Struct(fields))
1045 },
1046 BinaryOffset => Ok(ArrowDataType::LargeBinary),
1047 #[cfg(feature = "dtype-extension")]
1048 Extension(typ, inner) => Ok(ArrowDataType::Extension(Box::new(
1049 arrow::datatypes::ExtensionType {
1050 name: typ.name().into(),
1051 inner: inner.try_to_arrow(compat_level)?,
1052 metadata: typ.serialize_metadata().map(|m| m.into()),
1053 },
1054 ))),
1055 Unknown(kind) => {
1056 let dt = match kind {
1057 UnknownKind::Any => ArrowDataType::Unknown,
1058 UnknownKind::Float => ArrowDataType::Float64,
1059 UnknownKind::Str => ArrowDataType::Utf8View,
1060 UnknownKind::Int(v) => {
1061 return materialize_dyn_int(*v).dtype().try_to_arrow(compat_level);
1062 },
1063 };
1064 Ok(dt)
1065 },
1066 }
1067 }
1068
1069 pub fn is_nested_null(&self) -> bool {
1070 use DataType::*;
1071 match self {
1072 Null => true,
1073 List(field) => field.is_nested_null(),
1074 #[cfg(feature = "dtype-array")]
1075 Array(field, _) => field.is_nested_null(),
1076 #[cfg(feature = "dtype-struct")]
1077 Struct(fields) => fields.iter().all(|fld| fld.dtype.is_nested_null()),
1078 _ => false,
1079 }
1080 }
1081
1082 pub fn matches_schema_type(&self, schema_type: &DataType) -> PolarsResult<bool> {
1089 match (self, schema_type) {
1090 (DataType::List(l), DataType::List(r)) => l.matches_schema_type(r),
1091 #[cfg(feature = "dtype-array")]
1092 (DataType::Array(l, sl), DataType::Array(r, sr)) => {
1093 Ok(l.matches_schema_type(r)? && sl == sr)
1094 },
1095 #[cfg(feature = "dtype-struct")]
1096 (DataType::Struct(l), DataType::Struct(r)) => {
1097 if l.len() != r.len() {
1098 polars_bail!(SchemaMismatch: "structs have different number of fields: {} vs {}", l.len(), r.len());
1099 }
1100 let mut must_cast = false;
1101 for (l, r) in l.iter().zip(r.iter()) {
1102 must_cast |= l.dtype.matches_schema_type(&r.dtype)?;
1103 }
1104 Ok(must_cast)
1105 },
1106 (DataType::Null, DataType::Null) => Ok(false),
1107 #[cfg(feature = "dtype-decimal")]
1108 (DataType::Decimal(p1, s1), DataType::Decimal(p2, s2)) => Ok((p1, s1) != (p2, s2)),
1109 (DataType::Null, _) => Ok(true),
1112 #[cfg(feature = "dtype-categorical")]
1113 (DataType::Categorical(l, _), DataType::Categorical(r, _)) => {
1114 ensure_same_categories(l, r)?;
1115 Ok(false)
1116 },
1117 #[cfg(feature = "dtype-categorical")]
1118 (DataType::Enum(l, _), DataType::Enum(r, _)) => {
1119 ensure_same_frozen_categories(l, r)?;
1120 Ok(false)
1121 },
1122
1123 (l, r) if l == r => Ok(false),
1124 (l, r) => {
1125 polars_bail!(SchemaMismatch: "type {:?} is incompatible with expected type {:?}", l, r)
1126 },
1127 }
1128 }
1129
1130 #[inline]
1131 pub fn is_unknown(&self) -> bool {
1132 matches!(self, DataType::Unknown(_))
1133 }
1134
1135 pub fn nesting_level(&self) -> usize {
1136 let mut level = 0;
1137 let mut slf = self;
1138 while let Some(inner_dtype) = slf.inner_dtype() {
1139 level += 1;
1140 slf = inner_dtype;
1141 }
1142 level
1143 }
1144
1145 #[cfg(feature = "dtype-categorical")]
1147 pub fn cat_physical(&self) -> PolarsResult<CategoricalPhysical> {
1148 match self {
1149 DataType::Categorical(cats, _) => Ok(cats.physical()),
1150 DataType::Enum(fcats, _) => Ok(fcats.physical()),
1151 _ => {
1152 polars_bail!(SchemaMismatch: "invalid dtype: expected an Enum or Categorical type, received '{:?}'", self)
1153 },
1154 }
1155 }
1156
1157 #[cfg(feature = "dtype-categorical")]
1159 pub fn cat_mapping(&self) -> PolarsResult<&Arc<CategoricalMapping>> {
1160 match self {
1161 DataType::Categorical(_, mapping) | DataType::Enum(_, mapping) => Ok(mapping),
1162 _ => {
1163 polars_bail!(SchemaMismatch: "invalid dtype: expected an Enum or Categorical type, received '{:?}'", self)
1164 },
1165 }
1166 }
1167
1168 #[cfg(feature = "dtype-categorical")]
1169 pub fn from_categories(cats: Arc<Categories>) -> Self {
1170 let mapping = cats.mapping();
1171 Self::Categorical(cats, mapping)
1172 }
1173
1174 #[cfg(feature = "dtype-categorical")]
1175 pub fn from_frozen_categories(fcats: Arc<FrozenCategories>) -> Self {
1176 let mapping = fcats.mapping().clone();
1177 Self::Enum(fcats, mapping)
1178 }
1179
1180 pub fn is_numeric(&self) -> bool {
1181 self.is_integer() || self.is_float() || self.is_decimal()
1182 }
1183
1184 pub fn numeric_to_unsigned_bit_repr(&self) -> Option<DataType> {
1185 use DataType::*;
1186
1187 Some(match self {
1188 Int8 | UInt8 => UInt8,
1189 Int16 | UInt16 | Float16 => UInt16,
1190 Int32 | UInt32 | Float32 => UInt32,
1191 Int64 | UInt64 | Float64 => UInt64,
1192 Int128 | UInt128 => UInt128,
1193 _ => return None,
1194 })
1195 }
1196}
1197
1198impl Display for DataType {
1199 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
1200 let s = match self {
1201 DataType::Null => "null",
1202 DataType::Boolean => "bool",
1203 DataType::UInt8 => "u8",
1204 DataType::UInt16 => "u16",
1205 DataType::UInt32 => "u32",
1206 DataType::UInt64 => "u64",
1207 DataType::UInt128 => "u128",
1208 DataType::Int8 => "i8",
1209 DataType::Int16 => "i16",
1210 DataType::Int32 => "i32",
1211 DataType::Int64 => "i64",
1212 DataType::Int128 => "i128",
1213 DataType::Float16 => "f16",
1214 DataType::Float32 => "f32",
1215 DataType::Float64 => "f64",
1216 #[cfg(feature = "dtype-decimal")]
1217 DataType::Decimal(p, s) => return write!(f, "decimal[{p},{s}]"),
1218 DataType::String => "str",
1219 DataType::Binary => "binary",
1220 DataType::BinaryOffset => "binary[offset]",
1221 DataType::Date => "date",
1222 DataType::Datetime(tu, None) => return write!(f, "datetime[{tu}]"),
1223 DataType::Datetime(tu, Some(tz)) => return write!(f, "datetime[{tu}, {tz}]"),
1224 DataType::Duration(tu) => return write!(f, "duration[{tu}]"),
1225 DataType::Time => "time",
1226 #[cfg(feature = "dtype-array")]
1227 DataType::Array(_, _) => {
1228 let tp = self.array_leaf_dtype().unwrap();
1229
1230 let dims = self.get_shape().unwrap();
1231 let shape = if dims.len() == 1 {
1232 format!("{}", dims[0])
1233 } else {
1234 format_tuple!(dims)
1235 };
1236 return write!(f, "array[{tp}, {shape}]");
1237 },
1238 DataType::List(tp) => return write!(f, "list[{tp}]"),
1239 #[cfg(feature = "object")]
1240 DataType::Object(s) => s,
1241 #[cfg(feature = "dtype-categorical")]
1242 DataType::Categorical(_, _) => "cat",
1243 #[cfg(feature = "dtype-categorical")]
1244 DataType::Enum(_, _) => "enum",
1245 #[cfg(feature = "dtype-struct")]
1246 DataType::Struct(fields) => return write!(f, "struct[{}]", fields.len()),
1247 #[cfg(feature = "dtype-extension")]
1248 DataType::Extension(typ, _) => return write!(f, "ext[{}]", typ.0.dyn_display()),
1249 DataType::Unknown(kind) => match kind {
1250 UnknownKind::Any => "unknown",
1251 UnknownKind::Int(_) => "dyn int",
1252 UnknownKind::Float => "dyn float",
1253 UnknownKind::Str => "dyn str",
1254 },
1255 };
1256 f.write_str(s)
1257 }
1258}
1259
1260impl std::fmt::Debug for DataType {
1261 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
1262 use DataType::*;
1263 match self {
1264 Boolean => write!(f, "Boolean"),
1265 UInt8 => write!(f, "UInt8"),
1266 UInt16 => write!(f, "UInt16"),
1267 UInt32 => write!(f, "UInt32"),
1268 UInt64 => write!(f, "UInt64"),
1269 UInt128 => write!(f, "UInt128"),
1270 Int8 => write!(f, "Int8"),
1271 Int16 => write!(f, "Int16"),
1272 Int32 => write!(f, "Int32"),
1273 Int64 => write!(f, "Int64"),
1274 Int128 => write!(f, "Int128"),
1275 Float16 => write!(f, "Float16"),
1276 Float32 => write!(f, "Float32"),
1277 Float64 => write!(f, "Float64"),
1278 String => write!(f, "String"),
1279 Binary => write!(f, "Binary"),
1280 BinaryOffset => write!(f, "BinaryOffset"),
1281 Date => write!(f, "Date"),
1282 Time => write!(f, "Time"),
1283 Duration(unit) => write!(f, "Duration('{unit}')"),
1284 Datetime(unit, opt_tz) => {
1285 if let Some(tz) = opt_tz {
1286 write!(f, "Datetime('{unit}', '{tz}')")
1287 } else {
1288 write!(f, "Datetime('{unit}')")
1289 }
1290 },
1291 #[cfg(feature = "dtype-decimal")]
1292 Decimal(p, s) => write!(f, "Decimal({p}, {s})"),
1293 #[cfg(feature = "dtype-array")]
1294 Array(inner, size) => write!(f, "Array({inner:?}, {size})"),
1295 List(inner) => write!(f, "List({inner:?})"),
1296 #[cfg(feature = "dtype-struct")]
1297 Struct(fields) => {
1298 let mut first = true;
1299 write!(f, "Struct({{")?;
1300 for field in fields {
1301 if !first {
1302 write!(f, ", ")?;
1303 }
1304 write!(f, "'{}': {:?}", field.name(), field.dtype())?;
1305 first = false;
1306 }
1307 write!(f, "}})")
1308 },
1309 #[cfg(feature = "dtype-categorical")]
1310 Categorical(cats, _) => {
1311 if cats.is_global() {
1312 write!(f, "Categorical")
1313 } else if cats.namespace().is_empty() && cats.physical() == CategoricalPhysical::U32
1314 {
1315 write!(f, "Categorical('{}')", cats.name())
1316 } else {
1317 write!(
1318 f,
1319 "Categorical('{}', '{}', {:?})",
1320 cats.name(),
1321 cats.namespace(),
1322 cats.physical()
1323 )
1324 }
1325 },
1326 #[cfg(feature = "dtype-categorical")]
1327 Enum(_, _) => write!(f, "Enum([...])"),
1328 #[cfg(feature = "object")]
1329 Object(_) => write!(f, "Object"),
1330 Null => write!(f, "Null"),
1331 #[cfg(feature = "dtype-extension")]
1332 Extension(typ, inner) => write!(f, "Extension({}, {inner:?})", typ.0.dyn_debug()),
1333 Unknown(kind) => write!(f, "Unknown({kind:?})"),
1334 }
1335 }
1336}
1337
1338pub fn merge_dtypes(left: &DataType, right: &DataType) -> PolarsResult<DataType> {
1339 use DataType::*;
1340 Ok(match (left, right) {
1341 #[cfg(feature = "dtype-categorical")]
1342 (Categorical(cats_l, map), Categorical(cats_r, _)) => {
1343 ensure_same_categories(cats_l, cats_r)?;
1344 Categorical(cats_l.clone(), map.clone())
1345 },
1346 #[cfg(feature = "dtype-categorical")]
1347 (Enum(fcats_l, map), Enum(fcats_r, _)) => {
1348 ensure_same_frozen_categories(fcats_l, fcats_r)?;
1349 Enum(fcats_l.clone(), map.clone())
1350 },
1351 (List(inner_l), List(inner_r)) => {
1352 let merged = merge_dtypes(inner_l, inner_r)?;
1353 List(Box::new(merged))
1354 },
1355 #[cfg(feature = "dtype-struct")]
1356 (Struct(inner_l), Struct(inner_r)) => {
1357 polars_ensure!(inner_l.len() == inner_r.len(), ComputeError: "cannot combine structs with differing amounts of fields ({} != {})", inner_l.len(), inner_r.len());
1358 let fields = inner_l.iter().zip(inner_r.iter()).map(|(l, r)| {
1359 polars_ensure!(l.name() == r.name(), ComputeError: "cannot combine structs with different fields ({} != {})", l.name(), r.name());
1360 let merged = merge_dtypes(l.dtype(), r.dtype())?;
1361 Ok(Field::new(l.name().clone(), merged))
1362 }).collect::<PolarsResult<Vec<_>>>()?;
1363 Struct(fields)
1364 },
1365 #[cfg(feature = "dtype-array")]
1366 (Array(inner_l, width_l), Array(inner_r, width_r)) => {
1367 polars_ensure!(width_l == width_r, ComputeError: "widths of FixedSizeWidth Series are not equal");
1368 let merged = merge_dtypes(inner_l, inner_r)?;
1369 Array(Box::new(merged), *width_l)
1370 },
1371 (left, right) if left == right => left.clone(),
1372 _ => polars_bail!(ComputeError: "unable to merge datatypes"),
1373 })
1374}
1375
1376fn collect_nested_types(
1377 dtype: &DataType,
1378 result: &mut PlHashSet<DataType>,
1379 include_compound_types: bool,
1380) {
1381 match dtype {
1382 DataType::List(inner) => {
1383 if include_compound_types {
1384 result.insert(dtype.clone());
1385 }
1386 collect_nested_types(inner, result, include_compound_types);
1387 },
1388 #[cfg(feature = "dtype-array")]
1389 DataType::Array(inner, _) => {
1390 if include_compound_types {
1391 result.insert(dtype.clone());
1392 }
1393 collect_nested_types(inner, result, include_compound_types);
1394 },
1395 #[cfg(feature = "dtype-struct")]
1396 DataType::Struct(fields) => {
1397 if include_compound_types {
1398 result.insert(dtype.clone());
1399 }
1400 for field in fields {
1401 collect_nested_types(field.dtype(), result, include_compound_types);
1402 }
1403 },
1404 _ => {
1405 result.insert(dtype.clone());
1406 },
1407 }
1408}
1409
1410pub fn unpack_dtypes(dtype: &DataType, include_compound_types: bool) -> PlHashSet<DataType> {
1411 let mut result = PlHashSet::new();
1412 collect_nested_types(dtype, &mut result, include_compound_types);
1413 result
1414}
1415
1416#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
1417#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1418#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
1419pub struct CompatLevel(pub(crate) u16);
1420
1421impl CompatLevel {
1422 pub const fn newest() -> CompatLevel {
1423 CompatLevel(1)
1424 }
1425
1426 pub const fn oldest() -> CompatLevel {
1427 CompatLevel(0)
1428 }
1429
1430 #[doc(hidden)]
1433 pub fn with_level(level: u16) -> PolarsResult<CompatLevel> {
1434 if level > CompatLevel::newest().0 {
1435 polars_bail!(InvalidOperation: "invalid compat level");
1436 }
1437 Ok(CompatLevel(level))
1438 }
1439
1440 #[doc(hidden)]
1441 pub fn get_level(&self) -> u16 {
1442 self.0
1443 }
1444
1445 pub fn uses_binview_types(&self) -> bool {
1447 *self != CompatLevel::oldest()
1448 }
1449}
1450
1451impl DataType {
1452 pub fn visit_with(&self, mut visitor_fn: impl FnMut(&DataType)) {
1453 self.try_visit_with(|dtype| {
1454 visitor_fn(dtype);
1455 Ok(())
1456 })
1457 .unwrap();
1458 }
1459
1460 pub fn try_visit_with(
1461 &self,
1462 mut visitor_fn: impl FnMut(&DataType) -> PolarsResult<()>,
1463 ) -> PolarsResult<()> {
1464 DataType::try_mutate_with(Cow::Borrowed(self), |dtype| {
1465 visitor_fn(dtype.as_ref()).map(|_| dtype)
1466 })
1467 .map(|_| ())
1468 }
1469
1470 pub fn try_mutate_with<'d>(
1471 dtype: Cow<'d, DataType>,
1472 mut visitor_fn: impl FnMut(Cow<'d, DataType>) -> PolarsResult<Cow<'d, DataType>>,
1473 ) -> PolarsResult<Cow<'d, DataType>> {
1474 DtypeVisitor {
1475 visitor_fn: &mut visitor_fn,
1476 }
1477 .visit_rec(dtype)
1478 }
1479}
1480
1481struct DtypeVisitor<'d, 'f> {
1482 visitor_fn: &'f mut dyn FnMut(Cow<'d, DataType>) -> PolarsResult<Cow<'d, DataType>>,
1483}
1484
1485impl<'d, 'f> DtypeVisitor<'d, 'f> {
1486 fn visit_rec(&mut self, dtype: Cow<'d, DataType>) -> PolarsResult<Cow<'d, DataType>> {
1487 let dtype = match dtype.as_ref() {
1488 DataType::List(_) => match dtype {
1489 Cow::Owned(DataType::List(mut inner)) => {
1490 self.visit_ref_mut(inner.as_mut())?;
1491 Cow::Owned(DataType::List(inner))
1492 },
1493 Cow::Borrowed(DataType::List(inner)) => {
1494 let ret = self.visit_rec(Cow::Borrowed(inner.as_ref()))?;
1495
1496 if std::ptr::eq(ret.as_ref(), inner.as_ref()) {
1497 dtype
1498 } else {
1499 Cow::Owned(DataType::List(Box::new(ret.into_owned())))
1500 }
1501 },
1502 _ => unreachable!(),
1503 },
1504 #[cfg(feature = "dtype-array")]
1505 DataType::Array(..) => match dtype {
1506 Cow::Owned(DataType::Array(mut inner, width)) => {
1507 self.visit_ref_mut(inner.as_mut())?;
1508 Cow::Owned(DataType::Array(inner, width))
1509 },
1510 Cow::Borrowed(DataType::Array(inner, width)) => {
1511 let ret = self.visit_rec(Cow::Borrowed(inner.as_ref()))?;
1512
1513 if std::ptr::eq(ret.as_ref(), inner.as_ref()) {
1514 dtype
1515 } else {
1516 Cow::Owned(DataType::Array(Box::new(ret.into_owned()), *width))
1517 }
1518 },
1519 _ => unreachable!(),
1520 },
1521 #[cfg(feature = "dtype-struct")]
1522 DataType::Struct(_) => match dtype {
1523 Cow::Owned(DataType::Struct(mut fields)) => {
1524 for f in &mut fields {
1525 self.visit_ref_mut(&mut f.dtype)?;
1526 }
1527
1528 Cow::Owned(DataType::Struct(fields))
1529 },
1530 Cow::Borrowed(DataType::Struct(fields)) => {
1531 let mut new_fields = vec![];
1532
1533 for (i, f) in fields.iter().enumerate() {
1534 let ret = self.visit_rec(Cow::Borrowed(f.dtype()))?;
1535
1536 if std::ptr::eq(ret.as_ref(), f.dtype()) && new_fields.is_empty() {
1537 continue;
1538 }
1539
1540 if new_fields.is_empty() {
1541 new_fields.reserve_exact(fields.len());
1542 new_fields.extend(fields.iter().take(i).cloned());
1543 }
1544
1545 new_fields.push(Field::new(f.name().clone(), ret.into_owned()));
1546 }
1547
1548 if new_fields.is_empty() {
1549 dtype
1550 } else {
1551 assert_eq!(new_fields.len(), fields.len());
1552 Cow::Owned(DataType::Struct(new_fields))
1553 }
1554 },
1555 _ => unreachable!(),
1556 },
1557 #[cfg(feature = "dtype-extension")]
1558 DataType::Extension(..) => match dtype {
1559 Cow::Owned(DataType::Extension(ext, mut storage)) => {
1560 self.visit_ref_mut(storage.as_mut())?;
1561 Cow::Owned(DataType::Extension(ext, storage))
1562 },
1563 Cow::Borrowed(DataType::Extension(ext, storage)) => {
1564 let ret = self.visit_rec(Cow::Borrowed(storage.as_ref()))?;
1565
1566 if std::ptr::eq(ret.as_ref(), storage.as_ref()) {
1567 dtype
1568 } else {
1569 Cow::Owned(DataType::Extension(ext.clone(), Box::new(ret.into_owned())))
1570 }
1571 },
1572 _ => unreachable!(),
1573 },
1574 _ => {
1575 debug_assert!(!dtype.is_nested());
1576 dtype
1577 },
1578 };
1579
1580 (self.visitor_fn)(dtype)
1581 }
1582
1583 fn visit_ref_mut(&mut self, dtype: &mut DataType) -> PolarsResult<()> {
1585 *dtype = self
1586 .visit_rec(Cow::Owned(std::mem::replace(dtype, DataType::Null)))?
1587 .into_owned();
1588
1589 Ok(())
1590 }
1591}
1592
1593#[cfg(test)]
1594mod tests {
1595 use super::*;
1596
1597 #[cfg(feature = "dtype-array")]
1598 #[test]
1599 fn test_unpack_primitive_dtypes() {
1600 let inner_type = DataType::Float64;
1601 let array_type = DataType::Array(Box::new(inner_type), 10);
1602 let list_type = DataType::List(Box::new(array_type));
1603
1604 let result = unpack_dtypes(&list_type, false);
1605
1606 let mut expected = PlHashSet::new();
1607 expected.insert(DataType::Float64);
1608
1609 assert_eq!(result, expected)
1610 }
1611
1612 #[cfg(feature = "dtype-array")]
1613 #[test]
1614 fn test_unpack_compound_dtypes() {
1615 let inner_type = DataType::Float64;
1616 let array_type = DataType::Array(Box::new(inner_type), 10);
1617 let list_type = DataType::List(Box::new(array_type.clone()));
1618
1619 let result = unpack_dtypes(&list_type, true);
1620
1621 let mut expected = PlHashSet::new();
1622 expected.insert(list_type);
1623 expected.insert(array_type);
1624 expected.insert(DataType::Float64);
1625
1626 assert_eq!(result, expected)
1627 }
1628}