polars_core/datatypes/
field.rs

1use arrow::datatypes::{IntervalUnit, Metadata};
2use polars_dtype::categorical::CategoricalPhysical;
3use polars_error::feature_gated;
4use polars_utils::check_allow_importing_interval_as_struct;
5use polars_utils::pl_str::PlSmallStr;
6
7use super::*;
8pub static POLARS_OBJECT_EXTENSION_NAME: &str = "_POLARS_PYTHON_OBJECT";
9
10/// Characterizes the name and the [`DataType`] of a column.
11#[derive(Clone, Debug, PartialEq, Eq, Hash)]
12#[cfg_attr(
13    any(feature = "serde", feature = "serde-lazy"),
14    derive(Serialize, Deserialize)
15)]
16#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
17pub struct Field {
18    pub name: PlSmallStr,
19    pub dtype: DataType,
20}
21
22impl From<Field> for (PlSmallStr, DataType) {
23    fn from(value: Field) -> Self {
24        (value.name, value.dtype)
25    }
26}
27
28pub type FieldRef = Arc<Field>;
29
30impl Field {
31    /// Creates a new `Field`.
32    ///
33    /// # Example
34    ///
35    /// ```rust
36    /// # use polars_core::prelude::*;
37    /// let f1 = Field::new("Fruit name".into(), DataType::String);
38    /// let f2 = Field::new("Lawful".into(), DataType::Boolean);
39    /// let f2 = Field::new("Departure".into(), DataType::Time);
40    /// ```
41    #[inline]
42    pub fn new(name: PlSmallStr, dtype: DataType) -> Self {
43        Field { name, dtype }
44    }
45
46    /// Returns a reference to the `Field` name.
47    ///
48    /// # Example
49    ///
50    /// ```rust
51    /// # use polars_core::prelude::*;
52    /// let f = Field::new("Year".into(), DataType::Int32);
53    ///
54    /// assert_eq!(f.name(), "Year");
55    /// ```
56    #[inline]
57    pub fn name(&self) -> &PlSmallStr {
58        &self.name
59    }
60
61    /// Returns a reference to the `Field` datatype.
62    ///
63    /// # Example
64    ///
65    /// ```rust
66    /// # use polars_core::prelude::*;
67    /// let f = Field::new("Birthday".into(), DataType::Date);
68    ///
69    /// assert_eq!(f.dtype(), &DataType::Date);
70    /// ```
71    #[inline]
72    pub fn dtype(&self) -> &DataType {
73        &self.dtype
74    }
75
76    /// Sets the `Field` datatype.
77    ///
78    /// # Example
79    ///
80    /// ```rust
81    /// # use polars_core::prelude::*;
82    /// let mut f = Field::new("Temperature".into(), DataType::Int32);
83    /// f.coerce(DataType::Float32);
84    ///
85    /// assert_eq!(f, Field::new("Temperature".into(), DataType::Float32));
86    /// ```
87    pub fn coerce(&mut self, dtype: DataType) {
88        self.dtype = dtype;
89    }
90
91    /// Sets the `Field` name.
92    ///
93    /// # Example
94    ///
95    /// ```rust
96    /// # use polars_core::prelude::*;
97    /// let mut f = Field::new("Atomic number".into(), DataType::UInt32);
98    /// f.set_name("Proton".into());
99    ///
100    /// assert_eq!(f, Field::new("Proton".into(), DataType::UInt32));
101    /// ```
102    pub fn set_name(&mut self, name: PlSmallStr) {
103        self.name = name;
104    }
105
106    /// Returns this `Field`, renamed.
107    pub fn with_name(mut self, name: PlSmallStr) -> Self {
108        self.name = name;
109        self
110    }
111
112    /// Converts the `Field` to an `arrow::datatypes::Field`.
113    ///
114    /// # Example
115    ///
116    /// ```rust
117    /// # use polars_core::prelude::*;
118    /// let f = Field::new("Value".into(), DataType::Int64);
119    /// let af = arrow::datatypes::Field::new("Value".into(), arrow::datatypes::ArrowDataType::Int64, true);
120    ///
121    /// assert_eq!(f.to_arrow(CompatLevel::newest()), af);
122    /// ```
123    pub fn to_arrow(&self, compat_level: CompatLevel) -> ArrowField {
124        self.dtype.to_arrow_field(self.name.clone(), compat_level)
125    }
126
127    pub fn to_physical(&self) -> Field {
128        Self {
129            name: self.name.clone(),
130            dtype: self.dtype().to_physical(),
131        }
132    }
133}
134
135impl AsRef<DataType> for Field {
136    fn as_ref(&self) -> &DataType {
137        &self.dtype
138    }
139}
140
141impl AsRef<DataType> for DataType {
142    fn as_ref(&self) -> &DataType {
143        self
144    }
145}
146
147impl DataType {
148    pub fn boxed(self) -> Box<DataType> {
149        Box::new(self)
150    }
151
152    pub fn from_arrow_field(field: &ArrowField) -> DataType {
153        Self::from_arrow(&field.dtype, field.metadata.as_deref())
154    }
155
156    pub fn from_arrow_dtype(dt: &ArrowDataType) -> DataType {
157        Self::from_arrow(dt, None)
158    }
159
160    pub fn from_arrow(dt: &ArrowDataType, md: Option<&Metadata>) -> DataType {
161        match dt {
162            ArrowDataType::Null => DataType::Null,
163            ArrowDataType::UInt8 => DataType::UInt8,
164            ArrowDataType::UInt16 => DataType::UInt16,
165            ArrowDataType::UInt32 => DataType::UInt32,
166            ArrowDataType::UInt64 => DataType::UInt64,
167            #[cfg(feature = "dtype-u128")]
168            ArrowDataType::UInt128 => DataType::UInt128,
169            ArrowDataType::Int8 => DataType::Int8,
170            ArrowDataType::Int16 => DataType::Int16,
171            ArrowDataType::Int32 => DataType::Int32,
172            ArrowDataType::Int64 => DataType::Int64,
173            #[cfg(feature = "dtype-i128")]
174            ArrowDataType::Int128 => DataType::Int128,
175            ArrowDataType::Boolean => DataType::Boolean,
176            #[cfg(feature = "dtype-f16")]
177            ArrowDataType::Float16 => DataType::Float16,
178            ArrowDataType::Float32 => DataType::Float32,
179            ArrowDataType::Float64 => DataType::Float64,
180            #[cfg(feature = "dtype-array")]
181            ArrowDataType::FixedSizeList(f, size) => {
182                DataType::Array(DataType::from_arrow_field(f).boxed(), *size)
183            },
184            ArrowDataType::LargeList(f) | ArrowDataType::List(f) => {
185                DataType::List(DataType::from_arrow_field(f).boxed())
186            },
187            ArrowDataType::Date32 => DataType::Date,
188            ArrowDataType::Timestamp(tu, tz) => {
189                DataType::Datetime(tu.into(), TimeZone::opt_try_new(tz.clone()).unwrap())
190            },
191            ArrowDataType::Duration(tu) => DataType::Duration(tu.into()),
192            ArrowDataType::Date64 => DataType::Datetime(TimeUnit::Milliseconds, None),
193            ArrowDataType::Time64(_) | ArrowDataType::Time32(_) => DataType::Time,
194
195            #[cfg(feature = "dtype-categorical")]
196            ArrowDataType::Dictionary(_, value_type, _) => {
197                // The metadata encoding here must match DataType::to_arrow_field.
198                if let Some(mut enum_md) = md.and_then(|md| md.pl_enum_metadata()) {
199                    let cats = move || {
200                        if enum_md.is_empty() {
201                            return None;
202                        }
203
204                        let len;
205                        (len, enum_md) = enum_md.split_once(';').unwrap();
206                        let len = len.parse::<usize>().unwrap();
207                        let cat;
208                        (cat, enum_md) = enum_md.split_at(len);
209                        Some(cat)
210                    };
211
212                    let fcats = FrozenCategories::new(std::iter::from_fn(cats)).unwrap();
213                    DataType::from_frozen_categories(fcats)
214                } else if let Some(mut cat_md) = md.and_then(|md| md.pl_categorical_metadata()) {
215                    let name_len;
216                    (name_len, cat_md) = cat_md.split_once(';').unwrap();
217                    let name_len = name_len.parse::<usize>().unwrap();
218                    let name;
219                    (name, cat_md) = cat_md.split_at(name_len);
220
221                    let namespace_len;
222                    (namespace_len, cat_md) = cat_md.split_once(';').unwrap();
223                    let namespace_len = namespace_len.parse::<usize>().unwrap();
224                    let namespace;
225                    (namespace, cat_md) = cat_md.split_at(namespace_len);
226
227                    let (physical, _rest) = cat_md.split_once(';').unwrap();
228
229                    let physical: CategoricalPhysical = physical.parse().ok().unwrap();
230                    let cats = Categories::new(
231                        PlSmallStr::from_str(name),
232                        PlSmallStr::from_str(namespace),
233                        physical,
234                    );
235                    DataType::from_categories(cats)
236                } else if matches!(
237                    value_type.as_ref(),
238                    ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 | ArrowDataType::Utf8View
239                ) {
240                    DataType::from_categories(Categories::global())
241                } else {
242                    Self::from_arrow(value_type, None)
243                }
244            },
245
246            #[cfg(feature = "dtype-struct")]
247            ArrowDataType::Struct(fields) => {
248                DataType::Struct(fields.iter().map(|fld| fld.into()).collect())
249            },
250            #[cfg(not(feature = "dtype-struct"))]
251            ArrowDataType::Struct(_) => {
252                panic!("activate the 'dtype-struct' feature to handle struct data types")
253            },
254            ArrowDataType::Extension(ext) if ext.name.as_str() == POLARS_OBJECT_EXTENSION_NAME => {
255                #[cfg(feature = "object")]
256                {
257                    DataType::Object("object")
258                }
259                #[cfg(not(feature = "object"))]
260                {
261                    panic!("activate the 'object' feature to be able to load POLARS_EXTENSION_TYPE")
262                }
263            },
264            #[cfg(feature = "dtype-extension")]
265            ArrowDataType::Extension(ext) => {
266                use crate::prelude::extension::get_extension_type_or_storage;
267                let storage = DataType::from_arrow(&ext.inner, md);
268                match get_extension_type_or_storage(&ext.name, &storage, ext.metadata.as_deref()) {
269                    Some(typ) => DataType::Extension(typ, Box::new(storage)),
270                    None => storage,
271                }
272            },
273            #[cfg(feature = "dtype-decimal")]
274            ArrowDataType::Decimal(precision, scale) => DataType::Decimal(*precision, *scale),
275            ArrowDataType::Utf8View | ArrowDataType::LargeUtf8 | ArrowDataType::Utf8 => {
276                DataType::String
277            },
278            ArrowDataType::BinaryView => DataType::Binary,
279            ArrowDataType::LargeBinary if md.is_some() => {
280                let md = md.unwrap();
281                if md.maintain_type() {
282                    DataType::BinaryOffset
283                } else {
284                    DataType::Binary
285                }
286            },
287            ArrowDataType::LargeBinary | ArrowDataType::Binary => DataType::Binary,
288            ArrowDataType::FixedSizeBinary(_) => DataType::Binary,
289            ArrowDataType::Map(inner, _is_sorted) => {
290                DataType::List(Self::from_arrow_field(inner).boxed())
291            },
292            ArrowDataType::Interval(IntervalUnit::MonthDayNano) => {
293                check_allow_importing_interval_as_struct("month_day_nano_interval").unwrap();
294                feature_gated!("dtype-struct", DataType::_month_days_ns_struct_type())
295            },
296            ArrowDataType::Interval(IntervalUnit::MonthDayMillis) => {
297                check_allow_importing_interval_as_struct("month_day_millisecond_interval").unwrap();
298                feature_gated!("dtype-struct", DataType::_month_days_ns_struct_type())
299            },
300            dt => panic!(
301                "Arrow datatype {dt:?} not supported by Polars. \
302                You probably need to activate that data-type feature."
303            ),
304        }
305    }
306}
307
308impl From<&ArrowField> for Field {
309    fn from(f: &ArrowField) -> Self {
310        Field::new(f.name.clone(), DataType::from_arrow_field(f))
311    }
312}