Skip to main content

polars_core/datatypes/
field.rs

1use arrow::datatypes::{IntervalUnit, Metadata};
2use polars_dtype::categorical::CategoricalPhysical;
3use polars_error::feature_gated;
4use polars_utils::pl_str::PlSmallStr;
5
6use super::*;
7use crate::config::check_allow_importing_interval_as_struct;
8pub static POLARS_OBJECT_EXTENSION_NAME: &str = "_POLARS_PYTHON_OBJECT";
9pub static ARROW_UUID_EXTENSION_NAME: &str = "arrow.uuid";
10
11/// Characterizes the name and the [`DataType`] of a column.
12#[derive(Clone, Debug, PartialEq, Eq, Hash)]
13#[cfg_attr(
14    any(feature = "serde", feature = "serde-lazy"),
15    derive(Serialize, Deserialize)
16)]
17#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
18pub struct Field {
19    pub name: PlSmallStr,
20    pub dtype: DataType,
21}
22
23impl From<Field> for (PlSmallStr, DataType) {
24    fn from(value: Field) -> Self {
25        (value.name, value.dtype)
26    }
27}
28
29pub type FieldRef = Arc<Field>;
30
31impl Field {
32    /// Creates a new `Field`.
33    ///
34    /// # Example
35    ///
36    /// ```rust
37    /// # use polars_core::prelude::*;
38    /// let f1 = Field::new("Fruit name".into(), DataType::String);
39    /// let f2 = Field::new("Lawful".into(), DataType::Boolean);
40    /// let f2 = Field::new("Departure".into(), DataType::Time);
41    /// ```
42    #[inline]
43    pub fn new(name: PlSmallStr, dtype: DataType) -> Self {
44        Field { name, dtype }
45    }
46
47    /// Returns a reference to the `Field` name.
48    ///
49    /// # Example
50    ///
51    /// ```rust
52    /// # use polars_core::prelude::*;
53    /// let f = Field::new("Year".into(), DataType::Int32);
54    ///
55    /// assert_eq!(f.name(), "Year");
56    /// ```
57    #[inline]
58    pub fn name(&self) -> &PlSmallStr {
59        &self.name
60    }
61
62    /// Returns a reference to the `Field` datatype.
63    ///
64    /// # Example
65    ///
66    /// ```rust
67    /// # use polars_core::prelude::*;
68    /// let f = Field::new("Birthday".into(), DataType::Date);
69    ///
70    /// assert_eq!(f.dtype(), &DataType::Date);
71    /// ```
72    #[inline]
73    pub fn dtype(&self) -> &DataType {
74        &self.dtype
75    }
76
77    /// Sets the `Field` datatype.
78    ///
79    /// # Example
80    ///
81    /// ```rust
82    /// # use polars_core::prelude::*;
83    /// let mut f = Field::new("Temperature".into(), DataType::Int32);
84    /// f.coerce(DataType::Float32);
85    ///
86    /// assert_eq!(f, Field::new("Temperature".into(), DataType::Float32));
87    /// ```
88    pub fn coerce(&mut self, dtype: DataType) {
89        self.dtype = dtype;
90    }
91
92    /// Sets the `Field` name.
93    ///
94    /// # Example
95    ///
96    /// ```rust
97    /// # use polars_core::prelude::*;
98    /// let mut f = Field::new("Atomic number".into(), DataType::UInt32);
99    /// f.set_name("Proton".into());
100    ///
101    /// assert_eq!(f, Field::new("Proton".into(), DataType::UInt32));
102    /// ```
103    pub fn set_name(&mut self, name: PlSmallStr) {
104        self.name = name;
105    }
106
107    /// Returns this `Field`, renamed.
108    pub fn with_name(mut self, name: PlSmallStr) -> Self {
109        self.name = name;
110        self
111    }
112
113    // Returns this `Field`, with a different datatype.
114    pub fn with_dtype(mut self, dtype: DataType) -> Self {
115        self.dtype = dtype;
116        self
117    }
118
119    /// Converts the `Field` to an `arrow::datatypes::Field`.
120    ///
121    /// # Example
122    ///
123    /// ```rust
124    /// # use polars_core::prelude::*;
125    /// let f = Field::new("Value".into(), DataType::Int64);
126    /// let af = arrow::datatypes::Field::new("Value".into(), arrow::datatypes::ArrowDataType::Int64, true);
127    ///
128    /// assert_eq!(f.to_arrow(CompatLevel::newest()), af);
129    /// ```
130    pub fn to_arrow(&self, compat_level: CompatLevel) -> ArrowField {
131        self.dtype.to_arrow_field(self.name.clone(), compat_level)
132    }
133
134    pub fn to_physical(&self) -> Field {
135        Self {
136            name: self.name.clone(),
137            dtype: self.dtype().to_physical(),
138        }
139    }
140}
141
142impl AsRef<DataType> for Field {
143    fn as_ref(&self) -> &DataType {
144        &self.dtype
145    }
146}
147
148impl AsRef<DataType> for DataType {
149    fn as_ref(&self) -> &DataType {
150        self
151    }
152}
153
154impl DataType {
155    pub fn boxed(self) -> Box<DataType> {
156        Box::new(self)
157    }
158
159    pub fn from_arrow_field(field: &ArrowField) -> DataType {
160        Self::from_arrow(&field.dtype, field.metadata.as_deref())
161    }
162
163    pub fn from_arrow_dtype(dt: &ArrowDataType) -> DataType {
164        Self::from_arrow(dt, None)
165    }
166
167    pub fn from_arrow(dt: &ArrowDataType, md: Option<&Metadata>) -> DataType {
168        match dt {
169            ArrowDataType::Null => DataType::Null,
170            ArrowDataType::UInt8 => DataType::UInt8,
171            ArrowDataType::UInt16 => DataType::UInt16,
172            ArrowDataType::UInt32 => DataType::UInt32,
173            ArrowDataType::UInt64 => DataType::UInt64,
174            #[cfg(feature = "dtype-u128")]
175            ArrowDataType::UInt128 => DataType::UInt128,
176            ArrowDataType::Int8 => DataType::Int8,
177            ArrowDataType::Int16 => DataType::Int16,
178            ArrowDataType::Int32 => DataType::Int32,
179            ArrowDataType::Int64 => DataType::Int64,
180            #[cfg(feature = "dtype-i128")]
181            ArrowDataType::Int128 => DataType::Int128,
182            ArrowDataType::Boolean => DataType::Boolean,
183            #[cfg(feature = "dtype-f16")]
184            ArrowDataType::Float16 => DataType::Float16,
185            ArrowDataType::Float32 => DataType::Float32,
186            ArrowDataType::Float64 => DataType::Float64,
187            #[cfg(feature = "dtype-array")]
188            ArrowDataType::FixedSizeList(f, size) => {
189                DataType::Array(DataType::from_arrow_field(f).boxed(), *size)
190            },
191            ArrowDataType::LargeList(f) | ArrowDataType::List(f) => {
192                DataType::List(DataType::from_arrow_field(f).boxed())
193            },
194            ArrowDataType::Date32 => DataType::Date,
195            ArrowDataType::Timestamp(tu, tz) => {
196                DataType::Datetime(tu.into(), TimeZone::opt_try_new(tz.clone()).unwrap())
197            },
198            ArrowDataType::Duration(tu) => DataType::Duration(tu.into()),
199            ArrowDataType::Date64 => DataType::Datetime(TimeUnit::Milliseconds, None),
200            ArrowDataType::Time64(_) | ArrowDataType::Time32(_) => DataType::Time,
201
202            #[cfg(feature = "dtype-categorical")]
203            ArrowDataType::Dictionary(_, value_type, _) => {
204                // The metadata encoding here must match DataType::to_arrow_field.
205                if let Some(mut enum_md) = md.and_then(|md| md.pl_enum_metadata()) {
206                    let cats = move || {
207                        if enum_md.is_empty() {
208                            return None;
209                        }
210
211                        let len;
212                        (len, enum_md) = enum_md.split_once(';').unwrap();
213                        let len = len.parse::<usize>().unwrap();
214                        let cat;
215                        (cat, enum_md) = enum_md.split_at(len);
216                        Some(cat)
217                    };
218
219                    let fcats = FrozenCategories::new(std::iter::from_fn(cats)).unwrap();
220                    DataType::from_frozen_categories(fcats)
221                } else if let Some(mut cat_md) = md.and_then(|md| md.pl_categorical_metadata()) {
222                    let name_len;
223                    (name_len, cat_md) = cat_md.split_once(';').unwrap();
224                    let name_len = name_len.parse::<usize>().unwrap();
225                    let name;
226                    (name, cat_md) = cat_md.split_at(name_len);
227
228                    let namespace_len;
229                    (namespace_len, cat_md) = cat_md.split_once(';').unwrap();
230                    let namespace_len = namespace_len.parse::<usize>().unwrap();
231                    let namespace;
232                    (namespace, cat_md) = cat_md.split_at(namespace_len);
233
234                    let (physical, _rest) = cat_md.split_once(';').unwrap();
235
236                    let physical: CategoricalPhysical = physical.parse().ok().unwrap();
237                    let cats = Categories::new(
238                        PlSmallStr::from_str(name),
239                        PlSmallStr::from_str(namespace),
240                        physical,
241                    );
242                    DataType::from_categories(cats)
243                } else if matches!(
244                    value_type.as_ref(),
245                    ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 | ArrowDataType::Utf8View
246                ) {
247                    DataType::from_categories(Categories::global())
248                } else {
249                    Self::from_arrow(value_type, None)
250                }
251            },
252
253            #[cfg(feature = "dtype-struct")]
254            ArrowDataType::Struct(fields) => {
255                DataType::Struct(fields.iter().map(|fld| fld.into()).collect())
256            },
257            #[cfg(not(feature = "dtype-struct"))]
258            ArrowDataType::Struct(_) => {
259                panic!("activate the 'dtype-struct' feature to handle struct data types")
260            },
261            ArrowDataType::Extension(ext) if ext.name.as_str() == POLARS_OBJECT_EXTENSION_NAME => {
262                #[cfg(feature = "object")]
263                {
264                    DataType::Object("object")
265                }
266                #[cfg(not(feature = "object"))]
267                {
268                    panic!("activate the 'object' feature to be able to load POLARS_EXTENSION_TYPE")
269                }
270            },
271            #[cfg(feature = "dtype-extension")]
272            ArrowDataType::Extension(ext) => {
273                use crate::prelude::extension::get_extension_type_or_storage;
274                let storage = DataType::from_arrow(&ext.inner, md);
275                match get_extension_type_or_storage(&ext.name, &storage, ext.metadata.as_deref()) {
276                    Some(typ) => DataType::Extension(typ, Box::new(storage)),
277                    None => storage,
278                }
279            },
280            #[cfg(feature = "dtype-decimal")]
281            ArrowDataType::Decimal(precision, scale)
282            | ArrowDataType::Decimal32(precision, scale)
283            | ArrowDataType::Decimal64(precision, scale)
284            | ArrowDataType::Decimal256(precision, scale) => DataType::Decimal(*precision, *scale),
285            ArrowDataType::Utf8View | ArrowDataType::LargeUtf8 | ArrowDataType::Utf8 => {
286                DataType::String
287            },
288            ArrowDataType::BinaryView => DataType::Binary,
289            ArrowDataType::LargeBinary if md.is_some() => {
290                let md = md.unwrap();
291                if md.maintain_type() {
292                    DataType::BinaryOffset
293                } else {
294                    DataType::Binary
295                }
296            },
297            ArrowDataType::LargeBinary | ArrowDataType::Binary => DataType::Binary,
298            ArrowDataType::FixedSizeBinary(_) => DataType::Binary,
299            ArrowDataType::Map(inner, _is_sorted) => {
300                DataType::List(Self::from_arrow_field(inner).boxed())
301            },
302            ArrowDataType::Interval(IntervalUnit::MonthDayNano) => {
303                check_allow_importing_interval_as_struct("month_day_nano_interval").unwrap();
304                feature_gated!("dtype-struct", DataType::_month_days_ns_struct_type())
305            },
306            ArrowDataType::Interval(IntervalUnit::MonthDayMillis) => {
307                check_allow_importing_interval_as_struct("month_day_millisecond_interval").unwrap();
308                feature_gated!("dtype-struct", DataType::_month_days_ns_struct_type())
309            },
310            dt => panic!(
311                "Arrow datatype {dt:?} not supported by Polars. \
312                You probably need to activate that data-type feature."
313            ),
314        }
315    }
316}
317
318impl From<&ArrowField> for Field {
319    fn from(f: &ArrowField) -> Self {
320        Field::new(f.name.clone(), DataType::from_arrow_field(f))
321    }
322}