polars_core/datatypes/
field.rs

1use arrow::datatypes::Metadata;
2use polars_dtype::categorical::CategoricalPhysical;
3use polars_utils::pl_str::PlSmallStr;
4
5use super::*;
6pub static EXTENSION_NAME: &str = "POLARS_EXTENSION_TYPE";
7
8/// Characterizes the name and the [`DataType`] of a column.
9#[derive(Clone, Debug, PartialEq, Eq, Hash)]
10#[cfg_attr(
11    any(feature = "serde", feature = "serde-lazy"),
12    derive(Serialize, Deserialize)
13)]
14#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
15pub struct Field {
16    pub name: PlSmallStr,
17    pub dtype: DataType,
18}
19
20impl From<Field> for (PlSmallStr, DataType) {
21    fn from(value: Field) -> Self {
22        (value.name, value.dtype)
23    }
24}
25
26pub type FieldRef = Arc<Field>;
27
28impl Field {
29    /// Creates a new `Field`.
30    ///
31    /// # Example
32    ///
33    /// ```rust
34    /// # use polars_core::prelude::*;
35    /// let f1 = Field::new("Fruit name".into(), DataType::String);
36    /// let f2 = Field::new("Lawful".into(), DataType::Boolean);
37    /// let f2 = Field::new("Departure".into(), DataType::Time);
38    /// ```
39    #[inline]
40    pub fn new(name: PlSmallStr, dtype: DataType) -> Self {
41        Field { name, dtype }
42    }
43
44    /// Returns a reference to the `Field` name.
45    ///
46    /// # Example
47    ///
48    /// ```rust
49    /// # use polars_core::prelude::*;
50    /// let f = Field::new("Year".into(), DataType::Int32);
51    ///
52    /// assert_eq!(f.name(), "Year");
53    /// ```
54    #[inline]
55    pub fn name(&self) -> &PlSmallStr {
56        &self.name
57    }
58
59    /// Returns a reference to the `Field` datatype.
60    ///
61    /// # Example
62    ///
63    /// ```rust
64    /// # use polars_core::prelude::*;
65    /// let f = Field::new("Birthday".into(), DataType::Date);
66    ///
67    /// assert_eq!(f.dtype(), &DataType::Date);
68    /// ```
69    #[inline]
70    pub fn dtype(&self) -> &DataType {
71        &self.dtype
72    }
73
74    /// Sets the `Field` datatype.
75    ///
76    /// # Example
77    ///
78    /// ```rust
79    /// # use polars_core::prelude::*;
80    /// let mut f = Field::new("Temperature".into(), DataType::Int32);
81    /// f.coerce(DataType::Float32);
82    ///
83    /// assert_eq!(f, Field::new("Temperature".into(), DataType::Float32));
84    /// ```
85    pub fn coerce(&mut self, dtype: DataType) {
86        self.dtype = dtype;
87    }
88
89    /// Sets the `Field` name.
90    ///
91    /// # Example
92    ///
93    /// ```rust
94    /// # use polars_core::prelude::*;
95    /// let mut f = Field::new("Atomic number".into(), DataType::UInt32);
96    /// f.set_name("Proton".into());
97    ///
98    /// assert_eq!(f, Field::new("Proton".into(), DataType::UInt32));
99    /// ```
100    pub fn set_name(&mut self, name: PlSmallStr) {
101        self.name = name;
102    }
103
104    /// Returns this `Field`, renamed.
105    pub fn with_name(mut self, name: PlSmallStr) -> Self {
106        self.name = name;
107        self
108    }
109
110    /// Converts the `Field` to an `arrow::datatypes::Field`.
111    ///
112    /// # Example
113    ///
114    /// ```rust
115    /// # use polars_core::prelude::*;
116    /// let f = Field::new("Value".into(), DataType::Int64);
117    /// let af = arrow::datatypes::Field::new("Value".into(), arrow::datatypes::ArrowDataType::Int64, true);
118    ///
119    /// assert_eq!(f.to_arrow(CompatLevel::newest()), af);
120    /// ```
121    pub fn to_arrow(&self, compat_level: CompatLevel) -> ArrowField {
122        self.dtype.to_arrow_field(self.name.clone(), compat_level)
123    }
124}
125
126impl AsRef<DataType> for Field {
127    fn as_ref(&self) -> &DataType {
128        &self.dtype
129    }
130}
131
132impl AsRef<DataType> for DataType {
133    fn as_ref(&self) -> &DataType {
134        self
135    }
136}
137
138impl DataType {
139    pub fn boxed(self) -> Box<DataType> {
140        Box::new(self)
141    }
142
143    pub fn from_arrow_field(field: &ArrowField) -> DataType {
144        Self::from_arrow(&field.dtype, field.metadata.as_deref())
145    }
146
147    pub fn from_arrow_dtype(dt: &ArrowDataType) -> DataType {
148        Self::from_arrow(dt, None)
149    }
150
151    pub fn from_arrow(dt: &ArrowDataType, md: Option<&Metadata>) -> DataType {
152        match dt {
153            ArrowDataType::Null => DataType::Null,
154            ArrowDataType::UInt8 => DataType::UInt8,
155            ArrowDataType::UInt16 => DataType::UInt16,
156            ArrowDataType::UInt32 => DataType::UInt32,
157            ArrowDataType::UInt64 => DataType::UInt64,
158            ArrowDataType::Int8 => DataType::Int8,
159            ArrowDataType::Int16 => DataType::Int16,
160            ArrowDataType::Int32 => DataType::Int32,
161            ArrowDataType::Int64 => DataType::Int64,
162            #[cfg(feature = "dtype-i128")]
163            ArrowDataType::Int128 => DataType::Int128,
164            ArrowDataType::Boolean => DataType::Boolean,
165            ArrowDataType::Float16 => DataType::Float32,
166            ArrowDataType::Float32 => DataType::Float32,
167            ArrowDataType::Float64 => DataType::Float64,
168            #[cfg(feature = "dtype-array")]
169            ArrowDataType::FixedSizeList(f, size) => {
170                DataType::Array(DataType::from_arrow_field(f).boxed(), *size)
171            },
172            ArrowDataType::LargeList(f) | ArrowDataType::List(f) => {
173                DataType::List(DataType::from_arrow_field(f).boxed())
174            },
175            ArrowDataType::Date32 => DataType::Date,
176            ArrowDataType::Timestamp(tu, tz) => {
177                DataType::Datetime(tu.into(), TimeZone::opt_try_new(tz.clone()).unwrap())
178            },
179            ArrowDataType::Duration(tu) => DataType::Duration(tu.into()),
180            ArrowDataType::Date64 => DataType::Datetime(TimeUnit::Milliseconds, None),
181            ArrowDataType::Time64(_) | ArrowDataType::Time32(_) => DataType::Time,
182
183            #[cfg(feature = "dtype-categorical")]
184            ArrowDataType::Dictionary(_, value_type, _) => {
185                // The metadata encoding here must match DataType::to_arrow_field.
186                if let Some(mut enum_md) = md.and_then(|md| md.pl_enum_metadata()) {
187                    let cats = move || {
188                        if enum_md.is_empty() {
189                            return None;
190                        }
191
192                        let len;
193                        (len, enum_md) = enum_md.split_once(';').unwrap();
194                        let len = len.parse::<usize>().unwrap();
195                        let cat;
196                        (cat, enum_md) = enum_md.split_at(len);
197                        Some(cat)
198                    };
199
200                    let fcats = FrozenCategories::new(std::iter::from_fn(cats)).unwrap();
201                    DataType::from_frozen_categories(fcats)
202                } else if let Some(mut cat_md) = md.and_then(|md| md.pl_categorical_metadata()) {
203                    let name_len;
204                    (name_len, cat_md) = cat_md.split_once(';').unwrap();
205                    let name_len = name_len.parse::<usize>().unwrap();
206                    let name;
207                    (name, cat_md) = cat_md.split_at(name_len);
208
209                    let namespace_len;
210                    (namespace_len, cat_md) = cat_md.split_once(';').unwrap();
211                    let namespace_len = namespace_len.parse::<usize>().unwrap();
212                    let namespace;
213                    (namespace, cat_md) = cat_md.split_at(namespace_len);
214
215                    let (physical, _rest) = cat_md.split_once(';').unwrap();
216
217                    let physical: CategoricalPhysical = physical.parse().ok().unwrap();
218                    let cats = Categories::new(
219                        PlSmallStr::from_str(name),
220                        PlSmallStr::from_str(namespace),
221                        physical,
222                    );
223                    DataType::from_categories(cats)
224                } else if matches!(
225                    value_type.as_ref(),
226                    ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 | ArrowDataType::Utf8View
227                ) {
228                    DataType::from_categories(Categories::global())
229                } else {
230                    Self::from_arrow(value_type, None)
231                }
232            },
233
234            #[cfg(feature = "dtype-struct")]
235            ArrowDataType::Struct(fields) => {
236                DataType::Struct(fields.iter().map(|fld| fld.into()).collect())
237            },
238            #[cfg(not(feature = "dtype-struct"))]
239            ArrowDataType::Struct(_) => {
240                panic!("activate the 'dtype-struct' feature to handle struct data types")
241            },
242            ArrowDataType::Extension(ext) if ext.name.as_str() == EXTENSION_NAME => {
243                #[cfg(feature = "object")]
244                {
245                    DataType::Object("object")
246                }
247                #[cfg(not(feature = "object"))]
248                {
249                    panic!("activate the 'object' feature to be able to load POLARS_EXTENSION_TYPE")
250                }
251            },
252            #[cfg(feature = "dtype-decimal")]
253            ArrowDataType::Decimal(precision, scale) => {
254                DataType::Decimal(Some(*precision), Some(*scale))
255            },
256            ArrowDataType::Utf8View | ArrowDataType::LargeUtf8 | ArrowDataType::Utf8 => {
257                DataType::String
258            },
259            ArrowDataType::BinaryView => DataType::Binary,
260            ArrowDataType::LargeBinary if md.is_some() => {
261                let md = md.unwrap();
262                if md.maintain_type() {
263                    DataType::BinaryOffset
264                } else {
265                    DataType::Binary
266                }
267            },
268            ArrowDataType::LargeBinary | ArrowDataType::Binary => DataType::Binary,
269            ArrowDataType::FixedSizeBinary(_) => DataType::Binary,
270            ArrowDataType::Map(inner, _is_sorted) => {
271                DataType::List(Self::from_arrow_field(inner).boxed())
272            },
273            dt => panic!(
274                "Arrow datatype {dt:?} not supported by Polars. \
275                You probably need to activate that data-type feature."
276            ),
277        }
278    }
279}
280
281impl From<&ArrowField> for Field {
282    fn from(f: &ArrowField) -> Self {
283        Field::new(f.name.clone(), DataType::from_arrow_field(f))
284    }
285}