1use arrow::datatypes::{IntervalUnit, Metadata};
2use polars_dtype::categorical::CategoricalPhysical;
3use polars_error::feature_gated;
4use polars_utils::check_allow_importing_interval_as_struct;
5use polars_utils::pl_str::PlSmallStr;
6
7use super::*;
8pub static POLARS_OBJECT_EXTENSION_NAME: &str = "_POLARS_PYTHON_OBJECT";
9
10#[derive(Clone, Debug, PartialEq, Eq, Hash)]
12#[cfg_attr(
13 any(feature = "serde", feature = "serde-lazy"),
14 derive(Serialize, Deserialize)
15)]
16#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
17pub struct Field {
18 pub name: PlSmallStr,
19 pub dtype: DataType,
20}
21
22impl From<Field> for (PlSmallStr, DataType) {
23 fn from(value: Field) -> Self {
24 (value.name, value.dtype)
25 }
26}
27
28pub type FieldRef = Arc<Field>;
29
30impl Field {
31 #[inline]
42 pub fn new(name: PlSmallStr, dtype: DataType) -> Self {
43 Field { name, dtype }
44 }
45
46 #[inline]
57 pub fn name(&self) -> &PlSmallStr {
58 &self.name
59 }
60
61 #[inline]
72 pub fn dtype(&self) -> &DataType {
73 &self.dtype
74 }
75
76 pub fn coerce(&mut self, dtype: DataType) {
88 self.dtype = dtype;
89 }
90
91 pub fn set_name(&mut self, name: PlSmallStr) {
103 self.name = name;
104 }
105
106 pub fn with_name(mut self, name: PlSmallStr) -> Self {
108 self.name = name;
109 self
110 }
111
112 pub fn to_arrow(&self, compat_level: CompatLevel) -> ArrowField {
124 self.dtype.to_arrow_field(self.name.clone(), compat_level)
125 }
126
127 pub fn to_physical(&self) -> Field {
128 Self {
129 name: self.name.clone(),
130 dtype: self.dtype().to_physical(),
131 }
132 }
133}
134
135impl AsRef<DataType> for Field {
136 fn as_ref(&self) -> &DataType {
137 &self.dtype
138 }
139}
140
141impl AsRef<DataType> for DataType {
142 fn as_ref(&self) -> &DataType {
143 self
144 }
145}
146
147impl DataType {
148 pub fn boxed(self) -> Box<DataType> {
149 Box::new(self)
150 }
151
152 pub fn from_arrow_field(field: &ArrowField) -> DataType {
153 Self::from_arrow(&field.dtype, field.metadata.as_deref())
154 }
155
156 pub fn from_arrow_dtype(dt: &ArrowDataType) -> DataType {
157 Self::from_arrow(dt, None)
158 }
159
160 pub fn from_arrow(dt: &ArrowDataType, md: Option<&Metadata>) -> DataType {
161 match dt {
162 ArrowDataType::Null => DataType::Null,
163 ArrowDataType::UInt8 => DataType::UInt8,
164 ArrowDataType::UInt16 => DataType::UInt16,
165 ArrowDataType::UInt32 => DataType::UInt32,
166 ArrowDataType::UInt64 => DataType::UInt64,
167 #[cfg(feature = "dtype-u128")]
168 ArrowDataType::UInt128 => DataType::UInt128,
169 ArrowDataType::Int8 => DataType::Int8,
170 ArrowDataType::Int16 => DataType::Int16,
171 ArrowDataType::Int32 => DataType::Int32,
172 ArrowDataType::Int64 => DataType::Int64,
173 #[cfg(feature = "dtype-i128")]
174 ArrowDataType::Int128 => DataType::Int128,
175 ArrowDataType::Boolean => DataType::Boolean,
176 #[cfg(feature = "dtype-f16")]
177 ArrowDataType::Float16 => DataType::Float16,
178 ArrowDataType::Float32 => DataType::Float32,
179 ArrowDataType::Float64 => DataType::Float64,
180 #[cfg(feature = "dtype-array")]
181 ArrowDataType::FixedSizeList(f, size) => {
182 DataType::Array(DataType::from_arrow_field(f).boxed(), *size)
183 },
184 ArrowDataType::LargeList(f) | ArrowDataType::List(f) => {
185 DataType::List(DataType::from_arrow_field(f).boxed())
186 },
187 ArrowDataType::Date32 => DataType::Date,
188 ArrowDataType::Timestamp(tu, tz) => {
189 DataType::Datetime(tu.into(), TimeZone::opt_try_new(tz.clone()).unwrap())
190 },
191 ArrowDataType::Duration(tu) => DataType::Duration(tu.into()),
192 ArrowDataType::Date64 => DataType::Datetime(TimeUnit::Milliseconds, None),
193 ArrowDataType::Time64(_) | ArrowDataType::Time32(_) => DataType::Time,
194
195 #[cfg(feature = "dtype-categorical")]
196 ArrowDataType::Dictionary(_, value_type, _) => {
197 if let Some(mut enum_md) = md.and_then(|md| md.pl_enum_metadata()) {
199 let cats = move || {
200 if enum_md.is_empty() {
201 return None;
202 }
203
204 let len;
205 (len, enum_md) = enum_md.split_once(';').unwrap();
206 let len = len.parse::<usize>().unwrap();
207 let cat;
208 (cat, enum_md) = enum_md.split_at(len);
209 Some(cat)
210 };
211
212 let fcats = FrozenCategories::new(std::iter::from_fn(cats)).unwrap();
213 DataType::from_frozen_categories(fcats)
214 } else if let Some(mut cat_md) = md.and_then(|md| md.pl_categorical_metadata()) {
215 let name_len;
216 (name_len, cat_md) = cat_md.split_once(';').unwrap();
217 let name_len = name_len.parse::<usize>().unwrap();
218 let name;
219 (name, cat_md) = cat_md.split_at(name_len);
220
221 let namespace_len;
222 (namespace_len, cat_md) = cat_md.split_once(';').unwrap();
223 let namespace_len = namespace_len.parse::<usize>().unwrap();
224 let namespace;
225 (namespace, cat_md) = cat_md.split_at(namespace_len);
226
227 let (physical, _rest) = cat_md.split_once(';').unwrap();
228
229 let physical: CategoricalPhysical = physical.parse().ok().unwrap();
230 let cats = Categories::new(
231 PlSmallStr::from_str(name),
232 PlSmallStr::from_str(namespace),
233 physical,
234 );
235 DataType::from_categories(cats)
236 } else if matches!(
237 value_type.as_ref(),
238 ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 | ArrowDataType::Utf8View
239 ) {
240 DataType::from_categories(Categories::global())
241 } else {
242 Self::from_arrow(value_type, None)
243 }
244 },
245
246 #[cfg(feature = "dtype-struct")]
247 ArrowDataType::Struct(fields) => {
248 DataType::Struct(fields.iter().map(|fld| fld.into()).collect())
249 },
250 #[cfg(not(feature = "dtype-struct"))]
251 ArrowDataType::Struct(_) => {
252 panic!("activate the 'dtype-struct' feature to handle struct data types")
253 },
254 ArrowDataType::Extension(ext) if ext.name.as_str() == POLARS_OBJECT_EXTENSION_NAME => {
255 #[cfg(feature = "object")]
256 {
257 DataType::Object("object")
258 }
259 #[cfg(not(feature = "object"))]
260 {
261 panic!("activate the 'object' feature to be able to load POLARS_EXTENSION_TYPE")
262 }
263 },
264 #[cfg(feature = "dtype-extension")]
265 ArrowDataType::Extension(ext) => {
266 use crate::prelude::extension::get_extension_type_or_storage;
267 let storage = DataType::from_arrow(&ext.inner, md);
268 match get_extension_type_or_storage(&ext.name, &storage, ext.metadata.as_deref()) {
269 Some(typ) => DataType::Extension(typ, Box::new(storage)),
270 None => storage,
271 }
272 },
273 #[cfg(feature = "dtype-decimal")]
274 ArrowDataType::Decimal(precision, scale) => DataType::Decimal(*precision, *scale),
275 ArrowDataType::Utf8View | ArrowDataType::LargeUtf8 | ArrowDataType::Utf8 => {
276 DataType::String
277 },
278 ArrowDataType::BinaryView => DataType::Binary,
279 ArrowDataType::LargeBinary if md.is_some() => {
280 let md = md.unwrap();
281 if md.maintain_type() {
282 DataType::BinaryOffset
283 } else {
284 DataType::Binary
285 }
286 },
287 ArrowDataType::LargeBinary | ArrowDataType::Binary => DataType::Binary,
288 ArrowDataType::FixedSizeBinary(_) => DataType::Binary,
289 ArrowDataType::Map(inner, _is_sorted) => {
290 DataType::List(Self::from_arrow_field(inner).boxed())
291 },
292 ArrowDataType::Interval(IntervalUnit::MonthDayNano) => {
293 check_allow_importing_interval_as_struct("month_day_nano_interval").unwrap();
294 feature_gated!("dtype-struct", DataType::_month_days_ns_struct_type())
295 },
296 ArrowDataType::Interval(IntervalUnit::MonthDayMillis) => {
297 check_allow_importing_interval_as_struct("month_day_millisecond_interval").unwrap();
298 feature_gated!("dtype-struct", DataType::_month_days_ns_struct_type())
299 },
300 dt => panic!(
301 "Arrow datatype {dt:?} not supported by Polars. \
302 You probably need to activate that data-type feature."
303 ),
304 }
305 }
306}
307
308impl From<&ArrowField> for Field {
309 fn from(f: &ArrowField) -> Self {
310 Field::new(f.name.clone(), DataType::from_arrow_field(f))
311 }
312}