1use arrow::datatypes::{IntervalUnit, Metadata};
2use polars_dtype::categorical::CategoricalPhysical;
3use polars_error::feature_gated;
4use polars_utils::pl_str::PlSmallStr;
5
6use super::*;
7use crate::config::check_allow_importing_interval_as_struct;
8pub static POLARS_OBJECT_EXTENSION_NAME: &str = "_POLARS_PYTHON_OBJECT";
9pub static ARROW_UUID_EXTENSION_NAME: &str = "arrow.uuid";
10
11#[derive(Clone, Debug, PartialEq, Eq, Hash)]
13#[cfg_attr(
14 any(feature = "serde", feature = "serde-lazy"),
15 derive(Serialize, Deserialize)
16)]
17#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
18pub struct Field {
19 pub name: PlSmallStr,
20 pub dtype: DataType,
21}
22
23impl From<Field> for (PlSmallStr, DataType) {
24 fn from(value: Field) -> Self {
25 (value.name, value.dtype)
26 }
27}
28
29pub type FieldRef = Arc<Field>;
30
31impl Field {
32 #[inline]
43 pub fn new(name: PlSmallStr, dtype: DataType) -> Self {
44 Field { name, dtype }
45 }
46
47 #[inline]
58 pub fn name(&self) -> &PlSmallStr {
59 &self.name
60 }
61
62 #[inline]
73 pub fn dtype(&self) -> &DataType {
74 &self.dtype
75 }
76
77 pub fn coerce(&mut self, dtype: DataType) {
89 self.dtype = dtype;
90 }
91
92 pub fn set_name(&mut self, name: PlSmallStr) {
104 self.name = name;
105 }
106
107 pub fn with_name(mut self, name: PlSmallStr) -> Self {
109 self.name = name;
110 self
111 }
112
113 pub fn with_dtype(mut self, dtype: DataType) -> Self {
115 self.dtype = dtype;
116 self
117 }
118
119 pub fn to_arrow(&self, compat_level: CompatLevel) -> ArrowField {
131 self.dtype.to_arrow_field(self.name.clone(), compat_level)
132 }
133
134 pub fn to_physical(&self) -> Field {
135 Self {
136 name: self.name.clone(),
137 dtype: self.dtype().to_physical(),
138 }
139 }
140}
141
142impl AsRef<DataType> for Field {
143 fn as_ref(&self) -> &DataType {
144 &self.dtype
145 }
146}
147
148impl AsRef<DataType> for DataType {
149 fn as_ref(&self) -> &DataType {
150 self
151 }
152}
153
154impl DataType {
155 pub fn boxed(self) -> Box<DataType> {
156 Box::new(self)
157 }
158
159 pub fn from_arrow_field(field: &ArrowField) -> DataType {
160 Self::from_arrow(&field.dtype, field.metadata.as_deref())
161 }
162
163 pub fn from_arrow_dtype(dt: &ArrowDataType) -> DataType {
164 Self::from_arrow(dt, None)
165 }
166
167 pub fn from_arrow(dt: &ArrowDataType, md: Option<&Metadata>) -> DataType {
168 match dt {
169 ArrowDataType::Null => DataType::Null,
170 ArrowDataType::UInt8 => DataType::UInt8,
171 ArrowDataType::UInt16 => DataType::UInt16,
172 ArrowDataType::UInt32 => DataType::UInt32,
173 ArrowDataType::UInt64 => DataType::UInt64,
174 #[cfg(feature = "dtype-u128")]
175 ArrowDataType::UInt128 => DataType::UInt128,
176 ArrowDataType::Int8 => DataType::Int8,
177 ArrowDataType::Int16 => DataType::Int16,
178 ArrowDataType::Int32 => DataType::Int32,
179 ArrowDataType::Int64 => DataType::Int64,
180 #[cfg(feature = "dtype-i128")]
181 ArrowDataType::Int128 => DataType::Int128,
182 ArrowDataType::Boolean => DataType::Boolean,
183 #[cfg(feature = "dtype-f16")]
184 ArrowDataType::Float16 => DataType::Float16,
185 ArrowDataType::Float32 => DataType::Float32,
186 ArrowDataType::Float64 => DataType::Float64,
187 #[cfg(feature = "dtype-array")]
188 ArrowDataType::FixedSizeList(f, size) => {
189 DataType::Array(DataType::from_arrow_field(f).boxed(), *size)
190 },
191 ArrowDataType::LargeList(f) | ArrowDataType::List(f) => {
192 DataType::List(DataType::from_arrow_field(f).boxed())
193 },
194 ArrowDataType::Date32 => DataType::Date,
195 ArrowDataType::Timestamp(tu, tz) => {
196 DataType::Datetime(tu.into(), TimeZone::opt_try_new(tz.clone()).unwrap())
197 },
198 ArrowDataType::Duration(tu) => DataType::Duration(tu.into()),
199 ArrowDataType::Date64 => DataType::Datetime(TimeUnit::Milliseconds, None),
200 ArrowDataType::Time64(_) | ArrowDataType::Time32(_) => DataType::Time,
201
202 #[cfg(feature = "dtype-categorical")]
203 ArrowDataType::Dictionary(_, value_type, _) => {
204 if let Some(mut enum_md) = md.and_then(|md| md.pl_enum_metadata()) {
206 let cats = move || {
207 if enum_md.is_empty() {
208 return None;
209 }
210
211 let len;
212 (len, enum_md) = enum_md.split_once(';').unwrap();
213 let len = len.parse::<usize>().unwrap();
214 let cat;
215 (cat, enum_md) = enum_md.split_at(len);
216 Some(cat)
217 };
218
219 let fcats = FrozenCategories::new(std::iter::from_fn(cats)).unwrap();
220 DataType::from_frozen_categories(fcats)
221 } else if let Some(mut cat_md) = md.and_then(|md| md.pl_categorical_metadata()) {
222 let name_len;
223 (name_len, cat_md) = cat_md.split_once(';').unwrap();
224 let name_len = name_len.parse::<usize>().unwrap();
225 let name;
226 (name, cat_md) = cat_md.split_at(name_len);
227
228 let namespace_len;
229 (namespace_len, cat_md) = cat_md.split_once(';').unwrap();
230 let namespace_len = namespace_len.parse::<usize>().unwrap();
231 let namespace;
232 (namespace, cat_md) = cat_md.split_at(namespace_len);
233
234 let (physical, _rest) = cat_md.split_once(';').unwrap();
235
236 let physical: CategoricalPhysical = physical.parse().ok().unwrap();
237 let cats = Categories::new(
238 PlSmallStr::from_str(name),
239 PlSmallStr::from_str(namespace),
240 physical,
241 );
242 DataType::from_categories(cats)
243 } else if matches!(
244 value_type.as_ref(),
245 ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 | ArrowDataType::Utf8View
246 ) {
247 DataType::from_categories(Categories::global())
248 } else {
249 Self::from_arrow(value_type, None)
250 }
251 },
252
253 #[cfg(feature = "dtype-struct")]
254 ArrowDataType::Struct(fields) => {
255 DataType::Struct(fields.iter().map(|fld| fld.into()).collect())
256 },
257 #[cfg(not(feature = "dtype-struct"))]
258 ArrowDataType::Struct(_) => {
259 panic!("activate the 'dtype-struct' feature to handle struct data types")
260 },
261 ArrowDataType::Extension(ext) if ext.name.as_str() == POLARS_OBJECT_EXTENSION_NAME => {
262 #[cfg(feature = "object")]
263 {
264 DataType::Object("object")
265 }
266 #[cfg(not(feature = "object"))]
267 {
268 panic!("activate the 'object' feature to be able to load POLARS_EXTENSION_TYPE")
269 }
270 },
271 #[cfg(feature = "dtype-extension")]
272 ArrowDataType::Extension(ext) => {
273 use crate::prelude::extension::get_extension_type_or_storage;
274 let storage = DataType::from_arrow(&ext.inner, md);
275 match get_extension_type_or_storage(&ext.name, &storage, ext.metadata.as_deref()) {
276 Some(typ) => DataType::Extension(typ, Box::new(storage)),
277 None => storage,
278 }
279 },
280 #[cfg(feature = "dtype-decimal")]
281 ArrowDataType::Decimal(precision, scale)
282 | ArrowDataType::Decimal32(precision, scale)
283 | ArrowDataType::Decimal64(precision, scale)
284 | ArrowDataType::Decimal256(precision, scale) => DataType::Decimal(*precision, *scale),
285 ArrowDataType::Utf8View | ArrowDataType::LargeUtf8 | ArrowDataType::Utf8 => {
286 DataType::String
287 },
288 ArrowDataType::BinaryView => DataType::Binary,
289 ArrowDataType::LargeBinary if md.is_some() => {
290 let md = md.unwrap();
291 if md.maintain_type() {
292 DataType::BinaryOffset
293 } else {
294 DataType::Binary
295 }
296 },
297 ArrowDataType::LargeBinary | ArrowDataType::Binary => DataType::Binary,
298 ArrowDataType::FixedSizeBinary(_) => DataType::Binary,
299 ArrowDataType::Map(inner, _is_sorted) => {
300 DataType::List(Self::from_arrow_field(inner).boxed())
301 },
302 ArrowDataType::Interval(IntervalUnit::MonthDayNano) => {
303 check_allow_importing_interval_as_struct("month_day_nano_interval").unwrap();
304 feature_gated!("dtype-struct", DataType::_month_days_ns_struct_type())
305 },
306 ArrowDataType::Interval(IntervalUnit::MonthDayMillis) => {
307 check_allow_importing_interval_as_struct("month_day_millisecond_interval").unwrap();
308 feature_gated!("dtype-struct", DataType::_month_days_ns_struct_type())
309 },
310 dt => panic!(
311 "Arrow datatype {dt:?} not supported by Polars. \
312 You probably need to activate that data-type feature."
313 ),
314 }
315 }
316}
317
318impl From<&ArrowField> for Field {
319 fn from(f: &ArrowField) -> Self {
320 Field::new(f.name.clone(), DataType::from_arrow_field(f))
321 }
322}