polars_core/datatypes/
field.rs1use arrow::datatypes::Metadata;
2use polars_dtype::categorical::CategoricalPhysical;
3use polars_utils::pl_str::PlSmallStr;
4
5use super::*;
6pub static EXTENSION_NAME: &str = "POLARS_EXTENSION_TYPE";
7
8#[derive(Clone, Debug, PartialEq, Eq, Hash)]
10#[cfg_attr(
11 any(feature = "serde", feature = "serde-lazy"),
12 derive(Serialize, Deserialize)
13)]
14#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
15pub struct Field {
16 pub name: PlSmallStr,
17 pub dtype: DataType,
18}
19
20impl From<Field> for (PlSmallStr, DataType) {
21 fn from(value: Field) -> Self {
22 (value.name, value.dtype)
23 }
24}
25
26pub type FieldRef = Arc<Field>;
27
28impl Field {
29 #[inline]
40 pub fn new(name: PlSmallStr, dtype: DataType) -> Self {
41 Field { name, dtype }
42 }
43
44 #[inline]
55 pub fn name(&self) -> &PlSmallStr {
56 &self.name
57 }
58
59 #[inline]
70 pub fn dtype(&self) -> &DataType {
71 &self.dtype
72 }
73
74 pub fn coerce(&mut self, dtype: DataType) {
86 self.dtype = dtype;
87 }
88
89 pub fn set_name(&mut self, name: PlSmallStr) {
101 self.name = name;
102 }
103
104 pub fn with_name(mut self, name: PlSmallStr) -> Self {
106 self.name = name;
107 self
108 }
109
110 pub fn to_arrow(&self, compat_level: CompatLevel) -> ArrowField {
122 self.dtype.to_arrow_field(self.name.clone(), compat_level)
123 }
124}
125
126impl AsRef<DataType> for Field {
127 fn as_ref(&self) -> &DataType {
128 &self.dtype
129 }
130}
131
132impl AsRef<DataType> for DataType {
133 fn as_ref(&self) -> &DataType {
134 self
135 }
136}
137
138impl DataType {
139 pub fn boxed(self) -> Box<DataType> {
140 Box::new(self)
141 }
142
143 pub fn from_arrow_field(field: &ArrowField) -> DataType {
144 Self::from_arrow(&field.dtype, field.metadata.as_deref())
145 }
146
147 pub fn from_arrow_dtype(dt: &ArrowDataType) -> DataType {
148 Self::from_arrow(dt, None)
149 }
150
151 pub fn from_arrow(dt: &ArrowDataType, md: Option<&Metadata>) -> DataType {
152 match dt {
153 ArrowDataType::Null => DataType::Null,
154 ArrowDataType::UInt8 => DataType::UInt8,
155 ArrowDataType::UInt16 => DataType::UInt16,
156 ArrowDataType::UInt32 => DataType::UInt32,
157 ArrowDataType::UInt64 => DataType::UInt64,
158 ArrowDataType::Int8 => DataType::Int8,
159 ArrowDataType::Int16 => DataType::Int16,
160 ArrowDataType::Int32 => DataType::Int32,
161 ArrowDataType::Int64 => DataType::Int64,
162 #[cfg(feature = "dtype-i128")]
163 ArrowDataType::Int128 => DataType::Int128,
164 ArrowDataType::Boolean => DataType::Boolean,
165 ArrowDataType::Float16 => DataType::Float32,
166 ArrowDataType::Float32 => DataType::Float32,
167 ArrowDataType::Float64 => DataType::Float64,
168 #[cfg(feature = "dtype-array")]
169 ArrowDataType::FixedSizeList(f, size) => {
170 DataType::Array(DataType::from_arrow_field(f).boxed(), *size)
171 },
172 ArrowDataType::LargeList(f) | ArrowDataType::List(f) => {
173 DataType::List(DataType::from_arrow_field(f).boxed())
174 },
175 ArrowDataType::Date32 => DataType::Date,
176 ArrowDataType::Timestamp(tu, tz) => {
177 DataType::Datetime(tu.into(), TimeZone::opt_try_new(tz.clone()).unwrap())
178 },
179 ArrowDataType::Duration(tu) => DataType::Duration(tu.into()),
180 ArrowDataType::Date64 => DataType::Datetime(TimeUnit::Milliseconds, None),
181 ArrowDataType::Time64(_) | ArrowDataType::Time32(_) => DataType::Time,
182
183 #[cfg(feature = "dtype-categorical")]
184 ArrowDataType::Dictionary(_, value_type, _) => {
185 if let Some(mut enum_md) = md.and_then(|md| md.pl_enum_metadata()) {
187 let cats = move || {
188 if enum_md.is_empty() {
189 return None;
190 }
191
192 let len;
193 (len, enum_md) = enum_md.split_once(';').unwrap();
194 let len = len.parse::<usize>().unwrap();
195 let cat;
196 (cat, enum_md) = enum_md.split_at(len);
197 Some(cat)
198 };
199
200 let fcats = FrozenCategories::new(std::iter::from_fn(cats)).unwrap();
201 DataType::from_frozen_categories(fcats)
202 } else if let Some(mut cat_md) = md.and_then(|md| md.pl_categorical_metadata()) {
203 let name_len;
204 (name_len, cat_md) = cat_md.split_once(';').unwrap();
205 let name_len = name_len.parse::<usize>().unwrap();
206 let name;
207 (name, cat_md) = cat_md.split_at(name_len);
208
209 let namespace_len;
210 (namespace_len, cat_md) = cat_md.split_once(';').unwrap();
211 let namespace_len = namespace_len.parse::<usize>().unwrap();
212 let namespace;
213 (namespace, cat_md) = cat_md.split_at(namespace_len);
214
215 let (physical, _rest) = cat_md.split_once(';').unwrap();
216
217 let physical: CategoricalPhysical = physical.parse().ok().unwrap();
218 let cats = Categories::new(
219 PlSmallStr::from_str(name),
220 PlSmallStr::from_str(namespace),
221 physical,
222 );
223 DataType::from_categories(cats)
224 } else if matches!(
225 value_type.as_ref(),
226 ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 | ArrowDataType::Utf8View
227 ) {
228 DataType::from_categories(Categories::global())
229 } else {
230 Self::from_arrow(value_type, None)
231 }
232 },
233
234 #[cfg(feature = "dtype-struct")]
235 ArrowDataType::Struct(fields) => {
236 DataType::Struct(fields.iter().map(|fld| fld.into()).collect())
237 },
238 #[cfg(not(feature = "dtype-struct"))]
239 ArrowDataType::Struct(_) => {
240 panic!("activate the 'dtype-struct' feature to handle struct data types")
241 },
242 ArrowDataType::Extension(ext) if ext.name.as_str() == EXTENSION_NAME => {
243 #[cfg(feature = "object")]
244 {
245 DataType::Object("object")
246 }
247 #[cfg(not(feature = "object"))]
248 {
249 panic!("activate the 'object' feature to be able to load POLARS_EXTENSION_TYPE")
250 }
251 },
252 #[cfg(feature = "dtype-decimal")]
253 ArrowDataType::Decimal(precision, scale) => {
254 DataType::Decimal(Some(*precision), Some(*scale))
255 },
256 ArrowDataType::Utf8View | ArrowDataType::LargeUtf8 | ArrowDataType::Utf8 => {
257 DataType::String
258 },
259 ArrowDataType::BinaryView => DataType::Binary,
260 ArrowDataType::LargeBinary if md.is_some() => {
261 let md = md.unwrap();
262 if md.maintain_type() {
263 DataType::BinaryOffset
264 } else {
265 DataType::Binary
266 }
267 },
268 ArrowDataType::LargeBinary | ArrowDataType::Binary => DataType::Binary,
269 ArrowDataType::FixedSizeBinary(_) => DataType::Binary,
270 ArrowDataType::Map(inner, _is_sorted) => {
271 DataType::List(Self::from_arrow_field(inner).boxed())
272 },
273 dt => panic!(
274 "Arrow datatype {dt:?} not supported by Polars. \
275 You probably need to activate that data-type feature."
276 ),
277 }
278 }
279}
280
281impl From<&ArrowField> for Field {
282 fn from(f: &ArrowField) -> Self {
283 Field::new(f.name.clone(), DataType::from_arrow_field(f))
284 }
285}