polars_core/datatypes/
_serde.rs

1//! Having `Object<&;static> in [`DataType`] make serde tag the `Deserialize` trait bound 'static
2//! even though we skip serializing `Object`.
3//!
4//! We could use [serde_1712](https://github.com/serde-rs/serde/issues/1712), but that gave problems caused by
5//! [rust_96956](https://github.com/rust-lang/rust/issues/96956), so we make a dummy type without static
6
7use polars_dtype::categorical::CategoricalPhysical;
8use serde::{Deserialize, Serialize};
9
10use super::*;
11
12impl<'a> Deserialize<'a> for DataType {
13    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
14    where
15        D: Deserializer<'a>,
16    {
17        Ok(SerializableDataType::deserialize(deserializer)?.into())
18    }
19}
20
21impl Serialize for DataType {
22    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
23    where
24        S: Serializer,
25    {
26        let dt: SerializableDataType = self.into();
27        dt.serialize(serializer)
28    }
29}
30
31#[cfg(feature = "dsl-schema")]
32impl schemars::JsonSchema for DataType {
33    fn schema_name() -> std::borrow::Cow<'static, str> {
34        SerializableDataType::schema_name()
35    }
36
37    fn schema_id() -> std::borrow::Cow<'static, str> {
38        SerializableDataType::schema_id()
39    }
40
41    fn json_schema(generator: &mut schemars::SchemaGenerator) -> schemars::Schema {
42        SerializableDataType::json_schema(generator)
43    }
44}
45
46#[derive(Serialize, Deserialize)]
47#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
48#[serde(rename = "DataType")]
49enum SerializableDataType {
50    Boolean,
51    UInt8,
52    UInt16,
53    UInt32,
54    UInt64,
55    UInt128,
56    Int8,
57    Int16,
58    Int32,
59    Int64,
60    Int128,
61    Float16,
62    Float32,
63    Float64,
64    String,
65    Binary,
66    BinaryOffset,
67    /// A 32-bit date representing the elapsed time since UNIX epoch (1970-01-01)
68    /// in days (32 bits).
69    Date,
70    /// A 64-bit date representing the elapsed time since UNIX epoch (1970-01-01)
71    /// in the given ms/us/ns TimeUnit (64 bits).
72    Datetime(TimeUnit, Option<TimeZone>),
73    // 64-bit integer representing difference between times in milli|micro|nano seconds
74    Duration(TimeUnit),
75    /// A 64-bit time representing elapsed time since midnight in the given TimeUnit.
76    Time,
77    List(Box<SerializableDataType>),
78    #[cfg(feature = "dtype-array")]
79    Array(Box<SerializableDataType>, usize),
80    Null,
81    #[cfg(feature = "dtype-struct")]
82    Struct(Vec<Field>),
83    // some logical types we cannot know statically, e.g. Datetime
84    Unknown(UnknownKind),
85    #[cfg(feature = "dtype-categorical")]
86    Categorical {
87        name: String,
88        namespace: String,
89        physical: CategoricalPhysical,
90    },
91    #[cfg(feature = "dtype-categorical")]
92    Enum {
93        strings: Series,
94    },
95    #[cfg(feature = "dtype-decimal")]
96    Decimal(usize, usize),
97    #[cfg(feature = "object")]
98    Object(String),
99    #[cfg(feature = "dtype-extension")]
100    Extension {
101        name: String,
102        metadata: Option<String>,
103        storage: Box<SerializableDataType>,
104    },
105}
106
107impl From<&DataType> for SerializableDataType {
108    fn from(dt: &DataType) -> Self {
109        use DataType::*;
110        match dt {
111            Boolean => Self::Boolean,
112            UInt8 => Self::UInt8,
113            UInt16 => Self::UInt16,
114            UInt32 => Self::UInt32,
115            UInt64 => Self::UInt64,
116            UInt128 => Self::UInt128,
117            Int8 => Self::Int8,
118            Int16 => Self::Int16,
119            Int32 => Self::Int32,
120            Int64 => Self::Int64,
121            Int128 => Self::Int128,
122            Float16 => Self::Float16,
123            Float32 => Self::Float32,
124            Float64 => Self::Float64,
125            String => Self::String,
126            Binary => Self::Binary,
127            BinaryOffset => Self::BinaryOffset,
128            Date => Self::Date,
129            Datetime(tu, tz) => Self::Datetime(*tu, tz.clone()),
130            Duration(tu) => Self::Duration(*tu),
131            Time => Self::Time,
132            List(dt) => Self::List(Box::new(dt.as_ref().into())),
133            #[cfg(feature = "dtype-array")]
134            Array(dt, width) => Self::Array(Box::new(dt.as_ref().into()), *width),
135            Null => Self::Null,
136            Unknown(kind) => Self::Unknown(*kind),
137            #[cfg(feature = "dtype-struct")]
138            Struct(flds) => Self::Struct(flds.clone()),
139            #[cfg(feature = "dtype-categorical")]
140            Categorical(cats, _) => Self::Categorical {
141                name: cats.name().to_string(),
142                namespace: cats.namespace().to_string(),
143                physical: cats.physical(),
144            },
145            #[cfg(feature = "dtype-categorical")]
146            Enum(fcats, _) => Self::Enum {
147                strings: StringChunked::with_chunk(
148                    PlSmallStr::from_static("categories"),
149                    fcats.categories().clone(),
150                )
151                .into_series(),
152            },
153            #[cfg(feature = "dtype-decimal")]
154            Decimal(precision, scale) => Self::Decimal(*precision, *scale),
155            #[cfg(feature = "object")]
156            Object(name) => Self::Object(name.to_string()),
157            #[cfg(feature = "dtype-extension")]
158            Extension(typ, storage) => Self::Extension {
159                name: typ.name().to_string(),
160                metadata: typ.serialize_metadata().map(|s| s.into_owned()),
161                storage: Box::new(SerializableDataType::from(storage.as_ref())),
162            },
163        }
164    }
165}
166impl From<SerializableDataType> for DataType {
167    fn from(dt: SerializableDataType) -> Self {
168        use SerializableDataType::*;
169        match dt {
170            Boolean => Self::Boolean,
171            UInt8 => Self::UInt8,
172            UInt16 => Self::UInt16,
173            UInt32 => Self::UInt32,
174            UInt64 => Self::UInt64,
175            UInt128 => Self::UInt128,
176            Int8 => Self::Int8,
177            Int16 => Self::Int16,
178            Int32 => Self::Int32,
179            Int64 => Self::Int64,
180            Int128 => Self::Int128,
181            Float16 => Self::Float16,
182            Float32 => Self::Float32,
183            Float64 => Self::Float64,
184            String => Self::String,
185            Binary => Self::Binary,
186            BinaryOffset => Self::BinaryOffset,
187            Date => Self::Date,
188            Datetime(tu, tz) => Self::Datetime(tu, tz),
189            Duration(tu) => Self::Duration(tu),
190            Time => Self::Time,
191            List(dt) => Self::List(Box::new((*dt).into())),
192            #[cfg(feature = "dtype-array")]
193            Array(dt, width) => Self::Array(Box::new((*dt).into()), width),
194            Null => Self::Null,
195            Unknown(kind) => Self::Unknown(kind),
196            #[cfg(feature = "dtype-struct")]
197            Struct(flds) => Self::Struct(flds),
198            #[cfg(feature = "dtype-categorical")]
199            Categorical {
200                name,
201                namespace,
202                physical,
203            } => {
204                let cats = Categories::new(
205                    PlSmallStr::from(name),
206                    PlSmallStr::from(namespace),
207                    physical,
208                );
209                let mapping = cats.mapping();
210                Self::Categorical(cats, mapping)
211            },
212            #[cfg(feature = "dtype-categorical")]
213            Enum { strings } => {
214                let ca = strings.str().unwrap();
215                let fcats = FrozenCategories::new(ca.iter().flatten()).unwrap();
216                let mapping = fcats.mapping().clone();
217                Self::Enum(fcats, mapping)
218            },
219            #[cfg(feature = "dtype-decimal")]
220            Decimal(precision, scale) => Self::Decimal(precision, scale),
221            #[cfg(feature = "object")]
222            Object(_) => Self::Object("unknown"),
223            #[cfg(feature = "dtype-extension")]
224            Extension {
225                name,
226                metadata,
227                storage,
228            } => {
229                let storage = DataType::from(*storage);
230                let ext_type = crate::datatypes::extension::get_extension_type_or_generic(
231                    &name,
232                    &storage,
233                    metadata.as_deref(),
234                );
235                Self::Extension(ext_type, Box::new(storage))
236            },
237        }
238    }
239}