polars_core/datatypes/
_serde.rs

1//! Having `Object<&;static> in [`DataType`] make serde tag the `Deserialize` trait bound 'static
2//! even though we skip serializing `Object`.
3//!
4//! We could use [serde_1712](https://github.com/serde-rs/serde/issues/1712), but that gave problems caused by
5//! [rust_96956](https://github.com/rust-lang/rust/issues/96956), so we make a dummy type without static
6
7#[cfg(feature = "dtype-categorical")]
8use serde::de::SeqAccess;
9use serde::{Deserialize, Serialize};
10
11use super::*;
12
13impl<'a> Deserialize<'a> for DataType {
14    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
15    where
16        D: Deserializer<'a>,
17    {
18        Ok(SerializableDataType::deserialize(deserializer)?.into())
19    }
20}
21
22impl Serialize for DataType {
23    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
24    where
25        S: Serializer,
26    {
27        let dt: SerializableDataType = self.into();
28        dt.serialize(serializer)
29    }
30}
31
32#[cfg(feature = "dtype-categorical")]
33struct Wrap<T>(T);
34
35#[cfg(feature = "dtype-categorical")]
36impl serde::Serialize for Wrap<Utf8ViewArray> {
37    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
38    where
39        S: Serializer,
40    {
41        serializer.collect_seq(self.0.values_iter())
42    }
43}
44
45#[cfg(feature = "dtype-categorical")]
46impl<'de> serde::Deserialize<'de> for Wrap<Utf8ViewArray> {
47    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
48    where
49        D: Deserializer<'de>,
50    {
51        struct Utf8Visitor;
52
53        impl<'de> Visitor<'de> for Utf8Visitor {
54            type Value = Wrap<Utf8ViewArray>;
55
56            fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result {
57                formatter.write_str("Utf8Visitor string sequence.")
58            }
59
60            fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
61            where
62                A: SeqAccess<'de>,
63            {
64                let mut utf8array = MutablePlString::with_capacity(seq.size_hint().unwrap_or(10));
65                while let Some(key) = seq.next_element()? {
66                    let key: Option<String> = key;
67                    utf8array.push(key)
68                }
69                Ok(Wrap(utf8array.into()))
70            }
71        }
72
73        deserializer.deserialize_seq(Utf8Visitor)
74    }
75}
76
77#[derive(Serialize, Deserialize)]
78enum SerializableDataType {
79    Boolean,
80    UInt8,
81    UInt16,
82    UInt32,
83    UInt64,
84    Int8,
85    Int16,
86    Int32,
87    Int64,
88    Int128,
89    Float32,
90    Float64,
91    String,
92    Binary,
93    /// A 32-bit date representing the elapsed time since UNIX epoch (1970-01-01)
94    /// in days (32 bits).
95    Date,
96    /// A 64-bit date representing the elapsed time since UNIX epoch (1970-01-01)
97    /// in the given ms/us/ns TimeUnit (64 bits).
98    Datetime(TimeUnit, Option<TimeZone>),
99    // 64-bit integer representing difference between times in milli|micro|nano seconds
100    Duration(TimeUnit),
101    /// A 64-bit time representing elapsed time since midnight in the given TimeUnit.
102    Time,
103    List(Box<SerializableDataType>),
104    #[cfg(feature = "dtype-array")]
105    Array(Box<SerializableDataType>, usize),
106    Null,
107    #[cfg(feature = "dtype-struct")]
108    Struct(Vec<Field>),
109    // some logical types we cannot know statically, e.g. Datetime
110    Unknown(UnknownKind),
111    #[cfg(feature = "dtype-categorical")]
112    Categorical(Option<Series>, CategoricalOrdering),
113    #[cfg(feature = "dtype-decimal")]
114    Decimal(Option<usize>, Option<usize>),
115    #[cfg(feature = "dtype-categorical")]
116    Enum(Option<Series>, CategoricalOrdering),
117    #[cfg(feature = "object")]
118    Object(String),
119}
120
121impl From<&DataType> for SerializableDataType {
122    fn from(dt: &DataType) -> Self {
123        use DataType::*;
124        match dt {
125            Boolean => Self::Boolean,
126            UInt8 => Self::UInt8,
127            UInt16 => Self::UInt16,
128            UInt32 => Self::UInt32,
129            UInt64 => Self::UInt64,
130            Int8 => Self::Int8,
131            Int16 => Self::Int16,
132            Int32 => Self::Int32,
133            Int64 => Self::Int64,
134            Int128 => Self::Int128,
135            Float32 => Self::Float32,
136            Float64 => Self::Float64,
137            String => Self::String,
138            Binary => Self::Binary,
139            Date => Self::Date,
140            Datetime(tu, tz) => Self::Datetime(*tu, tz.clone()),
141            Duration(tu) => Self::Duration(*tu),
142            Time => Self::Time,
143            List(dt) => Self::List(Box::new(dt.as_ref().into())),
144            #[cfg(feature = "dtype-array")]
145            Array(dt, width) => Self::Array(Box::new(dt.as_ref().into()), *width),
146            Null => Self::Null,
147            Unknown(kind) => Self::Unknown(*kind),
148            #[cfg(feature = "dtype-struct")]
149            Struct(flds) => Self::Struct(flds.clone()),
150            #[cfg(feature = "dtype-categorical")]
151            Categorical(Some(rev_map), ordering) => Self::Categorical(
152                Some(
153                    StringChunked::with_chunk(PlSmallStr::EMPTY, rev_map.get_categories().clone())
154                        .into_series(),
155                ),
156                *ordering,
157            ),
158            #[cfg(feature = "dtype-categorical")]
159            Categorical(None, ordering) => Self::Categorical(None, *ordering),
160            #[cfg(feature = "dtype-categorical")]
161            Enum(Some(rev_map), ordering) => Self::Enum(
162                Some(
163                    StringChunked::with_chunk(PlSmallStr::EMPTY, rev_map.get_categories().clone())
164                        .into_series(),
165                ),
166                *ordering,
167            ),
168            #[cfg(feature = "dtype-categorical")]
169            Enum(None, ordering) => Self::Enum(None, *ordering),
170            #[cfg(feature = "dtype-decimal")]
171            Decimal(precision, scale) => Self::Decimal(*precision, *scale),
172            #[cfg(feature = "object")]
173            Object(name) => Self::Object(name.to_string()),
174            dt => panic!("{dt:?} not supported"),
175        }
176    }
177}
178impl From<SerializableDataType> for DataType {
179    fn from(dt: SerializableDataType) -> Self {
180        use SerializableDataType::*;
181        match dt {
182            Boolean => Self::Boolean,
183            UInt8 => Self::UInt8,
184            UInt16 => Self::UInt16,
185            UInt32 => Self::UInt32,
186            UInt64 => Self::UInt64,
187            Int8 => Self::Int8,
188            Int16 => Self::Int16,
189            Int32 => Self::Int32,
190            Int64 => Self::Int64,
191            Int128 => Self::Int128,
192            Float32 => Self::Float32,
193            Float64 => Self::Float64,
194            String => Self::String,
195            Binary => Self::Binary,
196            Date => Self::Date,
197            Datetime(tu, tz) => Self::Datetime(tu, tz),
198            Duration(tu) => Self::Duration(tu),
199            Time => Self::Time,
200            List(dt) => Self::List(Box::new((*dt).into())),
201            #[cfg(feature = "dtype-array")]
202            Array(dt, width) => Self::Array(Box::new((*dt).into()), width),
203            Null => Self::Null,
204            Unknown(kind) => Self::Unknown(kind),
205            #[cfg(feature = "dtype-struct")]
206            Struct(flds) => Self::Struct(flds),
207            #[cfg(feature = "dtype-categorical")]
208            Categorical(Some(categories), ordering) => Self::Categorical(
209                Some(Arc::new(RevMapping::build_local(
210                    categories.0.rechunk().chunks()[0]
211                        .as_any()
212                        .downcast_ref::<Utf8ViewArray>()
213                        .unwrap()
214                        .clone(),
215                ))),
216                ordering,
217            ),
218            #[cfg(feature = "dtype-categorical")]
219            Categorical(None, ordering) => Self::Categorical(None, ordering),
220            #[cfg(feature = "dtype-categorical")]
221            Enum(Some(categories), _) => create_enum_dtype(
222                categories.rechunk().chunks()[0]
223                    .as_any()
224                    .downcast_ref::<Utf8ViewArray>()
225                    .unwrap()
226                    .clone(),
227            ),
228            #[cfg(feature = "dtype-categorical")]
229            Enum(None, ordering) => Self::Enum(None, ordering),
230            #[cfg(feature = "dtype-decimal")]
231            Decimal(precision, scale) => Self::Decimal(precision, scale),
232            #[cfg(feature = "object")]
233            Object(_) => Self::Object("unknown"),
234        }
235    }
236}