polars_io/catalog/unity/
models.rs

1use polars_core::prelude::PlHashMap;
2use polars_utils::pl_str::PlSmallStr;
3
4#[derive(Debug, serde::Deserialize)]
5pub struct CatalogInfo {
6    pub name: String,
7
8    pub comment: Option<String>,
9
10    #[serde(default)]
11    pub storage_location: Option<String>,
12
13    #[serde(default, deserialize_with = "null_to_default")]
14    pub properties: PlHashMap<PlSmallStr, String>,
15
16    #[serde(default, deserialize_with = "null_to_default")]
17    pub options: PlHashMap<PlSmallStr, String>,
18
19    #[serde(with = "chrono::serde::ts_milliseconds_option")]
20    pub created_at: Option<chrono::DateTime<chrono::Utc>>,
21
22    pub created_by: Option<String>,
23
24    #[serde(with = "chrono::serde::ts_milliseconds_option")]
25    pub updated_at: Option<chrono::DateTime<chrono::Utc>>,
26
27    pub updated_by: Option<String>,
28}
29
30#[derive(Debug, serde::Deserialize)]
31pub struct NamespaceInfo {
32    pub name: String,
33    pub comment: Option<String>,
34
35    #[serde(default, deserialize_with = "null_to_default")]
36    pub properties: PlHashMap<PlSmallStr, String>,
37
38    #[serde(default)]
39    pub storage_location: Option<String>,
40
41    #[serde(with = "chrono::serde::ts_milliseconds_option")]
42    pub created_at: Option<chrono::DateTime<chrono::Utc>>,
43
44    pub created_by: Option<String>,
45
46    #[serde(with = "chrono::serde::ts_milliseconds_option")]
47    pub updated_at: Option<chrono::DateTime<chrono::Utc>>,
48
49    pub updated_by: Option<String>,
50}
51
52#[derive(Debug, serde::Deserialize)]
53pub struct TableInfo {
54    pub name: String,
55    pub table_id: String,
56    pub table_type: TableType,
57
58    #[serde(default)]
59    pub comment: Option<String>,
60
61    #[serde(default)]
62    pub storage_location: Option<String>,
63
64    #[serde(default)]
65    pub data_source_format: Option<DataSourceFormat>,
66
67    #[serde(default)]
68    pub columns: Option<Vec<ColumnInfo>>,
69
70    #[serde(default, deserialize_with = "null_to_default")]
71    pub properties: PlHashMap<PlSmallStr, String>,
72
73    #[serde(with = "chrono::serde::ts_milliseconds_option")]
74    pub created_at: Option<chrono::DateTime<chrono::Utc>>,
75
76    pub created_by: Option<String>,
77
78    #[serde(with = "chrono::serde::ts_milliseconds_option")]
79    pub updated_at: Option<chrono::DateTime<chrono::Utc>>,
80
81    pub updated_by: Option<String>,
82}
83
84#[derive(
85    Debug, strum_macros::Display, strum_macros::EnumString, serde::Serialize, serde::Deserialize,
86)]
87#[strum(serialize_all = "SCREAMING_SNAKE_CASE")]
88#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
89pub enum TableType {
90    Managed,
91    External,
92    View,
93    MaterializedView,
94    StreamingTable,
95    ManagedShallowClone,
96    Foreign,
97    ExternalShallowClone,
98}
99
100#[derive(
101    Debug, strum_macros::Display, strum_macros::EnumString, serde::Serialize, serde::Deserialize,
102)]
103#[strum(serialize_all = "SCREAMING_SNAKE_CASE")]
104#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
105pub enum DataSourceFormat {
106    Delta,
107    Csv,
108    Json,
109    Avro,
110    Parquet,
111    Orc,
112    Text,
113
114    // Databricks-specific
115    UnityCatalog,
116    Deltasharing,
117    DatabricksFormat,
118    MysqlFormat,
119    PostgresqlFormat,
120    RedshiftFormat,
121    SnowflakeFormat,
122    SqldwFormat,
123    SqlserverFormat,
124    SalesforceFormat,
125    BigqueryFormat,
126    NetsuiteFormat,
127    WorkdayRaasFormat,
128    HiveSerde,
129    HiveCustom,
130    VectorIndexFormat,
131}
132
133#[derive(Debug, serde::Serialize, serde::Deserialize)]
134pub struct ColumnInfo {
135    pub name: PlSmallStr,
136    pub type_name: PlSmallStr,
137    pub type_text: PlSmallStr,
138    pub type_json: String,
139    pub position: Option<u32>,
140    pub comment: Option<String>,
141    pub partition_index: Option<u32>,
142}
143
144/// Note: This struct contains all the field names for a few different possible type / field presence
145/// combinations. We use serde(default) and skip_serializing_if to get the desired serialization
146/// output.
147///
148/// E.g.:
149///
150/// ```text
151/// {
152///     "name": "List",
153///     "type": {"type": "array", "elementType": "long", "containsNull": True},
154///     "nullable": True,
155///     "metadata": {},
156/// }
157/// {
158///     "name": "Struct",
159///     "type": {
160///         "type": "struct",
161///         "fields": [{"name": "x", "type": "long", "nullable": True, "metadata": {}}],
162///     },
163///     "nullable": True,
164///     "metadata": {},
165/// }
166/// {
167///     "name": "ListStruct",
168///     "type": {
169///         "type": "array",
170///         "elementType": {
171///             "type": "struct",
172///             "fields": [{"name": "x", "type": "long", "nullable": True, "metadata": {}}],
173///         },
174///         "containsNull": True,
175///     },
176///     "nullable": True,
177///     "metadata": {},
178/// }
179/// {
180///     "name": "Map",
181///     "type": {
182///         "type": "map",
183///         "keyType": "string",
184///         "valueType": "string",
185///         "valueContainsNull": True,
186///     },
187///     "nullable": True,
188///     "metadata": {},
189/// }
190/// ```
191#[derive(Debug, Default, serde::Serialize, serde::Deserialize)]
192pub struct ColumnTypeJson {
193    #[serde(default, skip_serializing_if = "Option::is_none")]
194    pub name: Option<PlSmallStr>,
195
196    #[serde(rename = "type")]
197    pub type_: ColumnTypeJsonType,
198
199    #[serde(default, skip_serializing_if = "Option::is_none")]
200    pub nullable: Option<bool>,
201
202    #[serde(default, skip_serializing_if = "Option::is_none")]
203    pub metadata: Option<PlHashMap<String, String>>,
204
205    // Used for List types
206    #[serde(
207        default,
208        rename = "elementType",
209        skip_serializing_if = "Option::is_none"
210    )]
211    pub element_type: Option<ColumnTypeJsonType>,
212
213    #[serde(
214        default,
215        rename = "containsNull",
216        skip_serializing_if = "Option::is_none"
217    )]
218    pub contains_null: Option<bool>,
219
220    // Used for Struct types
221    #[serde(default, skip_serializing_if = "Option::is_none")]
222    pub fields: Option<Vec<ColumnTypeJson>>,
223
224    // Used for Map types
225    #[serde(default, rename = "keyType", skip_serializing_if = "Option::is_none")]
226    pub key_type: Option<ColumnTypeJsonType>,
227
228    #[serde(default, rename = "valueType", skip_serializing_if = "Option::is_none")]
229    pub value_type: Option<ColumnTypeJsonType>,
230
231    #[serde(
232        default,
233        rename = "valueContainsNull",
234        skip_serializing_if = "Option::is_none"
235    )]
236    pub value_contains_null: Option<bool>,
237}
238
239#[derive(Debug, serde::Serialize, serde::Deserialize)]
240#[serde(untagged)]
241pub enum ColumnTypeJsonType {
242    /// * `{"type": "name", ..}``
243    TypeName(PlSmallStr),
244    /// * `{"type": {"type": "name", ..}}`
245    TypeJson(Box<ColumnTypeJson>),
246}
247
248impl Default for ColumnTypeJsonType {
249    fn default() -> Self {
250        Self::TypeName(PlSmallStr::EMPTY)
251    }
252}
253
254impl ColumnTypeJsonType {
255    pub const fn from_static_type_name(type_name: &'static str) -> Self {
256        Self::TypeName(PlSmallStr::from_static(type_name))
257    }
258}
259
260#[derive(Debug, serde::Deserialize)]
261pub struct TableCredentials {
262    pub aws_temp_credentials: Option<TableCredentialsAws>,
263    pub azure_user_delegation_sas: Option<TableCredentialsAzure>,
264    pub gcp_oauth_token: Option<TableCredentialsGcp>,
265    pub expiration_time: i64,
266}
267
268impl TableCredentials {
269    pub fn into_enum(self) -> Option<TableCredentialsVariants> {
270        if let v @ Some(_) = self.aws_temp_credentials {
271            v.map(TableCredentialsVariants::Aws)
272        } else if let v @ Some(_) = self.azure_user_delegation_sas {
273            v.map(TableCredentialsVariants::Azure)
274        } else if let v @ Some(_) = self.gcp_oauth_token {
275            v.map(TableCredentialsVariants::Gcp)
276        } else {
277            None
278        }
279    }
280}
281
282pub enum TableCredentialsVariants {
283    Aws(TableCredentialsAws),
284    Azure(TableCredentialsAzure),
285    Gcp(TableCredentialsGcp),
286}
287
288#[derive(Debug, serde::Deserialize)]
289pub struct TableCredentialsAws {
290    pub access_key_id: String,
291    pub secret_access_key: String,
292    pub session_token: Option<String>,
293
294    #[serde(default)]
295    pub access_point: Option<String>,
296}
297
298#[derive(Debug, serde::Deserialize)]
299pub struct TableCredentialsAzure {
300    pub sas_token: String,
301}
302
303#[derive(Debug, serde::Deserialize)]
304pub struct TableCredentialsGcp {
305    pub oauth_token: String,
306}
307
308fn null_to_default<'de, T, D>(d: D) -> Result<T, D::Error>
309where
310    T: Default + serde::de::Deserialize<'de>,
311    D: serde::de::Deserializer<'de>,
312{
313    use serde::Deserialize;
314    let opt_val = Option::<T>::deserialize(d)?;
315    Ok(opt_val.unwrap_or_default())
316}