1use polars_core::prelude::{DataType, Field};
2use polars_core::schema::{Schema, SchemaRef};
3use polars_error::{PolarsResult, polars_bail, polars_err, to_compute_err};
4use polars_utils::error::TruncateErrorDetail;
5use polars_utils::format_pl_smallstr;
6use polars_utils::pl_str::PlSmallStr;
7
8use super::models::{ColumnInfo, ColumnTypeJson, ColumnTypeJsonType, TableInfo};
9use crate::utils::decode_json_response;
10
11pub fn table_info_to_schemas(
13 table_info: &TableInfo,
14) -> PolarsResult<(Option<SchemaRef>, Option<SchemaRef>)> {
15 let Some(columns) = table_info.columns.as_deref() else {
16 return Ok((None, None));
17 };
18
19 let mut schema = Schema::default();
20 let mut hive_schema = Schema::default();
21
22 for (i, col) in columns.iter().enumerate() {
23 if let Some(position) = col.position {
24 if usize::try_from(position).unwrap() != i {
25 polars_bail!(
26 ComputeError:
27 "not yet supported: position was not ordered"
28 )
29 }
30 }
31
32 let field = column_info_to_field(col)?;
33
34 if let Some(i) = col.partition_index {
35 if usize::try_from(i).unwrap() != hive_schema.len() {
36 polars_bail!(
37 ComputeError:
38 "not yet supported: partition_index was not ordered"
39 )
40 }
41
42 hive_schema.extend([field]);
43 } else {
44 schema.extend([field])
45 }
46 }
47
48 Ok((
49 Some(schema.into()),
50 Some(hive_schema)
51 .filter(|x| !x.is_empty())
52 .map(|x| x.into()),
53 ))
54}
55
56pub fn column_info_to_field(column_info: &ColumnInfo) -> PolarsResult<Field> {
57 Ok(Field::new(
58 column_info.name.clone(),
59 parse_type_json_str(&column_info.type_json)?,
60 ))
61}
62
63pub fn parse_type_json_str(type_json: &str) -> PolarsResult<DataType> {
69 let decoded: ColumnTypeJson = decode_json_response(type_json.as_bytes())?;
70
71 parse_type_json(&decoded).map_err(|e| {
72 e.wrap_msg(|e| {
73 format!(
74 "error parsing type response: {}, type_json: {}",
75 e,
76 TruncateErrorDetail(type_json)
77 )
78 })
79 })
80}
81
82pub fn parse_type_json(type_json: &ColumnTypeJson) -> PolarsResult<DataType> {
85 use ColumnTypeJsonType::*;
86
87 let out = match &type_json.type_ {
88 TypeName(name) => match name.as_str() {
89 "array" => {
90 let inner_json: &ColumnTypeJsonType =
91 type_json.element_type.as_ref().ok_or_else(|| {
92 polars_err!(
93 ComputeError:
94 "missing elementType in response for array type"
95 )
96 })?;
97
98 let inner_dtype = parse_type_json_type(inner_json)?;
99
100 DataType::List(Box::new(inner_dtype))
101 },
102
103 "struct" => {
104 let fields_json: &[ColumnTypeJson] =
105 type_json.fields.as_deref().ok_or_else(|| {
106 polars_err!(
107 ComputeError:
108 "missing elementType in response for array type"
109 )
110 })?;
111
112 let fields = fields_json
113 .iter()
114 .map(|x| {
115 let name = x.name.clone().ok_or_else(|| {
116 polars_err!(
117 ComputeError:
118 "missing name in fields response for struct type"
119 )
120 })?;
121 let dtype = parse_type_json(x)?;
122
123 Ok(Field::new(name, dtype))
124 })
125 .collect::<PolarsResult<Vec<_>>>()?;
126
127 DataType::Struct(fields)
128 },
129
130 "map" => {
131 let key_type = type_json.key_type.as_ref().ok_or_else(|| {
132 polars_err!(
133 ComputeError:
134 "missing keyType in response for map type"
135 )
136 })?;
137
138 let value_type = type_json.value_type.as_ref().ok_or_else(|| {
139 polars_err!(
140 ComputeError:
141 "missing valueType in response for map type"
142 )
143 })?;
144
145 DataType::List(Box::new(DataType::Struct(vec![
146 Field::new(
147 PlSmallStr::from_static("key"),
148 parse_type_json_type(key_type)?,
149 ),
150 Field::new(
151 PlSmallStr::from_static("value"),
152 parse_type_json_type(value_type)?,
153 ),
154 ])))
155 },
156
157 name => parse_type_text(name)?,
158 },
159
160 TypeJson(type_json) => parse_type_json(type_json.as_ref())?,
161 };
162
163 Ok(out)
164}
165
166fn parse_type_json_type(type_json_type: &ColumnTypeJsonType) -> PolarsResult<DataType> {
167 use ColumnTypeJsonType::*;
168
169 match type_json_type {
170 TypeName(name) => parse_type_text(name),
171 TypeJson(type_json) => parse_type_json(type_json.as_ref()),
172 }
173}
174
175fn parse_type_text(type_text: &str) -> PolarsResult<DataType> {
188 use DataType::*;
189 use polars_core::prelude::TimeUnit;
190
191 let dtype = match type_text {
192 "boolean" => Boolean,
193
194 "tinyint" | "byte" => Int8,
195 "smallint" | "short" => Int16,
196 "int" | "integer" => Int32,
197 "bigint" | "long" => Int64,
198
199 "float" | "real" => Float32,
200 "double" => Float64,
201
202 "date" => Date,
203 "timestamp" | "timestamp_ntz" | "timestamp_ltz" => Datetime(TimeUnit::Microseconds, None),
204
205 "string" => String,
206 "binary" => Binary,
207
208 "null" | "void" => Null,
209
210 v => {
211 if v.starts_with("decimal") {
212 (|| {
214 let (precision, scale) = v
215 .get(7..)?
216 .strip_prefix('(')?
217 .strip_suffix(')')?
218 .split_once(',')?;
219 let precision: usize = precision.parse().ok()?;
220 let scale: usize = scale.parse().ok()?;
221
222 Some(DataType::Decimal(precision, scale))
223 })()
224 .ok_or_else(|| {
225 polars_err!(
226 ComputeError:
227 "type format did not match decimal(int,int): {}",
228 v
229 )
230 })?
231 } else {
232 polars_bail!(
233 ComputeError:
234 "parse_type_text unknown type name: {}",
235 v
236 )
237 }
238 },
239 };
240
241 Ok(dtype)
242}
243
244pub fn schema_to_column_info_list(schema: &Schema) -> PolarsResult<Vec<ColumnInfo>> {
247 schema
248 .iter()
249 .enumerate()
250 .map(|(i, (name, dtype))| {
251 let name = name.clone();
252 let type_text = dtype_to_type_text(dtype)?;
253 let type_name = dtype_to_type_name(dtype)?;
254 let type_json = serde_json::to_string(&field_to_type_json(name.clone(), dtype)?)
255 .map_err(to_compute_err)?;
256
257 Ok(ColumnInfo {
258 name,
259 type_name,
260 type_text,
261 type_json,
262 position: Some(i.try_into().unwrap()),
263 comment: None,
264 partition_index: None,
265 })
266 })
267 .collect::<PolarsResult<_>>()
268}
269
270fn dtype_to_type_text(dtype: &DataType) -> PolarsResult<PlSmallStr> {
272 use DataType::*;
273 use polars_core::prelude::TimeUnit;
274
275 macro_rules! S {
276 ($e:expr) => {
277 PlSmallStr::from_static($e)
278 };
279 }
280
281 let out = match dtype {
282 Boolean => S!("boolean"),
283
284 Int8 => S!("tinyint"),
285 Int16 => S!("smallint"),
286 Int32 => S!("int"),
287 Int64 => S!("bigint"),
288
289 Float32 => S!("float"),
290 Float64 => S!("double"),
291
292 Date => S!("date"),
293 Datetime(TimeUnit::Microseconds, None) => S!("timestamp_ntz"),
294
295 String => S!("string"),
296 Binary => S!("binary"),
297
298 Null => S!("null"),
299
300 Decimal(precision, scale) => {
301 format_pl_smallstr!("decimal({},{})", precision, scale)
302 },
303
304 List(inner) => {
305 if let Some((key_type, value_type)) = get_list_map_type(inner) {
306 format_pl_smallstr!(
307 "map<{},{}>",
308 dtype_to_type_text(key_type)?,
309 dtype_to_type_text(value_type)?
310 )
311 } else {
312 format_pl_smallstr!("array<{}>", dtype_to_type_text(inner)?)
313 }
314 },
315
316 Struct(fields) => {
317 let mut out = std::string::String::from("struct<");
320
321 for Field { name, dtype } in fields {
322 out.push_str(name);
323 out.push(':');
324 out.push_str(&dtype_to_type_text(dtype)?);
325 out.push(',');
326 }
327
328 if out.ends_with(',') {
329 out.truncate(out.len() - 1);
330 }
331
332 out.push('>');
333
334 out.into()
335 },
336
337 v => polars_bail!(
338 ComputeError:
339 "dtype_to_type_text unsupported type: {}",
340 v
341 ),
342 };
343
344 Ok(out)
345}
346
347fn dtype_to_type_name(dtype: &DataType) -> PolarsResult<PlSmallStr> {
349 use DataType::*;
350 use polars_core::prelude::TimeUnit;
351
352 macro_rules! S {
353 ($e:expr) => {
354 PlSmallStr::from_static($e)
355 };
356 }
357
358 let out = match dtype {
359 Boolean => S!("BOOLEAN"),
360
361 Int8 => S!("BYTE"),
362 Int16 => S!("SHORT"),
363 Int32 => S!("INT"),
364 Int64 => S!("LONG"),
365
366 Float32 => S!("FLOAT"),
367 Float64 => S!("DOUBLE"),
368
369 Date => S!("DATE"),
370 Datetime(TimeUnit::Microseconds, None) => S!("TIMESTAMP_NTZ"),
371 String => S!("STRING"),
372 Binary => S!("BINARY"),
373
374 Null => S!("NULL"),
375
376 Decimal(..) => S!("DECIMAL"),
377
378 List(inner) => {
379 if get_list_map_type(inner).is_some() {
380 S!("MAP")
381 } else {
382 S!("ARRAY")
383 }
384 },
385
386 Struct(..) => S!("STRUCT"),
387
388 v => polars_bail!(
389 ComputeError:
390 "dtype_to_type_text unsupported type: {}",
391 v
392 ),
393 };
394
395 Ok(out)
396}
397
398fn field_to_type_json(name: PlSmallStr, dtype: &DataType) -> PolarsResult<ColumnTypeJson> {
400 Ok(ColumnTypeJson {
401 name: Some(name),
402 type_: dtype_to_type_json(dtype)?,
403 nullable: Some(true),
404 metadata: Some(Default::default()),
406
407 ..Default::default()
408 })
409}
410
411fn dtype_to_type_json(dtype: &DataType) -> PolarsResult<ColumnTypeJsonType> {
412 use DataType::*;
413 use polars_core::prelude::TimeUnit;
414
415 macro_rules! S {
416 ($e:expr) => {
417 ColumnTypeJsonType::from_static_type_name($e)
418 };
419 }
420
421 let out = match dtype {
422 Boolean => S!("boolean"),
423
424 Int8 => S!("byte"),
425 Int16 => S!("short"),
426 Int32 => S!("integer"),
427 Int64 => S!("long"),
428
429 Float32 => S!("float"),
430 Float64 => S!("double"),
431
432 Date => S!("date"),
433 Datetime(TimeUnit::Microseconds, None) => S!("timestamp_ntz"),
434
435 String => S!("string"),
436 Binary => S!("binary"),
437
438 Null => S!("null"),
439
440 Decimal(..) => ColumnTypeJsonType::TypeName(dtype_to_type_text(dtype)?),
441
442 List(inner) => {
443 let out = if let Some((key_type, value_type)) = get_list_map_type(inner) {
444 ColumnTypeJson {
445 type_: ColumnTypeJsonType::from_static_type_name("map"),
446 key_type: Some(dtype_to_type_json(key_type)?),
447 value_type: Some(dtype_to_type_json(value_type)?),
448 value_contains_null: Some(true),
449
450 ..Default::default()
451 }
452 } else {
453 ColumnTypeJson {
454 type_: ColumnTypeJsonType::from_static_type_name("array"),
455 element_type: Some(dtype_to_type_json(inner)?),
456 contains_null: Some(true),
457
458 ..Default::default()
459 }
460 };
461
462 ColumnTypeJsonType::TypeJson(Box::new(out))
463 },
464
465 Struct(fields) => {
466 let out = ColumnTypeJson {
467 type_: ColumnTypeJsonType::from_static_type_name("struct"),
468 fields: Some(
469 fields
470 .iter()
471 .map(|Field { name, dtype }| field_to_type_json(name.clone(), dtype))
472 .collect::<PolarsResult<_>>()?,
473 ),
474
475 ..Default::default()
476 };
477
478 ColumnTypeJsonType::TypeJson(Box::new(out))
479 },
480
481 v => polars_bail!(
482 ComputeError:
483 "dtype_to_type_text unsupported type: {}",
484 v
485 ),
486 };
487
488 Ok(out)
489}
490
491fn get_list_map_type(list_inner_dtype: &DataType) -> Option<(&DataType, &DataType)> {
496 let DataType::Struct(fields) = list_inner_dtype else {
497 return None;
498 };
499
500 let [fld1, fld2] = fields.as_slice() else {
501 return None;
502 };
503
504 if !(fld1.name == "key" && fld2.name == "value") {
505 return None;
506 }
507
508 Some((fld1.dtype(), fld2.dtype()))
509}