polars_sql/
types.rs

1//! This module supports mapping SQL datatypes to Polars datatypes.
2//!
3//! It also provides utility functions for working with SQL datatypes.
4use polars_core::datatypes::{DataType, TimeUnit};
5use polars_error::{PolarsResult, polars_bail};
6use polars_plan::dsl::{Expr, lit};
7use sqlparser::ast::{
8    ArrayElemTypeDef, DataType as SQLDataType, ExactNumberInfo, Ident, ObjectName, TimezoneInfo,
9};
10
11polars_utils::regex_cache::cached_regex! {
12    static DATETIME_LITERAL_RE = r"^\d{4}-[01]\d-[0-3]\d[ T](?:[01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9](\.\d{1,9})?$";
13    static DATE_LITERAL_RE = r"^\d{4}-[01]\d-[0-3]\d$";
14    static TIME_LITERAL_RE = r"^(?:[01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9](\.\d{1,9})?$";
15}
16
17pub fn bitstring_to_bytes_literal(b: &String) -> PolarsResult<Expr> {
18    let n_bits = b.len();
19    if !b.chars().all(|c| c == '0' || c == '1') || n_bits > 64 {
20        polars_bail!(
21            SQLSyntax:
22            "bit string literal should contain only 0s and 1s and have length <= 64; found '{}' with length {}", b, n_bits
23        )
24    }
25    let s = b.as_str();
26    Ok(lit(match n_bits {
27        0 => b"".to_vec(),
28        1..=8 => u8::from_str_radix(s, 2).unwrap().to_be_bytes().to_vec(),
29        9..=16 => u16::from_str_radix(s, 2).unwrap().to_be_bytes().to_vec(),
30        17..=32 => u32::from_str_radix(s, 2).unwrap().to_be_bytes().to_vec(),
31        _ => u64::from_str_radix(s, 2).unwrap().to_be_bytes().to_vec(),
32    }))
33}
34
35pub fn is_iso_datetime(value: &str) -> bool {
36    DATETIME_LITERAL_RE.is_match(value)
37}
38
39pub fn is_iso_date(value: &str) -> bool {
40    DATE_LITERAL_RE.is_match(value)
41}
42
43pub fn is_iso_time(value: &str) -> bool {
44    TIME_LITERAL_RE.is_match(value)
45}
46
47fn timeunit_from_precision(prec: &Option<u64>) -> PolarsResult<TimeUnit> {
48    Ok(match prec {
49        None => TimeUnit::Microseconds,
50        Some(n) if (1u64..=3u64).contains(n) => TimeUnit::Milliseconds,
51        Some(n) if (4u64..=6u64).contains(n) => TimeUnit::Microseconds,
52        Some(n) if (7u64..=9u64).contains(n) => TimeUnit::Nanoseconds,
53        Some(n) => {
54            polars_bail!(SQLSyntax: "invalid temporal type precision (expected 1-9, found {})", n)
55        },
56    })
57}
58
59pub(crate) fn map_sql_dtype_to_polars(dtype: &SQLDataType) -> PolarsResult<DataType> {
60    Ok(match dtype {
61        // ---------------------------------
62        // array/list
63        // ---------------------------------
64        SQLDataType::Array(ArrayElemTypeDef::AngleBracket(inner_type))
65        | SQLDataType::Array(ArrayElemTypeDef::SquareBracket(inner_type, _)) => {
66            DataType::List(Box::new(map_sql_dtype_to_polars(inner_type)?))
67        },
68
69        // ---------------------------------
70        // binary
71        // ---------------------------------
72        SQLDataType::Bytea
73        | SQLDataType::Bytes(_)
74        | SQLDataType::Binary(_)
75        | SQLDataType::Blob(_)
76        | SQLDataType::Varbinary(_) => DataType::Binary,
77
78        // ---------------------------------
79        // boolean
80        // ---------------------------------
81        SQLDataType::Boolean | SQLDataType::Bool => DataType::Boolean,
82
83        // ---------------------------------
84        // signed integer
85        // ---------------------------------
86        SQLDataType::Int(_) | SQLDataType::Integer(_) => DataType::Int32,
87        SQLDataType::Int2(_) | SQLDataType::SmallInt(_) => DataType::Int16,
88        SQLDataType::Int4(_) | SQLDataType::MediumInt(_) => DataType::Int32,
89        SQLDataType::Int8(_) | SQLDataType::BigInt(_) => DataType::Int64,
90        SQLDataType::TinyInt(_) => DataType::Int8,
91
92        // ---------------------------------
93        // unsigned integer: the following do not map to PostgreSQL types/syntax, but
94        // are enabled for wider compatibility (eg: "CAST(col AS BIGINT UNSIGNED)").
95        // ---------------------------------
96        SQLDataType::UnsignedTinyInt(_) => DataType::UInt8, // see also: "custom" types below
97        SQLDataType::UnsignedInt(_) | SQLDataType::UnsignedInteger(_) => DataType::UInt32,
98        SQLDataType::UnsignedInt2(_) | SQLDataType::UnsignedSmallInt(_) => DataType::UInt16,
99        SQLDataType::UnsignedInt4(_) | SQLDataType::UnsignedMediumInt(_) => DataType::UInt32,
100        SQLDataType::UnsignedInt8(_) | SQLDataType::UnsignedBigInt(_) | SQLDataType::UInt8 => {
101            DataType::UInt64
102        },
103
104        // ---------------------------------
105        // float
106        // ---------------------------------
107        SQLDataType::Double | SQLDataType::DoublePrecision | SQLDataType::Float8 => {
108            DataType::Float64
109        },
110        SQLDataType::Float(n_bytes) => match n_bytes {
111            Some(n) if (1u64..=24u64).contains(n) => DataType::Float32,
112            Some(n) if (25u64..=53u64).contains(n) => DataType::Float64,
113            Some(n) => {
114                polars_bail!(SQLSyntax: "unsupported `float` size (expected a value between 1 and 53, found {})", n)
115            },
116            None => DataType::Float64,
117        },
118        SQLDataType::Float4 | SQLDataType::Real => DataType::Float32,
119
120        // ---------------------------------
121        // decimal
122        // ---------------------------------
123        #[cfg(feature = "dtype-decimal")]
124        SQLDataType::Dec(info) | SQLDataType::Decimal(info) | SQLDataType::Numeric(info) => {
125            match *info {
126                ExactNumberInfo::PrecisionAndScale(p, s) => {
127                    DataType::Decimal(Some(p as usize), Some(s as usize))
128                },
129                ExactNumberInfo::Precision(p) => DataType::Decimal(Some(p as usize), Some(0)),
130                ExactNumberInfo::None => DataType::Decimal(Some(38), Some(9)),
131            }
132        },
133
134        // ---------------------------------
135        // temporal
136        // ---------------------------------
137        SQLDataType::Date => DataType::Date,
138        SQLDataType::Interval => DataType::Duration(TimeUnit::Microseconds),
139        SQLDataType::Time(_, tz) => match tz {
140            TimezoneInfo::None => DataType::Time,
141            _ => {
142                polars_bail!(SQLInterface: "`time` with timezone is not supported; found tz={}", tz)
143            },
144        },
145        SQLDataType::Datetime(prec) => DataType::Datetime(timeunit_from_precision(prec)?, None),
146        SQLDataType::Timestamp(prec, tz) => match tz {
147            TimezoneInfo::None => DataType::Datetime(timeunit_from_precision(prec)?, None),
148            _ => {
149                polars_bail!(SQLInterface: "`timestamp` with timezone is not (yet) supported")
150            },
151        },
152
153        // ---------------------------------
154        // string
155        // ---------------------------------
156        SQLDataType::Char(_)
157        | SQLDataType::CharVarying(_)
158        | SQLDataType::Character(_)
159        | SQLDataType::CharacterVarying(_)
160        | SQLDataType::Clob(_)
161        | SQLDataType::String(_)
162        | SQLDataType::Text
163        | SQLDataType::Uuid
164        | SQLDataType::Varchar(_) => DataType::String,
165
166        // ---------------------------------
167        // custom
168        // ---------------------------------
169        SQLDataType::Custom(ObjectName(idents), _) => match idents.as_slice() {
170            [Ident { value, .. }] => match value.to_lowercase().as_str() {
171                // these integer types are not supported by the PostgreSQL core distribution,
172                // but they ARE available via `pguint` (https://github.com/petere/pguint), an
173                // extension maintained by one of the PostgreSQL core developers.
174                "uint1" => DataType::UInt8,
175                "uint2" => DataType::UInt16,
176                "uint4" | "uint" => DataType::UInt32,
177                "uint8" => DataType::UInt64,
178                // `pguint` also provides a 1 byte (8bit) integer type alias
179                "int1" => DataType::Int8,
180                _ => {
181                    polars_bail!(SQLInterface: "datatype {:?} is not currently supported", value)
182                },
183            },
184            _ => {
185                polars_bail!(SQLInterface: "datatype {:?} is not currently supported", idents)
186            },
187        },
188        _ => {
189            polars_bail!(SQLInterface: "datatype {:?} is not currently supported", dtype)
190        },
191    })
192}