polars_sql/
types.rs

1//! This module supports mapping SQL datatypes to Polars datatypes.
2//!
3//! It also provides utility functions for working with SQL datatypes.
4use polars_core::datatypes::{DataType, TimeUnit};
5use polars_error::{PolarsResult, polars_bail};
6use polars_plan::dsl::{Expr, lit};
7use sqlparser::ast::{
8    ArrayElemTypeDef, DataType as SQLDataType, ExactNumberInfo, Ident, ObjectName, ObjectNamePart,
9    TimezoneInfo,
10};
11
12polars_utils::regex_cache::cached_regex! {
13    static DATETIME_LITERAL_RE = r"^\d{4}-[01]\d-[0-3]\d[ T](?:[01][0-9]|2[0-3]):[0-5][0-9](?::[0-5][0-9](\.\d{1,9})?)?$";
14    static DATE_LITERAL_RE = r"^\d{4}-[01]\d-[0-3]\d$";
15    static TIME_LITERAL_RE = r"^(?:[01][0-9]|2[0-3]):[0-5][0-9](?::[0-5][0-9](\.\d{1,9})?)?$";
16}
17
18pub fn bitstring_to_bytes_literal(b: &String) -> PolarsResult<Expr> {
19    let n_bits = b.len();
20    if !b.chars().all(|c| c == '0' || c == '1') || n_bits > 64 {
21        polars_bail!(
22            SQLSyntax:
23            "bit string literal should contain only 0s and 1s and have length <= 64; found '{}' with length {}", b, n_bits
24        )
25    }
26    let s = b.as_str();
27    Ok(lit(match n_bits {
28        0 => b"".to_vec(),
29        1..=8 => u8::from_str_radix(s, 2).unwrap().to_be_bytes().to_vec(),
30        9..=16 => u16::from_str_radix(s, 2).unwrap().to_be_bytes().to_vec(),
31        17..=32 => u32::from_str_radix(s, 2).unwrap().to_be_bytes().to_vec(),
32        _ => u64::from_str_radix(s, 2).unwrap().to_be_bytes().to_vec(),
33    }))
34}
35
36pub fn is_iso_datetime(value: &str) -> bool {
37    DATETIME_LITERAL_RE.is_match(value)
38}
39
40pub fn is_iso_date(value: &str) -> bool {
41    DATE_LITERAL_RE.is_match(value)
42}
43
44pub fn is_iso_time(value: &str) -> bool {
45    TIME_LITERAL_RE.is_match(value)
46}
47
48fn timeunit_from_precision(prec: &Option<u64>) -> PolarsResult<TimeUnit> {
49    Ok(match prec {
50        None => TimeUnit::Microseconds,
51        Some(n) if (1u64..=3u64).contains(n) => TimeUnit::Milliseconds,
52        Some(n) if (4u64..=6u64).contains(n) => TimeUnit::Microseconds,
53        Some(n) if (7u64..=9u64).contains(n) => TimeUnit::Nanoseconds,
54        Some(n) => {
55            polars_bail!(SQLSyntax: "invalid temporal type precision (expected 1-9, found {})", n)
56        },
57    })
58}
59
60pub(crate) fn map_sql_dtype_to_polars(dtype: &SQLDataType) -> PolarsResult<DataType> {
61    Ok(match dtype {
62        // ---------------------------------
63        // array/list
64        // ---------------------------------
65        SQLDataType::Array(ArrayElemTypeDef::AngleBracket(inner_type))
66        | SQLDataType::Array(ArrayElemTypeDef::SquareBracket(inner_type, _)) => {
67            DataType::List(Box::new(map_sql_dtype_to_polars(inner_type)?))
68        },
69
70        // ---------------------------------
71        // binary
72        // ---------------------------------
73        SQLDataType::Bytea
74        | SQLDataType::Bytes(_)
75        | SQLDataType::Binary(_)
76        | SQLDataType::Blob(_)
77        | SQLDataType::Varbinary(_) => DataType::Binary,
78
79        // ---------------------------------
80        // boolean
81        // ---------------------------------
82        SQLDataType::Boolean | SQLDataType::Bool => DataType::Boolean,
83
84        // ---------------------------------
85        // signed integer
86        // ---------------------------------
87        SQLDataType::TinyInt(_) => DataType::Int8,
88        SQLDataType::Int16 | SQLDataType::Int2(_) | SQLDataType::SmallInt(_) => DataType::Int16,
89        SQLDataType::Int32
90        | SQLDataType::Int4(_)
91        | SQLDataType::MediumInt(_)
92        | SQLDataType::Integer(_)
93        | SQLDataType::Int(_) => DataType::Int32,
94        SQLDataType::Int64 | SQLDataType::Int8(_) | SQLDataType::BigInt(_) => DataType::Int64,
95        SQLDataType::Int128 | SQLDataType::HugeInt => DataType::Int128,
96
97        // ---------------------------------
98        // unsigned integer
99        // ---------------------------------
100        SQLDataType::UTinyInt | SQLDataType::TinyIntUnsigned(_) => DataType::UInt8,
101        SQLDataType::Int2Unsigned(_)
102        | SQLDataType::SmallIntUnsigned(_)
103        | SQLDataType::USmallInt
104        | SQLDataType::UInt16 => DataType::UInt16,
105        SQLDataType::Int4Unsigned(_) | SQLDataType::MediumIntUnsigned(_) | SQLDataType::UInt32 => {
106            DataType::UInt32
107        },
108        SQLDataType::Int8Unsigned(_)
109        | SQLDataType::BigIntUnsigned(_)
110        | SQLDataType::UBigInt
111        | SQLDataType::UInt64
112        | SQLDataType::UInt8 => DataType::UInt64,
113        SQLDataType::IntUnsigned(_) | SQLDataType::UnsignedInteger => DataType::UInt32,
114        SQLDataType::UHugeInt => DataType::UInt128,
115
116        // ---------------------------------
117        // float
118        // ---------------------------------
119        SQLDataType::Float4 | SQLDataType::Real => DataType::Float32,
120        SQLDataType::Double(_) | SQLDataType::DoublePrecision | SQLDataType::Float8 => {
121            DataType::Float64
122        },
123        SQLDataType::Float(n_bytes) => match n_bytes {
124            ExactNumberInfo::Precision(n) if (1u64..=24u64).contains(n) => DataType::Float32,
125            ExactNumberInfo::Precision(n) if (25u64..=53u64).contains(n) => DataType::Float64,
126            ExactNumberInfo::Precision(n) => {
127                polars_bail!(SQLSyntax: "unsupported `float` size (expected a value between 1 and 53, found {})", n)
128            },
129            ExactNumberInfo::None => DataType::Float64,
130            ExactNumberInfo::PrecisionAndScale(_, _) => {
131                polars_bail!(SQLSyntax: "FLOAT does not support scale parameter")
132            },
133        },
134
135        // ---------------------------------
136        // decimal
137        // ---------------------------------
138        #[cfg(feature = "dtype-decimal")]
139        SQLDataType::Dec(info)
140        | SQLDataType::Decimal(info)
141        | SQLDataType::BigDecimal(info)
142        | SQLDataType::Numeric(info) => match *info {
143            ExactNumberInfo::PrecisionAndScale(p, s) => DataType::Decimal(p as usize, s as usize),
144            ExactNumberInfo::Precision(p) => DataType::Decimal(p as usize, 0),
145            ExactNumberInfo::None => DataType::Decimal(38, 9),
146        },
147
148        // ---------------------------------
149        // temporal
150        // ---------------------------------
151        SQLDataType::Date => DataType::Date,
152        SQLDataType::Interval { fields, precision } => {
153            if !fields.is_none() {
154                // eg: "YEARS TO MONTH"
155                polars_bail!(SQLInterface: "`interval` definition with fields={:?} is not supported", fields)
156            }
157            let time_unit = match precision {
158                Some(p) if (1u64..=3u64).contains(p) => TimeUnit::Milliseconds,
159                Some(p) if (4u64..=6u64).contains(p) => TimeUnit::Microseconds,
160                Some(p) if (7u64..=9u64).contains(p) => TimeUnit::Nanoseconds,
161                Some(p) => {
162                    polars_bail!(SQLSyntax: "invalid `interval` precision (expected 1-9, found {})", p)
163                },
164                None => TimeUnit::Microseconds,
165            };
166            DataType::Duration(time_unit)
167        },
168        SQLDataType::Time(_, tz) => match tz {
169            TimezoneInfo::None => DataType::Time,
170            _ => {
171                polars_bail!(SQLInterface: "`time` with timezone is not supported; found tz={}", tz)
172            },
173        },
174        SQLDataType::Datetime(prec) => DataType::Datetime(timeunit_from_precision(prec)?, None),
175        SQLDataType::Timestamp(prec, tz) => match tz {
176            TimezoneInfo::None => DataType::Datetime(timeunit_from_precision(prec)?, None),
177            _ => {
178                polars_bail!(SQLInterface: "`timestamp` with timezone is not (yet) supported")
179            },
180        },
181
182        // ---------------------------------
183        // string
184        // ---------------------------------
185        SQLDataType::Char(_)
186        | SQLDataType::CharVarying(_)
187        | SQLDataType::Character(_)
188        | SQLDataType::CharacterVarying(_)
189        | SQLDataType::Clob(_)
190        | SQLDataType::String(_)
191        | SQLDataType::Text
192        | SQLDataType::Uuid
193        | SQLDataType::Varchar(_) => DataType::String,
194
195        // ---------------------------------
196        // custom
197        // ---------------------------------
198        SQLDataType::Custom(ObjectName(idents), _) => match idents.as_slice() {
199            [ObjectNamePart::Identifier(Ident { value, .. })] => {
200                match value.to_lowercase().as_str() {
201                    // these integer types are not supported by the PostgreSQL core distribution,
202                    // but they ARE available via `pguint` (https://github.com/petere/pguint), an
203                    // extension maintained by one of the PostgreSQL core developers, and/or DuckDB.
204                    "int1" => DataType::Int8,
205                    "uint1" | "utinyint" => DataType::UInt8,
206                    "uint2" | "usmallint" => DataType::UInt16,
207                    "uint4" | "uinteger" | "uint" => DataType::UInt32,
208                    "uint8" | "ubigint" => DataType::UInt64,
209                    _ => {
210                        polars_bail!(SQLInterface: "datatype {:?} is not currently supported", value)
211                    },
212                }
213            },
214            _ => {
215                polars_bail!(SQLInterface: "datatype {:?} is not currently supported", idents)
216            },
217        },
218        _ => {
219            polars_bail!(SQLInterface: "datatype {:?} is not currently supported", dtype)
220        },
221    })
222}