Skip to main content

polars_sql/
types.rs

1//! This module supports mapping SQL datatypes to Polars datatypes.
2//!
3//! It also provides utility functions for working with SQL datatypes.
4use polars_core::datatypes::{DataType, TimeUnit};
5use polars_error::{PolarsResult, polars_bail};
6use polars_plan::dsl::Expr;
7use polars_plan::dsl::functions::lit;
8use sqlparser::ast::{
9    ArrayElemTypeDef, DataType as SQLDataType, ExactNumberInfo, Ident, ObjectName, ObjectNamePart,
10    TimezoneInfo,
11};
12
13polars_utils::regex_cache::cached_regex! {
14    static DATETIME_LITERAL_RE = r"^\d{4}-[01]\d-[0-3]\d[ T](?:[01][0-9]|2[0-3]):[0-5][0-9](?::[0-5][0-9](\.\d{1,9})?)?$";
15    static DATE_LITERAL_RE = r"^\d{4}-[01]\d-[0-3]\d$";
16    static TIME_LITERAL_RE = r"^(?:[01][0-9]|2[0-3]):[0-5][0-9](?::[0-5][0-9](\.\d{1,9})?)?$";
17}
18
19pub fn bitstring_to_bytes_literal(b: &String) -> PolarsResult<Expr> {
20    let n_bits = b.len();
21    if !b.chars().all(|c| c == '0' || c == '1') || n_bits > 64 {
22        polars_bail!(
23            SQLSyntax:
24            "bit string literal should contain only 0s and 1s and have length <= 64; found '{}' with length {}", b, n_bits
25        )
26    }
27    let s = b.as_str();
28    Ok(lit(match n_bits {
29        0 => b"".to_vec(),
30        1..=8 => u8::from_str_radix(s, 2).unwrap().to_be_bytes().to_vec(),
31        9..=16 => u16::from_str_radix(s, 2).unwrap().to_be_bytes().to_vec(),
32        17..=32 => u32::from_str_radix(s, 2).unwrap().to_be_bytes().to_vec(),
33        _ => u64::from_str_radix(s, 2).unwrap().to_be_bytes().to_vec(),
34    }))
35}
36
37pub fn is_iso_datetime(value: &str) -> bool {
38    DATETIME_LITERAL_RE.is_match(value)
39}
40
41pub fn is_iso_date(value: &str) -> bool {
42    DATE_LITERAL_RE.is_match(value)
43}
44
45pub fn is_iso_time(value: &str) -> bool {
46    TIME_LITERAL_RE.is_match(value)
47}
48
49pub(crate) fn timeunit_from_precision(prec: &Option<u64>) -> PolarsResult<TimeUnit> {
50    Ok(match prec {
51        None => TimeUnit::Microseconds,
52        Some(n) if (1u64..=3u64).contains(n) => TimeUnit::Milliseconds,
53        Some(n) if (4u64..=6u64).contains(n) => TimeUnit::Microseconds,
54        Some(n) if (7u64..=9u64).contains(n) => TimeUnit::Nanoseconds,
55        Some(n) => {
56            polars_bail!(SQLSyntax: "invalid temporal type precision (expected 1-9, found {})", n)
57        },
58    })
59}
60
61pub(crate) fn map_sql_dtype_to_polars(dtype: &SQLDataType) -> PolarsResult<DataType> {
62    Ok(match dtype {
63        // ---------------------------------
64        // array/list
65        // ---------------------------------
66        SQLDataType::Array(ArrayElemTypeDef::AngleBracket(inner_type))
67        | SQLDataType::Array(ArrayElemTypeDef::SquareBracket(inner_type, _)) => {
68            DataType::List(Box::new(map_sql_dtype_to_polars(inner_type)?))
69        },
70
71        // ---------------------------------
72        // binary
73        // ---------------------------------
74        SQLDataType::Bytea
75        | SQLDataType::Bytes(_)
76        | SQLDataType::Binary(_)
77        | SQLDataType::Blob(_)
78        | SQLDataType::Varbinary(_) => DataType::Binary,
79
80        // ---------------------------------
81        // boolean
82        // ---------------------------------
83        SQLDataType::Boolean | SQLDataType::Bool => DataType::Boolean,
84
85        // ---------------------------------
86        // signed integer
87        // ---------------------------------
88        SQLDataType::TinyInt(_) => DataType::Int8,
89        SQLDataType::Int16 | SQLDataType::Int2(_) | SQLDataType::SmallInt(_) => DataType::Int16,
90        SQLDataType::Int32
91        | SQLDataType::Int4(_)
92        | SQLDataType::MediumInt(_)
93        | SQLDataType::Integer(_)
94        | SQLDataType::Int(_) => DataType::Int32,
95        SQLDataType::Int64 | SQLDataType::Int8(_) | SQLDataType::BigInt(_) => DataType::Int64,
96        SQLDataType::Int128 | SQLDataType::HugeInt => DataType::Int128,
97
98        // ---------------------------------
99        // unsigned integer
100        // ---------------------------------
101        SQLDataType::UTinyInt | SQLDataType::TinyIntUnsigned(_) => DataType::UInt8,
102        SQLDataType::Int2Unsigned(_)
103        | SQLDataType::SmallIntUnsigned(_)
104        | SQLDataType::USmallInt
105        | SQLDataType::UInt16 => DataType::UInt16,
106        SQLDataType::Int4Unsigned(_) | SQLDataType::MediumIntUnsigned(_) | SQLDataType::UInt32 => {
107            DataType::UInt32
108        },
109        SQLDataType::Int8Unsigned(_)
110        | SQLDataType::BigIntUnsigned(_)
111        | SQLDataType::UBigInt
112        | SQLDataType::UInt64
113        | SQLDataType::UInt8 => DataType::UInt64,
114        SQLDataType::IntUnsigned(_) | SQLDataType::UnsignedInteger => DataType::UInt32,
115        SQLDataType::UHugeInt => DataType::UInt128,
116
117        // ---------------------------------
118        // float
119        // ---------------------------------
120        SQLDataType::Float4 | SQLDataType::Real => DataType::Float32,
121        SQLDataType::Double(_) | SQLDataType::DoublePrecision | SQLDataType::Float8 => {
122            DataType::Float64
123        },
124        SQLDataType::Float(n_bytes) => match n_bytes {
125            ExactNumberInfo::Precision(n) if (1u64..=24u64).contains(n) => DataType::Float32,
126            ExactNumberInfo::Precision(n) if (25u64..=53u64).contains(n) => DataType::Float64,
127            ExactNumberInfo::Precision(n) => {
128                polars_bail!(SQLSyntax: "unsupported `float` size (expected a value between 1 and 53, found {})", n)
129            },
130            ExactNumberInfo::None => DataType::Float64,
131            ExactNumberInfo::PrecisionAndScale(_, _) => {
132                polars_bail!(SQLSyntax: "FLOAT does not support scale parameter")
133            },
134        },
135
136        // ---------------------------------
137        // decimal
138        // ---------------------------------
139        #[cfg(feature = "dtype-decimal")]
140        SQLDataType::Dec(info)
141        | SQLDataType::Decimal(info)
142        | SQLDataType::BigDecimal(info)
143        | SQLDataType::Numeric(info) => match *info {
144            ExactNumberInfo::PrecisionAndScale(p, s) => DataType::Decimal(p as usize, s as usize),
145            ExactNumberInfo::Precision(p) => DataType::Decimal(p as usize, 0),
146            ExactNumberInfo::None => DataType::Decimal(38, 9),
147        },
148
149        // ---------------------------------
150        // temporal
151        // ---------------------------------
152        SQLDataType::Date => DataType::Date,
153        SQLDataType::Interval { fields, precision } => {
154            if !fields.is_none() {
155                // eg: "YEARS TO MONTH"
156                polars_bail!(SQLInterface: "`interval` definition with fields={:?} is not supported", fields)
157            }
158            let time_unit = match precision {
159                Some(p) if (1u64..=3u64).contains(p) => TimeUnit::Milliseconds,
160                Some(p) if (4u64..=6u64).contains(p) => TimeUnit::Microseconds,
161                Some(p) if (7u64..=9u64).contains(p) => TimeUnit::Nanoseconds,
162                Some(p) => {
163                    polars_bail!(SQLSyntax: "invalid `interval` precision (expected 1-9, found {})", p)
164                },
165                None => TimeUnit::Microseconds,
166            };
167            DataType::Duration(time_unit)
168        },
169        SQLDataType::Time(_, tz) => match tz {
170            TimezoneInfo::None => DataType::Time,
171            _ => {
172                polars_bail!(SQLInterface: "`time` with timezone is not supported; found tz={}", tz)
173            },
174        },
175        SQLDataType::Datetime(prec) => DataType::Datetime(timeunit_from_precision(prec)?, None),
176        SQLDataType::Timestamp(prec, tz) => match tz {
177            TimezoneInfo::None => DataType::Datetime(timeunit_from_precision(prec)?, None),
178            _ => {
179                polars_bail!(SQLInterface: "`timestamp` with timezone is not (yet) supported")
180            },
181        },
182
183        // ---------------------------------
184        // string
185        // ---------------------------------
186        SQLDataType::Char(_)
187        | SQLDataType::CharVarying(_)
188        | SQLDataType::Character(_)
189        | SQLDataType::CharacterVarying(_)
190        | SQLDataType::Clob(_)
191        | SQLDataType::String(_)
192        | SQLDataType::Text
193        | SQLDataType::Uuid
194        | SQLDataType::Varchar(_) => DataType::String,
195
196        // ---------------------------------
197        // custom
198        // ---------------------------------
199        SQLDataType::Custom(ObjectName(idents), _) => match idents.as_slice() {
200            [ObjectNamePart::Identifier(Ident { value, .. })] => {
201                match value.to_lowercase().as_str() {
202                    // these integer types are not supported by the PostgreSQL core distribution,
203                    // but they ARE available via `pguint` (https://github.com/petere/pguint), an
204                    // extension maintained by one of the PostgreSQL core developers, and/or DuckDB.
205                    "int1" => DataType::Int8,
206                    "uint1" | "utinyint" => DataType::UInt8,
207                    "uint2" | "usmallint" => DataType::UInt16,
208                    "uint4" | "uinteger" | "uint" => DataType::UInt32,
209                    "uint8" | "ubigint" => DataType::UInt64,
210                    _ => {
211                        polars_bail!(SQLInterface: "datatype {:?} is not currently supported", value)
212                    },
213                }
214            },
215            _ => {
216                polars_bail!(SQLInterface: "datatype {:?} is not currently supported", idents)
217            },
218        },
219        _ => {
220            polars_bail!(SQLInterface: "datatype {:?} is not currently supported", dtype)
221        },
222    })
223}