1use polars_core::datatypes::{DataType, TimeUnit};
5use polars_error::{PolarsResult, polars_bail};
6use polars_plan::dsl::{Expr, lit};
7use sqlparser::ast::{
8 ArrayElemTypeDef, DataType as SQLDataType, ExactNumberInfo, Ident, ObjectName, TimezoneInfo,
9};
10
11polars_utils::regex_cache::cached_regex! {
12 static DATETIME_LITERAL_RE = r"^\d{4}-[01]\d-[0-3]\d[ T](?:[01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9](\.\d{1,9})?$";
13 static DATE_LITERAL_RE = r"^\d{4}-[01]\d-[0-3]\d$";
14 static TIME_LITERAL_RE = r"^(?:[01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9](\.\d{1,9})?$";
15}
16
17pub fn bitstring_to_bytes_literal(b: &String) -> PolarsResult<Expr> {
18 let n_bits = b.len();
19 if !b.chars().all(|c| c == '0' || c == '1') || n_bits > 64 {
20 polars_bail!(
21 SQLSyntax:
22 "bit string literal should contain only 0s and 1s and have length <= 64; found '{}' with length {}", b, n_bits
23 )
24 }
25 let s = b.as_str();
26 Ok(lit(match n_bits {
27 0 => b"".to_vec(),
28 1..=8 => u8::from_str_radix(s, 2).unwrap().to_be_bytes().to_vec(),
29 9..=16 => u16::from_str_radix(s, 2).unwrap().to_be_bytes().to_vec(),
30 17..=32 => u32::from_str_radix(s, 2).unwrap().to_be_bytes().to_vec(),
31 _ => u64::from_str_radix(s, 2).unwrap().to_be_bytes().to_vec(),
32 }))
33}
34
35pub fn is_iso_datetime(value: &str) -> bool {
36 DATETIME_LITERAL_RE.is_match(value)
37}
38
39pub fn is_iso_date(value: &str) -> bool {
40 DATE_LITERAL_RE.is_match(value)
41}
42
43pub fn is_iso_time(value: &str) -> bool {
44 TIME_LITERAL_RE.is_match(value)
45}
46
47fn timeunit_from_precision(prec: &Option<u64>) -> PolarsResult<TimeUnit> {
48 Ok(match prec {
49 None => TimeUnit::Microseconds,
50 Some(n) if (1u64..=3u64).contains(n) => TimeUnit::Milliseconds,
51 Some(n) if (4u64..=6u64).contains(n) => TimeUnit::Microseconds,
52 Some(n) if (7u64..=9u64).contains(n) => TimeUnit::Nanoseconds,
53 Some(n) => {
54 polars_bail!(SQLSyntax: "invalid temporal type precision (expected 1-9, found {})", n)
55 },
56 })
57}
58
59pub(crate) fn map_sql_dtype_to_polars(dtype: &SQLDataType) -> PolarsResult<DataType> {
60 Ok(match dtype {
61 SQLDataType::Array(ArrayElemTypeDef::AngleBracket(inner_type))
65 | SQLDataType::Array(ArrayElemTypeDef::SquareBracket(inner_type, _)) => {
66 DataType::List(Box::new(map_sql_dtype_to_polars(inner_type)?))
67 },
68
69 SQLDataType::Bytea
73 | SQLDataType::Bytes(_)
74 | SQLDataType::Binary(_)
75 | SQLDataType::Blob(_)
76 | SQLDataType::Varbinary(_) => DataType::Binary,
77
78 SQLDataType::Boolean | SQLDataType::Bool => DataType::Boolean,
82
83 SQLDataType::Int(_) | SQLDataType::Integer(_) => DataType::Int32,
87 SQLDataType::Int2(_) | SQLDataType::SmallInt(_) => DataType::Int16,
88 SQLDataType::Int4(_) | SQLDataType::MediumInt(_) => DataType::Int32,
89 SQLDataType::Int8(_) | SQLDataType::BigInt(_) => DataType::Int64,
90 SQLDataType::TinyInt(_) => DataType::Int8,
91
92 SQLDataType::UnsignedTinyInt(_) => DataType::UInt8, SQLDataType::UnsignedInt(_) | SQLDataType::UnsignedInteger(_) => DataType::UInt32,
98 SQLDataType::UnsignedInt2(_) | SQLDataType::UnsignedSmallInt(_) => DataType::UInt16,
99 SQLDataType::UnsignedInt4(_) | SQLDataType::UnsignedMediumInt(_) => DataType::UInt32,
100 SQLDataType::UnsignedInt8(_) | SQLDataType::UnsignedBigInt(_) | SQLDataType::UInt8 => {
101 DataType::UInt64
102 },
103
104 SQLDataType::Double | SQLDataType::DoublePrecision | SQLDataType::Float8 => {
108 DataType::Float64
109 },
110 SQLDataType::Float(n_bytes) => match n_bytes {
111 Some(n) if (1u64..=24u64).contains(n) => DataType::Float32,
112 Some(n) if (25u64..=53u64).contains(n) => DataType::Float64,
113 Some(n) => {
114 polars_bail!(SQLSyntax: "unsupported `float` size (expected a value between 1 and 53, found {})", n)
115 },
116 None => DataType::Float64,
117 },
118 SQLDataType::Float4 | SQLDataType::Real => DataType::Float32,
119
120 #[cfg(feature = "dtype-decimal")]
124 SQLDataType::Dec(info) | SQLDataType::Decimal(info) | SQLDataType::Numeric(info) => {
125 match *info {
126 ExactNumberInfo::PrecisionAndScale(p, s) => {
127 DataType::Decimal(Some(p as usize), Some(s as usize))
128 },
129 ExactNumberInfo::Precision(p) => DataType::Decimal(Some(p as usize), Some(0)),
130 ExactNumberInfo::None => DataType::Decimal(Some(38), Some(9)),
131 }
132 },
133
134 SQLDataType::Date => DataType::Date,
138 SQLDataType::Interval => DataType::Duration(TimeUnit::Microseconds),
139 SQLDataType::Time(_, tz) => match tz {
140 TimezoneInfo::None => DataType::Time,
141 _ => {
142 polars_bail!(SQLInterface: "`time` with timezone is not supported; found tz={}", tz)
143 },
144 },
145 SQLDataType::Datetime(prec) => DataType::Datetime(timeunit_from_precision(prec)?, None),
146 SQLDataType::Timestamp(prec, tz) => match tz {
147 TimezoneInfo::None => DataType::Datetime(timeunit_from_precision(prec)?, None),
148 _ => {
149 polars_bail!(SQLInterface: "`timestamp` with timezone is not (yet) supported")
150 },
151 },
152
153 SQLDataType::Char(_)
157 | SQLDataType::CharVarying(_)
158 | SQLDataType::Character(_)
159 | SQLDataType::CharacterVarying(_)
160 | SQLDataType::Clob(_)
161 | SQLDataType::String(_)
162 | SQLDataType::Text
163 | SQLDataType::Uuid
164 | SQLDataType::Varchar(_) => DataType::String,
165
166 SQLDataType::Custom(ObjectName(idents), _) => match idents.as_slice() {
170 [Ident { value, .. }] => match value.to_lowercase().as_str() {
171 "uint1" => DataType::UInt8,
175 "uint2" => DataType::UInt16,
176 "uint4" | "uint" => DataType::UInt32,
177 "uint8" => DataType::UInt64,
178 "int1" => DataType::Int8,
180 _ => {
181 polars_bail!(SQLInterface: "datatype {:?} is not currently supported", value)
182 },
183 },
184 _ => {
185 polars_bail!(SQLInterface: "datatype {:?} is not currently supported", idents)
186 },
187 },
188 _ => {
189 polars_bail!(SQLInterface: "datatype {:?} is not currently supported", dtype)
190 },
191 })
192}