1use polars_core::datatypes::{DataType, TimeUnit};
5use polars_error::{PolarsResult, polars_bail};
6use polars_plan::dsl::{Expr, lit};
7use sqlparser::ast::{
8 ArrayElemTypeDef, DataType as SQLDataType, ExactNumberInfo, Ident, ObjectName, ObjectNamePart,
9 TimezoneInfo,
10};
11
12polars_utils::regex_cache::cached_regex! {
13 static DATETIME_LITERAL_RE = r"^\d{4}-[01]\d-[0-3]\d[ T](?:[01][0-9]|2[0-3]):[0-5][0-9](?::[0-5][0-9](\.\d{1,9})?)?$";
14 static DATE_LITERAL_RE = r"^\d{4}-[01]\d-[0-3]\d$";
15 static TIME_LITERAL_RE = r"^(?:[01][0-9]|2[0-3]):[0-5][0-9](?::[0-5][0-9](\.\d{1,9})?)?$";
16}
17
18pub fn bitstring_to_bytes_literal(b: &String) -> PolarsResult<Expr> {
19 let n_bits = b.len();
20 if !b.chars().all(|c| c == '0' || c == '1') || n_bits > 64 {
21 polars_bail!(
22 SQLSyntax:
23 "bit string literal should contain only 0s and 1s and have length <= 64; found '{}' with length {}", b, n_bits
24 )
25 }
26 let s = b.as_str();
27 Ok(lit(match n_bits {
28 0 => b"".to_vec(),
29 1..=8 => u8::from_str_radix(s, 2).unwrap().to_be_bytes().to_vec(),
30 9..=16 => u16::from_str_radix(s, 2).unwrap().to_be_bytes().to_vec(),
31 17..=32 => u32::from_str_radix(s, 2).unwrap().to_be_bytes().to_vec(),
32 _ => u64::from_str_radix(s, 2).unwrap().to_be_bytes().to_vec(),
33 }))
34}
35
36pub fn is_iso_datetime(value: &str) -> bool {
37 DATETIME_LITERAL_RE.is_match(value)
38}
39
40pub fn is_iso_date(value: &str) -> bool {
41 DATE_LITERAL_RE.is_match(value)
42}
43
44pub fn is_iso_time(value: &str) -> bool {
45 TIME_LITERAL_RE.is_match(value)
46}
47
48fn timeunit_from_precision(prec: &Option<u64>) -> PolarsResult<TimeUnit> {
49 Ok(match prec {
50 None => TimeUnit::Microseconds,
51 Some(n) if (1u64..=3u64).contains(n) => TimeUnit::Milliseconds,
52 Some(n) if (4u64..=6u64).contains(n) => TimeUnit::Microseconds,
53 Some(n) if (7u64..=9u64).contains(n) => TimeUnit::Nanoseconds,
54 Some(n) => {
55 polars_bail!(SQLSyntax: "invalid temporal type precision (expected 1-9, found {})", n)
56 },
57 })
58}
59
60pub(crate) fn map_sql_dtype_to_polars(dtype: &SQLDataType) -> PolarsResult<DataType> {
61 Ok(match dtype {
62 SQLDataType::Array(ArrayElemTypeDef::AngleBracket(inner_type))
66 | SQLDataType::Array(ArrayElemTypeDef::SquareBracket(inner_type, _)) => {
67 DataType::List(Box::new(map_sql_dtype_to_polars(inner_type)?))
68 },
69
70 SQLDataType::Bytea
74 | SQLDataType::Bytes(_)
75 | SQLDataType::Binary(_)
76 | SQLDataType::Blob(_)
77 | SQLDataType::Varbinary(_) => DataType::Binary,
78
79 SQLDataType::Boolean | SQLDataType::Bool => DataType::Boolean,
83
84 SQLDataType::TinyInt(_) => DataType::Int8,
88 SQLDataType::Int16 | SQLDataType::Int2(_) | SQLDataType::SmallInt(_) => DataType::Int16,
89 SQLDataType::Int32
90 | SQLDataType::Int4(_)
91 | SQLDataType::MediumInt(_)
92 | SQLDataType::Integer(_)
93 | SQLDataType::Int(_) => DataType::Int32,
94 SQLDataType::Int64 | SQLDataType::Int8(_) | SQLDataType::BigInt(_) => DataType::Int64,
95 SQLDataType::Int128 | SQLDataType::HugeInt => DataType::Int128,
96
97 SQLDataType::UTinyInt | SQLDataType::TinyIntUnsigned(_) => DataType::UInt8,
101 SQLDataType::Int2Unsigned(_)
102 | SQLDataType::SmallIntUnsigned(_)
103 | SQLDataType::USmallInt
104 | SQLDataType::UInt16 => DataType::UInt16,
105 SQLDataType::Int4Unsigned(_) | SQLDataType::MediumIntUnsigned(_) | SQLDataType::UInt32 => {
106 DataType::UInt32
107 },
108 SQLDataType::Int8Unsigned(_)
109 | SQLDataType::BigIntUnsigned(_)
110 | SQLDataType::UBigInt
111 | SQLDataType::UInt64
112 | SQLDataType::UInt8 => DataType::UInt64,
113 SQLDataType::IntUnsigned(_) | SQLDataType::UnsignedInteger => DataType::UInt32,
114 SQLDataType::UHugeInt => DataType::UInt128,
115
116 SQLDataType::Float4 | SQLDataType::Real => DataType::Float32,
120 SQLDataType::Double(_) | SQLDataType::DoublePrecision | SQLDataType::Float8 => {
121 DataType::Float64
122 },
123 SQLDataType::Float(n_bytes) => match n_bytes {
124 ExactNumberInfo::Precision(n) if (1u64..=24u64).contains(n) => DataType::Float32,
125 ExactNumberInfo::Precision(n) if (25u64..=53u64).contains(n) => DataType::Float64,
126 ExactNumberInfo::Precision(n) => {
127 polars_bail!(SQLSyntax: "unsupported `float` size (expected a value between 1 and 53, found {})", n)
128 },
129 ExactNumberInfo::None => DataType::Float64,
130 ExactNumberInfo::PrecisionAndScale(_, _) => {
131 polars_bail!(SQLSyntax: "FLOAT does not support scale parameter")
132 },
133 },
134
135 #[cfg(feature = "dtype-decimal")]
139 SQLDataType::Dec(info)
140 | SQLDataType::Decimal(info)
141 | SQLDataType::BigDecimal(info)
142 | SQLDataType::Numeric(info) => match *info {
143 ExactNumberInfo::PrecisionAndScale(p, s) => DataType::Decimal(p as usize, s as usize),
144 ExactNumberInfo::Precision(p) => DataType::Decimal(p as usize, 0),
145 ExactNumberInfo::None => DataType::Decimal(38, 9),
146 },
147
148 SQLDataType::Date => DataType::Date,
152 SQLDataType::Interval { fields, precision } => {
153 if !fields.is_none() {
154 polars_bail!(SQLInterface: "`interval` definition with fields={:?} is not supported", fields)
156 }
157 let time_unit = match precision {
158 Some(p) if (1u64..=3u64).contains(p) => TimeUnit::Milliseconds,
159 Some(p) if (4u64..=6u64).contains(p) => TimeUnit::Microseconds,
160 Some(p) if (7u64..=9u64).contains(p) => TimeUnit::Nanoseconds,
161 Some(p) => {
162 polars_bail!(SQLSyntax: "invalid `interval` precision (expected 1-9, found {})", p)
163 },
164 None => TimeUnit::Microseconds,
165 };
166 DataType::Duration(time_unit)
167 },
168 SQLDataType::Time(_, tz) => match tz {
169 TimezoneInfo::None => DataType::Time,
170 _ => {
171 polars_bail!(SQLInterface: "`time` with timezone is not supported; found tz={}", tz)
172 },
173 },
174 SQLDataType::Datetime(prec) => DataType::Datetime(timeunit_from_precision(prec)?, None),
175 SQLDataType::Timestamp(prec, tz) => match tz {
176 TimezoneInfo::None => DataType::Datetime(timeunit_from_precision(prec)?, None),
177 _ => {
178 polars_bail!(SQLInterface: "`timestamp` with timezone is not (yet) supported")
179 },
180 },
181
182 SQLDataType::Char(_)
186 | SQLDataType::CharVarying(_)
187 | SQLDataType::Character(_)
188 | SQLDataType::CharacterVarying(_)
189 | SQLDataType::Clob(_)
190 | SQLDataType::String(_)
191 | SQLDataType::Text
192 | SQLDataType::Uuid
193 | SQLDataType::Varchar(_) => DataType::String,
194
195 SQLDataType::Custom(ObjectName(idents), _) => match idents.as_slice() {
199 [ObjectNamePart::Identifier(Ident { value, .. })] => {
200 match value.to_lowercase().as_str() {
201 "int1" => DataType::Int8,
205 "uint1" | "utinyint" => DataType::UInt8,
206 "uint2" | "usmallint" => DataType::UInt16,
207 "uint4" | "uinteger" | "uint" => DataType::UInt32,
208 "uint8" | "ubigint" => DataType::UInt64,
209 _ => {
210 polars_bail!(SQLInterface: "datatype {:?} is not currently supported", value)
211 },
212 }
213 },
214 _ => {
215 polars_bail!(SQLInterface: "datatype {:?} is not currently supported", idents)
216 },
217 },
218 _ => {
219 polars_bail!(SQLInterface: "datatype {:?} is not currently supported", dtype)
220 },
221 })
222}