1use polars_core::datatypes::{DataType, TimeUnit};
5use polars_error::{PolarsResult, polars_bail};
6use polars_plan::dsl::Expr;
7use polars_plan::dsl::functions::lit;
8use sqlparser::ast::{
9 ArrayElemTypeDef, DataType as SQLDataType, ExactNumberInfo, Ident, ObjectName, ObjectNamePart,
10 TimezoneInfo,
11};
12
13polars_utils::regex_cache::cached_regex! {
14 static DATETIME_LITERAL_RE = r"^\d{4}-[01]\d-[0-3]\d[ T](?:[01][0-9]|2[0-3]):[0-5][0-9](?::[0-5][0-9](\.\d{1,9})?)?$";
15 static DATE_LITERAL_RE = r"^\d{4}-[01]\d-[0-3]\d$";
16 static TIME_LITERAL_RE = r"^(?:[01][0-9]|2[0-3]):[0-5][0-9](?::[0-5][0-9](\.\d{1,9})?)?$";
17}
18
19pub fn bitstring_to_bytes_literal(b: &String) -> PolarsResult<Expr> {
20 let n_bits = b.len();
21 if !b.chars().all(|c| c == '0' || c == '1') || n_bits > 64 {
22 polars_bail!(
23 SQLSyntax:
24 "bit string literal should contain only 0s and 1s and have length <= 64; found '{}' with length {}", b, n_bits
25 )
26 }
27 let s = b.as_str();
28 Ok(lit(match n_bits {
29 0 => b"".to_vec(),
30 1..=8 => u8::from_str_radix(s, 2).unwrap().to_be_bytes().to_vec(),
31 9..=16 => u16::from_str_radix(s, 2).unwrap().to_be_bytes().to_vec(),
32 17..=32 => u32::from_str_radix(s, 2).unwrap().to_be_bytes().to_vec(),
33 _ => u64::from_str_radix(s, 2).unwrap().to_be_bytes().to_vec(),
34 }))
35}
36
37pub fn is_iso_datetime(value: &str) -> bool {
38 DATETIME_LITERAL_RE.is_match(value)
39}
40
41pub fn is_iso_date(value: &str) -> bool {
42 DATE_LITERAL_RE.is_match(value)
43}
44
45pub fn is_iso_time(value: &str) -> bool {
46 TIME_LITERAL_RE.is_match(value)
47}
48
49pub(crate) fn timeunit_from_precision(prec: &Option<u64>) -> PolarsResult<TimeUnit> {
50 Ok(match prec {
51 None => TimeUnit::Microseconds,
52 Some(n) if (1u64..=3u64).contains(n) => TimeUnit::Milliseconds,
53 Some(n) if (4u64..=6u64).contains(n) => TimeUnit::Microseconds,
54 Some(n) if (7u64..=9u64).contains(n) => TimeUnit::Nanoseconds,
55 Some(n) => {
56 polars_bail!(SQLSyntax: "invalid temporal type precision (expected 1-9, found {})", n)
57 },
58 })
59}
60
61pub(crate) fn map_sql_dtype_to_polars(dtype: &SQLDataType) -> PolarsResult<DataType> {
62 Ok(match dtype {
63 SQLDataType::Array(ArrayElemTypeDef::AngleBracket(inner_type))
67 | SQLDataType::Array(ArrayElemTypeDef::SquareBracket(inner_type, _)) => {
68 DataType::List(Box::new(map_sql_dtype_to_polars(inner_type)?))
69 },
70
71 SQLDataType::Bytea
75 | SQLDataType::Bytes(_)
76 | SQLDataType::Binary(_)
77 | SQLDataType::Blob(_)
78 | SQLDataType::Varbinary(_) => DataType::Binary,
79
80 SQLDataType::Boolean | SQLDataType::Bool => DataType::Boolean,
84
85 SQLDataType::TinyInt(_) => DataType::Int8,
89 SQLDataType::Int16 | SQLDataType::Int2(_) | SQLDataType::SmallInt(_) => DataType::Int16,
90 SQLDataType::Int32
91 | SQLDataType::Int4(_)
92 | SQLDataType::MediumInt(_)
93 | SQLDataType::Integer(_)
94 | SQLDataType::Int(_) => DataType::Int32,
95 SQLDataType::Int64 | SQLDataType::Int8(_) | SQLDataType::BigInt(_) => DataType::Int64,
96 SQLDataType::Int128 | SQLDataType::HugeInt => DataType::Int128,
97
98 SQLDataType::UTinyInt | SQLDataType::TinyIntUnsigned(_) => DataType::UInt8,
102 SQLDataType::Int2Unsigned(_)
103 | SQLDataType::SmallIntUnsigned(_)
104 | SQLDataType::USmallInt
105 | SQLDataType::UInt16 => DataType::UInt16,
106 SQLDataType::Int4Unsigned(_) | SQLDataType::MediumIntUnsigned(_) | SQLDataType::UInt32 => {
107 DataType::UInt32
108 },
109 SQLDataType::Int8Unsigned(_)
110 | SQLDataType::BigIntUnsigned(_)
111 | SQLDataType::UBigInt
112 | SQLDataType::UInt64
113 | SQLDataType::UInt8 => DataType::UInt64,
114 SQLDataType::IntUnsigned(_) | SQLDataType::UnsignedInteger => DataType::UInt32,
115 SQLDataType::UHugeInt => DataType::UInt128,
116
117 SQLDataType::Float4 | SQLDataType::Real => DataType::Float32,
121 SQLDataType::Double(_) | SQLDataType::DoublePrecision | SQLDataType::Float8 => {
122 DataType::Float64
123 },
124 SQLDataType::Float(n_bytes) => match n_bytes {
125 ExactNumberInfo::Precision(n) if (1u64..=24u64).contains(n) => DataType::Float32,
126 ExactNumberInfo::Precision(n) if (25u64..=53u64).contains(n) => DataType::Float64,
127 ExactNumberInfo::Precision(n) => {
128 polars_bail!(SQLSyntax: "unsupported `float` size (expected a value between 1 and 53, found {})", n)
129 },
130 ExactNumberInfo::None => DataType::Float64,
131 ExactNumberInfo::PrecisionAndScale(_, _) => {
132 polars_bail!(SQLSyntax: "FLOAT does not support scale parameter")
133 },
134 },
135
136 #[cfg(feature = "dtype-decimal")]
140 SQLDataType::Dec(info)
141 | SQLDataType::Decimal(info)
142 | SQLDataType::BigDecimal(info)
143 | SQLDataType::Numeric(info) => match *info {
144 ExactNumberInfo::PrecisionAndScale(p, s) => DataType::Decimal(p as usize, s as usize),
145 ExactNumberInfo::Precision(p) => DataType::Decimal(p as usize, 0),
146 ExactNumberInfo::None => DataType::Decimal(38, 9),
147 },
148
149 SQLDataType::Date => DataType::Date,
153 SQLDataType::Interval { fields, precision } => {
154 if !fields.is_none() {
155 polars_bail!(SQLInterface: "`interval` definition with fields={:?} is not supported", fields)
157 }
158 let time_unit = match precision {
159 Some(p) if (1u64..=3u64).contains(p) => TimeUnit::Milliseconds,
160 Some(p) if (4u64..=6u64).contains(p) => TimeUnit::Microseconds,
161 Some(p) if (7u64..=9u64).contains(p) => TimeUnit::Nanoseconds,
162 Some(p) => {
163 polars_bail!(SQLSyntax: "invalid `interval` precision (expected 1-9, found {})", p)
164 },
165 None => TimeUnit::Microseconds,
166 };
167 DataType::Duration(time_unit)
168 },
169 SQLDataType::Time(_, tz) => match tz {
170 TimezoneInfo::None => DataType::Time,
171 _ => {
172 polars_bail!(SQLInterface: "`time` with timezone is not supported; found tz={}", tz)
173 },
174 },
175 SQLDataType::Datetime(prec) => DataType::Datetime(timeunit_from_precision(prec)?, None),
176 SQLDataType::Timestamp(prec, tz) => match tz {
177 TimezoneInfo::None => DataType::Datetime(timeunit_from_precision(prec)?, None),
178 _ => {
179 polars_bail!(SQLInterface: "`timestamp` with timezone is not (yet) supported")
180 },
181 },
182
183 SQLDataType::Char(_)
187 | SQLDataType::CharVarying(_)
188 | SQLDataType::Character(_)
189 | SQLDataType::CharacterVarying(_)
190 | SQLDataType::Clob(_)
191 | SQLDataType::String(_)
192 | SQLDataType::Text
193 | SQLDataType::Uuid
194 | SQLDataType::Varchar(_) => DataType::String,
195
196 SQLDataType::Custom(ObjectName(idents), _) => match idents.as_slice() {
200 [ObjectNamePart::Identifier(Ident { value, .. })] => {
201 match value.to_lowercase().as_str() {
202 "int1" => DataType::Int8,
206 "uint1" | "utinyint" => DataType::UInt8,
207 "uint2" | "usmallint" => DataType::UInt16,
208 "uint4" | "uinteger" | "uint" => DataType::UInt32,
209 "uint8" | "ubigint" => DataType::UInt64,
210 _ => {
211 polars_bail!(SQLInterface: "datatype {:?} is not currently supported", value)
212 },
213 }
214 },
215 _ => {
216 polars_bail!(SQLInterface: "datatype {:?} is not currently supported", idents)
217 },
218 },
219 _ => {
220 polars_bail!(SQLInterface: "datatype {:?} is not currently supported", dtype)
221 },
222 })
223}