1use arrow::array::PrimitiveArray;
2use chrono::format::ParseErrorKind;
3use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime};
4use polars_core::prelude::*;
5
6use super::patterns::{self, Pattern};
7#[cfg(feature = "dtype-date")]
8use crate::chunkedarray::date::naive_date_to_date;
9use crate::chunkedarray::string::strptime;
10use crate::prelude::string::strptime::StrpTimeState;
11
12polars_utils::regex_cache::cached_regex! {
13 static DATETIME_DMY_RE = r#"(?x)
14 ^
15 ['"]? # optional quotes
16 (?:\d{1,2}) # day
17 [-/\.] # separator
18 (?P<month>[01]?\d{1}) # month
19 [-/\.] # separator
20 (?:\d{4,}) # year
21 (?:
22 [T\ ] # separator
23 (?:\d{1,2}) # hour
24 :? # separator
25 (?:\d{1,2}) # minute
26 (?:
27 :? # separator
28 (?:\d{1,2}) # second
29 (?:
30 \.(?:\d{1,9}) # subsecond
31 )?
32 )?
33 )?
34 ['"]? # optional quotes
35 $
36 "#;
37
38 static DATETIME_YMD_RE = r#"(?x)
39 ^
40 ['"]? # optional quotes
41 (?:\d{4,}) # year
42 [-/\.] # separator
43 (?P<month>[01]?\d{1}) # month
44 [-/\.] # separator
45 (?:\d{1,2}) # day
46 (?:
47 [T\ ] # separator
48 (?:\d{1,2}) # hour
49 :? # separator
50 (?:\d{1,2}) # minute
51 (?:
52 :? # separator
53 (?:\d{1,2}) # seconds
54 (?:
55 \.(?:\d{1,9}) # subsecond
56 )?
57 )?
58 )?
59 ['"]? # optional quotes
60 $
61 "#;
62
63 static DATETIME_YMDZ_RE = r#"(?x)
64 ^
65 ['"]? # optional quotes
66 (?:\d{4,}) # year
67 [-/\.] # separator
68 (?P<month>[01]?\d{1}) # month
69 [-/\.] # separator
70 (?:\d{1,2}) # year
71 [T\ ] # separator
72 (?:\d{2}) # hour
73 :? # separator
74 (?:\d{2}) # minute
75 (?:
76 :? # separator
77 (?:\d{2}) # second
78 (?:
79 \.(?:\d{1,9}) # subsecond
80 )?
81 )?
82 (?:
83 # offset (e.g. +01:00, +0100, or +01)
84 [+-](?:\d{2})
85 (?::?\d{2})?
86 # or Zulu suffix
87 |Z
88 )
89 ['"]? # optional quotes
90 $
91 "#;
92}
93
94impl Pattern {
95 pub fn is_inferable(&self, val: &str) -> bool {
96 match self {
97 Pattern::DateDMY => true, Pattern::DateYMD => true, Pattern::Time => true,
100 Pattern::DatetimeDMY => match DATETIME_DMY_RE.captures(val) {
101 Some(search) => (1..=12).contains(
102 &search
103 .name("month")
104 .unwrap()
105 .as_str()
106 .parse::<u8>()
107 .unwrap(),
108 ),
109 None => false,
110 },
111 Pattern::DatetimeYMD => match DATETIME_YMD_RE.captures(val) {
112 Some(search) => (1..=12).contains(
113 &search
114 .name("month")
115 .unwrap()
116 .as_str()
117 .parse::<u8>()
118 .unwrap(),
119 ),
120 None => false,
121 },
122 Pattern::DatetimeYMDZ => match DATETIME_YMDZ_RE.captures(val) {
123 Some(search) => (1..=12).contains(
124 &search
125 .name("month")
126 .unwrap()
127 .as_str()
128 .parse::<u8>()
129 .unwrap(),
130 ),
131 None => false,
132 },
133 }
134 }
135}
136
137pub trait StrpTimeParser<T> {
138 fn parse_bytes(&mut self, val: &[u8], time_unit: Option<TimeUnit>) -> Option<T>;
139}
140
141#[cfg(feature = "dtype-datetime")]
142impl StrpTimeParser<i64> for DatetimeInfer<Int64Type> {
143 fn parse_bytes(&mut self, val: &[u8], time_unit: Option<TimeUnit>) -> Option<i64> {
144 if self.fmt_len == 0 {
145 self.fmt_len = strptime::fmt_len(self.latest_fmt.as_bytes())?;
146 }
147 let transform = match time_unit {
148 Some(TimeUnit::Nanoseconds) => datetime_to_timestamp_ns,
149 Some(TimeUnit::Microseconds) => datetime_to_timestamp_us,
150 Some(TimeUnit::Milliseconds) => datetime_to_timestamp_ms,
151 _ => unreachable!(), };
153 unsafe {
154 self.transform_bytes
155 .parse(val, self.latest_fmt.as_bytes(), self.fmt_len)
156 .map(transform)
157 .or_else(|| {
158 for fmt in self.patterns {
161 if self.fmt_len == 0 {
162 self.fmt_len = strptime::fmt_len(fmt.as_bytes())?;
163 }
164 if let Some(parsed) = self
165 .transform_bytes
166 .parse(val, fmt.as_bytes(), self.fmt_len)
167 .map(datetime_to_timestamp_us)
168 {
169 self.latest_fmt = fmt;
170 return Some(parsed);
171 }
172 }
173 None
174 })
175 }
176 }
177}
178
179#[cfg(feature = "dtype-date")]
180impl StrpTimeParser<i32> for DatetimeInfer<Int32Type> {
181 fn parse_bytes(&mut self, val: &[u8], _time_unit: Option<TimeUnit>) -> Option<i32> {
182 if self.fmt_len == 0 {
183 self.fmt_len = strptime::fmt_len(self.latest_fmt.as_bytes())?;
184 }
185 unsafe {
186 self.transform_bytes
187 .parse(val, self.latest_fmt.as_bytes(), self.fmt_len)
188 .map(|ndt| naive_date_to_date(ndt.date()))
189 .or_else(|| {
190 for fmt in self.patterns {
193 if self.fmt_len == 0 {
194 self.fmt_len = strptime::fmt_len(fmt.as_bytes())?;
195 }
196 if let Some(parsed) = self
197 .transform_bytes
198 .parse(val, fmt.as_bytes(), self.fmt_len)
199 .map(|ndt| naive_date_to_date(ndt.date()))
200 {
201 self.latest_fmt = fmt;
202 return Some(parsed);
203 }
204 }
205 None
206 })
207 }
208 }
209}
210
211#[derive(Clone)]
212pub struct DatetimeInfer<T: PolarsNumericType> {
213 pub pattern: Pattern,
214 patterns: &'static [&'static str],
215 latest_fmt: &'static str,
216 transform: fn(&str, &str) -> Option<T::Native>,
217 transform_bytes: StrpTimeState,
218 fmt_len: u16,
219 pub logical_type: DataType,
220}
221
222pub trait TryFromWithUnit<T>: Sized {
223 type Error;
224 fn try_from_with_unit(pattern: T, unit: Option<TimeUnit>) -> PolarsResult<Self>;
225}
226
227#[cfg(feature = "dtype-datetime")]
228impl TryFromWithUnit<Pattern> for DatetimeInfer<Int64Type> {
229 type Error = PolarsError;
230
231 fn try_from_with_unit(value: Pattern, time_unit: Option<TimeUnit>) -> PolarsResult<Self> {
232 let time_unit = time_unit.expect("time_unit must be provided for datetime");
233
234 let transform = match (time_unit, value) {
235 (TimeUnit::Milliseconds, Pattern::DatetimeYMDZ) => transform_tzaware_datetime_ms,
236 (TimeUnit::Milliseconds, _) => transform_datetime_ms,
237 (TimeUnit::Microseconds, Pattern::DatetimeYMDZ) => transform_tzaware_datetime_us,
238 (TimeUnit::Microseconds, _) => transform_datetime_us,
239 (TimeUnit::Nanoseconds, Pattern::DatetimeYMDZ) => transform_tzaware_datetime_ns,
240 (TimeUnit::Nanoseconds, _) => transform_datetime_ns,
241 };
242 let (pattern, patterns) = match value {
243 Pattern::DatetimeDMY | Pattern::DateDMY => {
244 (Pattern::DatetimeDMY, patterns::DATETIME_D_M_Y)
245 },
246 Pattern::DatetimeYMD | Pattern::DateYMD => {
247 (Pattern::DatetimeYMD, patterns::DATETIME_Y_M_D)
248 },
249 Pattern::DatetimeYMDZ => (Pattern::DatetimeYMDZ, patterns::DATETIME_Y_M_D_Z),
250 Pattern::Time => (Pattern::Time, patterns::TIME_H_M_S),
251 };
252
253 Ok(DatetimeInfer {
254 pattern,
255 patterns,
256 latest_fmt: patterns[0],
257 transform,
258 transform_bytes: StrpTimeState::default(),
259 fmt_len: 0,
260 logical_type: DataType::Datetime(time_unit, None),
261 })
262 }
263}
264
265#[cfg(feature = "dtype-date")]
266impl TryFromWithUnit<Pattern> for DatetimeInfer<Int32Type> {
267 type Error = PolarsError;
268
269 fn try_from_with_unit(value: Pattern, _time_unit: Option<TimeUnit>) -> PolarsResult<Self> {
270 match value {
271 Pattern::DateDMY => Ok(DatetimeInfer {
272 pattern: Pattern::DateDMY,
273 patterns: patterns::DATE_D_M_Y,
274 latest_fmt: patterns::DATE_D_M_Y[0],
275 transform: transform_date,
276 transform_bytes: StrpTimeState::default(),
277 fmt_len: 0,
278 logical_type: DataType::Date,
279 }),
280 Pattern::DateYMD => Ok(DatetimeInfer {
281 pattern: Pattern::DateYMD,
282 patterns: patterns::DATE_Y_M_D,
283 latest_fmt: patterns::DATE_Y_M_D[0],
284 transform: transform_date,
285 transform_bytes: StrpTimeState::default(),
286 fmt_len: 0,
287 logical_type: DataType::Date,
288 }),
289 _ => polars_bail!(ComputeError: "could not convert pattern"),
290 }
291 }
292}
293
294impl<T: PolarsNumericType> DatetimeInfer<T> {
295 pub fn parse(&mut self, val: &str) -> Option<T::Native> {
296 match (self.transform)(val, self.latest_fmt) {
297 Some(parsed) => Some(parsed),
298 None => {
300 if !self.pattern.is_inferable(val) {
301 return None;
302 }
303 for fmt in self.patterns {
304 self.fmt_len = 0;
305 if let Some(parsed) = (self.transform)(val, fmt) {
306 self.latest_fmt = fmt;
307 return Some(parsed);
308 }
309 }
310 None
311 },
312 }
313 }
314}
315
316impl<T: PolarsNumericType> DatetimeInfer<T> {
317 fn coerce_string(&mut self, ca: &StringChunked) -> Series {
318 let chunks = ca.downcast_iter().map(|array| {
319 let iter = array
320 .into_iter()
321 .map(|opt_val| opt_val.and_then(|val| self.parse(val)));
322 PrimitiveArray::from_trusted_len_iter(iter)
323 });
324 ChunkedArray::<T>::from_chunk_iter(ca.name().clone(), chunks)
325 .into_series()
326 .cast(&self.logical_type)
327 .unwrap()
328 .with_name(ca.name().clone())
329 }
330}
331
332#[cfg(feature = "dtype-date")]
333fn transform_date(val: &str, fmt: &str) -> Option<i32> {
334 NaiveDate::parse_from_str(val, fmt)
335 .ok()
336 .map(naive_date_to_date)
337}
338
339#[cfg(feature = "dtype-datetime")]
340pub(crate) fn transform_datetime_ns(val: &str, fmt: &str) -> Option<i64> {
341 match NaiveDateTime::parse_from_str(val, fmt) {
342 Ok(ndt) => Some(datetime_to_timestamp_ns(ndt)),
343 Err(parse_error) => match parse_error.kind() {
344 ParseErrorKind::NotEnough => NaiveDate::parse_from_str(val, fmt)
345 .ok()
346 .map(|nd| datetime_to_timestamp_ns(nd.and_hms_opt(0, 0, 0).unwrap())),
347 _ => None,
348 },
349 }
350}
351
352fn transform_tzaware_datetime_ns(val: &str, fmt: &str) -> Option<i64> {
353 let dt = DateTime::parse_from_str(val, fmt);
354 dt.ok().map(|dt| datetime_to_timestamp_ns(dt.naive_utc()))
355}
356
357#[cfg(feature = "dtype-datetime")]
358pub(crate) fn transform_datetime_us(val: &str, fmt: &str) -> Option<i64> {
359 match NaiveDateTime::parse_from_str(val, fmt) {
360 Ok(ndt) => Some(datetime_to_timestamp_us(ndt)),
361 Err(parse_error) => match parse_error.kind() {
362 ParseErrorKind::NotEnough => NaiveDate::parse_from_str(val, fmt)
363 .ok()
364 .map(|nd| datetime_to_timestamp_us(nd.and_hms_opt(0, 0, 0).unwrap())),
365 _ => None,
366 },
367 }
368}
369
370fn transform_tzaware_datetime_us(val: &str, fmt: &str) -> Option<i64> {
371 let dt = DateTime::parse_from_str(val, fmt);
372 dt.ok().map(|dt| datetime_to_timestamp_us(dt.naive_utc()))
373}
374
375#[cfg(feature = "dtype-datetime")]
376pub(crate) fn transform_datetime_ms(val: &str, fmt: &str) -> Option<i64> {
377 match NaiveDateTime::parse_from_str(val, fmt) {
378 Ok(ndt) => Some(datetime_to_timestamp_ms(ndt)),
379 Err(parse_error) => match parse_error.kind() {
380 ParseErrorKind::NotEnough => NaiveDate::parse_from_str(val, fmt)
381 .ok()
382 .map(|nd| datetime_to_timestamp_ms(nd.and_hms_opt(0, 0, 0).unwrap())),
383 _ => None,
384 },
385 }
386}
387
388fn transform_tzaware_datetime_ms(val: &str, fmt: &str) -> Option<i64> {
389 let dt = DateTime::parse_from_str(val, fmt);
390 dt.ok().map(|dt| datetime_to_timestamp_ms(dt.naive_utc()))
391}
392
393pub fn infer_pattern_single(val: &str) -> Option<Pattern> {
394 infer_pattern_date_single(val)
396 .or_else(|| infer_pattern_time_single(val))
397 .or_else(|| infer_pattern_datetime_single(val))
398}
399
400fn infer_pattern_datetime_single(val: &str) -> Option<Pattern> {
401 if patterns::DATETIME_D_M_Y.iter().any(|fmt| {
402 NaiveDateTime::parse_from_str(val, fmt).is_ok()
403 || NaiveDate::parse_from_str(val, fmt).is_ok()
404 }) {
405 Some(Pattern::DatetimeDMY)
406 } else if patterns::DATETIME_Y_M_D.iter().any(|fmt| {
407 NaiveDateTime::parse_from_str(val, fmt).is_ok()
408 || NaiveDate::parse_from_str(val, fmt).is_ok()
409 }) {
410 Some(Pattern::DatetimeYMD)
411 } else if patterns::DATETIME_Y_M_D_Z
412 .iter()
413 .any(|fmt| NaiveDateTime::parse_from_str(val, fmt).is_ok())
414 {
415 Some(Pattern::DatetimeYMDZ)
416 } else {
417 None
418 }
419}
420
421fn infer_pattern_date_single(val: &str) -> Option<Pattern> {
422 if patterns::DATE_D_M_Y
423 .iter()
424 .any(|fmt| NaiveDate::parse_from_str(val, fmt).is_ok())
425 {
426 Some(Pattern::DateDMY)
427 } else if patterns::DATE_Y_M_D
428 .iter()
429 .any(|fmt| NaiveDate::parse_from_str(val, fmt).is_ok())
430 {
431 Some(Pattern::DateYMD)
432 } else {
433 None
434 }
435}
436
437fn infer_pattern_time_single(val: &str) -> Option<Pattern> {
438 patterns::TIME_H_M_S
439 .iter()
440 .any(|fmt| NaiveTime::parse_from_str(val, fmt).is_ok())
441 .then_some(Pattern::Time)
442}
443
444#[cfg(feature = "dtype-datetime")]
445pub fn to_datetime_with_inferred_tz(
446 ca: &StringChunked,
447 tu: TimeUnit,
448 strict: bool,
449 exact: bool,
450 ambiguous: &StringChunked,
451) -> PolarsResult<DatetimeChunked> {
452 use super::StringMethods;
453
454 let out = if exact {
455 to_datetime(ca, tu, None, ambiguous, false)
456 } else {
457 ca.as_datetime_not_exact(None, tu, false, None, ambiguous, false)
458 }?;
459
460 if strict && ca.null_count() != out.null_count() {
461 polars_core::utils::handle_casting_failures(
462 &ca.clone().into_series(),
463 &out.clone().into_series(),
464 )?;
465 }
466
467 Ok(out)
468}
469
470#[cfg(feature = "dtype-datetime")]
471pub fn to_datetime(
472 ca: &StringChunked,
473 tu: TimeUnit,
474 tz: Option<&TimeZone>,
475 _ambiguous: &StringChunked,
476 ensure_matching_time_zone: bool,
478) -> PolarsResult<DatetimeChunked> {
479 match ca.first_non_null() {
480 None => {
481 Ok(Int64Chunked::full_null(ca.name().clone(), ca.len()).into_datetime(tu, tz.cloned()))
482 },
483 Some(idx) => {
484 let subset = ca.slice(idx as i64, ca.len());
485 let pattern = subset
486 .into_iter()
487 .find_map(|opt_val| opt_val.and_then(infer_pattern_datetime_single))
488 .ok_or_else(|| polars_err!(parse_fmt_idk = "date"))?;
489 let mut infer = DatetimeInfer::<Int64Type>::try_from_with_unit(pattern, Some(tu))?;
490 match pattern {
491 #[cfg(feature = "timezones")]
492 Pattern::DatetimeYMDZ => infer.coerce_string(ca).datetime().map(|ca| {
493 polars_ensure!(
494 !ensure_matching_time_zone || tz.is_some(),
495 to_datetime_tz_mismatch
496 );
497
498 let mut ca = ca.clone();
499 ca.set_time_unit_and_time_zone(tu, tz.cloned().unwrap_or(TimeZone::UTC))?;
501 Ok(ca)
502 })?,
503 _ => infer.coerce_string(ca).datetime().map(|ca| {
504 let mut ca = ca.clone();
505 ca.set_time_unit(tu);
506 match tz {
507 #[cfg(feature = "timezones")]
508 Some(tz) => polars_ops::prelude::replace_time_zone(
509 &ca,
510 Some(tz),
511 _ambiguous,
512 NonExistent::Raise,
513 ),
514 _ => Ok(ca),
515 }
516 })?,
517 }
518 },
519 }
520}
521#[cfg(feature = "dtype-date")]
522pub(crate) fn to_date(ca: &StringChunked) -> PolarsResult<DateChunked> {
523 match ca.first_non_null() {
524 None => Ok(Int32Chunked::full_null(ca.name().clone(), ca.len()).into_date()),
525 Some(idx) => {
526 let subset = ca.slice(idx as i64, ca.len());
527 let pattern = subset
528 .into_iter()
529 .find_map(|opt_val| opt_val.and_then(infer_pattern_date_single))
530 .ok_or_else(|| polars_err!(parse_fmt_idk = "date"))?;
531 let mut infer = DatetimeInfer::<Int32Type>::try_from_with_unit(pattern, None).unwrap();
532 infer.coerce_string(ca).date().cloned()
533 },
534 }
535}