1#![allow(unsafe_op_in_unsafe_fn)]
2use chrono::{NaiveDate, NaiveDateTime};
5
6use crate::chunkedarray::{PolarsResult, polars_bail};
7
8polars_utils::regex_cache::cached_regex! {
9 static HOUR_PATTERN = r"%[_-]?[HkIl]";
10 static MINUTE_PATTERN = r"%[_-]?M";
11 static SECOND_PATTERN = r"%[_-]?S";
12 static TWELVE_HOUR_PATTERN = r"%[_-]?[Il]";
13 static MERIDIEM_PATTERN = r"%[_-]?[pP]";
14}
15
16#[inline]
17fn update_and_parse<T: atoi_simd::Parse>(
18 incr: usize,
19 offset: usize,
20 vals: &[u8],
21) -> Option<(T, usize)> {
22 let new_offset = offset + incr;
24 let bytes = vals.get(offset..new_offset)?;
25 let (val, parsed) = atoi_simd::parse_any(bytes).ok()?;
26 if parsed != incr {
27 None
28 } else {
29 Some((val, new_offset))
30 }
31}
32
33#[inline]
34fn parse_month_abbrev(val: &[u8], offset: usize) -> Option<(u32, usize)> {
35 let new_offset = offset + 3;
36 match &val[offset..new_offset] {
37 b"Jan" => Some((1, new_offset)),
38 b"Feb" => Some((2, new_offset)),
39 b"Mar" => Some((3, new_offset)),
40 b"Apr" => Some((4, new_offset)),
41 b"May" => Some((5, new_offset)),
42 b"Jun" => Some((6, new_offset)),
43 b"Jul" => Some((7, new_offset)),
44 b"Aug" => Some((8, new_offset)),
45 b"Sep" => Some((9, new_offset)),
46 b"Oct" => Some((10, new_offset)),
47 b"Nov" => Some((11, new_offset)),
48 b"Dec" => Some((12, new_offset)),
49 _ => None,
50 }
51}
52#[inline]
53fn parse_month_full(val: &[u8], offset: usize) -> Option<(u32, usize)> {
54 let min_offset = offset + 3;
55 match &val[offset..min_offset] {
56 b"Jan" => {
57 let new_offset = min_offset + 4;
58 match &val[min_offset..new_offset] {
59 b"uary" => Some((1, new_offset)),
60 _ => None,
61 }
62 },
63 b"Feb" => {
64 let new_offset = min_offset + 5;
65 match &val[min_offset..new_offset] {
66 b"ruary" => Some((2, new_offset)),
67 _ => None,
68 }
69 },
70 b"Mar" => {
71 let new_offset = min_offset + 2;
72 match &val[min_offset..new_offset] {
73 b"ch" => Some((3, new_offset)),
74 _ => None,
75 }
76 },
77 b"Apr" => {
78 let new_offset = min_offset + 2;
79 match &val[min_offset..new_offset] {
80 b"il" => Some((4, new_offset)),
81 _ => None,
82 }
83 },
84 b"May" => Some((5, min_offset)),
85 b"Jun" => {
86 let new_offset = min_offset + 1;
87 match &val[min_offset..new_offset] {
88 b"e" => Some((6, new_offset)),
89 _ => None,
90 }
91 },
92 b"Jul" => {
93 let new_offset = min_offset + 1;
94 match &val[min_offset..new_offset] {
95 b"y" => Some((7, new_offset)),
96 _ => None,
97 }
98 },
99 b"Aug" => {
100 let new_offset = min_offset + 3;
101 match &val[min_offset..new_offset] {
102 b"ust" => Some((8, new_offset)),
103 _ => None,
104 }
105 },
106 b"Sep" => {
107 let new_offset = min_offset + 6;
108 match &val[min_offset..new_offset] {
109 b"tember" => Some((9, new_offset)),
110 _ => None,
111 }
112 },
113 b"Oct" => {
114 let new_offset = min_offset + 4;
115 match &val[min_offset..new_offset] {
116 b"ober" => Some((10, new_offset)),
117 _ => None,
118 }
119 },
120 b"Nov" => {
121 let new_offset = min_offset + 5;
122 match &val[min_offset..new_offset] {
123 b"ember" => Some((11, new_offset)),
124 _ => None,
125 }
126 },
127 b"Dec" => {
128 let new_offset = min_offset + 5;
129 match &val[min_offset..new_offset] {
130 b"ember" => Some((12, new_offset)),
131 _ => None,
132 }
133 },
134 _ => None,
135 }
136}
137pub(super) fn compile_fmt(fmt: &str) -> PolarsResult<String> {
141 if HOUR_PATTERN.is_match(fmt) ^ MINUTE_PATTERN.is_match(fmt) {
146 polars_bail!(ComputeError: "Invalid format string: \
147 Please either specify both hour and minute, or neither.");
148 }
149 if SECOND_PATTERN.is_match(fmt) && !HOUR_PATTERN.is_match(fmt) {
150 polars_bail!(ComputeError: "Invalid format string: \
151 Found seconds directive, but no hours directive.");
152 }
153 if TWELVE_HOUR_PATTERN.is_match(fmt) ^ MERIDIEM_PATTERN.is_match(fmt) {
154 polars_bail!(ComputeError: "Invalid format string: \
155 Please either specify both 12-hour directive and meridiem directive, or neither.");
156 }
157
158 Ok(fmt
159 .replace("%D", "%m/%d/%y")
160 .replace("%R", "%H:%M")
161 .replace("%T", "%H:%M:%S")
162 .replace("%X", "%H:%M:%S")
163 .replace("%F", "%Y-%m-%d"))
164}
165
166#[derive(Default, Clone)]
167pub(super) struct StrpTimeState {}
168
169impl StrpTimeState {
170 #[inline]
171 pub(super) unsafe fn parse(
174 &mut self,
175 val: &[u8],
176 fmt: &[u8],
177 fmt_len_val: u16,
178 ) -> Option<NaiveDateTime> {
179 let mut offset = 0;
180 let mut negative = false;
181 if val.starts_with(b"-") && fmt.starts_with(b"%Y") {
182 offset = 1;
183 negative = true;
184 }
185 #[allow(non_snake_case)]
186 let has_B_code = fmt.windows(2).any(|w| w == b"%B");
187 let is_too_short = has_B_code && val.len() - offset < (fmt_len_val as usize);
190 if (!has_B_code && val.len() - offset != (fmt_len_val as usize)) || is_too_short {
191 return None;
192 }
193
194 const ESCAPE: u8 = b'%';
195 let mut year: i32 = 1;
196 let mut month: u32 = 1;
199 let mut day: u32 = 1;
200 let mut hour: u32 = 0;
201 let mut min: u32 = 0;
202 let mut sec: u32 = 0;
203 let mut nano: u32 = 0;
204
205 let mut fmt_iter = fmt.iter();
206
207 while let Some(fmt_b) = fmt_iter.next() {
208 debug_assert!(offset < val.len());
209 let b = *val.get_unchecked(offset);
210 if *fmt_b == ESCAPE {
211 let next = fmt_iter.next();
213 debug_assert!(next.is_some());
214 match next.unwrap_unchecked() {
215 b'Y' => {
216 (year, offset) = update_and_parse(4, offset, val)?;
217 if negative {
218 year *= -1
219 }
220 },
221 b'm' => {
222 (month, offset) = update_and_parse(2, offset, val)?;
223 if month > 12 {
224 return None;
225 }
226 },
227 b'b' => {
228 (month, offset) = parse_month_abbrev(val, offset)?;
229 },
230 b'B' => {
231 (month, offset) = parse_month_full(val, offset)?;
232 let new_fmt_len = fmt_len(fmt_iter.as_slice())?;
234 let remaining_val_len = val.len() - offset;
235 if remaining_val_len != (new_fmt_len as usize) {
236 return None;
237 }
238 },
239 b'd' => {
240 (day, offset) = update_and_parse(2, offset, val)?;
241 },
242 b'H' => {
243 (hour, offset) = update_and_parse(2, offset, val)?;
244 },
245 b'M' => {
246 (min, offset) = update_and_parse(2, offset, val)?;
247 },
248 b'S' => {
249 (sec, offset) = update_and_parse(2, offset, val)?;
250 },
251 b'y' => {
252 let new_offset = offset + 2;
253 let bytes = val.get_unchecked(offset..new_offset);
254
255 let (decade, parsed) = atoi_simd::parse_any::<i32>(bytes).ok()?;
256 if parsed == 0 {
257 return None;
258 }
259
260 if decade < 70 {
261 year = 2000 + decade;
262 } else {
263 year = 1900 + decade;
264 }
265 offset = new_offset;
266 },
267 b'9' => {
268 (nano, offset) = update_and_parse(9, offset, val)?;
269 break;
270 },
271 b'6' => {
272 (nano, offset) = update_and_parse(6, offset, val)?;
273 nano *= 1000;
274 break;
275 },
276 b'3' => {
277 (nano, offset) = update_and_parse(3, offset, val)?;
278 nano *= 1_000_000;
279 break;
280 },
281 _ => return None,
282 }
283 }
284 else if b == *fmt_b {
286 offset += 1;
287 } else {
288 return None;
289 }
290 }
291 if offset == val.len() {
293 NaiveDate::from_ymd_opt(year, month, day)
294 .and_then(|nd| nd.and_hms_nano_opt(hour, min, sec, nano))
295 }
296 else {
298 None
299 }
300 }
301}
302
303pub(super) fn fmt_len(fmt: &[u8]) -> Option<u16> {
304 let mut iter = fmt.iter();
305 let mut cnt = 0;
306
307 while let Some(&val) = iter.next() {
308 match val {
309 b'%' => match iter.next() {
310 Some(&next_val) => match next_val {
311 b'Y' => cnt += 4,
312 b'y' => cnt += 2,
313 b'd' => cnt += 2,
314 b'm' => cnt += 2,
315 b'b' => cnt += 3,
316 b'B' => cnt += 3, b'H' => cnt += 2,
318 b'M' => cnt += 2,
319 b'S' => cnt += 2,
320 b'9' => {
321 cnt += 9;
322 if matches!(iter.next(), Some(&b'f')) && iter.next().is_none() {
323 return Some(cnt);
324 } else {
325 return None;
326 }
327 },
328 b'6' => {
329 cnt += 6;
330 if matches!(iter.next(), Some(&b'f')) && iter.next().is_none() {
331 return Some(cnt);
332 } else {
333 return None;
334 }
335 },
336 b'3' => {
337 cnt += 3;
338 if matches!(iter.next(), Some(&b'f')) && iter.next().is_none() {
339 return Some(cnt);
340 } else {
341 return None;
342 }
343 },
344 _ => return None,
345 },
346 None => return None,
347 },
348 _ => {
349 cnt += 1;
350 },
351 }
352 }
353 Some(cnt)
354}
355
356#[cfg(test)]
357mod test {
358 use super::*;
359
360 #[test]
361 fn test_parsing() {
362 let patterns = [
363 (
364 "2021-01-01",
365 "%Y-%m-%d",
366 10,
367 Some(
368 NaiveDate::from_ymd_opt(2021, 1, 1)
369 .unwrap()
370 .and_hms_nano_opt(0, 0, 0, 0)
371 .unwrap(),
372 ),
373 ),
374 (
375 "2021-01-01 07:45:12",
376 "%Y-%m-%d %H:%M:%S",
377 19,
378 Some(
379 NaiveDate::from_ymd_opt(2021, 1, 1)
380 .unwrap()
381 .and_hms_nano_opt(7, 45, 12, 0)
382 .unwrap(),
383 ),
384 ),
385 (
386 "2021-01-01 07:45:12",
387 "%Y-%m-%d %H:%M:%S",
388 19,
389 Some(
390 NaiveDate::from_ymd_opt(2021, 1, 1)
391 .unwrap()
392 .and_hms_nano_opt(7, 45, 12, 0)
393 .unwrap(),
394 ),
395 ),
396 (
397 "2019-04-18T02:45:55.555000000",
398 "%Y-%m-%dT%H:%M:%S.%9f",
399 29,
400 Some(
401 NaiveDate::from_ymd_opt(2019, 4, 18)
402 .unwrap()
403 .and_hms_nano_opt(2, 45, 55, 555000000)
404 .unwrap(),
405 ),
406 ),
407 (
408 "2019-04-18T02:45:55.555000",
409 "%Y-%m-%dT%H:%M:%S.%6f",
410 26,
411 Some(
412 NaiveDate::from_ymd_opt(2019, 4, 18)
413 .unwrap()
414 .and_hms_nano_opt(2, 45, 55, 555000000)
415 .unwrap(),
416 ),
417 ),
418 (
419 "2019-04-18T02:45:55.555",
420 "%Y-%m-%dT%H:%M:%S.%3f",
421 23,
422 Some(
423 NaiveDate::from_ymd_opt(2019, 4, 18)
424 .unwrap()
425 .and_hms_nano_opt(2, 45, 55, 555000000)
426 .unwrap(),
427 ),
428 ),
429 ];
430
431 for (val, fmt, len, expected) in patterns {
432 assert_eq!(fmt_len(fmt.as_bytes()).unwrap(), len);
433 unsafe {
434 assert_eq!(
435 StrpTimeState::default().parse(val.as_bytes(), fmt.as_bytes(), len),
436 expected
437 )
438 };
439 }
440 }
441}