1#![allow(unsafe_op_in_unsafe_fn)]
2use chrono::{NaiveDate, NaiveDateTime};
5
6use crate::chunkedarray::{PolarsResult, polars_bail};
7
8polars_utils::regex_cache::cached_regex! {
9 static HOUR_PATTERN = r"%[_-]?[HkIl]";
10 static MINUTE_PATTERN = r"%[_-]?M";
11 static SECOND_PATTERN = r"%[_-]?S";
12 static TWELVE_HOUR_PATTERN = r"%[_-]?[Il]";
13 static MERIDIEM_PATTERN = r"%[_-]?[pP]";
14}
15
16#[inline]
17fn update_and_parse<T: atoi_simd::Parse>(
18 incr: usize,
19 offset: usize,
20 vals: &[u8],
21) -> Option<(T, usize)> {
22 let new_offset = offset + incr;
24 let bytes = vals.get(offset..new_offset)?;
25 let (val, parsed) = atoi_simd::parse_prefix::<T, true, false>(bytes).ok()?;
26 if parsed != incr {
27 None
28 } else {
29 Some((val, new_offset))
30 }
31}
32
33#[inline]
34fn parse_month_abbrev(val: &[u8], offset: usize) -> Option<(u32, usize)> {
35 let new_offset = offset + 3;
36 match &val[offset..new_offset] {
37 b"Jan" => Some((1, new_offset)),
38 b"Feb" => Some((2, new_offset)),
39 b"Mar" => Some((3, new_offset)),
40 b"Apr" => Some((4, new_offset)),
41 b"May" => Some((5, new_offset)),
42 b"Jun" => Some((6, new_offset)),
43 b"Jul" => Some((7, new_offset)),
44 b"Aug" => Some((8, new_offset)),
45 b"Sep" => Some((9, new_offset)),
46 b"Oct" => Some((10, new_offset)),
47 b"Nov" => Some((11, new_offset)),
48 b"Dec" => Some((12, new_offset)),
49 _ => None,
50 }
51}
52#[inline]
53fn parse_month_full(val: &[u8], offset: usize) -> Option<(u32, usize)> {
54 let min_offset = offset + 3;
55 match &val[offset..min_offset] {
56 b"Jan" => {
57 let new_offset = min_offset + 4;
58 match &val[min_offset..new_offset] {
59 b"uary" => Some((1, new_offset)),
60 _ => None,
61 }
62 },
63 b"Feb" => {
64 let new_offset = min_offset + 5;
65 match &val[min_offset..new_offset] {
66 b"ruary" => Some((2, new_offset)),
67 _ => None,
68 }
69 },
70 b"Mar" => {
71 let new_offset = min_offset + 2;
72 match &val[min_offset..new_offset] {
73 b"ch" => Some((3, new_offset)),
74 _ => None,
75 }
76 },
77 b"Apr" => {
78 let new_offset = min_offset + 2;
79 match &val[min_offset..new_offset] {
80 b"il" => Some((4, new_offset)),
81 _ => None,
82 }
83 },
84 b"May" => Some((5, min_offset)),
85 b"Jun" => {
86 let new_offset = min_offset + 1;
87 match &val[min_offset..new_offset] {
88 b"e" => Some((6, new_offset)),
89 _ => None,
90 }
91 },
92 b"Jul" => {
93 let new_offset = min_offset + 1;
94 match &val[min_offset..new_offset] {
95 b"y" => Some((7, new_offset)),
96 _ => None,
97 }
98 },
99 b"Aug" => {
100 let new_offset = min_offset + 3;
101 match &val[min_offset..new_offset] {
102 b"ust" => Some((8, new_offset)),
103 _ => None,
104 }
105 },
106 b"Sep" => {
107 let new_offset = min_offset + 6;
108 match &val[min_offset..new_offset] {
109 b"tember" => Some((9, new_offset)),
110 _ => None,
111 }
112 },
113 b"Oct" => {
114 let new_offset = min_offset + 4;
115 match &val[min_offset..new_offset] {
116 b"ober" => Some((10, new_offset)),
117 _ => None,
118 }
119 },
120 b"Nov" => {
121 let new_offset = min_offset + 5;
122 match &val[min_offset..new_offset] {
123 b"ember" => Some((11, new_offset)),
124 _ => None,
125 }
126 },
127 b"Dec" => {
128 let new_offset = min_offset + 5;
129 match &val[min_offset..new_offset] {
130 b"ember" => Some((12, new_offset)),
131 _ => None,
132 }
133 },
134 _ => None,
135 }
136}
137pub(super) fn compile_fmt(fmt: &str) -> PolarsResult<String> {
141 if HOUR_PATTERN.is_match(fmt) ^ MINUTE_PATTERN.is_match(fmt) {
146 polars_bail!(ComputeError: "Invalid format string: \
147 Please either specify both hour and minute, or neither.");
148 }
149 if SECOND_PATTERN.is_match(fmt) && !HOUR_PATTERN.is_match(fmt) {
150 polars_bail!(ComputeError: "Invalid format string: \
151 Found seconds directive, but no hours directive.");
152 }
153 if TWELVE_HOUR_PATTERN.is_match(fmt) ^ MERIDIEM_PATTERN.is_match(fmt) {
154 polars_bail!(ComputeError: "Invalid format string: \
155 Please either specify both 12-hour directive and meridiem directive, or neither.");
156 }
157
158 Ok(fmt
159 .replace("%D", "%m/%d/%y")
160 .replace("%R", "%H:%M")
161 .replace("%T", "%H:%M:%S")
162 .replace("%X", "%H:%M:%S")
163 .replace("%F", "%Y-%m-%d"))
164}
165
166#[derive(Default, Clone)]
167pub(super) struct StrpTimeState {}
168
169impl StrpTimeState {
170 #[inline]
171 pub(super) unsafe fn parse(
174 &mut self,
175 val: &[u8],
176 fmt: &[u8],
177 fmt_len_val: u16,
178 ) -> Option<NaiveDateTime> {
179 let mut offset = 0;
180 let mut negative = false;
181 if val.starts_with(b"-") && fmt.starts_with(b"%Y") {
182 offset = 1;
183 negative = true;
184 }
185 #[allow(non_snake_case)]
186 let has_B_code = fmt.windows(2).any(|w| w == b"%B");
187 let is_too_short = has_B_code && val.len() - offset < (fmt_len_val as usize);
190 if (!has_B_code && val.len() - offset != (fmt_len_val as usize)) || is_too_short {
191 return None;
192 }
193
194 const ESCAPE: u8 = b'%';
195 let mut year: i32 = 1;
196 let mut month: u32 = 1;
199 let mut day: u32 = 1;
200 let mut hour: u32 = 0;
201 let mut min: u32 = 0;
202 let mut sec: u32 = 0;
203 let mut nano: u32 = 0;
204
205 let mut fmt_iter = fmt.iter();
206
207 while let Some(fmt_b) = fmt_iter.next() {
208 debug_assert!(offset < val.len());
209 let b = *val.get_unchecked(offset);
210 if *fmt_b == ESCAPE {
211 let next = fmt_iter.next();
213 debug_assert!(next.is_some());
214 match next.unwrap_unchecked() {
215 b'Y' => {
216 (year, offset) = update_and_parse(4, offset, val)?;
217 if negative {
218 year *= -1
219 }
220 },
221 b'm' => {
222 (month, offset) = update_and_parse(2, offset, val)?;
223 if month > 12 {
224 return None;
225 }
226 },
227 b'b' => {
228 (month, offset) = parse_month_abbrev(val, offset)?;
229 },
230 b'B' => {
231 (month, offset) = parse_month_full(val, offset)?;
232 let new_fmt_len = fmt_len(fmt_iter.as_slice())?;
234 let remaining_val_len = val.len() - offset;
235 if remaining_val_len != (new_fmt_len as usize) {
236 return None;
237 }
238 },
239 b'd' => {
240 (day, offset) = update_and_parse(2, offset, val)?;
241 },
242 b'H' => {
243 (hour, offset) = update_and_parse(2, offset, val)?;
244 },
245 b'M' => {
246 (min, offset) = update_and_parse(2, offset, val)?;
247 },
248 b'S' => {
249 (sec, offset) = update_and_parse(2, offset, val)?;
250 },
251 b'y' => {
252 let new_offset = offset + 2;
253 let bytes = val.get_unchecked(offset..new_offset);
254
255 let (decade, parsed) =
256 atoi_simd::parse_prefix::<i32, true, false>(bytes).ok()?;
257 if parsed == 0 {
258 return None;
259 }
260
261 if decade < 70 {
262 year = 2000 + decade;
263 } else {
264 year = 1900 + decade;
265 }
266 offset = new_offset;
267 },
268 b'9' => {
269 (nano, offset) = update_and_parse(9, offset, val)?;
270 break;
271 },
272 b'6' => {
273 (nano, offset) = update_and_parse(6, offset, val)?;
274 nano *= 1000;
275 break;
276 },
277 b'3' => {
278 (nano, offset) = update_and_parse(3, offset, val)?;
279 nano *= 1_000_000;
280 break;
281 },
282 _ => return None,
283 }
284 }
285 else if b == *fmt_b {
287 offset += 1;
288 } else {
289 return None;
290 }
291 }
292 if offset == val.len() {
294 NaiveDate::from_ymd_opt(year, month, day)
295 .and_then(|nd| nd.and_hms_nano_opt(hour, min, sec, nano))
296 }
297 else {
299 None
300 }
301 }
302}
303
304pub(super) fn fmt_len(fmt: &[u8]) -> Option<u16> {
305 let mut iter = fmt.iter();
306 let mut cnt = 0;
307
308 while let Some(&val) = iter.next() {
309 match val {
310 b'%' => match iter.next() {
311 Some(&next_val) => match next_val {
312 b'Y' => cnt += 4,
313 b'y' => cnt += 2,
314 b'd' => cnt += 2,
315 b'm' => cnt += 2,
316 b'b' => cnt += 3,
317 b'B' => cnt += 3, b'H' => cnt += 2,
319 b'M' => cnt += 2,
320 b'S' => cnt += 2,
321 b'9' => {
322 cnt += 9;
323 if matches!(iter.next(), Some(&b'f')) && iter.next().is_none() {
324 return Some(cnt);
325 } else {
326 return None;
327 }
328 },
329 b'6' => {
330 cnt += 6;
331 if matches!(iter.next(), Some(&b'f')) && iter.next().is_none() {
332 return Some(cnt);
333 } else {
334 return None;
335 }
336 },
337 b'3' => {
338 cnt += 3;
339 if matches!(iter.next(), Some(&b'f')) && iter.next().is_none() {
340 return Some(cnt);
341 } else {
342 return None;
343 }
344 },
345 _ => return None,
346 },
347 None => return None,
348 },
349 _ => {
350 cnt += 1;
351 },
352 }
353 }
354 Some(cnt)
355}
356
357#[cfg(test)]
358mod test {
359 use super::*;
360
361 #[test]
362 fn test_parsing() {
363 let patterns = [
364 (
365 "2021-01-01",
366 "%Y-%m-%d",
367 10,
368 Some(
369 NaiveDate::from_ymd_opt(2021, 1, 1)
370 .unwrap()
371 .and_hms_nano_opt(0, 0, 0, 0)
372 .unwrap(),
373 ),
374 ),
375 (
376 "2021-01-01 07:45:12",
377 "%Y-%m-%d %H:%M:%S",
378 19,
379 Some(
380 NaiveDate::from_ymd_opt(2021, 1, 1)
381 .unwrap()
382 .and_hms_nano_opt(7, 45, 12, 0)
383 .unwrap(),
384 ),
385 ),
386 (
387 "2021-01-01 07:45:12",
388 "%Y-%m-%d %H:%M:%S",
389 19,
390 Some(
391 NaiveDate::from_ymd_opt(2021, 1, 1)
392 .unwrap()
393 .and_hms_nano_opt(7, 45, 12, 0)
394 .unwrap(),
395 ),
396 ),
397 (
398 "2019-04-18T02:45:55.555000000",
399 "%Y-%m-%dT%H:%M:%S.%9f",
400 29,
401 Some(
402 NaiveDate::from_ymd_opt(2019, 4, 18)
403 .unwrap()
404 .and_hms_nano_opt(2, 45, 55, 555000000)
405 .unwrap(),
406 ),
407 ),
408 (
409 "2019-04-18T02:45:55.555000",
410 "%Y-%m-%dT%H:%M:%S.%6f",
411 26,
412 Some(
413 NaiveDate::from_ymd_opt(2019, 4, 18)
414 .unwrap()
415 .and_hms_nano_opt(2, 45, 55, 555000000)
416 .unwrap(),
417 ),
418 ),
419 (
420 "2019-04-18T02:45:55.555",
421 "%Y-%m-%dT%H:%M:%S.%3f",
422 23,
423 Some(
424 NaiveDate::from_ymd_opt(2019, 4, 18)
425 .unwrap()
426 .and_hms_nano_opt(2, 45, 55, 555000000)
427 .unwrap(),
428 ),
429 ),
430 ];
431
432 for (val, fmt, len, expected) in patterns {
433 assert_eq!(fmt_len(fmt.as_bytes()).unwrap(), len);
434 unsafe {
435 assert_eq!(
436 StrpTimeState::default().parse(val.as_bytes(), fmt.as_bytes(), len),
437 expected
438 )
439 };
440 }
441 }
442}