1#![allow(unsafe_op_in_unsafe_fn)]
2use chrono::{NaiveDate, NaiveDateTime};
5
6use crate::chunkedarray::{PolarsResult, polars_bail};
7
8polars_utils::regex_cache::cached_regex! {
9 static HOUR_PATTERN = r"%[_-]?[HkIl]";
10 static MINUTE_PATTERN = r"%[_-]?M";
11 static SECOND_PATTERN = r"%[_-]?S";
12 static TWELVE_HOUR_PATTERN = r"%[_-]?[Il]";
13 static MERIDIEM_PATTERN = r"%[_-]?[pP]";
14}
15
16#[inline]
17fn update_and_parse<T: atoi_simd::Parse>(
18 incr: usize,
19 offset: usize,
20 vals: &[u8],
21) -> Option<(T, usize)> {
22 let new_offset = offset + incr;
24 let bytes = vals.get(offset..new_offset)?;
25 let (val, parsed) = atoi_simd::parse_any(bytes).ok()?;
26 if parsed == 0 {
27 None
28 } else {
29 Some((val, new_offset))
30 }
31}
32
33#[inline]
34fn parse_month_abbrev(val: &[u8], offset: usize) -> Option<(u32, usize)> {
35 let new_offset = offset + 3;
36 match &val[offset..new_offset] {
37 b"Jan" => Some((1, new_offset)),
38 b"Feb" => Some((2, new_offset)),
39 b"Mar" => Some((3, new_offset)),
40 b"Apr" => Some((4, new_offset)),
41 b"May" => Some((5, new_offset)),
42 b"Jun" => Some((6, new_offset)),
43 b"Jul" => Some((7, new_offset)),
44 b"Aug" => Some((8, new_offset)),
45 b"Sep" => Some((9, new_offset)),
46 b"Oct" => Some((10, new_offset)),
47 b"Nov" => Some((11, new_offset)),
48 b"Dec" => Some((12, new_offset)),
49 _ => None,
50 }
51}
52
53pub(super) fn compile_fmt(fmt: &str) -> PolarsResult<String> {
57 if HOUR_PATTERN.is_match(fmt) ^ MINUTE_PATTERN.is_match(fmt) {
62 polars_bail!(ComputeError: "Invalid format string: \
63 Please either specify both hour and minute, or neither.");
64 }
65 if SECOND_PATTERN.is_match(fmt) && !HOUR_PATTERN.is_match(fmt) {
66 polars_bail!(ComputeError: "Invalid format string: \
67 Found seconds directive, but no hours directive.");
68 }
69 if TWELVE_HOUR_PATTERN.is_match(fmt) ^ MERIDIEM_PATTERN.is_match(fmt) {
70 polars_bail!(ComputeError: "Invalid format string: \
71 Please either specify both 12-hour directive and meridiem directive, or neither.");
72 }
73
74 Ok(fmt
75 .replace("%D", "%m/%d/%y")
76 .replace("%R", "%H:%M")
77 .replace("%T", "%H:%M:%S")
78 .replace("%X", "%H:%M:%S")
79 .replace("%F", "%Y-%m-%d"))
80}
81
82#[derive(Default, Clone)]
83pub(super) struct StrpTimeState {}
84
85impl StrpTimeState {
86 #[inline]
87 pub(super) unsafe fn parse(
90 &mut self,
91 val: &[u8],
92 fmt: &[u8],
93 fmt_len: u16,
94 ) -> Option<NaiveDateTime> {
95 let mut offset = 0;
96 let mut negative = false;
97 if val.starts_with(b"-") && fmt.starts_with(b"%Y") {
98 offset = 1;
99 negative = true;
100 }
101 if val.len() - offset != (fmt_len as usize) {
102 return None;
103 }
104
105 const ESCAPE: u8 = b'%';
106 let mut year: i32 = 1;
107 let mut month: u32 = 1;
110 let mut day: u32 = 1;
111 let mut hour: u32 = 0;
112 let mut min: u32 = 0;
113 let mut sec: u32 = 0;
114 let mut nano: u32 = 0;
115
116 let mut fmt_iter = fmt.iter();
117
118 while let Some(fmt_b) = fmt_iter.next() {
119 debug_assert!(offset < val.len());
120 let b = *val.get_unchecked(offset);
121 if *fmt_b == ESCAPE {
122 let next = fmt_iter.next();
124 debug_assert!(next.is_some());
125 match next.unwrap_unchecked() {
126 b'Y' => {
127 (year, offset) = update_and_parse(4, offset, val)?;
128 if negative {
129 year *= -1
130 }
131 },
132 b'm' => {
133 (month, offset) = update_and_parse(2, offset, val)?;
134 if month > 12 {
135 return None;
136 }
137 },
138 b'b' => {
139 (month, offset) = parse_month_abbrev(val, offset)?;
140 },
141 b'd' => {
142 (day, offset) = update_and_parse(2, offset, val)?;
143 },
144 b'H' => {
145 (hour, offset) = update_and_parse(2, offset, val)?;
146 },
147 b'M' => {
148 (min, offset) = update_and_parse(2, offset, val)?;
149 },
150 b'S' => {
151 (sec, offset) = update_and_parse(2, offset, val)?;
152 },
153 b'y' => {
154 let new_offset = offset + 2;
155 let bytes = val.get_unchecked(offset..new_offset);
156
157 let (decade, parsed) = atoi_simd::parse_any::<i32>(bytes).ok()?;
158 if parsed == 0 {
159 return None;
160 }
161
162 if decade < 70 {
163 year = 2000 + decade;
164 } else {
165 year = 1900 + decade;
166 }
167 offset = new_offset;
168 },
169 b'9' => {
170 (nano, offset) = update_and_parse(9, offset, val)?;
171 break;
172 },
173 b'6' => {
174 (nano, offset) = update_and_parse(6, offset, val)?;
175 nano *= 1000;
176 break;
177 },
178 b'3' => {
179 (nano, offset) = update_and_parse(3, offset, val)?;
180 nano *= 1_000_000;
181 break;
182 },
183 _ => return None,
184 }
185 }
186 else if b == *fmt_b {
188 offset += 1;
189 } else {
190 return None;
191 }
192 }
193 if offset == val.len() {
195 NaiveDate::from_ymd_opt(year, month, day)
196 .and_then(|nd| nd.and_hms_nano_opt(hour, min, sec, nano))
197 }
198 else {
200 None
201 }
202 }
203}
204
205pub(super) fn fmt_len(fmt: &[u8]) -> Option<u16> {
206 let mut iter = fmt.iter();
207 let mut cnt = 0;
208
209 while let Some(&val) = iter.next() {
210 match val {
211 b'%' => match iter.next() {
212 Some(&next_val) => match next_val {
213 b'Y' => cnt += 4,
214 b'y' => cnt += 2,
215 b'd' => cnt += 2,
216 b'm' => cnt += 2,
217 b'b' => cnt += 3,
218 b'H' => cnt += 2,
219 b'M' => cnt += 2,
220 b'S' => cnt += 2,
221 b'9' => {
222 cnt += 9;
223 debug_assert_eq!(iter.next(), Some(&b'f'));
224 return Some(cnt);
225 },
226 b'6' => {
227 cnt += 6;
228 debug_assert_eq!(iter.next(), Some(&b'f'));
229 return Some(cnt);
230 },
231 b'3' => {
232 cnt += 3;
233 debug_assert_eq!(iter.next(), Some(&b'f'));
234 return Some(cnt);
235 },
236 _ => return None,
237 },
238 None => return None,
239 },
240 _ => {
241 cnt += 1;
242 },
243 }
244 }
245 Some(cnt)
246}
247
248#[cfg(test)]
249mod test {
250 use super::*;
251
252 #[test]
253 fn test_parsing() {
254 let patterns = [
255 (
256 "2021-01-01",
257 "%Y-%m-%d",
258 10,
259 Some(
260 NaiveDate::from_ymd_opt(2021, 1, 1)
261 .unwrap()
262 .and_hms_nano_opt(0, 0, 0, 0)
263 .unwrap(),
264 ),
265 ),
266 (
267 "2021-01-01 07:45:12",
268 "%Y-%m-%d %H:%M:%S",
269 19,
270 Some(
271 NaiveDate::from_ymd_opt(2021, 1, 1)
272 .unwrap()
273 .and_hms_nano_opt(7, 45, 12, 0)
274 .unwrap(),
275 ),
276 ),
277 (
278 "2021-01-01 07:45:12",
279 "%Y-%m-%d %H:%M:%S",
280 19,
281 Some(
282 NaiveDate::from_ymd_opt(2021, 1, 1)
283 .unwrap()
284 .and_hms_nano_opt(7, 45, 12, 0)
285 .unwrap(),
286 ),
287 ),
288 (
289 "2019-04-18T02:45:55.555000000",
290 "%Y-%m-%dT%H:%M:%S.%9f",
291 29,
292 Some(
293 NaiveDate::from_ymd_opt(2019, 4, 18)
294 .unwrap()
295 .and_hms_nano_opt(2, 45, 55, 555000000)
296 .unwrap(),
297 ),
298 ),
299 (
300 "2019-04-18T02:45:55.555000",
301 "%Y-%m-%dT%H:%M:%S.%6f",
302 26,
303 Some(
304 NaiveDate::from_ymd_opt(2019, 4, 18)
305 .unwrap()
306 .and_hms_nano_opt(2, 45, 55, 555000000)
307 .unwrap(),
308 ),
309 ),
310 (
311 "2019-04-18T02:45:55.555",
312 "%Y-%m-%dT%H:%M:%S.%3f",
313 23,
314 Some(
315 NaiveDate::from_ymd_opt(2019, 4, 18)
316 .unwrap()
317 .and_hms_nano_opt(2, 45, 55, 555000000)
318 .unwrap(),
319 ),
320 ),
321 ];
322
323 for (val, fmt, len, expected) in patterns {
324 assert_eq!(fmt_len(fmt.as_bytes()).unwrap(), len);
325 unsafe {
326 assert_eq!(
327 StrpTimeState::default().parse(val.as_bytes(), fmt.as_bytes(), len),
328 expected
329 )
330 };
331 }
332 }
333}