polars_time/chunkedarray/string/
mod.rs
1pub mod infer;
2use chrono::DateTime;
3mod patterns;
4mod strptime;
5use chrono::ParseError;
6use chrono::format::ParseErrorKind;
7pub use patterns::Pattern;
8#[cfg(feature = "dtype-time")]
9use polars_core::chunked_array::temporal::time_to_time64ns;
10use polars_core::prelude::arity::unary_elementwise;
11use polars_utils::cache::LruCachedFunc;
12
13use super::*;
14#[cfg(feature = "dtype-date")]
15use crate::chunkedarray::date::naive_date_to_date;
16use crate::prelude::string::strptime::StrpTimeState;
17
18#[cfg(feature = "dtype-time")]
19fn time_pattern<F, K>(val: &str, convert: F) -> Option<&'static str>
20where
22 F: Fn(&str, &str) -> chrono::ParseResult<K>,
23{
24 patterns::TIME_H_M_S
25 .iter()
26 .chain(patterns::TIME_H_M_S)
27 .find(|fmt| convert(val, fmt).is_ok())
28 .copied()
29}
30
31fn datetime_pattern<F, K>(val: &str, convert: F) -> Option<&'static str>
32where
34 F: Fn(&str, &str) -> chrono::ParseResult<K>,
35{
36 patterns::DATETIME_Y_M_D
37 .iter()
38 .chain(patterns::DATETIME_D_M_Y)
39 .find(|fmt| convert(val, fmt).is_ok())
40 .copied()
41}
42
43fn date_pattern<F, K>(val: &str, convert: F) -> Option<&'static str>
44where
46 F: Fn(&str, &str) -> chrono::ParseResult<K>,
47{
48 patterns::DATE_Y_M_D
49 .iter()
50 .chain(patterns::DATE_D_M_Y)
51 .find(|fmt| convert(val, fmt).is_ok())
52 .copied()
53}
54
55struct ParseErrorByteCopy(ParseErrorKind);
56
57impl From<ParseError> for ParseErrorByteCopy {
58 fn from(e: ParseError) -> Self {
59 ParseErrorByteCopy(e.kind())
60 }
61}
62
63fn get_first_val(ca: &StringChunked) -> PolarsResult<&str> {
64 let idx = ca.first_non_null().ok_or_else(|| {
65 polars_err!(ComputeError:
66 "unable to determine date parsing format, all values are null",
67 )
68 })?;
69 Ok(ca.get(idx).expect("should not be null"))
70}
71
72#[cfg(feature = "dtype-datetime")]
73fn sniff_fmt_datetime(ca_string: &StringChunked) -> PolarsResult<&'static str> {
74 let val = get_first_val(ca_string)?;
75 datetime_pattern(val, NaiveDateTime::parse_from_str)
76 .or_else(|| datetime_pattern(val, NaiveDate::parse_from_str))
77 .ok_or_else(|| polars_err!(parse_fmt_idk = "datetime"))
78}
79
80#[cfg(feature = "dtype-date")]
81fn sniff_fmt_date(ca_string: &StringChunked) -> PolarsResult<&'static str> {
82 let val = get_first_val(ca_string)?;
83 date_pattern(val, NaiveDate::parse_from_str).ok_or_else(|| polars_err!(parse_fmt_idk = "date"))
84}
85
86#[cfg(feature = "dtype-time")]
87fn sniff_fmt_time(ca_string: &StringChunked) -> PolarsResult<&'static str> {
88 let val = get_first_val(ca_string)?;
89 time_pattern(val, NaiveTime::parse_from_str).ok_or_else(|| polars_err!(parse_fmt_idk = "time"))
90}
91
92pub trait StringMethods: AsString {
93 #[cfg(feature = "dtype-time")]
94 fn as_time(&self, fmt: Option<&str>, use_cache: bool) -> PolarsResult<TimeChunked> {
96 let string_ca = self.as_string();
97 let fmt = match fmt {
98 Some(fmt) => fmt,
99 None => sniff_fmt_time(string_ca)?,
100 };
101 let use_cache = use_cache && string_ca.len() > 50;
102
103 let mut convert = LruCachedFunc::new(
104 |s| {
105 let naive_time = NaiveTime::parse_from_str(s, fmt).ok()?;
106 Some(time_to_time64ns(&naive_time))
107 },
108 (string_ca.len() as f64).sqrt() as usize,
109 );
110 let ca = unary_elementwise(string_ca, |opt_s| convert.eval(opt_s?, use_cache));
111 Ok(ca.with_name(string_ca.name().clone()).into())
112 }
113
114 #[cfg(feature = "dtype-date")]
115 fn as_date_not_exact(&self, fmt: Option<&str>) -> PolarsResult<DateChunked> {
119 let string_ca = self.as_string();
120 let fmt = match fmt {
121 Some(fmt) => fmt,
122 None => sniff_fmt_date(string_ca)?,
123 };
124 let ca = unary_elementwise(string_ca, |opt_s| {
125 let mut s = opt_s?;
126 let fmt_len = fmt.len();
127
128 for i in 1..(s.len().saturating_sub(fmt_len)) {
129 if s.is_empty() {
130 return None;
131 }
132 match NaiveDate::parse_from_str(s, fmt).map(naive_date_to_date) {
133 Ok(nd) => return Some(nd),
134 Err(e) => match ParseErrorByteCopy::from(e).0 {
135 ParseErrorKind::TooLong => {
136 s = &s[..s.len() - 1];
137 },
138 _ => {
139 s = &s[i..];
140 },
141 },
142 }
143 }
144 None
145 });
146 Ok(ca.with_name(string_ca.name().clone()).into())
147 }
148
149 #[cfg(feature = "dtype-datetime")]
150 fn as_datetime_not_exact(
154 &self,
155 fmt: Option<&str>,
156 tu: TimeUnit,
157 tz_aware: bool,
158 tz: Option<&TimeZone>,
159 _ambiguous: &StringChunked,
160 ) -> PolarsResult<DatetimeChunked> {
161 let string_ca = self.as_string();
162 let fmt = match fmt {
163 Some(fmt) => fmt,
164 None => sniff_fmt_datetime(string_ca)?,
165 };
166
167 let func = match tu {
168 TimeUnit::Nanoseconds => datetime_to_timestamp_ns,
169 TimeUnit::Microseconds => datetime_to_timestamp_us,
170 TimeUnit::Milliseconds => datetime_to_timestamp_ms,
171 };
172
173 let ca = unary_elementwise(string_ca, |opt_s| {
174 let mut s = opt_s?;
175 let fmt_len = fmt.len();
176
177 for i in 1..(s.len().saturating_sub(fmt_len)) {
178 if s.is_empty() {
179 return None;
180 }
181 let timestamp = if tz_aware {
182 DateTime::parse_from_str(s, fmt).map(|dt| func(dt.naive_utc()))
183 } else {
184 NaiveDateTime::parse_from_str(s, fmt).map(func)
185 };
186 match timestamp {
187 Ok(ts) => return Some(ts),
188 Err(e) => {
189 let e: ParseErrorByteCopy = e.into();
190 match e.0 {
191 ParseErrorKind::TooLong => {
192 s = &s[..s.len() - 1];
193 },
194 _ => {
195 s = &s[i..];
196 },
197 }
198 },
199 }
200 }
201 None
202 })
203 .with_name(string_ca.name().clone());
204 match (tz_aware, tz) {
205 #[cfg(feature = "timezones")]
206 (false, Some(tz)) => polars_ops::prelude::replace_time_zone(
207 &ca.into_datetime(tu, None),
208 Some(tz),
209 _ambiguous,
210 NonExistent::Raise,
211 ),
212 #[cfg(feature = "timezones")]
213 (true, tz) => Ok(ca.into_datetime(
214 tu,
215 tz.cloned().or_else(|| Some(PlSmallStr::from_static("UTC"))),
216 )),
217 _ => Ok(ca.into_datetime(tu, None)),
218 }
219 }
220
221 #[cfg(feature = "dtype-date")]
222 fn as_date(&self, fmt: Option<&str>, use_cache: bool) -> PolarsResult<DateChunked> {
224 let string_ca = self.as_string();
225 let fmt = match fmt {
226 Some(fmt) => fmt,
227 None => return infer::to_date(string_ca),
228 };
229 let use_cache = use_cache && string_ca.len() > 50;
230 let fmt = strptime::compile_fmt(fmt)?;
231
232 let ca = if let Some(fmt_len) = strptime::fmt_len(fmt.as_bytes()) {
234 let mut strptime_cache = StrpTimeState::default();
235 let mut convert = LruCachedFunc::new(
236 |s: &str| {
237 match unsafe { strptime_cache.parse(s.as_bytes(), fmt.as_bytes(), fmt_len) } {
239 None => NaiveDate::parse_from_str(s, &fmt).ok(),
241 Some(ndt) => Some(ndt.date()),
242 }
243 .map(naive_date_to_date)
244 },
245 (string_ca.len() as f64).sqrt() as usize,
246 );
247 unary_elementwise(string_ca, |val| convert.eval(val?, use_cache))
248 } else {
249 let mut convert = LruCachedFunc::new(
250 |s| {
251 let naive_date = NaiveDate::parse_from_str(s, &fmt).ok()?;
252 Some(naive_date_to_date(naive_date))
253 },
254 (string_ca.len() as f64).sqrt() as usize,
255 );
256 unary_elementwise(string_ca, |val| convert.eval(val?, use_cache))
257 };
258
259 Ok(ca.with_name(string_ca.name().clone()).into())
260 }
261
262 #[cfg(feature = "dtype-datetime")]
263 fn as_datetime(
265 &self,
266 fmt: Option<&str>,
267 tu: TimeUnit,
268 use_cache: bool,
269 tz_aware: bool,
270 tz: Option<&TimeZone>,
271 ambiguous: &StringChunked,
272 ) -> PolarsResult<DatetimeChunked> {
273 let string_ca = self.as_string();
274 let fmt = match fmt {
275 Some(fmt) => fmt,
276 None => return infer::to_datetime(string_ca, tu, tz, ambiguous),
277 };
278 let fmt = strptime::compile_fmt(fmt)?;
279 let use_cache = use_cache && string_ca.len() > 50;
280
281 let func = match tu {
282 TimeUnit::Nanoseconds => datetime_to_timestamp_ns,
283 TimeUnit::Microseconds => datetime_to_timestamp_us,
284 TimeUnit::Milliseconds => datetime_to_timestamp_ms,
285 };
286
287 if tz_aware {
288 #[cfg(feature = "timezones")]
289 {
290 let mut convert = LruCachedFunc::new(
291 |s: &str| {
292 let dt = DateTime::parse_from_str(s, &fmt).ok()?;
293 Some(func(dt.naive_utc()))
294 },
295 (string_ca.len() as f64).sqrt() as usize,
296 );
297 Ok(
298 unary_elementwise(string_ca, |opt_s| convert.eval(opt_s?, use_cache))
299 .with_name(string_ca.name().clone())
300 .into_datetime(
301 tu,
302 Some(
303 tz.cloned()
304 .unwrap_or_else(|| PlSmallStr::from_static("UTC")),
305 ),
306 ),
307 )
308 }
309 #[cfg(not(feature = "timezones"))]
310 {
311 panic!("activate 'timezones' feature")
312 }
313 } else {
314 let transform = match tu {
315 TimeUnit::Nanoseconds => infer::transform_datetime_ns,
316 TimeUnit::Microseconds => infer::transform_datetime_us,
317 TimeUnit::Milliseconds => infer::transform_datetime_ms,
318 };
319 let ca = if let Some(fmt_len) = self::strptime::fmt_len(fmt.as_bytes()) {
321 let mut strptime_cache = StrpTimeState::default();
322 let mut convert = LruCachedFunc::new(
323 |s: &str| {
324 match unsafe { strptime_cache.parse(s.as_bytes(), fmt.as_bytes(), fmt_len) }
326 {
327 None => transform(s, &fmt),
328 Some(ndt) => Some(func(ndt)),
329 }
330 },
331 (string_ca.len() as f64).sqrt() as usize,
332 );
333 unary_elementwise(string_ca, |opt_s| convert.eval(opt_s?, use_cache))
334 } else {
335 let mut convert = LruCachedFunc::new(
336 |s| transform(s, &fmt),
337 (string_ca.len() as f64).sqrt() as usize,
338 );
339 unary_elementwise(string_ca, |opt_s| convert.eval(opt_s?, use_cache))
340 };
341 let dt = ca
342 .with_name(string_ca.name().clone())
343 .into_datetime(tu, None);
344 match tz {
345 #[cfg(feature = "timezones")]
346 Some(tz) => polars_ops::prelude::replace_time_zone(
347 &dt,
348 Some(tz),
349 ambiguous,
350 NonExistent::Raise,
351 ),
352 _ => Ok(dt),
353 }
354 }
355 }
356}
357
358pub trait AsString {
359 fn as_string(&self) -> &StringChunked;
360}
361
362impl AsString for StringChunked {
363 fn as_string(&self) -> &StringChunked {
364 self
365 }
366}
367
368impl StringMethods for StringChunked {}