polars_time/chunkedarray/string/
mod.rs1pub mod infer;
2use chrono::DateTime;
3mod patterns;
4mod strptime;
5pub use patterns::Pattern;
6#[cfg(feature = "dtype-time")]
7use polars_core::chunked_array::temporal::time_to_time64ns;
8use polars_core::prelude::arity::unary_elementwise;
9use polars_utils::cache::LruCachedFunc;
10
11use super::*;
12#[cfg(feature = "dtype-date")]
13use crate::chunkedarray::date::naive_date_to_date;
14use crate::prelude::string::strptime::StrpTimeState;
15
16#[cfg(feature = "dtype-time")]
17fn time_pattern<F, K>(val: &str, convert: F) -> Option<&'static str>
18where
20 F: Fn(&str, &str) -> chrono::ParseResult<K>,
21{
22 patterns::TIME_H_M_S
23 .iter()
24 .chain(patterns::TIME_H_M_S)
25 .find(|fmt| convert(val, fmt).is_ok())
26 .copied()
27}
28
29fn datetime_pattern<F, K>(val: &str, convert: F) -> Option<&'static str>
30where
32 F: Fn(&str, &str) -> chrono::ParseResult<K>,
33{
34 patterns::DATETIME_Y_M_D
35 .iter()
36 .chain(patterns::DATETIME_D_M_Y)
37 .find(|fmt| convert(val, fmt).is_ok())
38 .copied()
39}
40
41fn date_pattern<F, K>(val: &str, convert: F) -> Option<&'static str>
42where
44 F: Fn(&str, &str) -> chrono::ParseResult<K>,
45{
46 patterns::DATE_Y_M_D
47 .iter()
48 .chain(patterns::DATE_D_M_Y)
49 .find(|fmt| convert(val, fmt).is_ok())
50 .copied()
51}
52
53fn get_first_val(ca: &StringChunked) -> PolarsResult<&str> {
54 let idx = ca.first_non_null().ok_or_else(|| {
55 polars_err!(ComputeError:
56 "unable to determine date parsing format, all values are null",
57 )
58 })?;
59 Ok(ca.get(idx).expect("should not be null"))
60}
61
62#[cfg(feature = "dtype-datetime")]
63fn sniff_fmt_datetime(ca_string: &StringChunked) -> PolarsResult<&'static str> {
64 let val = get_first_val(ca_string)?;
65 datetime_pattern(val, NaiveDateTime::parse_from_str)
66 .or_else(|| datetime_pattern(val, NaiveDate::parse_from_str))
67 .ok_or_else(|| polars_err!(parse_fmt_idk = "datetime"))
68}
69
70#[cfg(feature = "dtype-date")]
71fn sniff_fmt_date(ca_string: &StringChunked) -> PolarsResult<&'static str> {
72 let val = get_first_val(ca_string)?;
73 date_pattern(val, NaiveDate::parse_from_str).ok_or_else(|| polars_err!(parse_fmt_idk = "date"))
74}
75
76#[cfg(feature = "dtype-time")]
77fn sniff_fmt_time(ca_string: &StringChunked) -> PolarsResult<&'static str> {
78 let val = get_first_val(ca_string)?;
79 time_pattern(val, NaiveTime::parse_from_str).ok_or_else(|| polars_err!(parse_fmt_idk = "time"))
80}
81
82pub trait StringMethods: AsString {
83 #[cfg(feature = "dtype-time")]
84 fn as_time(&self, fmt: Option<&str>, use_cache: bool) -> PolarsResult<TimeChunked> {
86 let string_ca = self.as_string();
87 let fmt = match fmt {
88 Some(fmt) => fmt,
89 None => sniff_fmt_time(string_ca)?,
90 };
91 let use_cache = use_cache && string_ca.len() > 50;
92
93 let mut convert = LruCachedFunc::new(
94 |s| {
95 let naive_time = NaiveTime::parse_from_str(s, fmt).ok()?;
96 Some(time_to_time64ns(&naive_time))
97 },
98 (string_ca.len() as f64).sqrt() as usize,
99 );
100 let ca = unary_elementwise(string_ca, |opt_s| convert.eval(opt_s?, use_cache));
101 Ok(ca.with_name(string_ca.name().clone()).into_time())
102 }
103
104 #[cfg(feature = "dtype-date")]
105 fn as_date_not_exact(&self, fmt: Option<&str>) -> PolarsResult<DateChunked> {
109 let string_ca = self.as_string();
110 let fmt = match fmt {
111 Some(fmt) => fmt,
112 None => sniff_fmt_date(string_ca)?,
113 };
114 let ca = unary_elementwise(string_ca, |opt_s| {
115 let mut s = opt_s?;
116 while !s.is_empty() {
117 match NaiveDate::parse_and_remainder(s, fmt) {
118 Ok((nd, _)) => return Some(naive_date_to_date(nd)),
119 Err(_) => {
120 let mut it = s.chars();
121 it.next();
122 s = it.as_str();
123 },
124 }
125 }
126
127 None
128 });
129 Ok(ca.with_name(string_ca.name().clone()).into_date())
130 }
131
132 #[cfg(feature = "dtype-datetime")]
133 fn as_datetime_not_exact(
137 &self,
138 fmt: Option<&str>,
139 tu: TimeUnit,
140 tz_aware: bool,
141 tz: Option<&TimeZone>,
142 _ambiguous: &StringChunked,
143 ensure_matching_tz: bool,
145 ) -> PolarsResult<DatetimeChunked> {
146 let string_ca = self.as_string();
147 let had_format = fmt.is_some();
148 let fmt = match fmt {
149 Some(fmt) => fmt,
150 None => sniff_fmt_datetime(string_ca)?,
151 };
152
153 let func = match tu {
154 TimeUnit::Nanoseconds => datetime_to_timestamp_ns,
155 TimeUnit::Microseconds => datetime_to_timestamp_us,
156 TimeUnit::Milliseconds => datetime_to_timestamp_ms,
157 };
158
159 let ca = unary_elementwise(string_ca, |opt_s| {
160 let mut s = opt_s?;
161 while !s.is_empty() {
162 let timestamp = if tz_aware {
163 DateTime::parse_and_remainder(s, fmt)
164 .ok()
165 .map(|(dt, _r)| func(dt.naive_utc()))
166 } else {
167 infer::parse_datetime_and_remainder(s, fmt).map(|(nd, _r)| func(nd))
168 };
169 match timestamp {
170 Some(ts) => return Some(ts),
171 None => {
172 let mut it = s.chars();
173 it.next();
174 s = it.as_str();
175 },
176 }
177 }
178 None
179 })
180 .with_name(string_ca.name().clone());
181
182 polars_ensure!(
183 !ensure_matching_tz || had_format || !(tz_aware && tz.is_none()),
184 to_datetime_tz_mismatch
185 );
186
187 match (tz_aware, tz) {
188 #[cfg(feature = "timezones")]
189 (false, Some(tz)) => polars_ops::prelude::replace_time_zone(
190 &ca.into_datetime(tu, None),
191 Some(tz),
192 _ambiguous,
193 NonExistent::Raise,
194 ),
195 #[cfg(feature = "timezones")]
196 (true, tz) => Ok(ca.into_datetime(tu, Some(tz.cloned().unwrap_or(TimeZone::UTC)))),
197 _ => Ok(ca.into_datetime(tu, None)),
198 }
199 }
200
201 #[cfg(feature = "dtype-date")]
202 fn as_date(&self, fmt: Option<&str>, use_cache: bool) -> PolarsResult<DateChunked> {
204 let string_ca = self.as_string();
205 let fmt = match fmt {
206 Some(fmt) => fmt,
207 None => return infer::to_date(string_ca),
208 };
209 let use_cache = use_cache && string_ca.len() > 50;
210 let fmt = strptime::compile_fmt(fmt)?;
211
212 let ca = if let Some(fmt_len) = strptime::fmt_len(fmt.as_bytes()) {
214 let mut strptime_cache = StrpTimeState::default();
215 let mut convert = LruCachedFunc::new(
216 |s: &str| {
217 match unsafe { strptime_cache.parse(s.as_bytes(), fmt.as_bytes(), fmt_len) } {
219 None => NaiveDate::parse_from_str(s, &fmt).ok(),
221 Some(ndt) => Some(ndt.date()),
222 }
223 .map(naive_date_to_date)
224 },
225 (string_ca.len() as f64).sqrt() as usize,
226 );
227 unary_elementwise(string_ca, |val| convert.eval(val?, use_cache))
228 } else {
229 let mut convert = LruCachedFunc::new(
230 |s| {
231 let naive_date = NaiveDate::parse_from_str(s, &fmt).ok()?;
232 Some(naive_date_to_date(naive_date))
233 },
234 (string_ca.len() as f64).sqrt() as usize,
235 );
236 unary_elementwise(string_ca, |val| convert.eval(val?, use_cache))
237 };
238
239 Ok(ca.with_name(string_ca.name().clone()).into_date())
240 }
241
242 #[cfg(feature = "dtype-datetime")]
243 fn as_datetime(
245 &self,
246 fmt: Option<&str>,
247 tu: TimeUnit,
248 use_cache: bool,
249 tz_aware: bool,
250 tz: Option<&TimeZone>,
251 ambiguous: &StringChunked,
252 ) -> PolarsResult<DatetimeChunked> {
253 let string_ca = self.as_string();
254 let fmt = match fmt {
255 Some(fmt) => fmt,
256 None => return infer::to_datetime(string_ca, tu, tz, ambiguous, true),
257 };
258 let fmt = strptime::compile_fmt(fmt)?;
259 let use_cache = use_cache && string_ca.len() > 50;
260
261 let func = match tu {
262 TimeUnit::Nanoseconds => datetime_to_timestamp_ns,
263 TimeUnit::Microseconds => datetime_to_timestamp_us,
264 TimeUnit::Milliseconds => datetime_to_timestamp_ms,
265 };
266
267 if tz_aware {
268 #[cfg(feature = "timezones")]
269 {
270 let mut convert = LruCachedFunc::new(
271 |s: &str| {
272 let dt = DateTime::parse_from_str(s, &fmt).ok()?;
273 Some(func(dt.naive_utc()))
274 },
275 (string_ca.len() as f64).sqrt() as usize,
276 );
277 Ok(
278 unary_elementwise(string_ca, |opt_s| convert.eval(opt_s?, use_cache))
279 .with_name(string_ca.name().clone())
280 .into_datetime(tu, Some(tz.cloned().unwrap_or(TimeZone::UTC))),
281 )
282 }
283 #[cfg(not(feature = "timezones"))]
284 {
285 panic!("activate 'timezones' feature")
286 }
287 } else {
288 let transform = match tu {
289 TimeUnit::Nanoseconds => infer::transform_datetime_ns,
290 TimeUnit::Microseconds => infer::transform_datetime_us,
291 TimeUnit::Milliseconds => infer::transform_datetime_ms,
292 };
293 let ca = if let Some(fmt_len) = self::strptime::fmt_len(fmt.as_bytes()) {
295 let mut strptime_cache = StrpTimeState::default();
296 let mut convert = LruCachedFunc::new(
297 |s: &str| {
298 match unsafe { strptime_cache.parse(s.as_bytes(), fmt.as_bytes(), fmt_len) }
300 {
301 None => transform(s, &fmt),
302 Some(ndt) => Some(func(ndt)),
303 }
304 },
305 (string_ca.len() as f64).sqrt() as usize,
306 );
307 unary_elementwise(string_ca, |opt_s| convert.eval(opt_s?, use_cache))
308 } else {
309 let mut convert = LruCachedFunc::new(
310 |s| transform(s, &fmt),
311 (string_ca.len() as f64).sqrt() as usize,
312 );
313 unary_elementwise(string_ca, |opt_s| convert.eval(opt_s?, use_cache))
314 };
315 let dt = ca
316 .with_name(string_ca.name().clone())
317 .into_datetime(tu, None);
318 match tz {
319 #[cfg(feature = "timezones")]
320 Some(tz) => polars_ops::prelude::replace_time_zone(
321 &dt,
322 Some(tz),
323 ambiguous,
324 NonExistent::Raise,
325 ),
326 _ => Ok(dt),
327 }
328 }
329 }
330}
331
332pub trait AsString {
333 fn as_string(&self) -> &StringChunked;
334}
335
336impl AsString for StringChunked {
337 fn as_string(&self) -> &StringChunked {
338 self
339 }
340}
341
342impl StringMethods for StringChunked {}