polars_time/chunkedarray/string/
mod.rs1pub mod infer;
2use chrono::DateTime;
3mod patterns;
4mod strptime;
5pub use patterns::Pattern;
6#[cfg(feature = "dtype-time")]
7use polars_core::chunked_array::temporal::time_to_time64ns;
8use polars_core::prelude::arity::unary_elementwise;
9use polars_utils::cache::LruCachedFunc;
10
11use super::*;
12#[cfg(feature = "dtype-date")]
13use crate::chunkedarray::date::naive_date_to_date;
14use crate::prelude::string::strptime::StrpTimeState;
15
16#[cfg(feature = "dtype-time")]
17fn time_pattern<F, K>(val: &str, convert: F) -> Option<&'static str>
18where
20 F: Fn(&str, &str) -> chrono::ParseResult<K>,
21{
22 patterns::TIME_H_M_S
23 .iter()
24 .chain(patterns::TIME_H_M_S)
25 .find(|fmt| convert(val, fmt).is_ok())
26 .copied()
27}
28
29fn datetime_pattern<F, K>(val: &str, convert: F) -> Option<&'static str>
30where
32 F: Fn(&str, &str) -> chrono::ParseResult<K>,
33{
34 patterns::DATETIME_Y_M_D
35 .iter()
36 .chain(patterns::DATETIME_D_M_Y)
37 .find(|fmt| convert(val, fmt).is_ok())
38 .copied()
39}
40
41fn date_pattern<F, K>(val: &str, convert: F) -> Option<&'static str>
42where
44 F: Fn(&str, &str) -> chrono::ParseResult<K>,
45{
46 patterns::DATE_Y_M_D
47 .iter()
48 .chain(patterns::DATE_D_M_Y)
49 .find(|fmt| convert(val, fmt).is_ok())
50 .copied()
51}
52
53#[cfg(feature = "dtype-datetime")]
54fn sniff_fmt_datetime(val: &str) -> PolarsResult<&'static str> {
55 datetime_pattern(val, NaiveDateTime::parse_from_str)
56 .or_else(|| datetime_pattern(val, NaiveDate::parse_from_str))
57 .ok_or_else(|| polars_err!(parse_fmt_idk = "datetime"))
58}
59
60#[cfg(feature = "dtype-date")]
61fn sniff_fmt_date(val: &str) -> PolarsResult<&'static str> {
62 date_pattern(val, NaiveDate::parse_from_str).ok_or_else(|| polars_err!(parse_fmt_idk = "date"))
63}
64
65#[cfg(feature = "dtype-time")]
66fn sniff_fmt_time(val: &str) -> PolarsResult<&'static str> {
67 time_pattern(val, NaiveTime::parse_from_str).ok_or_else(|| polars_err!(parse_fmt_idk = "time"))
68}
69
70pub trait StringMethods: AsString {
71 #[cfg(feature = "dtype-time")]
72 fn as_time(&self, fmt: Option<&str>, use_cache: bool) -> PolarsResult<TimeChunked> {
74 let string_ca = self.as_string();
75 let fmt = match fmt {
76 Some(fmt) => fmt,
77 None => {
78 let Some(idx) = string_ca.first_non_null() else {
79 return Ok(
80 Int64Chunked::full_null(string_ca.name().clone(), string_ca.len())
81 .into_time(),
82 );
83 };
84 let val = string_ca.get(idx).expect("should not be null");
85 sniff_fmt_time(val)?
86 },
87 };
88 let use_cache = use_cache && string_ca.len() > 50;
89
90 let mut convert = LruCachedFunc::new(
91 |s| {
92 let naive_time = NaiveTime::parse_from_str(s, fmt).ok()?;
93 Some(time_to_time64ns(&naive_time))
94 },
95 (string_ca.len() as f64).sqrt() as usize,
96 );
97 let ca = unary_elementwise(string_ca, |opt_s| convert.eval(opt_s?, use_cache));
98 Ok(ca.with_name(string_ca.name().clone()).into_time())
99 }
100
101 #[cfg(feature = "dtype-date")]
102 fn as_date_not_exact(&self, fmt: Option<&str>) -> PolarsResult<DateChunked> {
106 let string_ca = self.as_string();
107 let fmt = match fmt {
108 Some(fmt) => fmt,
109 None => {
110 let Some(idx) = string_ca.first_non_null() else {
111 return Ok(
112 Int32Chunked::full_null(string_ca.name().clone(), string_ca.len())
113 .into_date(),
114 );
115 };
116 let val = string_ca.get(idx).expect("should not be null");
117 sniff_fmt_date(val)?
118 },
119 };
120 let ca = unary_elementwise(string_ca, |opt_s| {
121 let mut s = opt_s?;
122 while !s.is_empty() {
123 match NaiveDate::parse_and_remainder(s, fmt) {
124 Ok((nd, _)) => return Some(naive_date_to_date(nd)),
125 Err(_) => {
126 let mut it = s.chars();
127 it.next();
128 s = it.as_str();
129 },
130 }
131 }
132
133 None
134 });
135 Ok(ca.with_name(string_ca.name().clone()).into_date())
136 }
137
138 #[cfg(feature = "dtype-datetime")]
139 fn as_datetime_not_exact(
143 &self,
144 fmt: Option<&str>,
145 tu: TimeUnit,
146 tz_aware: bool,
147 tz: Option<&TimeZone>,
148 _ambiguous: &StringChunked,
149 ensure_matching_tz: bool,
151 ) -> PolarsResult<DatetimeChunked> {
152 let string_ca = self.as_string();
153 let had_format = fmt.is_some();
154 let fmt = match fmt {
155 Some(fmt) => fmt,
156 None => {
157 let Some(idx) = string_ca.first_non_null() else {
158 return Ok(
159 Int64Chunked::full_null(string_ca.name().clone(), string_ca.len())
160 .into_datetime(tu, tz.cloned()),
161 );
162 };
163 let val = string_ca.get(idx).expect("should not be null");
164 sniff_fmt_datetime(val)?
165 },
166 };
167
168 let func = match tu {
169 TimeUnit::Nanoseconds => datetime_to_timestamp_ns,
170 TimeUnit::Microseconds => datetime_to_timestamp_us,
171 TimeUnit::Milliseconds => datetime_to_timestamp_ms,
172 };
173
174 let ca = unary_elementwise(string_ca, |opt_s| {
175 let mut s = opt_s?;
176 while !s.is_empty() {
177 let timestamp = if tz_aware {
178 DateTime::parse_and_remainder(s, fmt)
179 .ok()
180 .map(|(dt, _r)| func(dt.naive_utc()))
181 } else {
182 infer::parse_datetime_and_remainder(s, fmt).map(|(nd, _r)| func(nd))
183 };
184 match timestamp {
185 Some(ts) => return Some(ts),
186 None => {
187 let mut it = s.chars();
188 it.next();
189 s = it.as_str();
190 },
191 }
192 }
193 None
194 })
195 .with_name(string_ca.name().clone());
196
197 polars_ensure!(
198 !ensure_matching_tz || had_format || !(tz_aware && tz.is_none()),
199 to_datetime_tz_mismatch
200 );
201
202 match (tz_aware, tz) {
203 #[cfg(feature = "timezones")]
204 (false, Some(tz)) => polars_ops::prelude::replace_time_zone(
205 &ca.into_datetime(tu, None),
206 Some(tz),
207 _ambiguous,
208 NonExistent::Raise,
209 ),
210 #[cfg(feature = "timezones")]
211 (true, tz) => Ok(ca.into_datetime(tu, Some(tz.cloned().unwrap_or(TimeZone::UTC)))),
212 _ => Ok(ca.into_datetime(tu, None)),
213 }
214 }
215
216 #[cfg(feature = "dtype-date")]
217 fn as_date(&self, fmt: Option<&str>, use_cache: bool) -> PolarsResult<DateChunked> {
219 let string_ca = self.as_string();
220 let fmt = match fmt {
221 Some(fmt) => fmt,
222 None => return infer::to_date(string_ca),
223 };
224 let use_cache = use_cache && string_ca.len() > 50;
225 let fmt = strptime::compile_fmt(fmt)?;
226
227 let ca = if let Some(fmt_len) = strptime::fmt_len(fmt.as_bytes()) {
229 let mut strptime_cache = StrpTimeState::default();
230 let mut convert = LruCachedFunc::new(
231 |s: &str| {
232 match unsafe { strptime_cache.parse(s.as_bytes(), fmt.as_bytes(), fmt_len) } {
234 None => NaiveDate::parse_from_str(s, &fmt).ok(),
236 Some(ndt) => Some(ndt.date()),
237 }
238 .map(naive_date_to_date)
239 },
240 (string_ca.len() as f64).sqrt() as usize,
241 );
242 unary_elementwise(string_ca, |val| convert.eval(val?, use_cache))
243 } else {
244 let mut convert = LruCachedFunc::new(
245 |s| {
246 let naive_date = NaiveDate::parse_from_str(s, &fmt).ok()?;
247 Some(naive_date_to_date(naive_date))
248 },
249 (string_ca.len() as f64).sqrt() as usize,
250 );
251 unary_elementwise(string_ca, |val| convert.eval(val?, use_cache))
252 };
253
254 Ok(ca.with_name(string_ca.name().clone()).into_date())
255 }
256
257 #[cfg(feature = "dtype-datetime")]
258 fn as_datetime(
260 &self,
261 fmt: Option<&str>,
262 tu: TimeUnit,
263 use_cache: bool,
264 tz_aware: bool,
265 tz: Option<&TimeZone>,
266 ambiguous: &StringChunked,
267 ) -> PolarsResult<DatetimeChunked> {
268 let string_ca = self.as_string();
269 let fmt = match fmt {
270 Some(fmt) => fmt,
271 None => return infer::to_datetime(string_ca, tu, tz, ambiguous, true),
272 };
273 let fmt = strptime::compile_fmt(fmt)?;
274 let use_cache = use_cache && string_ca.len() > 50;
275
276 let func = match tu {
277 TimeUnit::Nanoseconds => datetime_to_timestamp_ns,
278 TimeUnit::Microseconds => datetime_to_timestamp_us,
279 TimeUnit::Milliseconds => datetime_to_timestamp_ms,
280 };
281
282 if tz_aware {
283 #[cfg(feature = "timezones")]
284 {
285 let mut convert = LruCachedFunc::new(
286 |s: &str| {
287 let dt = DateTime::parse_from_str(s, &fmt).ok()?;
288 Some(func(dt.naive_utc()))
289 },
290 (string_ca.len() as f64).sqrt() as usize,
291 );
292 Ok(
293 unary_elementwise(string_ca, |opt_s| convert.eval(opt_s?, use_cache))
294 .with_name(string_ca.name().clone())
295 .into_datetime(tu, Some(tz.cloned().unwrap_or(TimeZone::UTC))),
296 )
297 }
298 #[cfg(not(feature = "timezones"))]
299 {
300 panic!("activate 'timezones' feature")
301 }
302 } else {
303 let transform = match tu {
304 TimeUnit::Nanoseconds => infer::transform_datetime_ns,
305 TimeUnit::Microseconds => infer::transform_datetime_us,
306 TimeUnit::Milliseconds => infer::transform_datetime_ms,
307 };
308 let ca = if let Some(fmt_len) = self::strptime::fmt_len(fmt.as_bytes()) {
310 let mut strptime_cache = StrpTimeState::default();
311 let mut convert = LruCachedFunc::new(
312 |s: &str| {
313 match unsafe { strptime_cache.parse(s.as_bytes(), fmt.as_bytes(), fmt_len) }
315 {
316 None => transform(s, &fmt),
317 Some(ndt) => Some(func(ndt)),
318 }
319 },
320 (string_ca.len() as f64).sqrt() as usize,
321 );
322 unary_elementwise(string_ca, |opt_s| convert.eval(opt_s?, use_cache))
323 } else {
324 let mut convert = LruCachedFunc::new(
325 |s| transform(s, &fmt),
326 (string_ca.len() as f64).sqrt() as usize,
327 );
328 unary_elementwise(string_ca, |opt_s| convert.eval(opt_s?, use_cache))
329 };
330 let dt = ca
331 .with_name(string_ca.name().clone())
332 .into_datetime(tu, None);
333 match tz {
334 #[cfg(feature = "timezones")]
335 Some(tz) => polars_ops::prelude::replace_time_zone(
336 &dt,
337 Some(tz),
338 ambiguous,
339 NonExistent::Raise,
340 ),
341 _ => Ok(dt),
342 }
343 }
344 }
345}
346
347pub trait AsString {
348 fn as_string(&self) -> &StringChunked;
349}
350
351impl AsString for StringChunked {
352 fn as_string(&self) -> &StringChunked {
353 self
354 }
355}
356
357impl StringMethods for StringChunked {}