polars_core/datatypes/temporal/
time_zone.rs1use polars_error::{PolarsResult, polars_bail};
2use polars_utils::pl_str::PlSmallStr;
3
4use crate::config;
5
6#[derive(Clone, Hash, PartialEq, Eq)]
7#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
8#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
9pub struct TimeZone {
10 inner: PlSmallStr,
12}
13
14impl TimeZone {
15 pub const UTC: TimeZone = unsafe { TimeZone::from_static("UTC") };
16
17 #[inline(always)]
23 pub const unsafe fn from_static(tz: &'static str) -> Self {
24 Self {
25 inner: PlSmallStr::from_static(tz),
26 }
27 }
28
29 pub unsafe fn new_unchecked(zone_str: impl Into<PlSmallStr>) -> Self {
33 Self {
34 inner: zone_str.into(),
35 }
36 }
37
38 #[inline]
43 pub fn opt_try_new(zone_str: Option<impl Into<PlSmallStr>>) -> PolarsResult<Option<Self>> {
44 Self::new_impl(zone_str.map(|x| x.into()))
45 }
46
47 fn new_impl(zone_str: Option<PlSmallStr>) -> PolarsResult<Option<Self>> {
48 if zone_str.as_deref() == Some("*") {
50 return Ok(Some(Self {
51 inner: PlSmallStr::from_static("*"),
52 }));
53 }
54
55 let mut canonical_tz = Self::_canonical_timezone_impl(zone_str);
56
57 #[cfg(feature = "timezones")]
58 if let Some(tz) = canonical_tz.as_mut() {
59 if let Err(err) = Self::validate_time_zone(tz) {
60 match parse_fixed_offset(tz) {
61 Ok(v) => *tz = v,
62 Err(_) => {
63 if std::env::var("POLARS_IGNORE_TIMEZONE_PARSE_ERROR").as_deref() == Ok("1")
66 {
67 if config::verbose() {
68 eprintln!("WARN: {err}")
69 }
70 } else {
71 return Err(err.wrap_msg(|s| {
72 format!(
73 "{s}\n\nIf you would like to forcibly disable \
74 timezone validation, set \
75 POLARS_IGNORE_TIMEZONE_PARSE_ERROR=1.",
76 )
77 }));
78 }
79 },
80 }
81 }
82 }
83
84 Ok(canonical_tz.map(|inner| Self { inner }))
85 }
86
87 pub fn eq_none_as_utc(this: Option<&TimeZone>, other: Option<&TimeZone>) -> bool {
89 this.unwrap_or(&Self::UTC) == other.unwrap_or(&Self::UTC)
90 }
91
92 pub fn _canonical_timezone_impl(tz: Option<PlSmallStr>) -> Option<PlSmallStr> {
93 match tz.as_deref() {
94 Some("") | None => None,
95 #[cfg(feature = "timezones")]
96 Some("+00:00") | Some("00:00") | Some("utc") => Some(PlSmallStr::from_static("UTC")),
97 Some(_) => tz,
98 }
99 }
100
101 #[cfg(feature = "timezones")]
102 pub fn from_chrono(tz: &chrono_tz::Tz) -> Self {
103 use polars_utils::format_pl_smallstr;
104
105 Self {
106 inner: format_pl_smallstr!("{}", tz),
107 }
108 }
109
110 #[cfg(feature = "timezones")]
111 pub fn to_chrono(&self) -> PolarsResult<chrono_tz::Tz> {
112 parse_time_zone(self)
113 }
114
115 #[cfg(feature = "timezones")]
116 pub fn validate_time_zone(tz: &str) -> PolarsResult<()> {
117 parse_time_zone(tz).map(|_| ())
118 }
119}
120
121impl std::ops::Deref for TimeZone {
122 type Target = PlSmallStr;
123
124 fn deref(&self) -> &Self::Target {
125 &self.inner
126 }
127}
128
129impl std::fmt::Debug for TimeZone {
130 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
131 std::fmt::Debug::fmt(&self.inner, f)
132 }
133}
134
135impl std::fmt::Display for TimeZone {
136 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
137 std::fmt::Display::fmt(&self.inner, f)
138 }
139}
140
141#[cfg(feature = "timezones")]
142static FIXED_OFFSET_PATTERN: &str = r#"(?x)
143 ^
144 (?P<sign>[-+])? # optional sign
145 (?P<hour>0[0-9]|1[0-4]) # hour (between 0 and 14)
146 :? # optional separator
147 00 # minute
148 $
149 "#;
150
151#[cfg(feature = "timezones")]
152polars_utils::regex_cache::cached_regex! {
153 static FIXED_OFFSET_RE = FIXED_OFFSET_PATTERN;
154}
155
156fn _levenshtein(a: &str, b: &str) -> usize {
159 let mut result = 0;
160
161 if a == b {
163 return result;
164 }
165
166 let length_a = a.chars().count();
167 let length_b = b.chars().count();
168
169 if length_a == 0 {
170 return length_b;
171 }
172
173 if length_b == 0 {
174 return length_a;
175 }
176
177 let mut cache: Vec<usize> = (1..).take(length_a).collect();
182
183 for (index_b, code_b) in b.chars().enumerate() {
185 result = index_b;
186 let mut distance_a = index_b;
187
188 for (index_a, code_a) in a.chars().enumerate() {
189 let distance_b = if code_a == code_b {
190 distance_a
191 } else {
192 distance_a + 1
193 };
194
195 distance_a = cache[index_a];
196
197 result = if distance_a > result {
198 if distance_b > result {
199 result + 1
200 } else {
201 distance_b
202 }
203 } else if distance_b > distance_a {
204 distance_a + 1
205 } else {
206 distance_b
207 };
208
209 cache[index_a] = result;
210 }
211 }
212
213 result
214}
215
216#[cfg(feature = "timezones")]
218pub fn parse_time_zone(tz: &str) -> PolarsResult<chrono_tz::Tz> {
219 if let Ok(tz_parsed) = tz.parse::<chrono_tz::Tz>() {
220 return Ok(tz_parsed);
221 }
222
223 let mut best: Option<(&str, usize)> = None;
224 for &candidate in chrono_tz::TZ_VARIANTS.iter() {
225 let score = _levenshtein(tz, candidate.name());
226 if best.is_none() || score < best.unwrap().1 {
227 best = Some((candidate.name(), score));
228 }
229 }
230
231 polars_bail!(
232 ComputeError:
233 "unable to parse time zone: '{tz}'. Please check the \
234 Time Zone Database for a list of available time zones.\n\nHint: did you mean '{}' instead?",
235 best.unwrap().0,
236 )
237}
238
239#[cfg(feature = "timezones")]
249pub fn parse_fixed_offset(tz: &str) -> PolarsResult<PlSmallStr> {
250 use polars_utils::format_pl_smallstr;
251
252 if let Some(caps) = FIXED_OFFSET_RE.captures(tz) {
253 let sign = match caps.name("sign").map(|s| s.as_str()) {
254 Some("-") => "+",
255 _ => "-",
256 };
257 let hour = caps.name("hour").unwrap().as_str().parse::<i32>().unwrap();
258 let etc_tz = format_pl_smallstr!("Etc/GMT{}{}", sign, hour);
259 if etc_tz.parse::<chrono_tz::Tz>().is_ok() {
260 return Ok(etc_tz);
261 }
262 }
263
264 unable_to_parse_err(tz)
265}
266
267#[cfg(feature = "timezones")]
268fn unable_to_parse_err<T>(tz: &str) -> PolarsResult<T> {
269 polars_bail!(
270 ComputeError:
271 "unable to parse time zone: '{}'. Please check the \
272 Time Zone Database for a list of available time zones.",
273 tz
274 )
275}