Skip to main content

polars_core/datatypes/temporal/
time_zone.rs

1use polars_error::{PolarsResult, polars_bail};
2use polars_utils::pl_str::PlSmallStr;
3
4use crate::config;
5
6#[derive(Clone, Hash, PartialEq, Eq)]
7#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
8#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
9pub struct TimeZone {
10    /// Private inner to ensure canonical / parsed time zone repr at construction.
11    inner: PlSmallStr,
12}
13
14impl TimeZone {
15    pub const UTC: TimeZone = unsafe { TimeZone::from_static("UTC") };
16
17    /// Construct from a static string.
18    ///
19    /// # Safety
20    /// This does not perform any validation, the caller is responsible for
21    /// ensuring they pass a valid timezone.
22    #[inline(always)]
23    pub const unsafe fn from_static(tz: &'static str) -> Self {
24        Self {
25            inner: PlSmallStr::from_static(tz),
26        }
27    }
28
29    /// # Safety
30    /// This does not perform any validation, the caller is responsible for
31    /// ensuring they pass a valid timezone.
32    pub unsafe fn new_unchecked(zone_str: impl Into<PlSmallStr>) -> Self {
33        Self {
34            inner: zone_str.into(),
35        }
36    }
37
38    /// Converts timezones to canonical form.
39    ///
40    /// If the "timezones" feature is enabled, additionally performs validation and converts to
41    /// Etc/GMT form where applicable.
42    #[inline]
43    pub fn opt_try_new(zone_str: Option<impl Into<PlSmallStr>>) -> PolarsResult<Option<Self>> {
44        Self::new_impl(zone_str.map(|x| x.into()))
45    }
46
47    fn new_impl(zone_str: Option<PlSmallStr>) -> PolarsResult<Option<Self>> {
48        // Needed for selectors https://github.com/pola-rs/polars/pull/9641
49        if zone_str.as_deref() == Some("*") {
50            return Ok(Some(Self {
51                inner: PlSmallStr::from_static("*"),
52            }));
53        }
54
55        let mut canonical_tz = Self::_canonical_timezone_impl(zone_str);
56
57        #[cfg(feature = "timezones")]
58        if let Some(tz) = canonical_tz.as_mut() {
59            if let Err(err) = Self::validate_time_zone(tz) {
60                match parse_fixed_offset(tz) {
61                    Ok(v) => *tz = v,
62                    Err(_) => {
63                        // This can be used if there are externally created arrow buffers / dtypes
64                        // with unknown timezones.
65                        if std::env::var("POLARS_IGNORE_TIMEZONE_PARSE_ERROR").as_deref() == Ok("1")
66                        {
67                            if config::verbose() {
68                                eprintln!("WARN: {err}")
69                            }
70                        } else {
71                            return Err(err.wrap_msg(|s| {
72                                format!(
73                                    "{s}\n\nIf you would like to forcibly disable \
74                                    timezone validation, set \
75                                    POLARS_IGNORE_TIMEZONE_PARSE_ERROR=1.",
76                                )
77                            }));
78                        }
79                    },
80                }
81            }
82        }
83
84        Ok(canonical_tz.map(|inner| Self { inner }))
85    }
86
87    /// Equality where `None` is treated as UTC.
88    pub fn eq_none_as_utc(this: Option<&TimeZone>, other: Option<&TimeZone>) -> bool {
89        this.unwrap_or(&Self::UTC) == other.unwrap_or(&Self::UTC)
90    }
91
92    pub fn _canonical_timezone_impl(tz: Option<PlSmallStr>) -> Option<PlSmallStr> {
93        match tz.as_deref() {
94            Some("") | None => None,
95            #[cfg(feature = "timezones")]
96            Some("+00:00") | Some("00:00") | Some("utc") => Some(PlSmallStr::from_static("UTC")),
97            Some(_) => tz,
98        }
99    }
100
101    #[cfg(feature = "timezones")]
102    pub fn from_chrono(tz: &chrono_tz::Tz) -> Self {
103        use polars_utils::format_pl_smallstr;
104
105        Self {
106            inner: format_pl_smallstr!("{}", tz),
107        }
108    }
109
110    #[cfg(feature = "timezones")]
111    pub fn to_chrono(&self) -> PolarsResult<chrono_tz::Tz> {
112        parse_time_zone(self)
113    }
114
115    #[cfg(feature = "timezones")]
116    pub fn validate_time_zone(tz: &str) -> PolarsResult<()> {
117        parse_time_zone(tz).map(|_| ())
118    }
119}
120
121impl std::ops::Deref for TimeZone {
122    type Target = PlSmallStr;
123
124    fn deref(&self) -> &Self::Target {
125        &self.inner
126    }
127}
128
129impl std::fmt::Debug for TimeZone {
130    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
131        std::fmt::Debug::fmt(&self.inner, f)
132    }
133}
134
135impl std::fmt::Display for TimeZone {
136    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
137        std::fmt::Display::fmt(&self.inner, f)
138    }
139}
140
141#[cfg(feature = "timezones")]
142static FIXED_OFFSET_PATTERN: &str = r#"(?x)
143    ^
144    (?P<sign>[-+])?            # optional sign
145    (?P<hour>0[0-9]|1[0-4])    # hour (between 0 and 14)
146    :?                         # optional separator
147    00                         # minute
148    $
149    "#;
150
151#[cfg(feature = "timezones")]
152polars_utils::regex_cache::cached_regex! {
153    static FIXED_OFFSET_RE = FIXED_OFFSET_PATTERN;
154}
155
156// Implementation from: https://github.com/wooorm/levenshtein-rs/blob/9c4730b1973d4e61e187f8fe6d5f299ad5c991fc/src/lib.rs
157// (MIT licensed. Copyright (c) 2016 Titus Wormer) <tituswormer@gmail.com>
158fn _levenshtein(a: &str, b: &str) -> usize {
159    let mut result = 0;
160
161    /* Shortcut optimizations / degenerate cases. */
162    if a == b {
163        return result;
164    }
165
166    let length_a = a.chars().count();
167    let length_b = b.chars().count();
168
169    if length_a == 0 {
170        return length_b;
171    }
172
173    if length_b == 0 {
174        return length_a;
175    }
176
177    /* Initialize the vector.
178     *
179     * This is why it’s fast, normally a matrix is used,
180     * here we use a single vector. */
181    let mut cache: Vec<usize> = (1..).take(length_a).collect();
182
183    /* Loop. */
184    for (index_b, code_b) in b.chars().enumerate() {
185        result = index_b;
186        let mut distance_a = index_b;
187
188        for (index_a, code_a) in a.chars().enumerate() {
189            let distance_b = if code_a == code_b {
190                distance_a
191            } else {
192                distance_a + 1
193            };
194
195            distance_a = cache[index_a];
196
197            result = if distance_a > result {
198                if distance_b > result {
199                    result + 1
200                } else {
201                    distance_b
202                }
203            } else if distance_b > distance_a {
204                distance_a + 1
205            } else {
206                distance_b
207            };
208
209            cache[index_a] = result;
210        }
211    }
212
213    result
214}
215
216/// Parse a time zone string to [`chrono_tz::Tz`]
217#[cfg(feature = "timezones")]
218pub fn parse_time_zone(tz: &str) -> PolarsResult<chrono_tz::Tz> {
219    if let Ok(tz_parsed) = tz.parse::<chrono_tz::Tz>() {
220        return Ok(tz_parsed);
221    }
222
223    let mut best: Option<(&str, usize)> = None;
224    for &candidate in chrono_tz::TZ_VARIANTS.iter() {
225        let score = _levenshtein(tz, candidate.name());
226        if best.is_none() || score < best.unwrap().1 {
227            best = Some((candidate.name(), score));
228        }
229    }
230
231    polars_bail!(
232        ComputeError:
233        "unable to parse time zone: '{tz}'. Please check the \
234        Time Zone Database for a list of available time zones.\n\nHint: did you mean '{}' instead?",
235        best.unwrap().0,
236    )
237}
238
239/// Convert fixed offset to Etc/GMT one from time zone database
240///
241/// E.g. +01:00 -> Etc/GMT-1
242///
243/// Note: the sign appears reversed, but is correct, see <https://en.wikipedia.org/wiki/Tz_database#Area>:
244/// > In order to conform with the POSIX style, those zone names beginning with
245/// > "Etc/GMT" have their sign reversed from the standard ISO 8601 convention.
246/// > In the "Etc" area, zones west of GMT have a positive sign and those east
247/// > have a negative sign in their name (e.g "Etc/GMT-14" is 14 hours ahead of GMT).
248#[cfg(feature = "timezones")]
249pub fn parse_fixed_offset(tz: &str) -> PolarsResult<PlSmallStr> {
250    use polars_utils::format_pl_smallstr;
251
252    if let Some(caps) = FIXED_OFFSET_RE.captures(tz) {
253        let sign = match caps.name("sign").map(|s| s.as_str()) {
254            Some("-") => "+",
255            _ => "-",
256        };
257        let hour = caps.name("hour").unwrap().as_str().parse::<i32>().unwrap();
258        let etc_tz = format_pl_smallstr!("Etc/GMT{}{}", sign, hour);
259        if etc_tz.parse::<chrono_tz::Tz>().is_ok() {
260            return Ok(etc_tz);
261        }
262    }
263
264    unable_to_parse_err(tz)
265}
266
267#[cfg(feature = "timezones")]
268fn unable_to_parse_err<T>(tz: &str) -> PolarsResult<T> {
269    polars_bail!(
270        ComputeError:
271        "unable to parse time zone: '{}'. Please check the \
272        Time Zone Database for a list of available time zones.",
273        tz
274    )
275}