polars_utils/
regex_cache.rs

1use std::cell::RefCell;
2
3use regex::Regex;
4
5use crate::cache::LruCache;
6
7// Regex compilation is really heavy, and the resulting regexes can be large as
8// well, so we should have a good caching scheme.
9//
10// TODO: add larger global cache which has time-based flush.
11
12/// A cache for compiled regular expressions.
13pub struct RegexCache {
14    cache: LruCache<String, Regex>,
15}
16
17impl RegexCache {
18    fn new() -> Self {
19        Self {
20            cache: LruCache::with_capacity(32),
21        }
22    }
23
24    pub fn compile(&mut self, re: &str) -> Result<&Regex, regex::Error> {
25        let r = self.cache.try_get_or_insert_with(re, |re| {
26            #[allow(clippy::disallowed_methods)]
27            Regex::new(re)
28        });
29        Ok(&*r?)
30    }
31}
32
33thread_local! {
34    static LOCAL_REGEX_CACHE: RefCell<RegexCache> = RefCell::new(RegexCache::new());
35}
36
37pub fn compile_regex(re: &str) -> Result<Regex, regex::Error> {
38    LOCAL_REGEX_CACHE.with_borrow_mut(|cache| cache.compile(re).cloned())
39}
40
41pub fn with_regex_cache<R, F: FnOnce(&mut RegexCache) -> R>(f: F) -> R {
42    LOCAL_REGEX_CACHE.with_borrow_mut(f)
43}
44
45#[macro_export]
46macro_rules! cached_regex {
47    () => {};
48
49    ($vis:vis static $name:ident = $regex:expr; $($rest:tt)*) => {
50        #[allow(clippy::disallowed_methods)]
51        $vis static $name: std::sync::LazyLock<regex::Regex> = std::sync::LazyLock::new(|| regex::Regex::new($regex).unwrap());
52        $crate::regex_cache::cached_regex!($($rest)*);
53    };
54}
55pub use cached_regex;