use std::hash::{Hash, Hasher};
use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
use std::sync::{Mutex, RwLock, RwLockReadGuard, RwLockWriteGuard};
use hashbrown::hash_map::RawEntryMut;
use once_cell::sync::Lazy;
use polars_utils::aliases::PlRandomState;
use polars_utils::pl_str::PlSmallStr;
use crate::datatypes::{InitHashMaps2, PlIdHashMap};
use crate::hashing::_HASHMAP_INIT_SIZE;
static STRING_CACHE_REFCOUNT: Mutex<u32> = Mutex::new(0);
static STRING_CACHE_ENABLED_GLOBALLY: AtomicBool = AtomicBool::new(false);
static STRING_CACHE_UUID_CTR: AtomicU32 = AtomicU32::new(0);
pub struct StringCacheHolder {
#[allow(dead_code)]
private_zst: (),
}
impl Default for StringCacheHolder {
fn default() -> Self {
Self::hold()
}
}
impl StringCacheHolder {
pub fn hold() -> StringCacheHolder {
increment_string_cache_refcount();
StringCacheHolder { private_zst: () }
}
}
impl Drop for StringCacheHolder {
fn drop(&mut self) {
decrement_string_cache_refcount();
}
}
fn increment_string_cache_refcount() {
let mut refcount = STRING_CACHE_REFCOUNT.lock().unwrap();
*refcount += 1;
}
fn decrement_string_cache_refcount() {
let mut refcount = STRING_CACHE_REFCOUNT.lock().unwrap();
*refcount -= 1;
if *refcount == 0 {
STRING_CACHE.clear()
}
}
pub fn enable_string_cache() {
let was_enabled = STRING_CACHE_ENABLED_GLOBALLY.swap(true, Ordering::AcqRel);
if !was_enabled {
increment_string_cache_refcount();
}
}
pub fn disable_string_cache() {
let was_enabled = STRING_CACHE_ENABLED_GLOBALLY.swap(false, Ordering::AcqRel);
if was_enabled {
decrement_string_cache_refcount();
}
}
pub fn using_string_cache() -> bool {
let refcount = STRING_CACHE_REFCOUNT.lock().unwrap();
*refcount > 0
}
#[derive(Copy, Clone)]
struct Key {
pub(super) hash: u64,
pub(super) idx: u32,
}
impl Key {
#[inline]
pub(super) fn new(hash: u64, idx: u32) -> Self {
Self { hash, idx }
}
}
impl Hash for Key {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
state.write_u64(self.hash)
}
}
pub(crate) struct SCacheInner {
map: PlIdHashMap<Key, ()>,
pub(crate) uuid: u32,
payloads: Vec<PlSmallStr>,
}
impl SCacheInner {
#[inline]
pub(crate) unsafe fn get_unchecked(&self, cat: u32) -> &str {
self.payloads.get_unchecked(cat as usize).as_str()
}
pub(crate) fn len(&self) -> usize {
self.map.len()
}
#[inline]
pub(crate) fn insert_from_hash(&mut self, h: u64, s: &str) -> u32 {
let mut global_idx = self.payloads.len() as u32;
let entry = self.map.raw_entry_mut().from_hash(h, |key| {
(key.hash == h) && {
let pos = key.idx as usize;
let value = unsafe { self.payloads.get_unchecked(pos) };
s == value.as_str()
}
});
match entry {
RawEntryMut::Occupied(entry) => {
global_idx = entry.key().idx;
},
RawEntryMut::Vacant(entry) => {
let idx = self.payloads.len() as u32;
let key = Key::new(h, idx);
entry.insert_hashed_nocheck(h, key, ());
self.payloads.push(PlSmallStr::from_str(s));
},
}
global_idx
}
#[inline]
pub(crate) fn get_cat(&self, s: &str) -> Option<u32> {
let h = StringCache::get_hash_builder().hash_one(s);
self.map
.raw_entry()
.from_hash(h, |key| {
(key.hash == h) && {
let pos = key.idx as usize;
let value = unsafe { self.payloads.get_unchecked(pos) };
s == value.as_str()
}
})
.map(|(k, _)| k.idx)
}
#[inline]
pub(crate) fn insert(&mut self, s: &str) -> u32 {
let h = StringCache::get_hash_builder().hash_one(s);
self.insert_from_hash(h, s)
}
}
impl Default for SCacheInner {
fn default() -> Self {
Self {
map: PlIdHashMap::with_capacity(_HASHMAP_INIT_SIZE),
uuid: STRING_CACHE_UUID_CTR.fetch_add(1, Ordering::AcqRel),
payloads: Vec::with_capacity(_HASHMAP_INIT_SIZE),
}
}
}
#[derive(Default)]
pub(crate) struct StringCache(pub(crate) RwLock<SCacheInner>);
impl StringCache {
#[inline]
pub(crate) fn get_hash_builder() -> PlRandomState {
PlRandomState::with_seed(0)
}
pub(crate) fn lock_map(&self) -> RwLockWriteGuard<SCacheInner> {
self.0.write().unwrap()
}
pub(crate) fn read_map(&self) -> RwLockReadGuard<SCacheInner> {
self.0.read().unwrap()
}
pub(crate) fn clear(&self) {
let mut lock = self.lock_map();
*lock = Default::default();
}
pub(crate) fn apply<F, T>(&self, fun: F) -> (u32, T)
where
F: FnOnce(&mut RwLockWriteGuard<SCacheInner>) -> T,
{
let cache = &mut crate::STRING_CACHE.lock_map();
let result = fun(cache);
if cache.len() > u32::MAX as usize {
panic!("not more than {} categories supported", u32::MAX)
};
(cache.uuid, result)
}
}
pub(crate) static STRING_CACHE: Lazy<StringCache> = Lazy::new(Default::default);