Skip to main content

polars_core/chunked_array/object/
registry.rs

1//! This is a heap allocated utility that can be used to register an object type.
2//!
3//! That object type will know its own generic type parameter `T` and callers can simply
4//! send `&Any` values and don't have to know the generic type themselves.
5use std::any::Any;
6use std::fmt::{Debug, Formatter};
7use std::ops::Deref;
8use std::sync::{Arc, LazyLock, RwLock};
9
10use arrow::array::builder::ArrayBuilder;
11use arrow::array::{Array, ArrayRef};
12use arrow::datatypes::ArrowDataType;
13use polars_utils::pl_str::PlSmallStr;
14
15use crate::chunked_array::object::builder::ObjectChunkedBuilder;
16use crate::datatypes::AnyValue;
17use crate::prelude::{ListBuilderTrait, ObjectChunked, PolarsObject};
18use crate::series::{IntoSeries, Series};
19
20/// Takes a `name` and `capacity` and constructs a new builder.
21pub type BuilderConstructor =
22    Box<dyn Fn(PlSmallStr, usize) -> Box<dyn AnonymousObjectBuilder> + Send + Sync>;
23pub type ObjectConverter = Arc<dyn Fn(AnyValue) -> Box<dyn Any> + Send + Sync>;
24pub type PyObjectConverter = Arc<dyn Fn(AnyValue) -> Box<dyn Any> + Send + Sync>;
25pub type ObjectArrayGetter = Arc<dyn Fn(&dyn Array, usize) -> Option<AnyValue<'_>> + Send + Sync>;
26pub type WithGIL = Arc<dyn Fn(&mut dyn FnMut()) + Send + Sync>;
27
28pub struct ObjectRegistry {
29    /// A function that creates an object builder
30    pub builder_constructor: BuilderConstructor,
31    // A function that converts AnyValue to Box<dyn Any> of the object type
32    object_converter: Option<ObjectConverter>,
33    // A function that converts AnyValue to Box<dyn Any> of the PyObject type
34    pyobject_converter: Option<PyObjectConverter>,
35    pub physical_dtype: ArrowDataType,
36    // A function that gets an AnyValue from a Box<dyn Array>.
37    array_getter: ObjectArrayGetter,
38    // A function which grabs the Python GIL.
39    with_gil: WithGIL,
40}
41
42impl Debug for ObjectRegistry {
43    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
44        write!(f, "object-registry")
45    }
46}
47
48static GLOBAL_OBJECT_REGISTRY: LazyLock<RwLock<Option<ObjectRegistry>>> =
49    LazyLock::new(Default::default);
50
51/// This trait can be registered, after which that global registration
52/// can be used to materialize object types
53pub trait AnonymousObjectBuilder: ArrayBuilder {
54    fn as_array_builder(self: Box<Self>) -> Box<dyn ArrayBuilder>;
55
56    /// # Safety
57    /// Expect `ObjectArray<T>` arrays.
58    unsafe fn from_chunks(self: Box<Self>, chunks: Vec<ArrayRef>) -> Series;
59
60    /// Append a `null` value.
61    fn append_null(&mut self);
62
63    /// Append a `T` of [`ObjectChunked<T>`][ObjectChunked<T>] made generic via the [`Any`] trait.
64    ///
65    /// [ObjectChunked<T>]: crate::chunked_array::object::ObjectChunked
66    fn append_value(&mut self, value: &dyn Any);
67
68    fn append_option(&mut self, value: Option<&dyn Any>) {
69        match value {
70            None => self.append_null(),
71            Some(v) => self.append_value(v),
72        }
73    }
74
75    /// Take the current state and materialize as a [`Series`]
76    /// the builder should not be used after that.
77    fn to_series(&mut self) -> Series;
78
79    fn get_list_builder(
80        &self,
81        name: PlSmallStr,
82        values_capacity: usize,
83        list_capacity: usize,
84    ) -> Box<dyn ListBuilderTrait>;
85}
86
87impl<T: PolarsObject> AnonymousObjectBuilder for ObjectChunkedBuilder<T> {
88    /// # Safety
89    /// Expects `ObjectArray<T>` arrays.
90    unsafe fn from_chunks(self: Box<Self>, chunks: Vec<ArrayRef>) -> Series {
91        ObjectChunked::<T>::new_with_compute_len(Arc::new(self.field().clone()), chunks)
92            .into_series()
93    }
94
95    fn as_array_builder(self: Box<Self>) -> Box<dyn ArrayBuilder> {
96        self
97    }
98
99    fn append_null(&mut self) {
100        self.append_null()
101    }
102
103    fn append_value(&mut self, value: &dyn Any) {
104        let value = value.downcast_ref::<T>().unwrap();
105        self.append_value(value.clone())
106    }
107
108    fn to_series(&mut self) -> Series {
109        let builder = std::mem::take(self);
110        builder.finish().into_series()
111    }
112    fn get_list_builder(
113        &self,
114        name: PlSmallStr,
115        values_capacity: usize,
116        list_capacity: usize,
117    ) -> Box<dyn ListBuilderTrait> {
118        Box::new(super::extension::list::ExtensionListBuilder::<T>::new(
119            name,
120            values_capacity,
121            list_capacity,
122        ))
123    }
124}
125
126pub fn register_object_builder(
127    builder_constructor: BuilderConstructor,
128    object_converter: ObjectConverter,
129    pyobject_converter: PyObjectConverter,
130    physical_dtype: ArrowDataType,
131    array_getter: ObjectArrayGetter,
132    with_gil: WithGIL,
133) {
134    let reg = GLOBAL_OBJECT_REGISTRY.deref();
135    let mut reg = reg.write().unwrap();
136
137    *reg = Some(ObjectRegistry {
138        builder_constructor,
139        object_converter: Some(object_converter),
140        pyobject_converter: Some(pyobject_converter),
141        physical_dtype,
142        array_getter,
143        with_gil,
144    })
145}
146
147#[cold]
148pub fn get_object_physical_type() -> ArrowDataType {
149    let reg = GLOBAL_OBJECT_REGISTRY.read().unwrap();
150    let reg = reg.as_ref().unwrap();
151    reg.physical_dtype.clone()
152}
153
154pub fn get_object_builder(name: PlSmallStr, capacity: usize) -> Box<dyn AnonymousObjectBuilder> {
155    let reg = GLOBAL_OBJECT_REGISTRY.read().unwrap();
156    let reg = reg.as_ref().unwrap();
157    (reg.builder_constructor)(name, capacity)
158}
159
160pub fn get_object_converter() -> ObjectConverter {
161    let reg = GLOBAL_OBJECT_REGISTRY.read().unwrap();
162    let reg = reg.as_ref().unwrap();
163    reg.object_converter.as_ref().unwrap().clone()
164}
165
166pub fn get_pyobject_converter() -> PyObjectConverter {
167    let reg = GLOBAL_OBJECT_REGISTRY.read().unwrap();
168    let reg = reg.as_ref().unwrap();
169    reg.pyobject_converter.as_ref().unwrap().clone()
170}
171
172pub fn get_object_array_getter() -> ObjectArrayGetter {
173    let reg = GLOBAL_OBJECT_REGISTRY.read().unwrap();
174    reg.as_ref().unwrap().array_getter.clone()
175}
176
177/// Run the given function while holding the GIL.
178///
179/// This is sometimes used to avoid the overhead of repeatedly
180/// releasing and acquiring the GIL.
181pub fn run_with_gil<R, F: FnOnce() -> R>(f: F) -> R {
182    let reg = GLOBAL_OBJECT_REGISTRY.read().unwrap();
183    let with_gil = reg.as_ref().unwrap().with_gil.clone();
184    let r = &mut None;
185    let f = &mut Some(f);
186    (with_gil)(&mut || {
187        *r = Some((f.take().unwrap())());
188    });
189    r.take().unwrap()
190}