polars_core/chunked_array/object/
registry.rs

1//! This is a heap allocated utility that can be used to register an object type.
2//!
3//! That object type will know its own generic type parameter `T` and callers can simply
4//! send `&Any` values and don't have to know the generic type themselves.
5use std::any::Any;
6use std::fmt::{Debug, Formatter};
7use std::ops::Deref;
8use std::sync::{Arc, LazyLock, RwLock};
9
10use arrow::array::builder::ArrayBuilder;
11use arrow::array::{Array, ArrayRef};
12use arrow::datatypes::ArrowDataType;
13use polars_utils::pl_str::PlSmallStr;
14
15use crate::chunked_array::object::builder::ObjectChunkedBuilder;
16use crate::datatypes::AnyValue;
17use crate::prelude::{ListBuilderTrait, ObjectChunked, PolarsObject};
18use crate::series::{IntoSeries, Series};
19
20/// Takes a `name` and `capacity` and constructs a new builder.
21pub type BuilderConstructor =
22    Box<dyn Fn(PlSmallStr, usize) -> Box<dyn AnonymousObjectBuilder> + Send + Sync>;
23pub type ObjectConverter = Arc<dyn Fn(AnyValue) -> Box<dyn Any> + Send + Sync>;
24pub type PyObjectConverter = Arc<dyn Fn(AnyValue) -> Box<dyn Any> + Send + Sync>;
25pub type ObjectArrayGetter = Arc<dyn Fn(&dyn Array, usize) -> Option<AnyValue<'_>> + Send + Sync>;
26
27pub struct ObjectRegistry {
28    /// A function that creates an object builder
29    pub builder_constructor: BuilderConstructor,
30    // A function that converts AnyValue to Box<dyn Any> of the object type
31    object_converter: Option<ObjectConverter>,
32    // A function that converts AnyValue to Box<dyn Any> of the PyObject type
33    pyobject_converter: Option<PyObjectConverter>,
34    pub physical_dtype: ArrowDataType,
35    // A function that gets an AnyValue from a Box<dyn Array>.
36    array_getter: ObjectArrayGetter,
37}
38
39impl Debug for ObjectRegistry {
40    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
41        write!(f, "object-registry")
42    }
43}
44
45static GLOBAL_OBJECT_REGISTRY: LazyLock<RwLock<Option<ObjectRegistry>>> =
46    LazyLock::new(Default::default);
47
48/// This trait can be registered, after which that global registration
49/// can be used to materialize object types
50pub trait AnonymousObjectBuilder: ArrayBuilder {
51    fn as_array_builder(self: Box<Self>) -> Box<dyn ArrayBuilder>;
52
53    /// # Safety
54    /// Expect `ObjectArray<T>` arrays.
55    unsafe fn from_chunks(self: Box<Self>, chunks: Vec<ArrayRef>) -> Series;
56
57    /// Append a `null` value.
58    fn append_null(&mut self);
59
60    /// Append a `T` of [`ObjectChunked<T>`][ObjectChunked<T>] made generic via the [`Any`] trait.
61    ///
62    /// [ObjectChunked<T>]: crate::chunked_array::object::ObjectChunked
63    fn append_value(&mut self, value: &dyn Any);
64
65    fn append_option(&mut self, value: Option<&dyn Any>) {
66        match value {
67            None => self.append_null(),
68            Some(v) => self.append_value(v),
69        }
70    }
71
72    /// Take the current state and materialize as a [`Series`]
73    /// the builder should not be used after that.
74    fn to_series(&mut self) -> Series;
75
76    fn get_list_builder(
77        &self,
78        name: PlSmallStr,
79        values_capacity: usize,
80        list_capacity: usize,
81    ) -> Box<dyn ListBuilderTrait>;
82}
83
84impl<T: PolarsObject> AnonymousObjectBuilder for ObjectChunkedBuilder<T> {
85    /// # Safety
86    /// Expects `ObjectArray<T>` arrays.
87    unsafe fn from_chunks(self: Box<Self>, chunks: Vec<ArrayRef>) -> Series {
88        ObjectChunked::<T>::new_with_compute_len(Arc::new(self.field().clone()), chunks)
89            .into_series()
90    }
91
92    fn as_array_builder(self: Box<Self>) -> Box<dyn ArrayBuilder> {
93        self
94    }
95
96    fn append_null(&mut self) {
97        self.append_null()
98    }
99
100    fn append_value(&mut self, value: &dyn Any) {
101        let value = value.downcast_ref::<T>().unwrap();
102        self.append_value(value.clone())
103    }
104
105    fn to_series(&mut self) -> Series {
106        let builder = std::mem::take(self);
107        builder.finish().into_series()
108    }
109    fn get_list_builder(
110        &self,
111        name: PlSmallStr,
112        values_capacity: usize,
113        list_capacity: usize,
114    ) -> Box<dyn ListBuilderTrait> {
115        Box::new(super::extension::list::ExtensionListBuilder::<T>::new(
116            name,
117            values_capacity,
118            list_capacity,
119        ))
120    }
121}
122
123pub fn register_object_builder(
124    builder_constructor: BuilderConstructor,
125    object_converter: ObjectConverter,
126    pyobject_converter: PyObjectConverter,
127    physical_dtype: ArrowDataType,
128    array_getter: ObjectArrayGetter,
129) {
130    let reg = GLOBAL_OBJECT_REGISTRY.deref();
131    let mut reg = reg.write().unwrap();
132
133    *reg = Some(ObjectRegistry {
134        builder_constructor,
135        object_converter: Some(object_converter),
136        pyobject_converter: Some(pyobject_converter),
137        physical_dtype,
138        array_getter,
139    })
140}
141
142#[cold]
143pub fn get_object_physical_type() -> ArrowDataType {
144    let reg = GLOBAL_OBJECT_REGISTRY.read().unwrap();
145    let reg = reg.as_ref().unwrap();
146    reg.physical_dtype.clone()
147}
148
149pub fn get_object_builder(name: PlSmallStr, capacity: usize) -> Box<dyn AnonymousObjectBuilder> {
150    let reg = GLOBAL_OBJECT_REGISTRY.read().unwrap();
151    let reg = reg.as_ref().unwrap();
152    (reg.builder_constructor)(name, capacity)
153}
154
155pub fn get_object_converter() -> ObjectConverter {
156    let reg = GLOBAL_OBJECT_REGISTRY.read().unwrap();
157    let reg = reg.as_ref().unwrap();
158    reg.object_converter.as_ref().unwrap().clone()
159}
160
161pub fn get_pyobject_converter() -> PyObjectConverter {
162    let reg = GLOBAL_OBJECT_REGISTRY.read().unwrap();
163    let reg = reg.as_ref().unwrap();
164    reg.pyobject_converter.as_ref().unwrap().clone()
165}
166
167pub fn get_object_array_getter() -> ObjectArrayGetter {
168    let reg = GLOBAL_OBJECT_REGISTRY.read().unwrap();
169    reg.as_ref().unwrap().array_getter.clone()
170}