polars_core/chunked_array/object/
registry.rs

1//! This is a heap allocated utility that can be used to register an object type.
2//!
3//! That object type will know its own generic type parameter `T` and callers can simply
4//! send `&Any` values and don't have to know the generic type themselves.
5use std::any::Any;
6use std::fmt::{Debug, Formatter};
7use std::ops::Deref;
8use std::sync::{Arc, LazyLock, RwLock};
9
10use arrow::array::ArrayRef;
11use arrow::array::builder::ArrayBuilder;
12use arrow::datatypes::ArrowDataType;
13use polars_utils::pl_str::PlSmallStr;
14
15use crate::chunked_array::object::builder::ObjectChunkedBuilder;
16use crate::datatypes::AnyValue;
17use crate::prelude::{ListBuilderTrait, ObjectChunked, PolarsObject};
18use crate::series::{IntoSeries, Series};
19
20/// Takes a `name` and `capacity` and constructs a new builder.
21pub type BuilderConstructor =
22    Box<dyn Fn(PlSmallStr, usize) -> Box<dyn AnonymousObjectBuilder> + Send + Sync>;
23pub type ObjectConverter = Arc<dyn Fn(AnyValue) -> Box<dyn Any> + Send + Sync>;
24pub type PyObjectConverter = Arc<dyn Fn(AnyValue) -> Box<dyn Any> + Send + Sync>;
25
26pub struct ObjectRegistry {
27    /// A function that creates an object builder
28    pub builder_constructor: BuilderConstructor,
29    // A function that converts AnyValue to Box<dyn Any> of the object type
30    object_converter: Option<ObjectConverter>,
31    // A function that converts AnyValue to Box<dyn Any> of the PyObject type
32    pyobject_converter: Option<PyObjectConverter>,
33    pub physical_dtype: ArrowDataType,
34}
35
36impl Debug for ObjectRegistry {
37    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
38        write!(f, "object-registry")
39    }
40}
41
42static GLOBAL_OBJECT_REGISTRY: LazyLock<RwLock<Option<ObjectRegistry>>> =
43    LazyLock::new(Default::default);
44
45/// This trait can be registered, after which that global registration
46/// can be used to materialize object types
47pub trait AnonymousObjectBuilder: ArrayBuilder {
48    fn as_array_builder(self: Box<Self>) -> Box<dyn ArrayBuilder>;
49
50    /// # Safety
51    /// Expect `ObjectArray<T>` arrays.
52    unsafe fn from_chunks(self: Box<Self>, chunks: Vec<ArrayRef>) -> Series;
53
54    /// Append a `null` value.
55    fn append_null(&mut self);
56
57    /// Append a `T` of [`ObjectChunked<T>`][ObjectChunked<T>] made generic via the [`Any`] trait.
58    ///
59    /// [ObjectChunked<T>]: crate::chunked_array::object::ObjectChunked
60    fn append_value(&mut self, value: &dyn Any);
61
62    fn append_option(&mut self, value: Option<&dyn Any>) {
63        match value {
64            None => self.append_null(),
65            Some(v) => self.append_value(v),
66        }
67    }
68
69    /// Take the current state and materialize as a [`Series`]
70    /// the builder should not be used after that.
71    fn to_series(&mut self) -> Series;
72
73    fn get_list_builder(
74        &self,
75        name: PlSmallStr,
76        values_capacity: usize,
77        list_capacity: usize,
78    ) -> Box<dyn ListBuilderTrait>;
79}
80
81impl<T: PolarsObject> AnonymousObjectBuilder for ObjectChunkedBuilder<T> {
82    /// # Safety
83    /// Expects `ObjectArray<T>` arrays.
84    unsafe fn from_chunks(self: Box<Self>, chunks: Vec<ArrayRef>) -> Series {
85        ObjectChunked::<T>::new_with_compute_len(Arc::new(self.field().clone()), chunks)
86            .into_series()
87    }
88
89    fn as_array_builder(self: Box<Self>) -> Box<dyn ArrayBuilder> {
90        self
91    }
92
93    fn append_null(&mut self) {
94        self.append_null()
95    }
96
97    fn append_value(&mut self, value: &dyn Any) {
98        let value = value.downcast_ref::<T>().unwrap();
99        self.append_value(value.clone())
100    }
101
102    fn to_series(&mut self) -> Series {
103        let builder = std::mem::take(self);
104        builder.finish().into_series()
105    }
106    fn get_list_builder(
107        &self,
108        name: PlSmallStr,
109        values_capacity: usize,
110        list_capacity: usize,
111    ) -> Box<dyn ListBuilderTrait> {
112        Box::new(super::extension::list::ExtensionListBuilder::<T>::new(
113            name,
114            values_capacity,
115            list_capacity,
116        ))
117    }
118}
119
120pub fn register_object_builder(
121    builder_constructor: BuilderConstructor,
122    object_converter: ObjectConverter,
123    pyobject_converter: PyObjectConverter,
124    physical_dtype: ArrowDataType,
125) {
126    let reg = GLOBAL_OBJECT_REGISTRY.deref();
127    let mut reg = reg.write().unwrap();
128
129    *reg = Some(ObjectRegistry {
130        builder_constructor,
131        object_converter: Some(object_converter),
132        pyobject_converter: Some(pyobject_converter),
133        physical_dtype,
134    })
135}
136
137#[cold]
138pub fn get_object_physical_type() -> ArrowDataType {
139    let reg = GLOBAL_OBJECT_REGISTRY.read().unwrap();
140    let reg = reg.as_ref().unwrap();
141    reg.physical_dtype.clone()
142}
143
144pub fn get_object_builder(name: PlSmallStr, capacity: usize) -> Box<dyn AnonymousObjectBuilder> {
145    let reg = GLOBAL_OBJECT_REGISTRY.read().unwrap();
146    let reg = reg.as_ref().unwrap();
147    (reg.builder_constructor)(name, capacity)
148}
149
150pub fn get_object_converter() -> ObjectConverter {
151    let reg = GLOBAL_OBJECT_REGISTRY.read().unwrap();
152    let reg = reg.as_ref().unwrap();
153    reg.object_converter.as_ref().unwrap().clone()
154}
155
156pub fn get_pyobject_converter() -> PyObjectConverter {
157    let reg = GLOBAL_OBJECT_REGISTRY.read().unwrap();
158    let reg = reg.as_ref().unwrap();
159    reg.pyobject_converter.as_ref().unwrap().clone()
160}