polars_core/chunked_array/builder/
mod.rs

1mod boolean;
2#[cfg(feature = "dtype-categorical")]
3mod categorical;
4#[cfg(feature = "dtype-array")]
5pub mod fixed_size_list;
6pub mod list;
7mod null;
8mod primitive;
9mod string;
10
11use std::sync::Arc;
12
13use arrow::array::*;
14use arrow::bitmap::Bitmap;
15pub use boolean::*;
16#[cfg(feature = "dtype-categorical")]
17pub use categorical::*;
18#[cfg(feature = "dtype-array")]
19pub(crate) use fixed_size_list::*;
20pub use list::*;
21pub use null::*;
22pub use primitive::*;
23pub use string::*;
24
25use crate::chunked_array::to_primitive;
26use crate::prelude::*;
27use crate::utils::{NoNull, get_iter_capacity};
28
29// N: the value type; T: the sentinel type
30pub trait ChunkedBuilder<N, T: PolarsDataType> {
31    fn append_value(&mut self, val: N);
32    fn append_null(&mut self);
33    fn append_option(&mut self, opt_val: Option<N>) {
34        match opt_val {
35            Some(v) => self.append_value(v),
36            None => self.append_null(),
37        }
38    }
39    fn finish(self) -> ChunkedArray<T>;
40
41    fn shrink_to_fit(&mut self);
42}
43
44// Used in polars/src/chunked_array/apply.rs:24 to collect from aligned vecs and null bitmaps
45impl<T> FromIterator<(Vec<T::Native>, Option<Bitmap>)> for ChunkedArray<T>
46where
47    T: PolarsNumericType,
48{
49    fn from_iter<I: IntoIterator<Item = (Vec<T::Native>, Option<Bitmap>)>>(iter: I) -> Self {
50        let chunks = iter
51            .into_iter()
52            .map(|(values, opt_buffer)| to_primitive::<T>(values, opt_buffer));
53        ChunkedArray::from_chunk_iter(PlSmallStr::EMPTY, chunks)
54    }
55}
56
57pub trait NewChunkedArray<T, N> {
58    fn from_slice(name: PlSmallStr, v: &[N]) -> Self;
59    fn from_slice_options(name: PlSmallStr, opt_v: &[Option<N>]) -> Self;
60
61    /// Create a new ChunkedArray from an iterator.
62    fn from_iter_options(name: PlSmallStr, it: impl Iterator<Item = Option<N>>) -> Self;
63
64    /// Create a new ChunkedArray from an iterator.
65    fn from_iter_values(name: PlSmallStr, it: impl Iterator<Item = N>) -> Self;
66}
67
68impl<T> NewChunkedArray<T, T::Native> for ChunkedArray<T>
69where
70    T: PolarsNumericType,
71{
72    fn from_slice(name: PlSmallStr, v: &[T::Native]) -> Self {
73        let arr =
74            PrimitiveArray::from_slice(v).to(T::get_static_dtype().to_arrow(CompatLevel::newest()));
75        ChunkedArray::with_chunk(name, arr)
76    }
77
78    fn from_slice_options(name: PlSmallStr, opt_v: &[Option<T::Native>]) -> Self {
79        Self::from_iter_options(name, opt_v.iter().copied())
80    }
81
82    fn from_iter_options(
83        name: PlSmallStr,
84        it: impl Iterator<Item = Option<T::Native>>,
85    ) -> ChunkedArray<T> {
86        let mut builder = PrimitiveChunkedBuilder::new(name, get_iter_capacity(&it));
87        it.for_each(|opt| builder.append_option(opt));
88        builder.finish()
89    }
90
91    /// Create a new ChunkedArray from an iterator.
92    fn from_iter_values(name: PlSmallStr, it: impl Iterator<Item = T::Native>) -> ChunkedArray<T> {
93        let ca: NoNull<ChunkedArray<_>> = it.collect();
94        let mut ca = ca.into_inner();
95        ca.rename(name);
96        ca
97    }
98}
99
100impl NewChunkedArray<BooleanType, bool> for BooleanChunked {
101    fn from_slice(name: PlSmallStr, v: &[bool]) -> Self {
102        Self::from_iter_values(name, v.iter().copied())
103    }
104
105    fn from_slice_options(name: PlSmallStr, opt_v: &[Option<bool>]) -> Self {
106        Self::from_iter_options(name, opt_v.iter().copied())
107    }
108
109    fn from_iter_options(
110        name: PlSmallStr,
111        it: impl Iterator<Item = Option<bool>>,
112    ) -> ChunkedArray<BooleanType> {
113        let mut builder = BooleanChunkedBuilder::new(name, get_iter_capacity(&it));
114        it.for_each(|opt| builder.append_option(opt));
115        builder.finish()
116    }
117
118    /// Create a new ChunkedArray from an iterator.
119    fn from_iter_values(
120        name: PlSmallStr,
121        it: impl Iterator<Item = bool>,
122    ) -> ChunkedArray<BooleanType> {
123        let mut ca: ChunkedArray<_> = it.collect();
124        ca.rename(name);
125        ca
126    }
127}
128
129impl<S> NewChunkedArray<StringType, S> for StringChunked
130where
131    S: AsRef<str>,
132{
133    fn from_slice(name: PlSmallStr, v: &[S]) -> Self {
134        let arr = Utf8ViewArray::from_slice_values(v);
135        ChunkedArray::with_chunk(name, arr)
136    }
137
138    fn from_slice_options(name: PlSmallStr, opt_v: &[Option<S>]) -> Self {
139        let arr = Utf8ViewArray::from_slice(opt_v);
140        ChunkedArray::with_chunk(name, arr)
141    }
142
143    fn from_iter_options(name: PlSmallStr, it: impl Iterator<Item = Option<S>>) -> Self {
144        let arr = MutableBinaryViewArray::from_iterator(it).freeze();
145        ChunkedArray::with_chunk(name, arr)
146    }
147
148    /// Create a new ChunkedArray from an iterator.
149    fn from_iter_values(name: PlSmallStr, it: impl Iterator<Item = S>) -> Self {
150        let arr = MutableBinaryViewArray::from_values_iter(it).freeze();
151        ChunkedArray::with_chunk(name, arr)
152    }
153}
154
155impl<B> NewChunkedArray<BinaryType, B> for BinaryChunked
156where
157    B: AsRef<[u8]>,
158{
159    fn from_slice(name: PlSmallStr, v: &[B]) -> Self {
160        let arr = BinaryViewArray::from_slice_values(v);
161        ChunkedArray::with_chunk(name, arr)
162    }
163
164    fn from_slice_options(name: PlSmallStr, opt_v: &[Option<B>]) -> Self {
165        let arr = BinaryViewArray::from_slice(opt_v);
166        ChunkedArray::with_chunk(name, arr)
167    }
168
169    fn from_iter_options(name: PlSmallStr, it: impl Iterator<Item = Option<B>>) -> Self {
170        let arr = MutableBinaryViewArray::from_iterator(it).freeze();
171        ChunkedArray::with_chunk(name, arr)
172    }
173
174    /// Create a new ChunkedArray from an iterator.
175    fn from_iter_values(name: PlSmallStr, it: impl Iterator<Item = B>) -> Self {
176        let arr = MutableBinaryViewArray::from_values_iter(it).freeze();
177        ChunkedArray::with_chunk(name, arr)
178    }
179}
180
181#[cfg(test)]
182mod test {
183    use super::*;
184
185    #[test]
186    fn test_primitive_builder() {
187        let mut builder =
188            PrimitiveChunkedBuilder::<UInt32Type>::new(PlSmallStr::from_static("foo"), 6);
189        let values = &[Some(1), None, Some(2), Some(3), None, Some(4)];
190        for val in values {
191            builder.append_option(*val);
192        }
193        let ca = builder.finish();
194        assert_eq!(Vec::from(&ca), values);
195    }
196
197    #[test]
198    fn test_list_builder() {
199        let mut builder = ListPrimitiveChunkedBuilder::<Int32Type>::new(
200            PlSmallStr::from_static("a"),
201            10,
202            5,
203            DataType::Int32,
204        );
205
206        // Create a series containing two chunks.
207        let mut s1 =
208            Int32Chunked::from_slice(PlSmallStr::from_static("a"), &[1, 2, 3]).into_series();
209        let s2 = Int32Chunked::from_slice(PlSmallStr::from_static("b"), &[4, 5, 6]).into_series();
210        s1.append(&s2).unwrap();
211
212        builder.append_series(&s1).unwrap();
213        builder.append_series(&s2).unwrap();
214        let ls = builder.finish();
215        if let AnyValue::List(s) = ls.get_any_value(0).unwrap() {
216            // many chunks are aggregated to one in the ListArray
217            assert_eq!(s.len(), 6)
218        } else {
219            panic!()
220        }
221        if let AnyValue::List(s) = ls.get_any_value(1).unwrap() {
222            assert_eq!(s.len(), 3)
223        } else {
224            panic!()
225        }
226
227        // Test list collect.
228        let out = [&s1, &s2].iter().copied().collect::<ListChunked>();
229        assert_eq!(out.get_as_series(0).unwrap().len(), 6);
230        assert_eq!(out.get_as_series(1).unwrap().len(), 3);
231
232        let mut builder = ListPrimitiveChunkedBuilder::<Int32Type>::new(
233            PlSmallStr::from_static("a"),
234            10,
235            5,
236            DataType::Int32,
237        );
238        builder.append_series(&s1).unwrap();
239        builder.append_null();
240
241        let out = builder.finish();
242        let out = out.explode(false).unwrap();
243        assert_eq!(out.len(), 7);
244        assert_eq!(out.get(6).unwrap(), AnyValue::Null);
245    }
246}