polars_core/chunked_array/
from_iterator.rs

1//! Implementations of upstream traits for [`ChunkedArray<T>`]
2use std::borrow::{Borrow, Cow};
3
4#[cfg(feature = "object")]
5use arrow::bitmap::BitmapBuilder;
6
7use crate::chunked_array::builder::{AnonymousOwnedListBuilder, get_list_builder};
8#[cfg(feature = "object")]
9use crate::chunked_array::object::ObjectArray;
10#[cfg(feature = "object")]
11use crate::chunked_array::object::builder::get_object_type;
12use crate::prelude::*;
13use crate::utils::{NoNull, get_iter_capacity};
14
15/// FromIterator trait
16impl<T> FromIterator<Option<T::Native>> for ChunkedArray<T>
17where
18    T: PolarsNumericType,
19{
20    #[inline]
21    fn from_iter<I: IntoIterator<Item = Option<T::Native>>>(iter: I) -> Self {
22        // TODO: eliminate this FromIterator implementation entirely.
23        iter.into_iter().collect_ca(PlSmallStr::EMPTY)
24    }
25}
26
27// NoNull is only a wrapper needed for specialization
28impl<T> FromIterator<T::Native> for NoNull<ChunkedArray<T>>
29where
30    T: PolarsNumericType,
31{
32    // We use Vec because it is way faster than Arrows builder. We can do this because we
33    // know we don't have null values.
34    #[inline]
35    fn from_iter<I: IntoIterator<Item = T::Native>>(iter: I) -> Self {
36        // 2021-02-07: aligned vec was ~2x faster than arrow collect.
37        let av = iter.into_iter().collect::<Vec<T::Native>>();
38        NoNull::new(ChunkedArray::from_vec(PlSmallStr::EMPTY, av))
39    }
40}
41
42impl FromIterator<Option<bool>> for ChunkedArray<BooleanType> {
43    #[inline]
44    fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
45        BooleanArray::from_iter(iter).into()
46    }
47}
48
49impl FromIterator<bool> for BooleanChunked {
50    #[inline]
51    fn from_iter<I: IntoIterator<Item = bool>>(iter: I) -> Self {
52        iter.into_iter().collect_ca(PlSmallStr::EMPTY)
53    }
54}
55
56impl FromIterator<bool> for NoNull<BooleanChunked> {
57    #[inline]
58    fn from_iter<I: IntoIterator<Item = bool>>(iter: I) -> Self {
59        NoNull::new(iter.into_iter().collect_ca(PlSmallStr::EMPTY))
60    }
61}
62
63// FromIterator for StringChunked variants.
64
65impl<Ptr> FromIterator<Option<Ptr>> for StringChunked
66where
67    Ptr: AsRef<str>,
68{
69    #[inline]
70    fn from_iter<I: IntoIterator<Item = Option<Ptr>>>(iter: I) -> Self {
71        let arr = MutableBinaryViewArray::from_iterator(iter.into_iter()).freeze();
72        ChunkedArray::with_chunk(PlSmallStr::EMPTY, arr)
73    }
74}
75
76/// Local [`AsRef<T>`] trait to circumvent the orphan rule.
77pub trait PolarsAsRef<T: ?Sized>: AsRef<T> {}
78
79impl PolarsAsRef<str> for String {}
80impl PolarsAsRef<str> for &str {}
81// &["foo", "bar"]
82impl PolarsAsRef<str> for &&str {}
83
84impl PolarsAsRef<str> for Cow<'_, str> {}
85impl PolarsAsRef<[u8]> for Vec<u8> {}
86impl PolarsAsRef<[u8]> for &[u8] {}
87// TODO: remove!
88impl PolarsAsRef<[u8]> for &&[u8] {}
89impl PolarsAsRef<[u8]> for Cow<'_, [u8]> {}
90
91impl<Ptr> FromIterator<Ptr> for StringChunked
92where
93    Ptr: PolarsAsRef<str>,
94{
95    #[inline]
96    fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
97        let arr = MutableBinaryViewArray::from_values_iter(iter.into_iter()).freeze();
98        ChunkedArray::with_chunk(PlSmallStr::EMPTY, arr)
99    }
100}
101
102// FromIterator for BinaryChunked variants.
103impl<Ptr> FromIterator<Option<Ptr>> for BinaryChunked
104where
105    Ptr: AsRef<[u8]>,
106{
107    #[inline]
108    fn from_iter<I: IntoIterator<Item = Option<Ptr>>>(iter: I) -> Self {
109        let arr = MutableBinaryViewArray::from_iter(iter).freeze();
110        ChunkedArray::with_chunk(PlSmallStr::EMPTY, arr)
111    }
112}
113
114impl<Ptr> FromIterator<Ptr> for BinaryChunked
115where
116    Ptr: PolarsAsRef<[u8]>,
117{
118    #[inline]
119    fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
120        let arr = MutableBinaryViewArray::from_values_iter(iter.into_iter()).freeze();
121        ChunkedArray::with_chunk(PlSmallStr::EMPTY, arr)
122    }
123}
124
125impl<Ptr> FromIterator<Ptr> for ListChunked
126where
127    Ptr: Borrow<Series>,
128{
129    #[inline]
130    fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
131        let mut it = iter.into_iter();
132        let capacity = get_iter_capacity(&it);
133
134        // first take one to get the dtype.
135        let v = match it.next() {
136            Some(v) => v,
137            None => return ListChunked::full_null(PlSmallStr::EMPTY, 0),
138        };
139        // We don't know the needed capacity. We arbitrarily choose an average of 5 elements per series.
140        let mut builder = get_list_builder(
141            v.borrow().dtype(),
142            capacity * 5,
143            capacity,
144            PlSmallStr::EMPTY,
145        );
146
147        builder.append_series(v.borrow()).unwrap();
148        for s in it {
149            builder.append_series(s.borrow()).unwrap();
150        }
151        builder.finish()
152    }
153}
154
155impl FromIterator<Option<Column>> for ListChunked {
156    fn from_iter<T: IntoIterator<Item = Option<Column>>>(iter: T) -> Self {
157        ListChunked::from_iter(
158            iter.into_iter()
159                .map(|c| c.map(|c| c.take_materialized_series())),
160        )
161    }
162}
163
164impl FromIterator<Option<Series>> for ListChunked {
165    #[inline]
166    fn from_iter<I: IntoIterator<Item = Option<Series>>>(iter: I) -> Self {
167        let mut it = iter.into_iter();
168        let capacity = get_iter_capacity(&it);
169
170        // get first non None from iter
171        let first_value;
172        let mut init_null_count = 0;
173        loop {
174            match it.next() {
175                Some(Some(s)) => {
176                    first_value = Some(s);
177                    break;
178                },
179                Some(None) => {
180                    init_null_count += 1;
181                },
182                None => return ListChunked::full_null(PlSmallStr::EMPTY, init_null_count),
183            }
184        }
185
186        match first_value {
187            None => {
188                // already returned full_null above
189                unreachable!()
190            },
191            Some(ref first_s) => {
192                // AnyValues with empty lists in python can create
193                // Series of an unknown dtype.
194                // We use the anonymousbuilder without a dtype
195                // the empty arrays is then not added (we add an extra offset instead)
196                // the next non-empty series then must have the correct dtype.
197                if matches!(first_s.dtype(), DataType::Null) && first_s.is_empty() {
198                    let mut builder =
199                        AnonymousOwnedListBuilder::new(PlSmallStr::EMPTY, capacity, None);
200                    for _ in 0..init_null_count {
201                        builder.append_null();
202                    }
203                    builder.append_empty();
204
205                    for opt_s in it {
206                        builder.append_opt_series(opt_s.as_ref()).unwrap();
207                    }
208                    builder.finish()
209                } else {
210                    // We don't know the needed capacity. We arbitrarily choose an average of 5 elements per series.
211                    let mut builder = get_list_builder(
212                        first_s.dtype(),
213                        capacity * 5,
214                        capacity,
215                        PlSmallStr::EMPTY,
216                    );
217
218                    for _ in 0..init_null_count {
219                        builder.append_null();
220                    }
221                    builder.append_series(first_s).unwrap();
222
223                    for opt_s in it {
224                        builder.append_opt_series(opt_s.as_ref()).unwrap();
225                    }
226                    builder.finish()
227                }
228            },
229        }
230    }
231}
232
233impl FromIterator<Option<Box<dyn Array>>> for ListChunked {
234    #[inline]
235    fn from_iter<I: IntoIterator<Item = Option<Box<dyn Array>>>>(iter: I) -> Self {
236        iter.into_iter().collect_ca(PlSmallStr::EMPTY)
237    }
238}
239
240#[cfg(feature = "object")]
241impl<T: PolarsObject> FromIterator<Option<T>> for ObjectChunked<T> {
242    fn from_iter<I: IntoIterator<Item = Option<T>>>(iter: I) -> Self {
243        let iter = iter.into_iter();
244        let size = iter.size_hint().0;
245        let mut null_mask_builder = BitmapBuilder::with_capacity(size);
246
247        let values: Vec<T> = iter
248            .map(|value| match value {
249                Some(value) => {
250                    null_mask_builder.push(true);
251                    value
252                },
253                None => {
254                    null_mask_builder.push(false);
255                    T::default()
256                },
257            })
258            .collect();
259
260        let arr = Box::new(
261            ObjectArray::from(values).with_validity(null_mask_builder.into_opt_validity()),
262        );
263        ChunkedArray::new_with_compute_len(
264            Arc::new(Field::new(PlSmallStr::EMPTY, get_object_type::<T>())),
265            vec![arr],
266        )
267    }
268}