polars_core/series/
builder.rs

1use arrow::array::builder::{ArrayBuilder, ShareStrategy, make_builder};
2use polars_utils::IdxSize;
3
4#[cfg(feature = "object")]
5use crate::chunked_array::object::registry::get_object_builder;
6use crate::prelude::*;
7use crate::utils::Container;
8
9#[cfg(feature = "dtype-categorical")]
10#[inline(always)]
11fn fill_rev_map(dtype: &DataType, rev_map_merger: &mut Option<Box<GlobalRevMapMerger>>) {
12    if let DataType::Categorical(Some(rev_map), _) = dtype {
13        assert!(
14            rev_map.is_active_global(),
15            "{}",
16            polars_err!(string_cache_mismatch)
17        );
18        if let Some(merger) = rev_map_merger {
19            merger.merge_map(rev_map).unwrap();
20        } else {
21            *rev_map_merger = Some(Box::new(GlobalRevMapMerger::new(rev_map.clone())));
22        }
23    }
24}
25
26/// A type-erased wrapper around ArrayBuilder.
27pub struct SeriesBuilder {
28    dtype: DataType,
29    builder: Box<dyn ArrayBuilder>,
30    #[cfg(feature = "dtype-categorical")]
31    rev_map_merger: Option<Box<GlobalRevMapMerger>>,
32}
33
34impl SeriesBuilder {
35    pub fn new(dtype: DataType) -> Self {
36        // FIXME: get rid of this hack.
37        #[cfg(feature = "object")]
38        if matches!(dtype, DataType::Object(_)) {
39            let builder = get_object_builder(PlSmallStr::EMPTY, 0).as_array_builder();
40            return Self {
41                dtype,
42                builder,
43                #[cfg(feature = "dtype-categorical")]
44                rev_map_merger: None,
45            };
46        }
47
48        let builder = make_builder(&dtype.to_physical().to_arrow(CompatLevel::newest()));
49        Self {
50            dtype,
51            builder,
52            #[cfg(feature = "dtype-categorical")]
53            rev_map_merger: None,
54        }
55    }
56
57    #[inline(always)]
58    pub fn reserve(&mut self, additional: usize) {
59        self.builder.reserve(additional);
60    }
61
62    fn freeze_dtype(&mut self) -> DataType {
63        #[cfg(feature = "dtype-categorical")]
64        if let Some(rev_map_merger) = self.rev_map_merger.take() {
65            let DataType::Categorical(_, order) = self.dtype else {
66                unreachable!()
67            };
68            return DataType::Categorical(Some(rev_map_merger.finish()), order);
69        }
70
71        self.dtype.clone()
72    }
73
74    pub fn freeze(mut self, name: PlSmallStr) -> Series {
75        unsafe {
76            let dtype = self.freeze_dtype();
77            Series::from_chunks_and_dtype_unchecked(name, vec![self.builder.freeze()], &dtype)
78        }
79    }
80
81    pub fn freeze_reset(&mut self, name: PlSmallStr) -> Series {
82        unsafe {
83            Series::from_chunks_and_dtype_unchecked(
84                name,
85                vec![self.builder.freeze_reset()],
86                &self.freeze_dtype(),
87            )
88        }
89    }
90
91    pub fn len(&self) -> usize {
92        self.builder.len()
93    }
94
95    pub fn is_empty(&self) -> bool {
96        self.builder.len() == 0
97    }
98
99    /// Extends this builder with the contents of the given series. May panic if
100    /// other does not match the dtype of this builder.
101    #[inline(always)]
102    pub fn extend(&mut self, other: &Series, share: ShareStrategy) {
103        #[cfg(feature = "dtype-categorical")]
104        {
105            fill_rev_map(other.dtype(), &mut self.rev_map_merger);
106        }
107
108        self.subslice_extend(other, 0, other.len(), share);
109    }
110
111    /// Extends this builder with the contents of the given series subslice.
112    /// May panic if other does not match the dtype of this builder.
113    pub fn subslice_extend(
114        &mut self,
115        other: &Series,
116        mut start: usize,
117        mut length: usize,
118        share: ShareStrategy,
119    ) {
120        #[cfg(feature = "dtype-categorical")]
121        {
122            fill_rev_map(other.dtype(), &mut self.rev_map_merger);
123        }
124
125        if length == 0 || other.is_empty() {
126            return;
127        }
128
129        for chunk in other.chunks() {
130            if start < chunk.len() {
131                let length_in_chunk = length.min(chunk.len() - start);
132                self.builder
133                    .subslice_extend(&**chunk, start, length_in_chunk, share);
134
135                start = 0;
136                length -= length_in_chunk;
137                if length == 0 {
138                    break;
139                }
140            } else {
141                start -= chunk.len();
142            }
143        }
144    }
145
146    pub fn subslice_extend_repeated(
147        &mut self,
148        other: &Series,
149        start: usize,
150        length: usize,
151        repeats: usize,
152        share: ShareStrategy,
153    ) {
154        #[cfg(feature = "dtype-categorical")]
155        {
156            fill_rev_map(other.dtype(), &mut self.rev_map_merger);
157        }
158
159        if length == 0 || other.is_empty() {
160            return;
161        }
162
163        let chunks = other.chunks();
164        if chunks.len() == 1 {
165            self.builder
166                .subslice_extend_repeated(&*chunks[0], start, length, repeats, share);
167        } else {
168            for _ in 0..repeats {
169                self.subslice_extend(other, start, length, share);
170            }
171        }
172    }
173
174    /// Extends this builder with the contents of the given series at the given
175    /// indices. That is, `other[idxs[i]]` is appended to this builder in order,
176    /// for each i=0..idxs.len(). May panic if other does not match the dtype
177    /// of this builder, or if the other series is not rechunked.
178    ///
179    /// # Safety
180    /// The indices must be in-bounds.
181    pub unsafe fn gather_extend(&mut self, other: &Series, idxs: &[IdxSize], share: ShareStrategy) {
182        #[cfg(feature = "dtype-categorical")]
183        {
184            fill_rev_map(other.dtype(), &mut self.rev_map_merger);
185        }
186
187        let chunks = other.chunks();
188        assert!(chunks.len() == 1);
189        self.builder.gather_extend(&*chunks[0], idxs, share);
190    }
191
192    pub fn opt_gather_extend(&mut self, other: &Series, idxs: &[IdxSize], share: ShareStrategy) {
193        #[cfg(feature = "dtype-categorical")]
194        {
195            fill_rev_map(other.dtype(), &mut self.rev_map_merger);
196        }
197
198        let chunks = other.chunks();
199        assert!(chunks.len() == 1);
200        self.builder.opt_gather_extend(&*chunks[0], idxs, share);
201    }
202}