polars_core/series/
builder.rs
1use arrow::array::builder::{ArrayBuilder, ShareStrategy, make_builder};
2use polars_utils::IdxSize;
3
4#[cfg(feature = "object")]
5use crate::chunked_array::object::registry::get_object_builder;
6use crate::prelude::*;
7use crate::utils::Container;
8
9#[cfg(feature = "dtype-categorical")]
10#[inline(always)]
11fn fill_rev_map(dtype: &DataType, rev_map_merger: &mut Option<Box<GlobalRevMapMerger>>) {
12 if let DataType::Categorical(Some(rev_map), _) = dtype {
13 assert!(
14 rev_map.is_active_global(),
15 "{}",
16 polars_err!(string_cache_mismatch)
17 );
18 if let Some(merger) = rev_map_merger {
19 merger.merge_map(rev_map).unwrap();
20 } else {
21 *rev_map_merger = Some(Box::new(GlobalRevMapMerger::new(rev_map.clone())));
22 }
23 }
24}
25
26pub struct SeriesBuilder {
28 dtype: DataType,
29 builder: Box<dyn ArrayBuilder>,
30 #[cfg(feature = "dtype-categorical")]
31 rev_map_merger: Option<Box<GlobalRevMapMerger>>,
32}
33
34impl SeriesBuilder {
35 pub fn new(dtype: DataType) -> Self {
36 #[cfg(feature = "object")]
38 if matches!(dtype, DataType::Object(_)) {
39 let builder = get_object_builder(PlSmallStr::EMPTY, 0).as_array_builder();
40 return Self {
41 dtype,
42 builder,
43 #[cfg(feature = "dtype-categorical")]
44 rev_map_merger: None,
45 };
46 }
47
48 let builder = make_builder(&dtype.to_physical().to_arrow(CompatLevel::newest()));
49 Self {
50 dtype,
51 builder,
52 #[cfg(feature = "dtype-categorical")]
53 rev_map_merger: None,
54 }
55 }
56
57 #[inline(always)]
58 pub fn reserve(&mut self, additional: usize) {
59 self.builder.reserve(additional);
60 }
61
62 fn freeze_dtype(&mut self) -> DataType {
63 #[cfg(feature = "dtype-categorical")]
64 if let Some(rev_map_merger) = self.rev_map_merger.take() {
65 let DataType::Categorical(_, order) = self.dtype else {
66 unreachable!()
67 };
68 return DataType::Categorical(Some(rev_map_merger.finish()), order);
69 }
70
71 self.dtype.clone()
72 }
73
74 pub fn freeze(mut self, name: PlSmallStr) -> Series {
75 unsafe {
76 let dtype = self.freeze_dtype();
77 Series::from_chunks_and_dtype_unchecked(name, vec![self.builder.freeze()], &dtype)
78 }
79 }
80
81 pub fn freeze_reset(&mut self, name: PlSmallStr) -> Series {
82 unsafe {
83 Series::from_chunks_and_dtype_unchecked(
84 name,
85 vec![self.builder.freeze_reset()],
86 &self.freeze_dtype(),
87 )
88 }
89 }
90
91 pub fn len(&self) -> usize {
92 self.builder.len()
93 }
94
95 pub fn is_empty(&self) -> bool {
96 self.builder.len() == 0
97 }
98
99 #[inline(always)]
102 pub fn extend(&mut self, other: &Series, share: ShareStrategy) {
103 #[cfg(feature = "dtype-categorical")]
104 {
105 fill_rev_map(other.dtype(), &mut self.rev_map_merger);
106 }
107
108 self.subslice_extend(other, 0, other.len(), share);
109 }
110
111 pub fn subslice_extend(
114 &mut self,
115 other: &Series,
116 mut start: usize,
117 mut length: usize,
118 share: ShareStrategy,
119 ) {
120 #[cfg(feature = "dtype-categorical")]
121 {
122 fill_rev_map(other.dtype(), &mut self.rev_map_merger);
123 }
124
125 if length == 0 || other.is_empty() {
126 return;
127 }
128
129 for chunk in other.chunks() {
130 if start < chunk.len() {
131 let length_in_chunk = length.min(chunk.len() - start);
132 self.builder
133 .subslice_extend(&**chunk, start, length_in_chunk, share);
134
135 start = 0;
136 length -= length_in_chunk;
137 if length == 0 {
138 break;
139 }
140 } else {
141 start -= chunk.len();
142 }
143 }
144 }
145
146 pub fn subslice_extend_repeated(
147 &mut self,
148 other: &Series,
149 start: usize,
150 length: usize,
151 repeats: usize,
152 share: ShareStrategy,
153 ) {
154 #[cfg(feature = "dtype-categorical")]
155 {
156 fill_rev_map(other.dtype(), &mut self.rev_map_merger);
157 }
158
159 if length == 0 || other.is_empty() {
160 return;
161 }
162
163 let chunks = other.chunks();
164 if chunks.len() == 1 {
165 self.builder
166 .subslice_extend_repeated(&*chunks[0], start, length, repeats, share);
167 } else {
168 for _ in 0..repeats {
169 self.subslice_extend(other, start, length, share);
170 }
171 }
172 }
173
174 pub unsafe fn gather_extend(&mut self, other: &Series, idxs: &[IdxSize], share: ShareStrategy) {
182 #[cfg(feature = "dtype-categorical")]
183 {
184 fill_rev_map(other.dtype(), &mut self.rev_map_merger);
185 }
186
187 let chunks = other.chunks();
188 assert!(chunks.len() == 1);
189 self.builder.gather_extend(&*chunks[0], idxs, share);
190 }
191
192 pub fn opt_gather_extend(&mut self, other: &Series, idxs: &[IdxSize], share: ShareStrategy) {
193 #[cfg(feature = "dtype-categorical")]
194 {
195 fill_rev_map(other.dtype(), &mut self.rev_map_merger);
196 }
197
198 let chunks = other.chunks();
199 assert!(chunks.len() == 1);
200 self.builder.opt_gather_extend(&*chunks[0], idxs, share);
201 }
202}