polars_core/chunked_array/ops/
extend.rs

1use arrow::Either;
2use arrow::compute::concatenate::concatenate;
3
4use crate::prelude::append::update_sorted_flag_before_append;
5use crate::prelude::*;
6use crate::series::IsSorted;
7
8fn extend_immutable(immutable: &dyn Array, chunks: &mut Vec<ArrayRef>, other_chunks: &[ArrayRef]) {
9    let out = if chunks.len() == 1 {
10        concatenate(&[immutable, &*other_chunks[0]]).unwrap()
11    } else {
12        let mut arrays = Vec::with_capacity(other_chunks.len() + 1);
13        arrays.push(immutable);
14        arrays.extend(other_chunks.iter().map(|a| &**a));
15        concatenate(&arrays).unwrap()
16    };
17
18    chunks.push(out);
19}
20
21impl<T> ChunkedArray<T>
22where
23    T: PolarsNumericType,
24{
25    /// Extend the memory backed by this array with the values from `other`.
26    ///
27    /// Different from [`ChunkedArray::append`] which adds chunks to this [`ChunkedArray`] `extend`
28    /// appends the data from `other` to the underlying `PrimitiveArray` and thus may cause a reallocation.
29    ///
30    /// However if this does not cause a reallocation, the resulting data structure will not have any extra chunks
31    /// and thus will yield faster queries.
32    ///
33    /// Prefer `extend` over `append` when you want to do a query after a single append. For instance during
34    /// online operations where you add `n` rows and rerun a query.
35    ///
36    /// Prefer `append` over `extend` when you want to append many times before doing a query. For instance
37    /// when you read in multiple files and when to store them in a single `DataFrame`.
38    /// In the latter case finish the sequence of `append` operations with a [`rechunk`](Self::rechunk).
39    pub fn extend(&mut self, other: &Self) -> PolarsResult<()> {
40        update_sorted_flag_before_append::<T>(self, other);
41        // all to a single chunk
42        if self.chunks.len() > 1 {
43            self.append(other)?;
44            self.rechunk_mut();
45            return Ok(());
46        }
47        // Depending on the state of the underlying arrow array we
48        // might be able to get a `MutablePrimitiveArray`
49        //
50        // This is only possible if the reference count of the array and its buffers are 1
51        // So the logic below is needed to keep the reference count 1 if it is
52
53        // First we must obtain an owned version of the array
54        let arr = self.downcast_iter().next().unwrap();
55
56        // increments 1
57        let arr = arr.clone();
58
59        // now we drop our owned ArrayRefs so that
60        // decrements 1
61        {
62            self.chunks.clear();
63        }
64
65        use Either::*;
66
67        if arr.values().is_sliced() {
68            extend_immutable(&arr, &mut self.chunks, &other.chunks);
69        } else {
70            match arr.into_mut() {
71                Left(immutable) => {
72                    extend_immutable(&immutable, &mut self.chunks, &other.chunks);
73                },
74                Right(mut mutable) => {
75                    for arr in other.downcast_iter() {
76                        match arr.null_count() {
77                            0 => mutable.extend_from_slice(arr.values()),
78                            _ => mutable.extend_trusted_len(arr.into_iter()),
79                        }
80                    }
81                    let arr: PrimitiveArray<T::Native> = mutable.into();
82                    self.chunks.push(Box::new(arr) as ArrayRef)
83                },
84            }
85        }
86        self.compute_len();
87        Ok(())
88    }
89}
90
91#[doc(hidden)]
92impl StringChunked {
93    pub fn extend(&mut self, other: &Self) -> PolarsResult<()> {
94        self.set_sorted_flag(IsSorted::Not);
95        self.append(other)
96    }
97}
98
99#[doc(hidden)]
100impl BinaryChunked {
101    pub fn extend(&mut self, other: &Self) -> PolarsResult<()> {
102        self.set_sorted_flag(IsSorted::Not);
103        self.append(other)
104    }
105}
106
107#[doc(hidden)]
108impl BinaryOffsetChunked {
109    pub fn extend(&mut self, other: &Self) -> PolarsResult<()> {
110        self.set_sorted_flag(IsSorted::Not);
111        self.append(other)
112    }
113}
114
115#[doc(hidden)]
116impl BooleanChunked {
117    pub fn extend(&mut self, other: &Self) -> PolarsResult<()> {
118        update_sorted_flag_before_append::<BooleanType>(self, other);
119        // make sure that we are a single chunk already
120        if self.chunks.len() > 1 {
121            self.append(other)?;
122            self.rechunk_mut();
123            return Ok(());
124        }
125        let arr = self.downcast_iter().next().unwrap();
126
127        // increments 1
128        let arr = arr.clone();
129
130        // now we drop our owned ArrayRefs so that
131        // decrements 1
132        {
133            self.chunks.clear();
134        }
135
136        use Either::*;
137
138        match arr.into_mut() {
139            Left(immutable) => {
140                extend_immutable(&immutable, &mut self.chunks, &other.chunks);
141            },
142            Right(mut mutable) => {
143                for arr in other.downcast_iter() {
144                    mutable.extend_trusted_len(arr.into_iter())
145                }
146                let arr: BooleanArray = mutable.into();
147                self.chunks.push(Box::new(arr) as ArrayRef)
148            },
149        }
150        self.compute_len();
151        self.set_sorted_flag(IsSorted::Not);
152        Ok(())
153    }
154}
155
156#[doc(hidden)]
157impl ListChunked {
158    pub fn extend(&mut self, other: &Self) -> PolarsResult<()> {
159        // TODO! properly implement mutation
160        // this is harder because we don't know the inner type of the list
161        self.set_sorted_flag(IsSorted::Not);
162        self.append(other)
163    }
164}
165
166#[cfg(feature = "dtype-array")]
167#[doc(hidden)]
168impl ArrayChunked {
169    pub fn extend(&mut self, other: &Self) -> PolarsResult<()> {
170        // TODO! properly implement mutation
171        // this is harder because we don't know the inner type of the list
172        self.set_sorted_flag(IsSorted::Not);
173        self.append(other)
174    }
175}
176
177#[cfg(feature = "dtype-struct")]
178#[doc(hidden)]
179impl StructChunked {
180    pub fn extend(&mut self, other: &Self) -> PolarsResult<()> {
181        // TODO! properly implement mutation
182        // this is harder because we don't know the inner type of the list
183        self.set_sorted_flag(IsSorted::Not);
184        self.append(other)
185    }
186}
187
188#[cfg(test)]
189mod test {
190    use super::*;
191
192    #[test]
193    #[allow(clippy::redundant_clone)]
194    fn test_extend_primitive() -> PolarsResult<()> {
195        // create a vec with overcapacity, so that we do not trigger a realloc
196        // this allows us to test if the mutation was successful
197
198        let mut values = Vec::with_capacity(32);
199        values.extend_from_slice(&[1, 2, 3]);
200        let mut ca = Int32Chunked::from_vec(PlSmallStr::from_static("a"), values);
201        let location = ca.cont_slice().unwrap().as_ptr() as usize;
202        let to_append = Int32Chunked::new(PlSmallStr::from_static("a"), &[4, 5, 6]);
203
204        ca.extend(&to_append)?;
205        let location2 = ca.cont_slice().unwrap().as_ptr() as usize;
206        assert_eq!(location, location2);
207        assert_eq!(ca.cont_slice().unwrap(), [1, 2, 3, 4, 5, 6]);
208
209        // now check if it succeeds if we cannot do this with a mutable.
210        let _temp = ca.chunks.clone();
211        ca.extend(&to_append)?;
212        let location2 = ca.cont_slice().unwrap().as_ptr() as usize;
213        assert_ne!(location, location2);
214        assert_eq!(ca.cont_slice().unwrap(), [1, 2, 3, 4, 5, 6, 4, 5, 6]);
215
216        Ok(())
217    }
218
219    #[test]
220    fn test_extend_string() -> PolarsResult<()> {
221        let mut ca = StringChunked::new(PlSmallStr::from_static("a"), &["a", "b", "c"]);
222        let to_append = StringChunked::new(PlSmallStr::from_static("a"), &["a", "b", "e"]);
223
224        ca.extend(&to_append)?;
225        assert_eq!(ca.len(), 6);
226        let vals = ca.into_no_null_iter().collect::<Vec<_>>();
227        assert_eq!(vals, ["a", "b", "c", "a", "b", "e"]);
228
229        Ok(())
230    }
231
232    #[test]
233    fn test_extend_bool() -> PolarsResult<()> {
234        let mut ca = BooleanChunked::new(PlSmallStr::from_static("a"), [true, false]);
235        let to_append = BooleanChunked::new(PlSmallStr::from_static("a"), &[false, false]);
236
237        ca.extend(&to_append)?;
238        assert_eq!(ca.len(), 4);
239        let vals = ca.into_no_null_iter().collect::<Vec<_>>();
240        assert_eq!(vals, [true, false, false, false]);
241
242        Ok(())
243    }
244}