polars_core/chunked_array/ops/
append.rs

1use polars_error::constants::LENGTH_LIMIT_MSG;
2
3use crate::prelude::*;
4use crate::series::IsSorted;
5
6pub(crate) fn new_chunks(chunks: &mut Vec<ArrayRef>, other: &[ArrayRef], len: usize) {
7    // Replace an empty array.
8    if chunks.len() == 1 && len == 0 {
9        other.clone_into(chunks);
10    } else {
11        for chunk in other {
12            if !chunk.is_empty() {
13                chunks.push(chunk.clone());
14            }
15        }
16    }
17}
18
19pub(crate) fn new_chunks_owned(chunks: &mut Vec<ArrayRef>, other: Vec<ArrayRef>, len: usize) {
20    // Replace an empty array.
21    if chunks.len() == 1 && len == 0 {
22        *chunks = other;
23    } else {
24        chunks.reserve(other.len());
25        chunks.extend(other.into_iter().filter(|c| !c.is_empty()));
26    }
27}
28
29pub(super) fn update_sorted_flag_before_append<T>(ca: &mut ChunkedArray<T>, other: &ChunkedArray<T>)
30where
31    T: PolarsDataType,
32    for<'a> T::Physical<'a>: TotalOrd,
33{
34    // Note: Do not call (first|last)_non_null on an array here before checking
35    // it is sorted, otherwise it will lead to quadratic behavior.
36    let sorted_flag = match (
37        ca.null_count() != ca.len(),
38        other.null_count() != other.len(),
39    ) {
40        (false, false) => IsSorted::Ascending,
41        (false, true) => {
42            if
43            // lhs is empty, just take sorted flag from rhs
44            ca.is_empty()
45                || (
46                    // lhs is non-empty and all-null, so rhs must have nulls ordered first
47                    other.is_sorted_any() && 1 + other.last_non_null().unwrap() == other.len()
48                )
49            {
50                other.is_sorted_flag()
51            } else {
52                IsSorted::Not
53            }
54        },
55        (true, false) => {
56            if
57            // rhs is empty, just take sorted flag from lhs
58            other.is_empty()
59                || (
60                    // rhs is non-empty and all-null, so lhs must have nulls ordered last
61                    ca.is_sorted_any() && ca.first_non_null().unwrap() == 0
62                )
63            {
64                ca.is_sorted_flag()
65            } else {
66                IsSorted::Not
67            }
68        },
69        (true, true) => {
70            // both arrays have non-null values.
71            // for arrays of unit length we can ignore the sorted flag, as it is
72            // not necessarily set.
73            if !(ca.is_sorted_any() || ca.len() == 1)
74                || !(other.is_sorted_any() || other.len() == 1)
75                || !(
76                    // We will coerce for single values
77                    ca.len() - ca.null_count() == 1
78                        || other.len() - other.null_count() == 1
79                        || ca.is_sorted_flag() == other.is_sorted_flag()
80                )
81            {
82                IsSorted::Not
83            } else {
84                let l_idx = ca.last_non_null().unwrap();
85                let r_idx = other.first_non_null().unwrap();
86
87                let null_pos_check =
88                    // check null positions
89                    // lhs does not end in nulls
90                    (1 + l_idx == ca.len())
91                    // rhs does not start with nulls
92                    && (r_idx == 0)
93                    // if there are nulls, they are all on one end
94                    && !(ca.first_non_null().unwrap() != 0 && 1 + other.last_non_null().unwrap() != other.len());
95
96                if !null_pos_check {
97                    IsSorted::Not
98                } else {
99                    #[allow(unused_assignments)]
100                    let mut out = IsSorted::Not;
101
102                    // This can be relatively expensive because of chunks, so delay as much as possible.
103                    let l_val = unsafe { ca.value_unchecked(l_idx) };
104                    let r_val = unsafe { other.value_unchecked(r_idx) };
105
106                    match (
107                        ca.len() - ca.null_count() == 1,
108                        other.len() - other.null_count() == 1,
109                    ) {
110                        (true, true) => {
111                            out = [IsSorted::Descending, IsSorted::Ascending]
112                                [l_val.tot_le(&r_val) as usize];
113                            drop(l_val);
114                            drop(r_val);
115                            ca.set_sorted_flag(out);
116                            return;
117                        },
118                        (true, false) => out = other.is_sorted_flag(),
119                        _ => out = ca.is_sorted_flag(),
120                    }
121
122                    debug_assert!(!matches!(out, IsSorted::Not));
123
124                    let check = if matches!(out, IsSorted::Ascending) {
125                        l_val.tot_le(&r_val)
126                    } else {
127                        l_val.tot_ge(&r_val)
128                    };
129
130                    if !check {
131                        out = IsSorted::Not
132                    }
133
134                    out
135                }
136            }
137        },
138    };
139
140    ca.set_sorted_flag(sorted_flag);
141}
142
143impl<T> ChunkedArray<T>
144where
145    T: PolarsDataType<IsNested = FalseT, IsObject = FalseT>,
146    for<'a> T::Physical<'a>: TotalOrd,
147{
148    /// Append in place. This is done by adding the chunks of `other` to this [`ChunkedArray`].
149    ///
150    /// See also [`extend`](Self::extend) for appends to the underlying memory
151    pub fn append(&mut self, other: &Self) -> PolarsResult<()> {
152        update_sorted_flag_before_append::<T>(self, other);
153        let len = self.len();
154        self.length = self
155            .length
156            .checked_add(other.length)
157            .ok_or_else(|| polars_err!(ComputeError: LENGTH_LIMIT_MSG))?;
158        self.null_count += other.null_count;
159        new_chunks(&mut self.chunks, &other.chunks, len);
160        Ok(())
161    }
162
163    /// Append in place. This is done by adding the chunks of `other` to this [`ChunkedArray`].
164    ///
165    /// See also [`extend`](Self::extend) for appends to the underlying memory
166    pub fn append_owned(&mut self, mut other: Self) -> PolarsResult<()> {
167        update_sorted_flag_before_append::<T>(self, &other);
168        let len = self.len();
169        self.length = self
170            .length
171            .checked_add(other.length)
172            .ok_or_else(|| polars_err!(ComputeError: LENGTH_LIMIT_MSG))?;
173        self.null_count += other.null_count;
174        new_chunks_owned(&mut self.chunks, std::mem::take(&mut other.chunks), len);
175        Ok(())
176    }
177}
178
179#[doc(hidden)]
180impl ListChunked {
181    pub fn append(&mut self, other: &Self) -> PolarsResult<()> {
182        self.append_owned(other.clone())
183    }
184
185    pub fn append_owned(&mut self, mut other: Self) -> PolarsResult<()> {
186        let dtype = merge_dtypes(self.dtype(), other.dtype())?;
187        self.field = Arc::new(Field::new(self.name().clone(), dtype));
188
189        let len = self.len();
190        self.length = self
191            .length
192            .checked_add(other.length)
193            .ok_or_else(|| polars_err!(ComputeError: LENGTH_LIMIT_MSG))?;
194        self.null_count += other.null_count;
195        self.set_sorted_flag(IsSorted::Not);
196        if !other.get_fast_explode_list() {
197            self.unset_fast_explode_list()
198        }
199
200        new_chunks_owned(&mut self.chunks, std::mem::take(&mut other.chunks), len);
201        Ok(())
202    }
203}
204
205#[cfg(feature = "dtype-array")]
206#[doc(hidden)]
207impl ArrayChunked {
208    pub fn append(&mut self, other: &Self) -> PolarsResult<()> {
209        self.append_owned(other.clone())
210    }
211
212    pub fn append_owned(&mut self, mut other: Self) -> PolarsResult<()> {
213        let dtype = merge_dtypes(self.dtype(), other.dtype())?;
214        self.field = Arc::new(Field::new(self.name().clone(), dtype));
215
216        let len = self.len();
217
218        self.length = self
219            .length
220            .checked_add(other.length)
221            .ok_or_else(|| polars_err!(ComputeError: LENGTH_LIMIT_MSG))?;
222        self.null_count += other.null_count;
223
224        self.set_sorted_flag(IsSorted::Not);
225
226        new_chunks_owned(&mut self.chunks, std::mem::take(&mut other.chunks), len);
227        Ok(())
228    }
229}
230
231#[cfg(feature = "dtype-struct")]
232#[doc(hidden)]
233impl StructChunked {
234    pub fn append(&mut self, other: &Self) -> PolarsResult<()> {
235        self.append_owned(other.clone())
236    }
237
238    pub fn append_owned(&mut self, mut other: Self) -> PolarsResult<()> {
239        let dtype = merge_dtypes(self.dtype(), other.dtype())?;
240        self.field = Arc::new(Field::new(self.name().clone(), dtype));
241
242        let len = self.len();
243
244        self.length = self
245            .length
246            .checked_add(other.length)
247            .ok_or_else(|| polars_err!(ComputeError: LENGTH_LIMIT_MSG))?;
248        self.null_count += other.null_count;
249
250        self.set_sorted_flag(IsSorted::Not);
251
252        new_chunks_owned(&mut self.chunks, std::mem::take(&mut other.chunks), len);
253        Ok(())
254    }
255}
256
257#[cfg(feature = "dtype-categorical")]
258#[doc(hidden)]
259impl<T: PolarsCategoricalType> CategoricalChunked<T> {
260    pub fn append(&mut self, other: &Self) -> PolarsResult<()> {
261        assert!(self.dtype() == other.dtype());
262        self.phys.append(&other.phys)
263    }
264
265    pub fn append_owned(&mut self, other: Self) -> PolarsResult<()> {
266        assert!(self.dtype() == other.dtype());
267        self.phys.append_owned(other.phys)
268    }
269}
270
271#[cfg(feature = "object")]
272#[doc(hidden)]
273impl<T: PolarsObject> ObjectChunked<T> {
274    pub fn append(&mut self, other: &Self) -> PolarsResult<()> {
275        self.append_owned(other.clone())
276    }
277
278    pub fn append_owned(&mut self, mut other: Self) -> PolarsResult<()> {
279        let len = self.len();
280        self.length = self
281            .length
282            .checked_add(other.length)
283            .ok_or_else(|| polars_err!(ComputeError: LENGTH_LIMIT_MSG))?;
284        self.null_count += other.null_count;
285        self.set_sorted_flag(IsSorted::Not);
286
287        new_chunks_owned(&mut self.chunks, std::mem::take(&mut other.chunks), len);
288        Ok(())
289    }
290}