polars_core/frame/
horizontal.rs

1use polars_error::{PolarsResult, polars_err};
2
3use super::Column;
4use crate::datatypes::AnyValue;
5use crate::frame::DataFrame;
6
7impl DataFrame {
8    /// Add columns horizontally.
9    ///
10    /// # Safety
11    /// The caller must ensure:
12    /// - the length of all [`Column`] is equal to the height of this [`DataFrame`]
13    /// - the columns names are unique
14    ///
15    /// Note: If `self` is empty, `self.height` will always be overridden by the height of the first
16    /// column in `columns`.
17    ///
18    /// Note that on a debug build this will panic on duplicates / height mismatch.
19    pub unsafe fn hstack_mut_unchecked(&mut self, columns: &[Column]) -> &mut Self {
20        self.clear_schema();
21        self.columns.extend_from_slice(columns);
22
23        if cfg!(debug_assertions) {
24            if let err @ Err(_) = DataFrame::validate_columns_slice(&self.columns) {
25                // Reset DataFrame state to before extend.
26                self.columns.truncate(self.columns.len() - columns.len());
27                err.unwrap();
28            }
29        }
30
31        if let Some(c) = self.columns.first() {
32            unsafe { self.set_height(c.len()) };
33        }
34
35        self
36    }
37
38    /// Add multiple [`Column`] to a [`DataFrame`].
39    /// Errors if the resulting DataFrame columns have duplicate names or unequal heights.
40    ///
41    /// Note: If `self` is empty, `self.height` will always be overridden by the height of the first
42    /// column in `columns`.
43    ///
44    /// # Example
45    ///
46    /// ```rust
47    /// # use polars_core::prelude::*;
48    /// fn stack(df: &mut DataFrame, columns: &[Column]) {
49    ///     df.hstack_mut(columns);
50    /// }
51    /// ```
52    pub fn hstack_mut(&mut self, columns: &[Column]) -> PolarsResult<&mut Self> {
53        self.clear_schema();
54        self.columns.extend_from_slice(columns);
55
56        if let err @ Err(_) = DataFrame::validate_columns_slice(&self.columns) {
57            // Reset DataFrame state to before extend.
58            self.columns.truncate(self.columns.len() - columns.len());
59            err?;
60        }
61
62        if let Some(c) = self.columns.first() {
63            unsafe { self.set_height(c.len()) };
64        }
65
66        Ok(self)
67    }
68}
69
70/// Concat [`DataFrame`]s horizontally.
71///
72/// If the lengths don't match and strict is false we pad with nulls, or return a `ShapeError` if strict is true.
73pub fn concat_df_horizontal(
74    dfs: &[DataFrame],
75    check_duplicates: bool,
76    strict: bool,
77) -> PolarsResult<DataFrame> {
78    let output_height = dfs
79        .iter()
80        .map(|df| df.height())
81        .max()
82        .ok_or_else(|| polars_err!(ComputeError: "cannot concat empty dataframes"))?;
83
84    let owned_df;
85
86    let mut out_width = 0;
87
88    let all_equal_height = dfs.iter().all(|df| {
89        out_width += df.width();
90        df.height() == output_height
91    });
92
93    // if not all equal length, extend the DataFrame with nulls
94    let dfs = if !all_equal_height {
95        if strict {
96            return Err(
97                polars_err!(ShapeMismatch: "cannot concat dataframes with different heights in 'strict' mode"),
98            );
99        }
100        out_width = 0;
101
102        owned_df = dfs
103            .iter()
104            .cloned()
105            .map(|mut df| {
106                out_width += df.width();
107
108                if df.height() != output_height {
109                    let diff = output_height - df.height();
110
111                    // SAFETY: We extend each column with nulls to the point of being of length
112                    // `output_height`. Then, we set the height of the resulting dataframe.
113                    unsafe { df.get_columns_mut() }.iter_mut().for_each(|c| {
114                        *c = c.extend_constant(AnyValue::Null, diff).unwrap();
115                    });
116                    df.clear_schema();
117                    unsafe {
118                        df.set_height(output_height);
119                    }
120                }
121                df
122            })
123            .collect::<Vec<_>>();
124        owned_df.as_slice()
125    } else {
126        dfs
127    };
128
129    let mut acc_cols = Vec::with_capacity(out_width);
130
131    for df in dfs {
132        acc_cols.extend(df.get_columns().iter().cloned());
133    }
134
135    if check_duplicates {
136        DataFrame::validate_columns_slice(&acc_cols)?;
137    }
138
139    let df = unsafe { DataFrame::new_no_checks_height_from_first(acc_cols) };
140
141    Ok(df)
142}
143
144#[cfg(test)]
145mod tests {
146    use polars_error::PolarsError;
147
148    #[test]
149    fn test_hstack_mut_empty_frame_height_validation() {
150        use crate::frame::DataFrame;
151        use crate::prelude::{Column, DataType};
152        let mut df = DataFrame::empty();
153        let result = df.hstack_mut(&[
154            Column::full_null("a".into(), 1, &DataType::Null),
155            Column::full_null("b".into(), 3, &DataType::Null),
156        ]);
157
158        assert!(
159            matches!(result, Err(PolarsError::ShapeMismatch(_))),
160            "expected shape mismatch error"
161        );
162
163        // Ensure the DataFrame is not mutated in the error case.
164        assert_eq!(df.width(), 0);
165    }
166}