1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
use polars_error::{polars_ensure, polars_err, PolarsResult};
use polars_utils::aliases::PlHashSet;

use super::Column;
use crate::datatypes::AnyValue;
use crate::frame::DataFrame;
use crate::prelude::PlSmallStr;

fn check_hstack(
    col: &Column,
    names: &mut PlHashSet<PlSmallStr>,
    height: usize,
    is_empty: bool,
) -> PolarsResult<()> {
    polars_ensure!(
        col.len() == height || is_empty,
        ShapeMismatch: "unable to hstack Series of length {} and DataFrame of height {}",
        col.len(), height,
    );
    polars_ensure!(
        names.insert(col.name().clone()),
        Duplicate: "unable to hstack, column with name {:?} already exists", col.name().as_str(),
    );
    Ok(())
}

impl DataFrame {
    /// Add columns horizontally.
    ///
    /// # Safety
    /// The caller must ensure:
    /// - the length of all [`Column`] is equal to the height of this [`DataFrame`]
    /// - the columns names are unique
    pub unsafe fn hstack_mut_unchecked(&mut self, columns: &[Column]) -> &mut Self {
        self.columns.extend_from_slice(columns);
        self
    }

    /// Add multiple [`Column`] to a [`DataFrame`].
    /// The added `Series` are required to have the same length.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn stack(df: &mut DataFrame, columns: &[Column]) {
    ///     df.hstack_mut(columns);
    /// }
    /// ```
    pub fn hstack_mut(&mut self, columns: &[Column]) -> PolarsResult<&mut Self> {
        let mut names = self
            .columns
            .iter()
            .map(|c| c.name().clone())
            .collect::<PlHashSet<_>>();

        let height = self.height();
        let is_empty = self.is_empty();
        // first loop check validity. We don't do this in a single pass otherwise
        // this DataFrame is already modified when an error occurs.
        for col in columns {
            check_hstack(col, &mut names, height, is_empty)?;
        }
        drop(names);
        Ok(unsafe { self.hstack_mut_unchecked(columns) })
    }
}
/// Concat [`DataFrame`]s horizontally.
/// Concat horizontally and extend with null values if lengths don't match
pub fn concat_df_horizontal(dfs: &[DataFrame], check_duplicates: bool) -> PolarsResult<DataFrame> {
    let max_len = dfs
        .iter()
        .map(|df| df.height())
        .max()
        .ok_or_else(|| polars_err!(ComputeError: "cannot concat empty dataframes"))?;

    let owned_df;

    // if not all equal length, extend the DataFrame with nulls
    let dfs = if !dfs.iter().all(|df| df.height() == max_len) {
        owned_df = dfs
            .iter()
            .cloned()
            .map(|mut df| {
                if df.height() != max_len {
                    let diff = max_len - df.height();
                    df.columns.iter_mut().for_each(|s| {
                        // @scalar-opt
                        let s = s.into_materialized_series();
                        *s = s.extend_constant(AnyValue::Null, diff).unwrap()
                    });
                }
                df
            })
            .collect::<Vec<_>>();
        owned_df.as_slice()
    } else {
        dfs
    };

    let mut first_df = dfs[0].clone();
    let height = first_df.height();
    let is_empty = first_df.is_empty();

    let mut names = if check_duplicates {
        first_df
            .columns
            .iter()
            .map(|s| s.name().clone())
            .collect::<PlHashSet<_>>()
    } else {
        Default::default()
    };

    for df in &dfs[1..] {
        let cols = df.get_columns();

        if check_duplicates {
            for col in cols {
                check_hstack(col, &mut names, height, is_empty)?;
            }
        }

        unsafe { first_df.hstack_mut_unchecked(cols) };
    }
    Ok(first_df)
}