polars_core/frame/
validation.rs

1use polars_error::{PolarsResult, polars_bail};
2use polars_utils::aliases::{InitHashMaps, PlHashSet};
3
4use super::DataFrame;
5use super::column::Column;
6
7impl DataFrame {
8    /// Ensure all equal height and names are unique.
9    ///
10    /// An Ok() result indicates `columns` is a valid state for a DataFrame.
11    pub fn validate_columns_slice(columns: &[Column]) -> PolarsResult<()> {
12        if columns.len() <= 1 {
13            return Ok(());
14        }
15
16        if columns.len() <= 4 {
17            // Too small to be worth spawning a hashmap for, this is at most 6 comparisons.
18            for i in 0..columns.len() - 1 {
19                let name = columns[i].name();
20                let height = columns[i].len();
21
22                for other in columns.iter().skip(i + 1) {
23                    if other.name() == name {
24                        polars_bail!(duplicate = name);
25                    }
26
27                    if other.len() != height {
28                        polars_bail!(
29                            ShapeMismatch:
30                            "height of column '{}' ({}) does not match height of column '{}' ({})",
31                            other.name(), other.len(), name, height
32                        )
33                    }
34                }
35            }
36        } else {
37            let first = &columns[0];
38
39            let first_len = first.len();
40            let first_name = first.name();
41
42            let mut names = PlHashSet::with_capacity(columns.len());
43            names.insert(first_name);
44
45            for col in &columns[1..] {
46                let col_name = col.name();
47                let col_len = col.len();
48
49                if col_len != first_len {
50                    polars_bail!(
51                        ShapeMismatch:
52                        "height of column '{}' ({}) does not match height of column '{}' ({})",
53                        col_name, col_len, first_name, first_len
54                    )
55                }
56
57                if names.contains(col_name) {
58                    polars_bail!(duplicate = col_name)
59                }
60
61                names.insert(col_name);
62            }
63        }
64
65        Ok(())
66    }
67}