Skip to main content

polars_core/frame/
validation.rs

1use polars_error::{PolarsResult, polars_bail};
2use polars_utils::aliases::{InitHashMaps, PlHashSet};
3
4use crate::frame::column::Column;
5
6/// Checks for duplicates and mismatching heights.
7pub(super) fn validate_columns_slice(
8    expected_height: usize,
9    columns: &[Column],
10) -> PolarsResult<()> {
11    if columns.is_empty() {
12        return Ok(());
13    }
14
15    let expected_height_msg = || {
16        if let Some(c) = columns.iter().find(|c| c.len() == expected_height) {
17            format!("height of column '{}' ({})", c.name(), c.len())
18        } else {
19            format!("DataFrame height ({expected_height})")
20        }
21    };
22
23    if columns.len() <= 4 {
24        // Too small to be worth spawning a hashmap for, this is at most 6 comparisons.
25        for (i, col) in columns.iter().enumerate() {
26            if col.len() != expected_height {
27                polars_bail!(
28                    ShapeMismatch:
29                    "height of column '{}' ({}) does not match {}",
30                    col.name(), col.len(), expected_height_msg()
31                )
32            }
33
34            let name = col.name();
35
36            for other in columns.iter().skip(i + 1) {
37                if other.name() == name {
38                    polars_bail!(duplicate = name);
39                }
40            }
41        }
42    } else {
43        let mut names = PlHashSet::with_capacity(columns.len());
44
45        for col in columns {
46            let col_name = col.name();
47            let col_len = col.len();
48
49            if col_len != expected_height {
50                polars_bail!(
51                    ShapeMismatch:
52                    "height of column '{}' ({}) does not match {}",
53                    col_name, col_len, expected_height_msg()
54                )
55            }
56
57            if names.contains(col_name) {
58                polars_bail!(duplicate = col_name)
59            }
60
61            names.insert(col_name);
62        }
63    }
64
65    Ok(())
66}
67
68pub(super) fn ensure_names_unique<T>(names: &[T]) -> PolarsResult<()>
69where
70    T: AsRef<str>,
71{
72    // Always unique.
73    if names.len() <= 1 {
74        return Ok(());
75    }
76
77    if names.len() <= 4 {
78        // Too small to be worth spawning a hashmap for, this is at most 6 comparisons.
79        for i in 0..names.len() - 1 {
80            let name = names[i].as_ref();
81
82            for other in names.iter().skip(i + 1) {
83                if name == other.as_ref() {
84                    polars_bail!(duplicate = name);
85                }
86            }
87        }
88    } else {
89        let mut names_set: PlHashSet<&str> = PlHashSet::with_capacity(names.len());
90
91        for name in names {
92            let name = name.as_ref();
93
94            if !names_set.insert(name) {
95                polars_bail!(duplicate = name);
96            }
97        }
98    }
99    Ok(())
100}