polars_core/schema/
mod.rs

1use std::fmt::Debug;
2
3use arrow::bitmap::Bitmap;
4use polars_utils::pl_str::PlSmallStr;
5
6use crate::prelude::*;
7use crate::utils::try_get_supertype;
8
9pub mod iceberg;
10
11pub type SchemaRef = Arc<Schema>;
12pub type Schema = polars_schema::Schema<DataType>;
13
14pub trait SchemaExt {
15    fn from_arrow_schema(value: &ArrowSchema) -> Self;
16
17    fn get_field(&self, name: &str) -> Option<Field>;
18
19    fn try_get_field(&self, name: &str) -> PolarsResult<Field>;
20
21    fn to_arrow(&self, compat_level: CompatLevel) -> ArrowSchema;
22
23    fn iter_fields(&self) -> impl ExactSizeIterator<Item = Field> + '_;
24
25    fn to_supertype(&mut self, other: &Schema) -> PolarsResult<bool>;
26
27    /// Select fields using a bitmap.
28    fn project_select(&self, select: &Bitmap) -> Self;
29}
30
31impl SchemaExt for Schema {
32    fn from_arrow_schema(value: &ArrowSchema) -> Self {
33        value
34            .iter_values()
35            .map(|x| (x.name.clone(), DataType::from_arrow_field(x)))
36            .collect()
37    }
38
39    /// Look up the name in the schema and return an owned [`Field`] by cloning the data.
40    ///
41    /// Returns `None` if the field does not exist.
42    ///
43    /// This method constructs the `Field` by cloning the name and dtype. For a version that returns references, see
44    /// [`get`][Self::get] or [`get_full`][Self::get_full].
45    fn get_field(&self, name: &str) -> Option<Field> {
46        self.get_full(name)
47            .map(|(_, name, dtype)| Field::new(name.clone(), dtype.clone()))
48    }
49
50    /// Look up the name in the schema and return an owned [`Field`] by cloning the data.
51    ///
52    /// Returns `Err(PolarsErr)` if the field does not exist.
53    ///
54    /// This method constructs the `Field` by cloning the name and dtype. For a version that returns references, see
55    /// [`get`][Self::get] or [`get_full`][Self::get_full].
56    fn try_get_field(&self, name: &str) -> PolarsResult<Field> {
57        self.get_full(name)
58            .ok_or_else(|| polars_err!(SchemaFieldNotFound: "{}", name))
59            .map(|(_, name, dtype)| Field::new(name.clone(), dtype.clone()))
60    }
61
62    /// Convert self to `ArrowSchema` by cloning the fields.
63    fn to_arrow(&self, compat_level: CompatLevel) -> ArrowSchema {
64        self.iter()
65            .map(|(name, dtype)| {
66                (
67                    name.clone(),
68                    dtype.to_arrow_field(name.clone(), compat_level),
69                )
70            })
71            .collect()
72    }
73
74    /// Iterates the [`Field`]s in this schema, constructing them anew by cloning each `(&name, &dtype)` pair.
75    ///
76    /// Note that this clones each name and dtype in order to form an owned [`Field`]. For a clone-free version, use
77    /// [`iter`][Self::iter], which returns `(&name, &dtype)`.
78    fn iter_fields(&self) -> impl ExactSizeIterator<Item = Field> + '_ {
79        self.iter()
80            .map(|(name, dtype)| Field::new(name.clone(), dtype.clone()))
81    }
82
83    /// Take another [`Schema`] and try to find the supertypes between them.
84    fn to_supertype(&mut self, other: &Schema) -> PolarsResult<bool> {
85        polars_ensure!(self.len() == other.len(), ComputeError: "schema lengths differ");
86
87        let mut changed = false;
88        for ((k, dt), (other_k, other_dt)) in self.iter_mut().zip(other.iter()) {
89            polars_ensure!(k == other_k, ComputeError: "schema names differ: got {}, expected {}", k, other_k);
90
91            let st = try_get_supertype(dt, other_dt)?;
92            changed |= (&st != dt) || (&st != other_dt);
93            *dt = st
94        }
95        Ok(changed)
96    }
97
98    fn project_select(&self, select: &Bitmap) -> Self {
99        assert_eq!(self.len(), select.len());
100        self.iter()
101            .zip(select.iter())
102            .filter(|(_, select)| *select)
103            .map(|((n, dt), _)| (n.clone(), dt.clone()))
104            .collect()
105    }
106}
107
108pub trait SchemaNamesAndDtypes {
109    const IS_ARROW: bool;
110    type DataType: Debug + Clone + Default + PartialEq;
111
112    fn iter_names_and_dtypes(
113        &self,
114    ) -> impl ExactSizeIterator<Item = (&PlSmallStr, &Self::DataType)>;
115}
116
117impl SchemaNamesAndDtypes for ArrowSchema {
118    const IS_ARROW: bool = true;
119    type DataType = ArrowDataType;
120
121    fn iter_names_and_dtypes(
122        &self,
123    ) -> impl ExactSizeIterator<Item = (&PlSmallStr, &Self::DataType)> {
124        self.iter_values().map(|x| (&x.name, &x.dtype))
125    }
126}
127
128impl SchemaNamesAndDtypes for Schema {
129    const IS_ARROW: bool = false;
130    type DataType = DataType;
131
132    fn iter_names_and_dtypes(
133        &self,
134    ) -> impl ExactSizeIterator<Item = (&PlSmallStr, &Self::DataType)> {
135        self.iter()
136    }
137}
138
139pub fn ensure_matching_schema<D>(
140    lhs: &polars_schema::Schema<D>,
141    rhs: &polars_schema::Schema<D>,
142) -> PolarsResult<()>
143where
144    polars_schema::Schema<D>: SchemaNamesAndDtypes,
145{
146    let lhs = lhs.iter_names_and_dtypes();
147    let rhs = rhs.iter_names_and_dtypes();
148
149    if lhs.len() != rhs.len() {
150        polars_bail!(
151            SchemaMismatch:
152            "schemas contained differing number of columns: {} != {}",
153            lhs.len(), rhs.len(),
154        );
155    }
156
157    for (i, ((l_name, l_dtype), (r_name, r_dtype))) in lhs.zip(rhs).enumerate() {
158        if l_name != r_name {
159            polars_bail!(
160                SchemaMismatch:
161                "schema names differ at index {}: {} != {}",
162                i, l_name, r_name
163            )
164        }
165        if l_dtype != r_dtype
166            && (!polars_schema::Schema::<D>::IS_ARROW
167                || unsafe {
168                    // For timezone normalization. Easier than writing out the entire PartialEq.
169                    DataType::from_arrow_dtype(std::mem::transmute::<
170                        &<polars_schema::Schema<D> as SchemaNamesAndDtypes>::DataType,
171                        &ArrowDataType,
172                    >(l_dtype))
173                        != DataType::from_arrow_dtype(std::mem::transmute::<
174                            &<polars_schema::Schema<D> as SchemaNamesAndDtypes>::DataType,
175                            &ArrowDataType,
176                        >(r_dtype))
177                })
178        {
179            polars_bail!(
180                SchemaMismatch:
181                "schema dtypes differ at index {} for column {}: {:?} != {:?}",
182                i, l_name, l_dtype, r_dtype
183            )
184        }
185    }
186
187    Ok(())
188}