polars_core/schema/
mod.rs1use std::fmt::Debug;
2
3use arrow::bitmap::Bitmap;
4use polars_utils::pl_str::PlSmallStr;
5
6use crate::prelude::*;
7use crate::utils::try_get_supertype;
8
9pub mod iceberg;
10
11pub type SchemaRef = Arc<Schema>;
12pub type Schema = polars_schema::Schema<DataType>;
13
14pub trait SchemaExt {
15 fn from_arrow_schema(value: &ArrowSchema) -> Self;
16
17 fn get_field(&self, name: &str) -> Option<Field>;
18
19 fn try_get_field(&self, name: &str) -> PolarsResult<Field>;
20
21 fn to_arrow(&self, compat_level: CompatLevel) -> ArrowSchema;
22
23 fn iter_fields(&self) -> impl ExactSizeIterator<Item = Field> + '_;
24
25 fn to_supertype(&mut self, other: &Schema) -> PolarsResult<bool>;
26
27 fn project_select(&self, select: &Bitmap) -> Self;
29}
30
31impl SchemaExt for Schema {
32 fn from_arrow_schema(value: &ArrowSchema) -> Self {
33 value
34 .iter_values()
35 .map(|x| (x.name.clone(), DataType::from_arrow_field(x)))
36 .collect()
37 }
38
39 fn get_field(&self, name: &str) -> Option<Field> {
46 self.get_full(name)
47 .map(|(_, name, dtype)| Field::new(name.clone(), dtype.clone()))
48 }
49
50 fn try_get_field(&self, name: &str) -> PolarsResult<Field> {
57 self.get_full(name)
58 .ok_or_else(|| polars_err!(SchemaFieldNotFound: "{}", name))
59 .map(|(_, name, dtype)| Field::new(name.clone(), dtype.clone()))
60 }
61
62 fn to_arrow(&self, compat_level: CompatLevel) -> ArrowSchema {
64 self.iter()
65 .map(|(name, dtype)| {
66 (
67 name.clone(),
68 dtype.to_arrow_field(name.clone(), compat_level),
69 )
70 })
71 .collect()
72 }
73
74 fn iter_fields(&self) -> impl ExactSizeIterator<Item = Field> + '_ {
79 self.iter()
80 .map(|(name, dtype)| Field::new(name.clone(), dtype.clone()))
81 }
82
83 fn to_supertype(&mut self, other: &Schema) -> PolarsResult<bool> {
85 polars_ensure!(self.len() == other.len(), ComputeError: "schema lengths differ");
86
87 let mut changed = false;
88 for ((k, dt), (other_k, other_dt)) in self.iter_mut().zip(other.iter()) {
89 polars_ensure!(k == other_k, ComputeError: "schema names differ: got {}, expected {}", k, other_k);
90
91 let st = try_get_supertype(dt, other_dt)?;
92 changed |= (&st != dt) || (&st != other_dt);
93 *dt = st
94 }
95 Ok(changed)
96 }
97
98 fn project_select(&self, select: &Bitmap) -> Self {
99 assert_eq!(self.len(), select.len());
100 self.iter()
101 .zip(select.iter())
102 .filter(|(_, select)| *select)
103 .map(|((n, dt), _)| (n.clone(), dt.clone()))
104 .collect()
105 }
106}
107
108pub trait SchemaNamesAndDtypes {
109 const IS_ARROW: bool;
110 type DataType: Debug + Clone + Default + PartialEq;
111
112 fn iter_names_and_dtypes(
113 &self,
114 ) -> impl ExactSizeIterator<Item = (&PlSmallStr, &Self::DataType)>;
115}
116
117impl SchemaNamesAndDtypes for ArrowSchema {
118 const IS_ARROW: bool = true;
119 type DataType = ArrowDataType;
120
121 fn iter_names_and_dtypes(
122 &self,
123 ) -> impl ExactSizeIterator<Item = (&PlSmallStr, &Self::DataType)> {
124 self.iter_values().map(|x| (&x.name, &x.dtype))
125 }
126}
127
128impl SchemaNamesAndDtypes for Schema {
129 const IS_ARROW: bool = false;
130 type DataType = DataType;
131
132 fn iter_names_and_dtypes(
133 &self,
134 ) -> impl ExactSizeIterator<Item = (&PlSmallStr, &Self::DataType)> {
135 self.iter()
136 }
137}
138
139pub fn ensure_matching_schema<D>(
140 lhs: &polars_schema::Schema<D>,
141 rhs: &polars_schema::Schema<D>,
142) -> PolarsResult<()>
143where
144 polars_schema::Schema<D>: SchemaNamesAndDtypes,
145{
146 let lhs = lhs.iter_names_and_dtypes();
147 let rhs = rhs.iter_names_and_dtypes();
148
149 if lhs.len() != rhs.len() {
150 polars_bail!(
151 SchemaMismatch:
152 "schemas contained differing number of columns: {} != {}",
153 lhs.len(), rhs.len(),
154 );
155 }
156
157 for (i, ((l_name, l_dtype), (r_name, r_dtype))) in lhs.zip(rhs).enumerate() {
158 if l_name != r_name {
159 polars_bail!(
160 SchemaMismatch:
161 "schema names differ at index {}: {} != {}",
162 i, l_name, r_name
163 )
164 }
165 if l_dtype != r_dtype
166 && (!polars_schema::Schema::<D>::IS_ARROW
167 || unsafe {
168 DataType::from_arrow_dtype(std::mem::transmute::<
170 &<polars_schema::Schema<D> as SchemaNamesAndDtypes>::DataType,
171 &ArrowDataType,
172 >(l_dtype))
173 != DataType::from_arrow_dtype(std::mem::transmute::<
174 &<polars_schema::Schema<D> as SchemaNamesAndDtypes>::DataType,
175 &ArrowDataType,
176 >(r_dtype))
177 })
178 {
179 polars_bail!(
180 SchemaMismatch:
181 "schema dtypes differ at index {} for column {}: {:?} != {:?}",
182 i, l_name, l_dtype, r_dtype
183 )
184 }
185 }
186
187 Ok(())
188}