polars_core/datatypes/
mod.rs

1//! # Data types supported by Polars.
2//!
3//! At the moment Polars doesn't include all data types available by Arrow. The goal is to
4//! incrementally support more data types and prioritize these by usability.
5//!
6//! [See the AnyValue variants](enum.AnyValue.html#variants) for the data types that
7//! are currently supported.
8//!
9#[cfg(feature = "serde")]
10mod _serde;
11mod aliases;
12mod any_value;
13mod dtype;
14mod field;
15mod into_scalar;
16#[cfg(feature = "object")]
17mod static_array_collect;
18mod time_unit;
19
20use std::cmp::Ordering;
21use std::fmt::{Display, Formatter};
22use std::hash::{Hash, Hasher};
23use std::ops::{Add, AddAssign, Div, Mul, Rem, Sub, SubAssign};
24
25mod schema;
26pub use aliases::*;
27pub use any_value::*;
28pub use arrow::array::{ArrayCollectIterExt, ArrayFromIter, ArrayFromIterDtype, StaticArray};
29#[cfg(feature = "dtype-categorical")]
30use arrow::datatypes::IntegerType;
31pub use arrow::datatypes::reshape::*;
32pub use arrow::datatypes::{ArrowDataType, TimeUnit as ArrowTimeUnit};
33use arrow::types::NativeType;
34use bytemuck::Zeroable;
35pub use dtype::*;
36pub use field::*;
37pub use into_scalar::*;
38use num_traits::{AsPrimitive, Bounded, FromPrimitive, Num, NumCast, One, Zero};
39use polars_compute::arithmetic::HasPrimitiveArithmeticKernel;
40use polars_compute::float_sum::FloatSum;
41use polars_utils::abs_diff::AbsDiff;
42use polars_utils::float::IsFloat;
43use polars_utils::min_max::MinMax;
44use polars_utils::nulls::IsNull;
45use polars_utils::total_ord::TotalHash;
46pub use schema::SchemaExtPl;
47#[cfg(feature = "serde")]
48use serde::de::{EnumAccess, Error, Unexpected, VariantAccess, Visitor};
49#[cfg(any(feature = "serde", feature = "serde-lazy"))]
50use serde::{Deserialize, Serialize};
51#[cfg(any(feature = "serde", feature = "serde-lazy"))]
52use serde::{Deserializer, Serializer};
53pub use time_unit::*;
54
55pub use crate::chunked_array::logical::*;
56#[cfg(feature = "object")]
57use crate::chunked_array::object::ObjectArray;
58#[cfg(feature = "object")]
59use crate::chunked_array::object::PolarsObjectSafe;
60use crate::prelude::*;
61use crate::utils::Wrap;
62
63pub struct TrueT;
64pub struct FalseT;
65
66/// # Safety
67///
68/// The StaticArray and dtype return must be correct.
69pub unsafe trait PolarsDataType: Send + Sync + Sized + 'static {
70    type Physical<'a>: std::fmt::Debug + Clone;
71    type OwnedPhysical: std::fmt::Debug + Send + Sync + Clone + PartialEq;
72    type ZeroablePhysical<'a>: Zeroable + From<Self::Physical<'a>>;
73    type Array: for<'a> StaticArray<
74            ValueT<'a> = Self::Physical<'a>,
75            ZeroableValueT<'a> = Self::ZeroablePhysical<'a>,
76        >;
77    type IsNested;
78    type HasViews;
79    type IsStruct;
80    type IsObject;
81    type IsLogical;
82
83    fn get_dtype() -> DataType
84    where
85        Self: Sized;
86}
87
88pub trait PolarsNumericType: 'static
89where
90    Self: for<'a> PolarsDataType<
91            OwnedPhysical = Self::Native,
92            Physical<'a> = Self::Native,
93            ZeroablePhysical<'a> = Self::Native,
94            Array = PrimitiveArray<Self::Native>,
95            IsNested = FalseT,
96            HasViews = FalseT,
97            IsStruct = FalseT,
98            IsObject = FalseT,
99            IsLogical = FalseT,
100        >,
101{
102    type Native: NumericNative;
103}
104
105pub trait PolarsIntegerType: PolarsNumericType {}
106pub trait PolarsFloatType: PolarsNumericType {}
107
108macro_rules! impl_polars_num_datatype {
109    ($trait: ident, $ca:ident, $variant:ident, $physical:ty, $owned_phys:ty) => {
110        #[derive(Clone, Copy)]
111        pub struct $ca {}
112
113        unsafe impl PolarsDataType for $ca {
114            type Physical<'a> = $physical;
115            type OwnedPhysical = $owned_phys;
116            type ZeroablePhysical<'a> = $physical;
117            type Array = PrimitiveArray<$physical>;
118            type IsNested = FalseT;
119            type HasViews = FalseT;
120            type IsStruct = FalseT;
121            type IsObject = FalseT;
122            type IsLogical = FalseT;
123
124            #[inline]
125            fn get_dtype() -> DataType {
126                DataType::$variant
127            }
128        }
129
130        impl PolarsNumericType for $ca {
131            type Native = $physical;
132        }
133
134        impl $trait for $ca {}
135    };
136}
137
138macro_rules! impl_polars_datatype_pass_dtype {
139    ($ca:ident, $dtype:expr, $arr:ty, $lt:lifetime, $phys:ty, $zerophys:ty, $owned_phys:ty, $has_views:ident, $is_logical:ident) => {
140        #[derive(Clone, Copy)]
141        pub struct $ca {}
142
143        unsafe impl PolarsDataType for $ca {
144            type Physical<$lt> = $phys;
145            type OwnedPhysical = $owned_phys;
146            type ZeroablePhysical<$lt> = $zerophys;
147            type Array = $arr;
148            type IsNested = FalseT;
149            type HasViews = $has_views;
150            type IsStruct = FalseT;
151            type IsObject = FalseT;
152            type IsLogical = $is_logical;
153
154            #[inline]
155            fn get_dtype() -> DataType {
156                $dtype
157            }
158        }
159    };
160}
161macro_rules! impl_polars_binview_datatype {
162    ($ca:ident, $variant:ident, $arr:ty, $lt:lifetime, $phys:ty, $zerophys:ty, $owned_phys:ty) => {
163        impl_polars_datatype_pass_dtype!(
164            $ca,
165            DataType::$variant,
166            $arr,
167            $lt,
168            $phys,
169            $zerophys,
170            $owned_phys,
171            TrueT,
172            FalseT
173        );
174    };
175}
176
177macro_rules! impl_polars_datatype {
178    ($ca:ident, $variant:ident, $arr:ty, $lt:lifetime, $phys:ty, $zerophys:ty, $owned_phys:ty, $is_logical:ident) => {
179        impl_polars_datatype_pass_dtype!(
180            $ca,
181            DataType::$variant,
182            $arr,
183            $lt,
184            $phys,
185            $zerophys,
186            $owned_phys,
187            FalseT,
188            $is_logical
189        );
190    };
191}
192
193impl_polars_num_datatype!(PolarsIntegerType, UInt8Type, UInt8, u8, u8);
194impl_polars_num_datatype!(PolarsIntegerType, UInt16Type, UInt16, u16, u16);
195impl_polars_num_datatype!(PolarsIntegerType, UInt32Type, UInt32, u32, u32);
196impl_polars_num_datatype!(PolarsIntegerType, UInt64Type, UInt64, u64, u64);
197impl_polars_num_datatype!(PolarsIntegerType, Int8Type, Int8, i8, i8);
198impl_polars_num_datatype!(PolarsIntegerType, Int16Type, Int16, i16, i16);
199impl_polars_num_datatype!(PolarsIntegerType, Int32Type, Int32, i32, i32);
200impl_polars_num_datatype!(PolarsIntegerType, Int64Type, Int64, i64, i64);
201
202#[cfg(feature = "dtype-i128")]
203impl_polars_num_datatype!(PolarsIntegerType, Int128Type, Int128, i128, i128);
204impl_polars_num_datatype!(PolarsFloatType, Float32Type, Float32, f32, f32);
205impl_polars_num_datatype!(PolarsFloatType, Float64Type, Float64, f64, f64);
206impl_polars_datatype!(DateType, Date, PrimitiveArray<i32>, 'a, i32, i32, i32, TrueT);
207impl_polars_datatype!(TimeType, Time, PrimitiveArray<i64>, 'a, i64, i64, i64, TrueT);
208impl_polars_binview_datatype!(StringType, String, Utf8ViewArray, 'a, &'a str, Option<&'a str>, String);
209impl_polars_binview_datatype!(BinaryType, Binary, BinaryViewArray, 'a, &'a [u8], Option<&'a [u8]>, Box<[u8]>);
210impl_polars_datatype!(BinaryOffsetType, BinaryOffset, BinaryArray<i64>, 'a, &'a [u8], Option<&'a [u8]>, Box<[u8]>, FalseT);
211impl_polars_datatype!(BooleanType, Boolean, BooleanArray, 'a, bool, bool, bool, FalseT);
212
213#[cfg(feature = "dtype-decimal")]
214impl_polars_datatype_pass_dtype!(DecimalType, DataType::Unknown(UnknownKind::Any), PrimitiveArray<i128>, 'a, i128, i128, i128, FalseT, TrueT);
215impl_polars_datatype_pass_dtype!(DatetimeType, DataType::Unknown(UnknownKind::Any), PrimitiveArray<i64>, 'a, i64, i64, i64, FalseT, TrueT);
216impl_polars_datatype_pass_dtype!(DurationType, DataType::Unknown(UnknownKind::Any), PrimitiveArray<i64>, 'a, i64, i64, i64, FalseT, TrueT);
217impl_polars_datatype_pass_dtype!(CategoricalType, DataType::Unknown(UnknownKind::Any), PrimitiveArray<u32>, 'a, u32, u32, u32, FalseT, TrueT);
218
219#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
220pub struct ListType {}
221unsafe impl PolarsDataType for ListType {
222    type Physical<'a> = Box<dyn Array>;
223    type OwnedPhysical = Box<dyn Array>;
224    type ZeroablePhysical<'a> = Option<Box<dyn Array>>;
225    type Array = ListArray<i64>;
226    type IsNested = TrueT;
227    type HasViews = FalseT;
228    type IsStruct = FalseT;
229    type IsObject = FalseT;
230    type IsLogical = FalseT;
231
232    fn get_dtype() -> DataType {
233        // Null as we cannot know anything without self.
234        DataType::List(Box::new(DataType::Null))
235    }
236}
237
238#[cfg(feature = "dtype-struct")]
239pub struct StructType {}
240#[cfg(feature = "dtype-struct")]
241unsafe impl PolarsDataType for StructType {
242    // The physical types are invalid.
243    // We don't want these to be used as that would be
244    // very expensive. We use const asserts to ensure
245    // traits/methods using the physical types are
246    // not called for structs.
247    type Physical<'a> = ();
248    type OwnedPhysical = ();
249    type ZeroablePhysical<'a> = ();
250    type Array = StructArray;
251    type IsNested = TrueT;
252    type HasViews = FalseT;
253    type IsStruct = TrueT;
254    type IsObject = FalseT;
255    type IsLogical = FalseT;
256
257    fn get_dtype() -> DataType
258    where
259        Self: Sized,
260    {
261        DataType::Struct(vec![])
262    }
263}
264
265#[cfg(feature = "dtype-array")]
266pub struct FixedSizeListType {}
267#[cfg(feature = "dtype-array")]
268unsafe impl PolarsDataType for FixedSizeListType {
269    type Physical<'a> = Box<dyn Array>;
270    type OwnedPhysical = Box<dyn Array>;
271    type ZeroablePhysical<'a> = Option<Box<dyn Array>>;
272    type Array = FixedSizeListArray;
273    type IsNested = TrueT;
274    type HasViews = FalseT;
275    type IsStruct = FalseT;
276    type IsObject = FalseT;
277    type IsLogical = FalseT;
278
279    fn get_dtype() -> DataType {
280        // Null as we cannot know anything without self.
281        DataType::Array(Box::new(DataType::Null), 0)
282    }
283}
284
285#[cfg(feature = "object")]
286pub struct ObjectType<T>(T);
287#[cfg(feature = "object")]
288unsafe impl<T: PolarsObject> PolarsDataType for ObjectType<T> {
289    type Physical<'a> = &'a T;
290    type OwnedPhysical = T;
291    type ZeroablePhysical<'a> = Option<&'a T>;
292    type Array = ObjectArray<T>;
293    type IsNested = FalseT;
294    type HasViews = FalseT;
295    type IsStruct = FalseT;
296    type IsObject = TrueT;
297    type IsLogical = FalseT;
298
299    fn get_dtype() -> DataType {
300        DataType::Object(T::type_name())
301    }
302}
303
304#[cfg(feature = "dtype-array")]
305pub type ArrayChunked = ChunkedArray<FixedSizeListType>;
306pub type ListChunked = ChunkedArray<ListType>;
307pub type BooleanChunked = ChunkedArray<BooleanType>;
308pub type UInt8Chunked = ChunkedArray<UInt8Type>;
309pub type UInt16Chunked = ChunkedArray<UInt16Type>;
310pub type UInt32Chunked = ChunkedArray<UInt32Type>;
311pub type UInt64Chunked = ChunkedArray<UInt64Type>;
312pub type Int8Chunked = ChunkedArray<Int8Type>;
313pub type Int16Chunked = ChunkedArray<Int16Type>;
314pub type Int32Chunked = ChunkedArray<Int32Type>;
315pub type Int64Chunked = ChunkedArray<Int64Type>;
316#[cfg(feature = "dtype-i128")]
317pub type Int128Chunked = ChunkedArray<Int128Type>;
318pub type Float32Chunked = ChunkedArray<Float32Type>;
319pub type Float64Chunked = ChunkedArray<Float64Type>;
320pub type StringChunked = ChunkedArray<StringType>;
321pub type BinaryChunked = ChunkedArray<BinaryType>;
322pub type BinaryOffsetChunked = ChunkedArray<BinaryOffsetType>;
323#[cfg(feature = "object")]
324pub type ObjectChunked<T> = ChunkedArray<ObjectType<T>>;
325
326pub trait NumericNative:
327    TotalOrd
328    + PartialOrd
329    + TotalHash
330    + NativeType
331    + Num
332    + NumCast
333    + Zero
334    + One
335    // + Simd
336    // + Simd8
337    + std::iter::Sum<Self>
338    + Add<Output = Self>
339    + Sub<Output = Self>
340    + Mul<Output = Self>
341    + Div<Output = Self>
342    + Rem<Output = Self>
343    + AddAssign
344    + SubAssign
345    + AbsDiff
346    + Bounded
347    + FromPrimitive
348    + IsFloat
349    + HasPrimitiveArithmeticKernel<TrueDivT=<Self::TrueDivPolarsType as PolarsNumericType>::Native>
350    + FloatSum<f64>
351    + AsPrimitive<f64>
352    + MinMax
353    + IsNull
354{
355    type PolarsType: PolarsNumericType;
356    type TrueDivPolarsType: PolarsNumericType;
357}
358
359impl NumericNative for i8 {
360    type PolarsType = Int8Type;
361    type TrueDivPolarsType = Float64Type;
362}
363impl NumericNative for i16 {
364    type PolarsType = Int16Type;
365    type TrueDivPolarsType = Float64Type;
366}
367impl NumericNative for i32 {
368    type PolarsType = Int32Type;
369    type TrueDivPolarsType = Float64Type;
370}
371impl NumericNative for i64 {
372    type PolarsType = Int64Type;
373    type TrueDivPolarsType = Float64Type;
374}
375#[cfg(feature = "dtype-i128")]
376impl NumericNative for i128 {
377    type PolarsType = Int128Type;
378    type TrueDivPolarsType = Float64Type;
379}
380impl NumericNative for u8 {
381    type PolarsType = UInt8Type;
382    type TrueDivPolarsType = Float64Type;
383}
384impl NumericNative for u16 {
385    type PolarsType = UInt16Type;
386    type TrueDivPolarsType = Float64Type;
387}
388impl NumericNative for u32 {
389    type PolarsType = UInt32Type;
390    type TrueDivPolarsType = Float64Type;
391}
392impl NumericNative for u64 {
393    type PolarsType = UInt64Type;
394    type TrueDivPolarsType = Float64Type;
395}
396impl NumericNative for f32 {
397    type PolarsType = Float32Type;
398    type TrueDivPolarsType = Float32Type;
399}
400impl NumericNative for f64 {
401    type PolarsType = Float64Type;
402    type TrueDivPolarsType = Float64Type;
403}