polars_core/datatypes/
mod.rs

1//! # Data types supported by Polars.
2//!
3//! At the moment Polars doesn't include all data types available by Arrow. The goal is to
4//! incrementally support more data types and prioritize these by usability.
5//!
6//! [See the AnyValue variants](enum.AnyValue.html#variants) for the data types that
7//! are currently supported.
8//!
9#[cfg(feature = "serde")]
10mod _serde;
11mod aliases;
12mod any_value;
13#[cfg(feature = "dtype-categorical")]
14mod categories;
15mod dtype;
16mod field;
17mod into_scalar;
18#[cfg(feature = "object")]
19mod static_array_collect;
20mod temporal;
21
22use std::cmp::Ordering;
23use std::fmt::{Display, Formatter};
24use std::hash::{Hash, Hasher};
25use std::ops::{Add, AddAssign, Div, Mul, Rem, Sub, SubAssign};
26use std::sync::Arc;
27
28mod schema;
29pub use aliases::*;
30pub use any_value::*;
31pub use arrow::array::{ArrayCollectIterExt, ArrayFromIter, ArrayFromIterDtype, StaticArray};
32#[cfg(feature = "dtype-categorical")]
33use arrow::datatypes::IntegerType;
34pub use arrow::datatypes::reshape::*;
35pub use arrow::datatypes::{ArrowDataType, TimeUnit as ArrowTimeUnit};
36use arrow::types::NativeType;
37use bytemuck::Zeroable;
38#[cfg(feature = "dtype-categorical")]
39pub use categories::{CategoricalMapping, Categories, FrozenCategories};
40pub use dtype::*;
41pub use field::*;
42pub use into_scalar::*;
43use num_traits::{AsPrimitive, Bounded, FromPrimitive, Num, NumCast, One, Zero};
44use polars_compute::arithmetic::HasPrimitiveArithmeticKernel;
45use polars_compute::float_sum::FloatSum;
46use polars_utils::abs_diff::AbsDiff;
47use polars_utils::float::IsFloat;
48use polars_utils::min_max::MinMax;
49use polars_utils::nulls::IsNull;
50use polars_utils::total_ord::TotalHash;
51pub use schema::SchemaExtPl;
52#[cfg(feature = "serde")]
53use serde::de::Visitor;
54#[cfg(any(feature = "serde", feature = "serde-lazy"))]
55use serde::{Deserialize, Serialize};
56#[cfg(any(feature = "serde", feature = "serde-lazy"))]
57use serde::{Deserializer, Serializer};
58pub use temporal::*;
59
60pub use crate::chunked_array::logical::*;
61#[cfg(feature = "object")]
62use crate::chunked_array::object::ObjectArray;
63#[cfg(feature = "object")]
64use crate::chunked_array::object::PolarsObjectSafe;
65use crate::prelude::*;
66use crate::series::implementations::SeriesWrap;
67use crate::utils::Wrap;
68
69pub struct TrueT;
70pub struct FalseT;
71
72/// # Safety
73///
74/// The StaticArray and dtype return must be correct.
75pub unsafe trait PolarsDataType: Send + Sync + Sized + 'static {
76    type Physical<'a>: std::fmt::Debug + Clone;
77    type OwnedPhysical: std::fmt::Debug + Send + Sync + Clone + PartialEq;
78    type ZeroablePhysical<'a>: Zeroable + From<Self::Physical<'a>>;
79    type Array: for<'a> StaticArray<
80            ValueT<'a> = Self::Physical<'a>,
81            ZeroableValueT<'a> = Self::ZeroablePhysical<'a>,
82        >;
83    type IsNested;
84    type HasViews;
85    type IsStruct;
86    type IsObject;
87
88    /// Returns the DataType variant associated with this PolarsDataType.
89    /// Not implemented for types whose DataTypes have parameters.
90    fn get_static_dtype() -> DataType
91    where
92        Self: Sized;
93}
94
95pub trait PolarsPhysicalType: PolarsDataType {
96    // A physical type is one backed by a ChunkedArray directly, as opposed to
97    // logical types which wrap physical data.
98    fn ca_into_series(ca: ChunkedArray<Self>) -> Series;
99}
100
101pub trait PolarsNumericType: PolarsPhysicalType + 'static
102where
103    Self: for<'a> PolarsDataType<
104            OwnedPhysical = Self::Native,
105            Physical<'a> = Self::Native,
106            ZeroablePhysical<'a> = Self::Native,
107            Array = PrimitiveArray<Self::Native>,
108            IsNested = FalseT,
109            HasViews = FalseT,
110            IsStruct = FalseT,
111            IsObject = FalseT,
112        >,
113{
114    type Native: NumericNative;
115}
116
117pub trait PolarsIntegerType: PolarsNumericType {}
118pub trait PolarsFloatType: PolarsNumericType {}
119
120macro_rules! impl_polars_num_datatype {
121    ($trait: ident, $pdt:ident, $variant:ident, $physical:ty, $owned_phys:ty) => {
122        #[derive(Clone, Copy)]
123        pub struct $pdt {}
124
125        unsafe impl PolarsDataType for $pdt {
126            type Physical<'a> = $physical;
127            type OwnedPhysical = $owned_phys;
128            type ZeroablePhysical<'a> = $physical;
129            type Array = PrimitiveArray<$physical>;
130            type IsNested = FalseT;
131            type HasViews = FalseT;
132            type IsStruct = FalseT;
133            type IsObject = FalseT;
134
135            #[inline]
136            fn get_static_dtype() -> DataType {
137                DataType::$variant
138            }
139        }
140
141        impl PolarsNumericType for $pdt {
142            type Native = $physical;
143        }
144
145        impl $trait for $pdt {}
146    };
147}
148
149macro_rules! impl_polars_datatype {
150    ($pdt:ident, $dtype:expr, $arr:ty, $lt:lifetime, $phys:ty, $zerophys:ty, $owned_phys:ty, $has_views:ident) => {
151        #[derive(Clone, Copy)]
152        pub struct $pdt {}
153
154        unsafe impl PolarsDataType for $pdt {
155            type Physical<$lt> = $phys;
156            type OwnedPhysical = $owned_phys;
157            type ZeroablePhysical<$lt> = $zerophys;
158            type Array = $arr;
159            type IsNested = FalseT;
160            type HasViews = $has_views;
161            type IsStruct = FalseT;
162            type IsObject = FalseT;
163
164            #[inline]
165            fn get_static_dtype() -> DataType {
166                $dtype
167            }
168        }
169    };
170}
171
172impl_polars_num_datatype!(PolarsIntegerType, UInt8Type, UInt8, u8, u8);
173impl_polars_num_datatype!(PolarsIntegerType, UInt16Type, UInt16, u16, u16);
174impl_polars_num_datatype!(PolarsIntegerType, UInt32Type, UInt32, u32, u32);
175impl_polars_num_datatype!(PolarsIntegerType, UInt64Type, UInt64, u64, u64);
176impl_polars_num_datatype!(PolarsIntegerType, Int8Type, Int8, i8, i8);
177impl_polars_num_datatype!(PolarsIntegerType, Int16Type, Int16, i16, i16);
178impl_polars_num_datatype!(PolarsIntegerType, Int32Type, Int32, i32, i32);
179impl_polars_num_datatype!(PolarsIntegerType, Int64Type, Int64, i64, i64);
180#[cfg(feature = "dtype-i128")]
181impl_polars_num_datatype!(PolarsIntegerType, Int128Type, Int128, i128, i128);
182impl_polars_num_datatype!(PolarsFloatType, Float32Type, Float32, f32, f32);
183impl_polars_num_datatype!(PolarsFloatType, Float64Type, Float64, f64, f64);
184
185impl_polars_datatype!(StringType, DataType::String, Utf8ViewArray, 'a, &'a str, Option<&'a str>, String, TrueT);
186impl_polars_datatype!(BinaryType, DataType::Binary, BinaryViewArray, 'a, &'a [u8], Option<&'a [u8]>, Box<[u8]>, TrueT);
187impl_polars_datatype!(BinaryOffsetType, DataType::BinaryOffset, BinaryArray<i64>, 'a, &'a [u8], Option<&'a [u8]>, Box<[u8]>, FalseT);
188impl_polars_datatype!(BooleanType, DataType::Boolean, BooleanArray, 'a, bool, bool, bool, FalseT);
189
190#[cfg(feature = "dtype-decimal")]
191impl_polars_datatype!(DecimalType, unimplemented!(), PrimitiveArray<i128>, 'a, i128, i128, i128, FalseT);
192impl_polars_datatype!(DatetimeType, unimplemented!(), PrimitiveArray<i64>, 'a, i64, i64, i64, FalseT);
193impl_polars_datatype!(DurationType, unimplemented!(), PrimitiveArray<i64>, 'a, i64, i64, i64, FalseT);
194impl_polars_datatype!(CategoricalType, unimplemented!(), PrimitiveArray<u32>, 'a, u32, u32, u32, FalseT);
195impl_polars_datatype!(DateType, DataType::Date, PrimitiveArray<i32>, 'a, i32, i32, i32, FalseT);
196impl_polars_datatype!(TimeType, DataType::Time, PrimitiveArray<i64>, 'a, i64, i64, i64, FalseT);
197
198#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
199pub struct ListType {}
200unsafe impl PolarsDataType for ListType {
201    type Physical<'a> = Box<dyn Array>;
202    type OwnedPhysical = Box<dyn Array>;
203    type ZeroablePhysical<'a> = Option<Box<dyn Array>>;
204    type Array = ListArray<i64>;
205    type IsNested = TrueT;
206    type HasViews = FalseT;
207    type IsStruct = FalseT;
208    type IsObject = FalseT;
209
210    fn get_static_dtype() -> DataType {
211        // Null as we cannot know anything without self.
212        DataType::List(Box::new(DataType::Null))
213    }
214}
215
216impl PolarsPhysicalType for ListType {
217    fn ca_into_series(ca: ChunkedArray<Self>) -> Series {
218        Series(Arc::new(SeriesWrap(ca)))
219    }
220}
221
222#[cfg(feature = "dtype-struct")]
223pub struct StructType {}
224#[cfg(feature = "dtype-struct")]
225unsafe impl PolarsDataType for StructType {
226    // The physical types are invalid.
227    // We don't want these to be used as that would be
228    // very expensive. We use const asserts to ensure
229    // traits/methods using the physical types are
230    // not called for structs.
231    type Physical<'a> = ();
232    type OwnedPhysical = ();
233    type ZeroablePhysical<'a> = ();
234    type Array = StructArray;
235    type IsNested = TrueT;
236    type HasViews = FalseT;
237    type IsStruct = TrueT;
238    type IsObject = FalseT;
239
240    fn get_static_dtype() -> DataType
241    where
242        Self: Sized,
243    {
244        DataType::Struct(vec![])
245    }
246}
247
248#[cfg(feature = "dtype-array")]
249pub struct FixedSizeListType {}
250#[cfg(feature = "dtype-array")]
251unsafe impl PolarsDataType for FixedSizeListType {
252    type Physical<'a> = Box<dyn Array>;
253    type OwnedPhysical = Box<dyn Array>;
254    type ZeroablePhysical<'a> = Option<Box<dyn Array>>;
255    type Array = FixedSizeListArray;
256    type IsNested = TrueT;
257    type HasViews = FalseT;
258    type IsStruct = FalseT;
259    type IsObject = FalseT;
260
261    fn get_static_dtype() -> DataType {
262        // Null as we cannot know anything without self.
263        DataType::Array(Box::new(DataType::Null), 0)
264    }
265}
266
267#[cfg(feature = "object")]
268pub struct ObjectType<T>(T);
269#[cfg(feature = "object")]
270unsafe impl<T: PolarsObject> PolarsDataType for ObjectType<T> {
271    type Physical<'a> = &'a T;
272    type OwnedPhysical = T;
273    type ZeroablePhysical<'a> = Option<&'a T>;
274    type Array = ObjectArray<T>;
275    type IsNested = FalseT;
276    type HasViews = FalseT;
277    type IsStruct = FalseT;
278    type IsObject = TrueT;
279
280    fn get_static_dtype() -> DataType {
281        DataType::Object(T::type_name())
282    }
283}
284
285macro_rules! impl_phys_dtype {
286    ($pdt:ty) => {
287        impl PolarsPhysicalType for $pdt {
288            fn ca_into_series(ca: ChunkedArray<Self>) -> Series {
289                Series(Arc::new(SeriesWrap(ca)))
290            }
291        }
292    };
293}
294
295macro_rules! impl_cond_phys_dtype {
296    ($pdt:ty, $feat:literal) => {
297        impl PolarsPhysicalType for $pdt {
298            fn ca_into_series(ca: ChunkedArray<Self>) -> Series {
299                #[cfg(feature = $feat)]
300                {
301                    Series(Arc::new(SeriesWrap(ca)))
302                }
303
304                #[cfg(not(feature = $feat))]
305                {
306                    unimplemented!()
307                }
308            }
309        }
310    };
311}
312
313// Annoyingly these types always exist but may not have an implementation to refer to.
314impl_cond_phys_dtype!(UInt8Type, "dtype-u8");
315impl_cond_phys_dtype!(UInt16Type, "dtype-u16");
316impl_cond_phys_dtype!(Int8Type, "dtype-i8");
317impl_cond_phys_dtype!(Int16Type, "dtype-i16");
318
319impl_phys_dtype!(Int32Type);
320impl_phys_dtype!(Int64Type);
321impl_phys_dtype!(UInt32Type);
322impl_phys_dtype!(UInt64Type);
323impl_phys_dtype!(Float32Type);
324impl_phys_dtype!(Float64Type);
325
326impl_phys_dtype!(StringType);
327impl_phys_dtype!(BinaryType);
328impl_phys_dtype!(BinaryOffsetType);
329impl_phys_dtype!(BooleanType);
330
331#[cfg(feature = "dtype-i128")]
332impl_phys_dtype!(Int128Type);
333
334#[cfg(feature = "dtype-array")]
335impl_phys_dtype!(FixedSizeListType);
336
337#[cfg(feature = "dtype-struct")]
338impl_phys_dtype!(StructType);
339
340#[cfg(feature = "object")]
341impl<T: PolarsObject> PolarsPhysicalType for ObjectType<T> {
342    fn ca_into_series(ca: ChunkedArray<Self>) -> Series {
343        Series(Arc::new(SeriesWrap(ca)))
344    }
345}
346
347#[cfg(feature = "dtype-array")]
348pub type ArrayChunked = ChunkedArray<FixedSizeListType>;
349pub type ListChunked = ChunkedArray<ListType>;
350pub type BooleanChunked = ChunkedArray<BooleanType>;
351pub type UInt8Chunked = ChunkedArray<UInt8Type>;
352pub type UInt16Chunked = ChunkedArray<UInt16Type>;
353pub type UInt32Chunked = ChunkedArray<UInt32Type>;
354pub type UInt64Chunked = ChunkedArray<UInt64Type>;
355pub type Int8Chunked = ChunkedArray<Int8Type>;
356pub type Int16Chunked = ChunkedArray<Int16Type>;
357pub type Int32Chunked = ChunkedArray<Int32Type>;
358pub type Int64Chunked = ChunkedArray<Int64Type>;
359#[cfg(feature = "dtype-i128")]
360pub type Int128Chunked = ChunkedArray<Int128Type>;
361pub type Float32Chunked = ChunkedArray<Float32Type>;
362pub type Float64Chunked = ChunkedArray<Float64Type>;
363pub type StringChunked = ChunkedArray<StringType>;
364pub type BinaryChunked = ChunkedArray<BinaryType>;
365pub type BinaryOffsetChunked = ChunkedArray<BinaryOffsetType>;
366#[cfg(feature = "object")]
367pub type ObjectChunked<T> = ChunkedArray<ObjectType<T>>;
368
369pub trait NumericNative:
370    TotalOrd
371    + PartialOrd
372    + TotalHash
373    + NativeType
374    + Num
375    + NumCast
376    + Zero
377    + One
378    // + Simd
379    // + Simd8
380    + std::iter::Sum<Self>
381    + Add<Output = Self>
382    + Sub<Output = Self>
383    + Mul<Output = Self>
384    + Div<Output = Self>
385    + Rem<Output = Self>
386    + AddAssign
387    + SubAssign
388    + AbsDiff
389    + Bounded
390    + FromPrimitive
391    + IsFloat
392    + HasPrimitiveArithmeticKernel<TrueDivT=<Self::TrueDivPolarsType as PolarsNumericType>::Native>
393    + FloatSum<f64>
394    + AsPrimitive<f64>
395    + MinMax
396    + IsNull
397{
398    type PolarsType: PolarsNumericType;
399    type TrueDivPolarsType: PolarsNumericType;
400}
401
402impl NumericNative for i8 {
403    type PolarsType = Int8Type;
404    type TrueDivPolarsType = Float64Type;
405}
406impl NumericNative for i16 {
407    type PolarsType = Int16Type;
408    type TrueDivPolarsType = Float64Type;
409}
410impl NumericNative for i32 {
411    type PolarsType = Int32Type;
412    type TrueDivPolarsType = Float64Type;
413}
414impl NumericNative for i64 {
415    type PolarsType = Int64Type;
416    type TrueDivPolarsType = Float64Type;
417}
418#[cfg(feature = "dtype-i128")]
419impl NumericNative for i128 {
420    type PolarsType = Int128Type;
421    type TrueDivPolarsType = Float64Type;
422}
423impl NumericNative for u8 {
424    type PolarsType = UInt8Type;
425    type TrueDivPolarsType = Float64Type;
426}
427impl NumericNative for u16 {
428    type PolarsType = UInt16Type;
429    type TrueDivPolarsType = Float64Type;
430}
431impl NumericNative for u32 {
432    type PolarsType = UInt32Type;
433    type TrueDivPolarsType = Float64Type;
434}
435impl NumericNative for u64 {
436    type PolarsType = UInt64Type;
437    type TrueDivPolarsType = Float64Type;
438}
439impl NumericNative for f32 {
440    type PolarsType = Float32Type;
441    type TrueDivPolarsType = Float32Type;
442}
443impl NumericNative for f64 {
444    type PolarsType = Float64Type;
445    type TrueDivPolarsType = Float64Type;
446}