polars_core/datatypes/
mod.rs

1//! # Data types supported by Polars.
2//!
3//! At the moment Polars doesn't include all data types available by Arrow. The goal is to
4//! incrementally support more data types and prioritize these by usability.
5//!
6//! [See the AnyValue variants](enum.AnyValue.html#variants) for the data types that
7//! are currently supported.
8//!
9#[cfg(feature = "serde")]
10mod _serde;
11mod aliases;
12mod any_value;
13mod dtype;
14#[cfg(feature = "dtype-extension")]
15pub mod extension;
16mod field;
17mod into_scalar;
18#[cfg(feature = "object")]
19mod static_array_collect;
20mod temporal;
21
22#[cfg(feature = "proptest")]
23pub mod proptest;
24
25use std::cmp::Ordering;
26use std::fmt::{Display, Formatter};
27use std::hash::{Hash, Hasher};
28use std::ops::{Add, AddAssign, Div, Mul, Rem, Sub, SubAssign};
29use std::sync::Arc;
30
31mod schema;
32pub use aliases::*;
33pub use any_value::*;
34pub use arrow::array::{ArrayCollectIterExt, ArrayFromIter, ArrayFromIterDtype, StaticArray};
35#[cfg(feature = "dtype-categorical")]
36use arrow::datatypes::IntegerType;
37pub use arrow::datatypes::reshape::*;
38pub use arrow::datatypes::{ArrowDataType, TimeUnit as ArrowTimeUnit};
39use arrow::types::NativeType;
40use bytemuck::Zeroable;
41pub use dtype::*;
42pub use field::*;
43pub use into_scalar::*;
44use num_traits::{AsPrimitive, Bounded, FromPrimitive, Num, NumCast, One, Zero};
45use polars_compute::arithmetic::HasPrimitiveArithmeticKernel;
46use polars_compute::float_sum::FloatSum;
47#[cfg(feature = "dtype-categorical")]
48pub use polars_dtype::categorical::{
49    CatNative, CatSize, CategoricalMapping, CategoricalPhysical, Categories, FrozenCategories,
50    ensure_same_categories, ensure_same_frozen_categories,
51};
52use polars_utils::abs_diff::AbsDiff;
53use polars_utils::float::IsFloat;
54use polars_utils::float16::pf16;
55use polars_utils::min_max::MinMax;
56use polars_utils::nulls::IsNull;
57use polars_utils::total_ord::TotalHash;
58pub use schema::SchemaExtPl;
59#[cfg(any(feature = "serde", feature = "serde-lazy"))]
60use serde::{Deserialize, Serialize};
61#[cfg(any(feature = "serde", feature = "serde-lazy"))]
62use serde::{Deserializer, Serializer};
63pub use temporal::*;
64
65pub use crate::chunked_array::logical::*;
66#[cfg(feature = "object")]
67use crate::chunked_array::object::ObjectArray;
68#[cfg(feature = "object")]
69use crate::chunked_array::object::PolarsObjectSafe;
70use crate::prelude::*;
71use crate::series::implementations::SeriesWrap;
72use crate::utils::Wrap;
73
74pub struct TrueT;
75pub struct FalseT;
76
77/// # Safety
78///
79/// The StaticArray and dtype return must be correct.
80pub unsafe trait PolarsDataType: Send + Sync + Sized + 'static {
81    type Physical<'a>: std::fmt::Debug + Clone;
82    type OwnedPhysical: std::fmt::Debug + Send + Sync + Clone + PartialEq;
83    type ZeroablePhysical<'a>: Zeroable + From<Self::Physical<'a>>;
84    type Array: for<'a> StaticArray<
85            ValueT<'a> = Self::Physical<'a>,
86            ZeroableValueT<'a> = Self::ZeroablePhysical<'a>,
87        >;
88    type IsNested;
89    type HasViews;
90    type IsStruct;
91    type IsObject;
92
93    /// Returns the DataType variant associated with this PolarsDataType.
94    /// Not implemented for types whose DataTypes have parameters.
95    fn get_static_dtype() -> DataType
96    where
97        Self: Sized;
98}
99
100pub trait PolarsPhysicalType: PolarsDataType {
101    // A physical type is one backed by a ChunkedArray directly, as opposed to
102    // logical types which wrap physical data.
103    fn ca_into_series(ca: ChunkedArray<Self>) -> Series;
104}
105
106pub trait PolarsNumericType: PolarsPhysicalType + 'static
107where
108    Self: for<'a> PolarsDataType<
109            OwnedPhysical = Self::Native,
110            Physical<'a> = Self::Native,
111            ZeroablePhysical<'a> = Self::Native,
112            Array = PrimitiveArray<Self::Native>,
113            IsNested = FalseT,
114            HasViews = FalseT,
115            IsStruct = FalseT,
116            IsObject = FalseT,
117        >,
118{
119    type Native: NumericNative;
120}
121
122pub trait PolarsIntegerType: PolarsNumericType {}
123pub trait PolarsFloatType: PolarsNumericType {}
124
125/// # Safety
126/// The physical() return type must be correct for Native.
127#[cfg(feature = "dtype-categorical")]
128pub unsafe trait PolarsCategoricalType: PolarsDataType {
129    type Native: NumericNative + CatNative + DictionaryKey + PartialEq + Eq + Hash;
130    type PolarsPhysical: PolarsIntegerType<Native = Self::Native>;
131
132    fn physical() -> CategoricalPhysical;
133}
134
135macro_rules! impl_polars_num_datatype {
136    ($trait: ident, $pdt:ident, $variant:ident, $physical:ty, $owned_phys:ty) => {
137        #[derive(Clone, Copy)]
138        pub struct $pdt {}
139
140        unsafe impl PolarsDataType for $pdt {
141            type Physical<'a> = $physical;
142            type OwnedPhysical = $owned_phys;
143            type ZeroablePhysical<'a> = $physical;
144            type Array = PrimitiveArray<$physical>;
145            type IsNested = FalseT;
146            type HasViews = FalseT;
147            type IsStruct = FalseT;
148            type IsObject = FalseT;
149
150            #[inline]
151            fn get_static_dtype() -> DataType {
152                DataType::$variant
153            }
154        }
155
156        impl PolarsNumericType for $pdt {
157            type Native = $physical;
158        }
159
160        impl $trait for $pdt {}
161    };
162}
163
164macro_rules! impl_polars_datatype {
165    ($pdt:ident, $dtype:expr, $arr:ty, $lt:lifetime, $phys:ty, $zerophys:ty, $owned_phys:ty, $has_views:ident) => {
166        #[derive(Clone, Copy)]
167        pub struct $pdt {}
168
169        unsafe impl PolarsDataType for $pdt {
170            type Physical<$lt> = $phys;
171            type OwnedPhysical = $owned_phys;
172            type ZeroablePhysical<$lt> = $zerophys;
173            type Array = $arr;
174            type IsNested = FalseT;
175            type HasViews = $has_views;
176            type IsStruct = FalseT;
177            type IsObject = FalseT;
178
179            #[inline]
180            fn get_static_dtype() -> DataType {
181                $dtype
182            }
183        }
184    };
185}
186
187macro_rules! impl_polars_categorical_datatype {
188    ($pdt:ident, $phys:ty, $native:ty, $phys_variant:ident) => {
189        impl_polars_datatype!(
190            $pdt,
191            unimplemented!(),
192            PrimitiveArray<$native>,
193            'a,
194            $native,
195            $native,
196            $native,
197            FalseT
198        );
199
200        #[cfg(feature = "dtype-categorical")]
201        unsafe impl PolarsCategoricalType for $pdt {
202            type Native = $native;
203            type PolarsPhysical = $phys;
204
205            fn physical() -> CategoricalPhysical {
206                CategoricalPhysical::$phys_variant
207            }
208        }
209    }
210}
211
212impl_polars_num_datatype!(PolarsIntegerType, UInt8Type, UInt8, u8, u8);
213impl_polars_num_datatype!(PolarsIntegerType, UInt16Type, UInt16, u16, u16);
214impl_polars_num_datatype!(PolarsIntegerType, UInt32Type, UInt32, u32, u32);
215impl_polars_num_datatype!(PolarsIntegerType, UInt64Type, UInt64, u64, u64);
216#[cfg(feature = "dtype-u128")]
217impl_polars_num_datatype!(PolarsIntegerType, UInt128Type, UInt128, u128, u128);
218impl_polars_num_datatype!(PolarsIntegerType, Int8Type, Int8, i8, i8);
219impl_polars_num_datatype!(PolarsIntegerType, Int16Type, Int16, i16, i16);
220impl_polars_num_datatype!(PolarsIntegerType, Int32Type, Int32, i32, i32);
221impl_polars_num_datatype!(PolarsIntegerType, Int64Type, Int64, i64, i64);
222#[cfg(feature = "dtype-i128")]
223impl_polars_num_datatype!(PolarsIntegerType, Int128Type, Int128, i128, i128);
224#[cfg(feature = "dtype-f16")]
225impl_polars_num_datatype!(PolarsFloatType, Float16Type, Float16, pf16, pf16);
226impl_polars_num_datatype!(PolarsFloatType, Float32Type, Float32, f32, f32);
227impl_polars_num_datatype!(PolarsFloatType, Float64Type, Float64, f64, f64);
228
229impl_polars_datatype!(StringType, DataType::String, Utf8ViewArray, 'a, &'a str, Option<&'a str>, String, TrueT);
230impl_polars_datatype!(BinaryType, DataType::Binary, BinaryViewArray, 'a, &'a [u8], Option<&'a [u8]>, Box<[u8]>, TrueT);
231impl_polars_datatype!(BinaryOffsetType, DataType::BinaryOffset, BinaryArray<i64>, 'a, &'a [u8], Option<&'a [u8]>, Box<[u8]>, FalseT);
232impl_polars_datatype!(BooleanType, DataType::Boolean, BooleanArray, 'a, bool, bool, bool, FalseT);
233
234#[cfg(feature = "dtype-decimal")]
235impl_polars_datatype!(DecimalType, unimplemented!(), PrimitiveArray<i128>, 'a, i128, i128, i128, FalseT);
236impl_polars_datatype!(DatetimeType, unimplemented!(), PrimitiveArray<i64>, 'a, i64, i64, i64, FalseT);
237impl_polars_datatype!(DurationType, unimplemented!(), PrimitiveArray<i64>, 'a, i64, i64, i64, FalseT);
238impl_polars_datatype!(CategoricalType, unimplemented!(), PrimitiveArray<u32>, 'a, u32, u32, u32, FalseT);
239impl_polars_datatype!(DateType, DataType::Date, PrimitiveArray<i32>, 'a, i32, i32, i32, FalseT);
240impl_polars_datatype!(TimeType, DataType::Time, PrimitiveArray<i64>, 'a, i64, i64, i64, FalseT);
241
242impl_polars_categorical_datatype!(Categorical8Type, UInt8Type, u8, U8);
243impl_polars_categorical_datatype!(Categorical16Type, UInt16Type, u16, U16);
244impl_polars_categorical_datatype!(Categorical32Type, UInt32Type, u32, U32);
245
246#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
247pub struct ListType {}
248unsafe impl PolarsDataType for ListType {
249    type Physical<'a> = Box<dyn Array>;
250    type OwnedPhysical = Box<dyn Array>;
251    type ZeroablePhysical<'a> = Option<Box<dyn Array>>;
252    type Array = ListArray<i64>;
253    type IsNested = TrueT;
254    type HasViews = FalseT;
255    type IsStruct = FalseT;
256    type IsObject = FalseT;
257
258    fn get_static_dtype() -> DataType {
259        // Null as we cannot know anything without self.
260        DataType::List(Box::new(DataType::Null))
261    }
262}
263
264impl PolarsPhysicalType for ListType {
265    fn ca_into_series(ca: ChunkedArray<Self>) -> Series {
266        Series(Arc::new(SeriesWrap(ca)))
267    }
268}
269
270#[cfg(feature = "dtype-struct")]
271pub struct StructType {}
272#[cfg(feature = "dtype-struct")]
273unsafe impl PolarsDataType for StructType {
274    // The physical types are invalid.
275    // We don't want these to be used as that would be
276    // very expensive. We use const asserts to ensure
277    // traits/methods using the physical types are
278    // not called for structs.
279    type Physical<'a> = ();
280    type OwnedPhysical = ();
281    type ZeroablePhysical<'a> = ();
282    type Array = StructArray;
283    type IsNested = TrueT;
284    type HasViews = FalseT;
285    type IsStruct = TrueT;
286    type IsObject = FalseT;
287
288    fn get_static_dtype() -> DataType
289    where
290        Self: Sized,
291    {
292        DataType::Struct(vec![])
293    }
294}
295
296#[cfg(feature = "dtype-array")]
297pub struct FixedSizeListType {}
298#[cfg(feature = "dtype-array")]
299unsafe impl PolarsDataType for FixedSizeListType {
300    type Physical<'a> = Box<dyn Array>;
301    type OwnedPhysical = Box<dyn Array>;
302    type ZeroablePhysical<'a> = Option<Box<dyn Array>>;
303    type Array = FixedSizeListArray;
304    type IsNested = TrueT;
305    type HasViews = FalseT;
306    type IsStruct = FalseT;
307    type IsObject = FalseT;
308
309    fn get_static_dtype() -> DataType {
310        // Null as we cannot know anything without self.
311        DataType::Array(Box::new(DataType::Null), 0)
312    }
313}
314
315#[cfg(feature = "object")]
316pub struct ObjectType<T>(T);
317#[cfg(feature = "object")]
318unsafe impl<T: PolarsObject> PolarsDataType for ObjectType<T> {
319    type Physical<'a> = &'a T;
320    type OwnedPhysical = T;
321    type ZeroablePhysical<'a> = Option<&'a T>;
322    type Array = ObjectArray<T>;
323    type IsNested = FalseT;
324    type HasViews = FalseT;
325    type IsStruct = FalseT;
326    type IsObject = TrueT;
327
328    fn get_static_dtype() -> DataType {
329        DataType::Object(T::type_name())
330    }
331}
332
333macro_rules! impl_phys_dtype {
334    ($pdt:ty) => {
335        impl PolarsPhysicalType for $pdt {
336            fn ca_into_series(ca: ChunkedArray<Self>) -> Series {
337                Series(Arc::new(SeriesWrap(ca)))
338            }
339        }
340    };
341}
342
343macro_rules! impl_cond_phys_dtype {
344    ($pdt:ty, $feat:literal) => {
345        impl PolarsPhysicalType for $pdt {
346            fn ca_into_series(ca: ChunkedArray<Self>) -> Series {
347                #[cfg(feature = $feat)]
348                {
349                    Series(Arc::new(SeriesWrap(ca)))
350                }
351
352                #[cfg(not(feature = $feat))]
353                {
354                    unimplemented!()
355                }
356            }
357        }
358    };
359}
360
361// Annoyingly these types always exist but may not have an implementation to refer to.
362impl_cond_phys_dtype!(UInt8Type, "dtype-u8");
363impl_cond_phys_dtype!(UInt16Type, "dtype-u16");
364impl_cond_phys_dtype!(Int8Type, "dtype-i8");
365impl_cond_phys_dtype!(Int16Type, "dtype-i16");
366
367impl_phys_dtype!(Int32Type);
368impl_phys_dtype!(Int64Type);
369impl_phys_dtype!(UInt32Type);
370impl_phys_dtype!(UInt64Type);
371#[cfg(feature = "dtype-f16")]
372impl_phys_dtype!(Float16Type);
373impl_phys_dtype!(Float32Type);
374impl_phys_dtype!(Float64Type);
375
376impl_phys_dtype!(StringType);
377impl_phys_dtype!(BinaryType);
378impl_phys_dtype!(BinaryOffsetType);
379impl_phys_dtype!(BooleanType);
380
381#[cfg(feature = "dtype-u128")]
382impl_phys_dtype!(UInt128Type);
383#[cfg(feature = "dtype-i128")]
384impl_phys_dtype!(Int128Type);
385
386#[cfg(feature = "dtype-array")]
387impl_phys_dtype!(FixedSizeListType);
388
389#[cfg(feature = "dtype-struct")]
390impl_phys_dtype!(StructType);
391
392#[cfg(feature = "object")]
393impl<T: PolarsObject> PolarsPhysicalType for ObjectType<T> {
394    fn ca_into_series(ca: ChunkedArray<Self>) -> Series {
395        Series(Arc::new(SeriesWrap(ca)))
396    }
397}
398
399#[cfg(feature = "dtype-array")]
400pub type ArrayChunked = ChunkedArray<FixedSizeListType>;
401pub type ListChunked = ChunkedArray<ListType>;
402pub type BooleanChunked = ChunkedArray<BooleanType>;
403pub type UInt8Chunked = ChunkedArray<UInt8Type>;
404pub type UInt16Chunked = ChunkedArray<UInt16Type>;
405pub type UInt32Chunked = ChunkedArray<UInt32Type>;
406pub type UInt64Chunked = ChunkedArray<UInt64Type>;
407#[cfg(feature = "dtype-u128")]
408pub type UInt128Chunked = ChunkedArray<UInt128Type>;
409pub type Int8Chunked = ChunkedArray<Int8Type>;
410pub type Int16Chunked = ChunkedArray<Int16Type>;
411pub type Int32Chunked = ChunkedArray<Int32Type>;
412pub type Int64Chunked = ChunkedArray<Int64Type>;
413#[cfg(feature = "dtype-i128")]
414pub type Int128Chunked = ChunkedArray<Int128Type>;
415#[cfg(feature = "dtype-f16")]
416pub type Float16Chunked = ChunkedArray<Float16Type>;
417pub type Float32Chunked = ChunkedArray<Float32Type>;
418pub type Float64Chunked = ChunkedArray<Float64Type>;
419pub type StringChunked = ChunkedArray<StringType>;
420pub type BinaryChunked = ChunkedArray<BinaryType>;
421pub type BinaryOffsetChunked = ChunkedArray<BinaryOffsetType>;
422#[cfg(feature = "object")]
423pub type ObjectChunked<T> = ChunkedArray<ObjectType<T>>;
424
425pub trait NumericNative:
426    TotalOrd
427    + PartialOrd
428    + TotalHash
429    + NativeType
430    + Num
431    + NumCast
432    + Zero
433    + One
434    // + Simd
435    // + Simd8
436    + std::iter::Sum<Self>
437    + Add<Output = Self>
438    + Sub<Output = Self>
439    + Mul<Output = Self>
440    + Div<Output = Self>
441    + Rem<Output = Self>
442    + AddAssign
443    + SubAssign
444    + AbsDiff
445    + Bounded
446    + FromPrimitive
447    + IsFloat
448    + HasPrimitiveArithmeticKernel<TrueDivT=<Self::TrueDivPolarsType as PolarsNumericType>::Native>
449    + FloatSum<f64>
450    + AsPrimitive<f64>
451    + MinMax
452    + IsNull
453{
454    type PolarsType: PolarsNumericType;
455    type TrueDivPolarsType: PolarsNumericType;
456}
457
458impl NumericNative for i8 {
459    type PolarsType = Int8Type;
460    type TrueDivPolarsType = Float64Type;
461}
462impl NumericNative for i16 {
463    type PolarsType = Int16Type;
464    type TrueDivPolarsType = Float64Type;
465}
466impl NumericNative for i32 {
467    type PolarsType = Int32Type;
468    type TrueDivPolarsType = Float64Type;
469}
470impl NumericNative for i64 {
471    type PolarsType = Int64Type;
472    type TrueDivPolarsType = Float64Type;
473}
474#[cfg(feature = "dtype-i128")]
475impl NumericNative for i128 {
476    type PolarsType = Int128Type;
477    type TrueDivPolarsType = Float64Type;
478}
479impl NumericNative for u8 {
480    type PolarsType = UInt8Type;
481    type TrueDivPolarsType = Float64Type;
482}
483impl NumericNative for u16 {
484    type PolarsType = UInt16Type;
485    type TrueDivPolarsType = Float64Type;
486}
487impl NumericNative for u32 {
488    type PolarsType = UInt32Type;
489    type TrueDivPolarsType = Float64Type;
490}
491impl NumericNative for u64 {
492    type PolarsType = UInt64Type;
493    type TrueDivPolarsType = Float64Type;
494}
495#[cfg(feature = "dtype-u128")]
496impl NumericNative for u128 {
497    type PolarsType = UInt128Type;
498    type TrueDivPolarsType = Float64Type;
499}
500#[cfg(feature = "dtype-f16")]
501impl NumericNative for pf16 {
502    type PolarsType = Float16Type;
503    type TrueDivPolarsType = Float16Type;
504}
505impl NumericNative for f32 {
506    type PolarsType = Float32Type;
507    type TrueDivPolarsType = Float32Type;
508}
509impl NumericNative for f64 {
510    type PolarsType = Float64Type;
511    type TrueDivPolarsType = Float64Type;
512}