1#![allow(unsafe_op_in_unsafe_fn)]
2use std::borrow::Cow;
3
4use arrow::types::PrimitiveType;
5use num_traits::ToBytes;
6use polars_compute::cast::SerPrimitive;
7use polars_error::feature_gated;
8use polars_utils::float16::pf16;
9use polars_utils::total_ord::ToTotalOrd;
10
11use super::*;
12use crate::CHEAP_SERIES_HASH_LIMIT;
13#[cfg(feature = "dtype-struct")]
14use crate::prelude::any_value::arr_to_any_value;
15
16#[cfg(feature = "object")]
17#[derive(Debug)]
18pub struct OwnedObject(pub Box<dyn PolarsObjectSafe>);
19
20#[cfg(feature = "object")]
21impl Clone for OwnedObject {
22 fn clone(&self) -> Self {
23 Self(self.0.to_boxed())
24 }
25}
26
27#[cfg(feature = "dtype-decimal")]
28use polars_compute::decimal::{
29 dec128_cmp, dec128_eq, dec128_rescale, dec128_to_f64, dec128_to_i128, f64_to_dec128,
30 i128_to_dec128,
31};
32
33#[derive(Debug, Clone, Default)]
34pub enum AnyValue<'a> {
35 #[default]
36 Null,
37 Boolean(bool),
39 String(&'a str),
41 UInt8(u8),
43 UInt16(u16),
45 UInt32(u32),
47 UInt64(u64),
49 UInt128(u128),
51 Int8(i8),
53 Int16(i16),
55 Int32(i32),
57 Int64(i64),
59 Int128(i128),
61 Float16(pf16),
63 Float32(f32),
65 Float64(f64),
67 #[cfg(feature = "dtype-date")]
70 Date(i32),
71 #[cfg(feature = "dtype-datetime")]
74 Datetime(i64, TimeUnit, Option<&'a TimeZone>),
75 #[cfg(feature = "dtype-datetime")]
78 DatetimeOwned(i64, TimeUnit, Option<Arc<TimeZone>>),
79 #[cfg(feature = "dtype-duration")]
81 Duration(i64, TimeUnit),
82 #[cfg(feature = "dtype-time")]
84 Time(i64),
85 #[cfg(feature = "dtype-categorical")]
86 Categorical(CatSize, &'a Arc<CategoricalMapping>),
87 #[cfg(feature = "dtype-categorical")]
88 CategoricalOwned(CatSize, Arc<CategoricalMapping>),
89 #[cfg(feature = "dtype-categorical")]
90 Enum(CatSize, &'a Arc<CategoricalMapping>),
91 #[cfg(feature = "dtype-categorical")]
92 EnumOwned(CatSize, Arc<CategoricalMapping>),
93 List(Series),
95 #[cfg(feature = "dtype-array")]
96 Array(Series, usize),
97 #[cfg(feature = "object")]
99 Object(&'a dyn PolarsObjectSafe),
100 #[cfg(feature = "object")]
101 ObjectOwned(OwnedObject),
102 #[cfg(feature = "dtype-struct")]
107 Struct(usize, &'a StructArray, &'a [Field]),
108 #[cfg(feature = "dtype-struct")]
109 StructOwned(Box<(Vec<AnyValue<'a>>, Vec<Field>)>),
110 StringOwned(PlSmallStr),
112 Binary(&'a [u8]),
113 BinaryOwned(Vec<u8>),
114 #[cfg(feature = "dtype-decimal")]
116 Decimal(i128, usize, usize),
117}
118
119impl AnyValue<'static> {
120 pub fn zero_sum(dtype: &DataType) -> Self {
121 match dtype {
122 DataType::String => AnyValue::StringOwned(PlSmallStr::EMPTY),
123 DataType::Binary => AnyValue::BinaryOwned(Vec::new()),
124 DataType::Boolean => (0 as IdxSize).into(),
125 d if d.is_primitive_numeric() => unsafe {
127 std::mem::transmute::<AnyValue<'_>, AnyValue<'static>>(
128 AnyValue::UInt8(0).cast(dtype),
129 )
130 },
131 #[cfg(feature = "dtype-duration")]
132 DataType::Duration(unit) => AnyValue::Duration(0, *unit),
133 #[cfg(feature = "dtype-decimal")]
134 DataType::Decimal(p, s) => AnyValue::Decimal(0, *p, *s),
135 _ => AnyValue::Null,
136 }
137 }
138
139 pub fn can_have_dtype(&self, dtype: &DataType) -> bool {
141 matches!(self, AnyValue::Null) || dtype == &self.dtype()
142 }
143
144 pub fn default_value(
146 dtype: &DataType,
147 numeric_to_one: bool,
148 num_list_values: usize,
149 ) -> AnyValue<'static> {
150 use {AnyValue as AV, DataType as DT};
151 match dtype {
152 DT::Boolean => AV::Boolean(false),
153 DT::UInt8 => AV::UInt8(numeric_to_one.into()),
154 DT::UInt16 => AV::UInt16(numeric_to_one.into()),
155 DT::UInt32 => AV::UInt32(numeric_to_one.into()),
156 DT::UInt64 => AV::UInt64(numeric_to_one.into()),
157 DT::UInt128 => AV::UInt128(numeric_to_one.into()),
158 DT::Int8 => AV::Int8(numeric_to_one.into()),
159 DT::Int16 => AV::Int16(numeric_to_one.into()),
160 DT::Int32 => AV::Int32(numeric_to_one.into()),
161 DT::Int64 => AV::Int64(numeric_to_one.into()),
162 DT::Int128 => AV::Int128(numeric_to_one.into()),
163 DT::Float16 => AV::Float16(numeric_to_one.into()),
164 DT::Float32 => AV::Float32(numeric_to_one.into()),
165 DT::Float64 => AV::Float64(numeric_to_one.into()),
166 #[cfg(feature = "dtype-decimal")]
167 DT::Decimal(p, s) => AV::Decimal(0, *p, *s),
168 DT::String => AV::String(""),
169 DT::Binary => AV::Binary(&[]),
170 DT::BinaryOffset => AV::Binary(&[]),
171 DT::Date => feature_gated!("dtype-date", AV::Date(0)),
172 DT::Datetime(time_unit, time_zone) => feature_gated!(
173 "dtype-datetime",
174 AV::DatetimeOwned(0, *time_unit, time_zone.clone().map(Arc::new))
175 ),
176 DT::Duration(time_unit) => {
177 feature_gated!("dtype-duration", AV::Duration(0, *time_unit))
178 },
179 DT::Time => feature_gated!("dtype-time", AV::Time(0)),
180 #[cfg(feature = "dtype-array")]
181 DT::Array(inner_dtype, width) => {
182 let inner_value =
183 AnyValue::default_value(inner_dtype, numeric_to_one, num_list_values);
184 AV::Array(
185 Scalar::new(inner_dtype.as_ref().clone(), inner_value)
186 .into_series(PlSmallStr::EMPTY)
187 .new_from_index(0, *width),
188 *width,
189 )
190 },
191 DT::List(inner_dtype) => AV::List(if num_list_values == 0 {
192 Series::new_empty(PlSmallStr::EMPTY, inner_dtype.as_ref())
193 } else {
194 let inner_value =
195 AnyValue::default_value(inner_dtype, numeric_to_one, num_list_values);
196
197 Scalar::new(inner_dtype.as_ref().clone(), inner_value)
198 .into_series(PlSmallStr::EMPTY)
199 .new_from_index(0, num_list_values)
200 }),
201 #[cfg(feature = "object")]
202 DT::Object(_) => AV::Null,
203 DT::Null => AV::Null,
204 #[cfg(feature = "dtype-categorical")]
205 DT::Categorical(_, _) => AV::Null,
206 #[cfg(feature = "dtype-categorical")]
207 DT::Enum(categories, mapping) => match categories.categories().is_empty() {
208 true => AV::Null,
209 false => AV::EnumOwned(0, mapping.clone()),
210 },
211 #[cfg(feature = "dtype-struct")]
212 DT::Struct(fields) => AV::StructOwned(Box::new((
213 fields
214 .iter()
215 .map(|f| AnyValue::default_value(f.dtype(), numeric_to_one, num_list_values))
216 .collect(),
217 fields.clone(),
218 ))),
219 #[cfg(feature = "dtype-extension")]
220 DT::Extension(_typ, storage) => {
221 AnyValue::default_value(storage, numeric_to_one, num_list_values)
222 },
223 DT::Unknown(_) => unreachable!(),
224 }
225 }
226}
227
228impl<'a> AnyValue<'a> {
229 pub fn dtype(&self) -> DataType {
234 use AnyValue::*;
235 match self {
236 Null => DataType::Null,
237 Boolean(_) => DataType::Boolean,
238 Int8(_) => DataType::Int8,
239 Int16(_) => DataType::Int16,
240 Int32(_) => DataType::Int32,
241 Int64(_) => DataType::Int64,
242 Int128(_) => DataType::Int128,
243 UInt8(_) => DataType::UInt8,
244 UInt16(_) => DataType::UInt16,
245 UInt32(_) => DataType::UInt32,
246 UInt64(_) => DataType::UInt64,
247 UInt128(_) => DataType::UInt128,
248 Float16(_) => DataType::Float16,
249 Float32(_) => DataType::Float32,
250 Float64(_) => DataType::Float64,
251 String(_) | StringOwned(_) => DataType::String,
252 Binary(_) | BinaryOwned(_) => DataType::Binary,
253 #[cfg(feature = "dtype-date")]
254 Date(_) => DataType::Date,
255 #[cfg(feature = "dtype-time")]
256 Time(_) => DataType::Time,
257 #[cfg(feature = "dtype-datetime")]
258 Datetime(_, tu, tz) => DataType::Datetime(*tu, (*tz).cloned()),
259 #[cfg(feature = "dtype-datetime")]
260 DatetimeOwned(_, tu, tz) => {
261 DataType::Datetime(*tu, tz.as_ref().map(|v| v.as_ref().clone()))
262 },
263 #[cfg(feature = "dtype-duration")]
264 Duration(_, tu) => DataType::Duration(*tu),
265 #[cfg(feature = "dtype-categorical")]
266 Categorical(_, _) | CategoricalOwned(_, _) => {
267 unimplemented!("can not get dtype of Categorical AnyValue")
268 },
269 #[cfg(feature = "dtype-categorical")]
270 Enum(_, _) | EnumOwned(_, _) => unimplemented!("can not get dtype of Enum AnyValue"),
271 List(s) => DataType::List(Box::new(s.dtype().clone())),
272 #[cfg(feature = "dtype-array")]
273 Array(s, size) => DataType::Array(Box::new(s.dtype().clone()), *size),
274 #[cfg(feature = "dtype-struct")]
275 Struct(_, _, fields) => DataType::Struct(fields.to_vec()),
276 #[cfg(feature = "dtype-struct")]
277 StructOwned(payload) => DataType::Struct(payload.1.clone()),
278 #[cfg(feature = "dtype-decimal")]
279 Decimal(_, p, s) => DataType::Decimal(*p, *s),
280 #[cfg(feature = "object")]
281 Object(o) => DataType::Object(o.type_name()),
282 #[cfg(feature = "object")]
283 ObjectOwned(o) => DataType::Object(o.0.type_name()),
284 }
285 }
286
287 #[doc(hidden)]
289 #[inline]
290 pub fn extract<T: NumCast + IsFloat>(&self) -> Option<T> {
291 use AnyValue::*;
292 match self {
293 Int8(v) => NumCast::from(*v),
294 Int16(v) => NumCast::from(*v),
295 Int32(v) => NumCast::from(*v),
296 Int64(v) => NumCast::from(*v),
297 Int128(v) => NumCast::from(*v),
298 UInt8(v) => NumCast::from(*v),
299 UInt16(v) => NumCast::from(*v),
300 UInt32(v) => NumCast::from(*v),
301 UInt64(v) => NumCast::from(*v),
302 UInt128(v) => NumCast::from(*v),
303 #[cfg(feature = "dtype-f16")]
304 Float16(v) => NumCast::from(*v),
305 Float32(v) => NumCast::from(*v),
306 Float64(v) => NumCast::from(*v),
307 #[cfg(feature = "dtype-date")]
308 Date(v) => NumCast::from(*v),
309 #[cfg(feature = "dtype-datetime")]
310 Datetime(v, _, _) | DatetimeOwned(v, _, _) => NumCast::from(*v),
311 #[cfg(feature = "dtype-time")]
312 Time(v) => NumCast::from(*v),
313 #[cfg(feature = "dtype-duration")]
314 Duration(v, _) => NumCast::from(*v),
315 #[cfg(feature = "dtype-decimal")]
316 Decimal(v, _p, s) => {
317 if T::is_float() {
318 NumCast::from(dec128_to_f64(*v, *s))
319 } else {
320 NumCast::from(dec128_to_i128(*v, *s))
321 }
322 },
323 Boolean(v) => NumCast::from(if *v { 1 } else { 0 }),
324 String(v) => {
325 if let Ok(val) = (*v).parse::<i128>() {
326 NumCast::from(val)
327 } else {
328 NumCast::from((*v).parse::<f64>().ok()?)
329 }
330 },
331 StringOwned(v) => String(v.as_str()).extract(),
332 _ => None,
333 }
334 }
335
336 #[inline]
337 pub fn try_extract<T: NumCast + IsFloat>(&self) -> PolarsResult<T> {
338 self.extract().ok_or_else(|| {
339 polars_err!(
340 ComputeError: "could not extract number from any-value of dtype: '{:?}'",
341 self.dtype(),
342 )
343 })
344 }
345
346 pub fn is_boolean(&self) -> bool {
347 matches!(self, AnyValue::Boolean(_))
348 }
349
350 pub fn is_primitive_numeric(&self) -> bool {
351 self.is_integer() || self.is_float()
352 }
353
354 pub fn is_float(&self) -> bool {
355 matches!(
356 self,
357 AnyValue::Float16(_) | AnyValue::Float32(_) | AnyValue::Float64(_)
358 )
359 }
360
361 pub fn is_integer(&self) -> bool {
362 self.is_signed_integer() || self.is_unsigned_integer()
363 }
364
365 pub fn is_signed_integer(&self) -> bool {
366 matches!(
367 self,
368 AnyValue::Int8(_)
369 | AnyValue::Int16(_)
370 | AnyValue::Int32(_)
371 | AnyValue::Int64(_)
372 | AnyValue::Int128(_)
373 )
374 }
375
376 pub fn is_unsigned_integer(&self) -> bool {
377 matches!(
378 self,
379 AnyValue::UInt8(_)
380 | AnyValue::UInt16(_)
381 | AnyValue::UInt32(_)
382 | AnyValue::UInt64(_)
383 | AnyValue::UInt128(_)
384 )
385 }
386
387 pub fn is_nan(&self) -> bool {
388 match self {
389 AnyValue::Float16(f) => f.is_nan(),
390 AnyValue::Float32(f) => f.is_nan(),
391 AnyValue::Float64(f) => f.is_nan(),
392 _ => false,
393 }
394 }
395
396 pub fn is_null(&self) -> bool {
397 matches!(self, AnyValue::Null)
398 }
399
400 pub fn is_nested_null(&self) -> bool {
401 match self {
402 AnyValue::Null => true,
403 AnyValue::List(s) => s.null_count() == s.len(),
404 #[cfg(feature = "dtype-array")]
405 AnyValue::Array(s, _) => s.null_count() == s.len(),
406 #[cfg(feature = "dtype-struct")]
407 AnyValue::Struct(_, _, _) => self._iter_struct_av().all(|av| av.is_nested_null()),
408 _ => false,
409 }
410 }
411
412 #[inline]
414 pub fn null_to_none(self) -> Option<Self> {
415 match self {
416 AnyValue::Null => None,
417 av => Some(av),
418 }
419 }
420
421 pub fn strict_cast(&self, dtype: &'a DataType) -> Option<AnyValue<'a>> {
424 let new_av = match (self, dtype) {
425 (av, DataType::UInt8) => AnyValue::UInt8(av.extract::<u8>()?),
427 (av, DataType::UInt16) => AnyValue::UInt16(av.extract::<u16>()?),
428 (av, DataType::UInt32) => AnyValue::UInt32(av.extract::<u32>()?),
429 (av, DataType::UInt64) => AnyValue::UInt64(av.extract::<u64>()?),
430 (av, DataType::UInt128) => AnyValue::UInt128(av.extract::<u128>()?),
431 (av, DataType::Int8) => AnyValue::Int8(av.extract::<i8>()?),
432 (av, DataType::Int16) => AnyValue::Int16(av.extract::<i16>()?),
433 (av, DataType::Int32) => AnyValue::Int32(av.extract::<i32>()?),
434 (av, DataType::Int64) => AnyValue::Int64(av.extract::<i64>()?),
435 (av, DataType::Int128) => AnyValue::Int128(av.extract::<i128>()?),
436 (av, DataType::Float16) => AnyValue::Float16(av.extract::<pf16>()?),
437 (av, DataType::Float32) => AnyValue::Float32(av.extract::<f32>()?),
438 (av, DataType::Float64) => AnyValue::Float64(av.extract::<f64>()?),
439
440 (AnyValue::UInt8(v), DataType::Boolean) => AnyValue::Boolean(*v != u8::default()),
442 (AnyValue::UInt16(v), DataType::Boolean) => AnyValue::Boolean(*v != u16::default()),
443 (AnyValue::UInt32(v), DataType::Boolean) => AnyValue::Boolean(*v != u32::default()),
444 (AnyValue::UInt64(v), DataType::Boolean) => AnyValue::Boolean(*v != u64::default()),
445 (AnyValue::UInt128(v), DataType::Boolean) => AnyValue::Boolean(*v != u128::default()),
446 (AnyValue::Int8(v), DataType::Boolean) => AnyValue::Boolean(*v != i8::default()),
447 (AnyValue::Int16(v), DataType::Boolean) => AnyValue::Boolean(*v != i16::default()),
448 (AnyValue::Int32(v), DataType::Boolean) => AnyValue::Boolean(*v != i32::default()),
449 (AnyValue::Int64(v), DataType::Boolean) => AnyValue::Boolean(*v != i64::default()),
450 (AnyValue::Int128(v), DataType::Boolean) => AnyValue::Boolean(*v != i128::default()),
451 (AnyValue::Float16(v), DataType::Boolean) => AnyValue::Boolean(*v != pf16::default()),
452 (AnyValue::Float32(v), DataType::Boolean) => AnyValue::Boolean(*v != f32::default()),
453 (AnyValue::Float64(v), DataType::Boolean) => AnyValue::Boolean(*v != f64::default()),
454
455 #[cfg(feature = "dtype-categorical")]
457 (
458 &AnyValue::Categorical(cat, &ref lmap) | &AnyValue::CategoricalOwned(cat, ref lmap),
459 DataType::Categorical(_, rmap),
460 ) => {
461 if Arc::ptr_eq(lmap, rmap) {
462 self.clone()
463 } else {
464 let s = unsafe { lmap.cat_to_str_unchecked(cat) };
465 let new_cat = rmap.insert_cat(s).unwrap();
466 AnyValue::CategoricalOwned(new_cat, rmap.clone())
467 }
468 },
469
470 #[cfg(feature = "dtype-categorical")]
471 (
472 &AnyValue::Enum(cat, &ref lmap) | &AnyValue::EnumOwned(cat, ref lmap),
473 DataType::Enum(_, rmap),
474 ) => {
475 if Arc::ptr_eq(lmap, rmap) {
476 self.clone()
477 } else {
478 let s = unsafe { lmap.cat_to_str_unchecked(cat) };
479 let new_cat = rmap.get_cat(s)?;
480 AnyValue::EnumOwned(new_cat, rmap.clone())
481 }
482 },
483
484 #[cfg(feature = "dtype-categorical")]
485 (
486 &AnyValue::Categorical(cat, &ref map)
487 | &AnyValue::CategoricalOwned(cat, ref map)
488 | &AnyValue::Enum(cat, &ref map)
489 | &AnyValue::EnumOwned(cat, ref map),
490 DataType::String,
491 ) => {
492 let s = unsafe { map.cat_to_str_unchecked(cat) };
493 AnyValue::StringOwned(PlSmallStr::from(s))
494 },
495
496 #[cfg(feature = "dtype-categorical")]
497 (AnyValue::String(s), DataType::Categorical(_, map)) => {
498 AnyValue::CategoricalOwned(map.insert_cat(s).unwrap(), map.clone())
499 },
500
501 #[cfg(feature = "dtype-categorical")]
502 (AnyValue::StringOwned(s), DataType::Categorical(_, map)) => {
503 AnyValue::CategoricalOwned(map.insert_cat(s).unwrap(), map.clone())
504 },
505
506 #[cfg(feature = "dtype-categorical")]
507 (AnyValue::String(s), DataType::Enum(_, map)) => {
508 AnyValue::CategoricalOwned(map.get_cat(s)?, map.clone())
509 },
510
511 #[cfg(feature = "dtype-categorical")]
512 (AnyValue::StringOwned(s), DataType::Enum(_, map)) => {
513 AnyValue::CategoricalOwned(map.get_cat(s)?, map.clone())
514 },
515
516 (AnyValue::String(v), DataType::String) => AnyValue::String(v),
518 (AnyValue::StringOwned(v), DataType::String) => AnyValue::StringOwned(v.clone()),
519
520 (av, DataType::String) => {
521 let mut tmp = vec![];
522 if av.is_unsigned_integer() {
523 let val = av.extract::<u64>()?;
524 SerPrimitive::write(&mut tmp, val);
525 } else if av.is_float() {
526 let val = av.extract::<f64>()?;
527 SerPrimitive::write(&mut tmp, val);
528 } else {
529 let val = av.extract::<i64>()?;
530 SerPrimitive::write(&mut tmp, val);
531 }
532 AnyValue::StringOwned(PlSmallStr::from_str(std::str::from_utf8(&tmp).unwrap()))
533 },
534
535 (AnyValue::String(v), DataType::Binary) => AnyValue::Binary(v.as_bytes()),
537
538 #[cfg(feature = "dtype-datetime")]
540 (av, DataType::Datetime(tu, tz)) if av.is_primitive_numeric() => {
541 AnyValue::Datetime(av.extract::<i64>()?, *tu, tz.as_ref())
542 },
543 #[cfg(all(feature = "dtype-datetime", feature = "dtype-date"))]
544 (AnyValue::Date(v), DataType::Datetime(tu, _)) => AnyValue::Datetime(
545 match tu {
546 TimeUnit::Nanoseconds => (*v as i64) * NS_IN_DAY,
547 TimeUnit::Microseconds => (*v as i64) * US_IN_DAY,
548 TimeUnit::Milliseconds => (*v as i64) * MS_IN_DAY,
549 },
550 *tu,
551 None,
552 ),
553 #[cfg(feature = "dtype-datetime")]
554 (
555 AnyValue::Datetime(v, tu, _) | AnyValue::DatetimeOwned(v, tu, _),
556 DataType::Datetime(tu_r, tz_r),
557 ) => AnyValue::Datetime(
558 match (tu, tu_r) {
559 (TimeUnit::Nanoseconds, TimeUnit::Microseconds) => *v / 1_000i64,
560 (TimeUnit::Nanoseconds, TimeUnit::Milliseconds) => *v / 1_000_000i64,
561 (TimeUnit::Microseconds, TimeUnit::Nanoseconds) => *v * 1_000i64,
562 (TimeUnit::Microseconds, TimeUnit::Milliseconds) => *v / 1_000i64,
563 (TimeUnit::Milliseconds, TimeUnit::Microseconds) => *v * 1_000i64,
564 (TimeUnit::Milliseconds, TimeUnit::Nanoseconds) => *v * 1_000_000i64,
565 _ => *v,
566 },
567 *tu_r,
568 tz_r.as_ref(),
569 ),
570
571 #[cfg(feature = "dtype-date")]
573 (av, DataType::Date) if av.is_primitive_numeric() => {
574 AnyValue::Date(av.extract::<i32>()?)
575 },
576 #[cfg(all(feature = "dtype-date", feature = "dtype-datetime"))]
577 (AnyValue::Datetime(v, tu, _) | AnyValue::DatetimeOwned(v, tu, _), DataType::Date) => {
578 AnyValue::Date(match tu {
579 TimeUnit::Nanoseconds => *v / NS_IN_DAY,
580 TimeUnit::Microseconds => *v / US_IN_DAY,
581 TimeUnit::Milliseconds => *v / MS_IN_DAY,
582 } as i32)
583 },
584
585 #[cfg(feature = "dtype-time")]
587 (av, DataType::Time) if av.is_primitive_numeric() => {
588 AnyValue::Time(av.extract::<i64>()?)
589 },
590 #[cfg(all(feature = "dtype-time", feature = "dtype-datetime"))]
591 (AnyValue::Datetime(v, tu, _) | AnyValue::DatetimeOwned(v, tu, _), DataType::Time) => {
592 AnyValue::Time(match tu {
593 TimeUnit::Nanoseconds => *v % NS_IN_DAY,
594 TimeUnit::Microseconds => (*v % US_IN_DAY) * 1_000i64,
595 TimeUnit::Milliseconds => (*v % MS_IN_DAY) * 1_000_000i64,
596 })
597 },
598
599 #[cfg(feature = "dtype-duration")]
601 (av, DataType::Duration(tu)) if av.is_primitive_numeric() => {
602 AnyValue::Duration(av.extract::<i64>()?, *tu)
603 },
604 #[cfg(all(feature = "dtype-duration", feature = "dtype-time"))]
605 (AnyValue::Time(v), DataType::Duration(tu)) => AnyValue::Duration(
606 match *tu {
607 TimeUnit::Nanoseconds => *v,
608 TimeUnit::Microseconds => *v / 1_000i64,
609 TimeUnit::Milliseconds => *v / 1_000_000i64,
610 },
611 *tu,
612 ),
613 #[cfg(feature = "dtype-duration")]
614 (AnyValue::Duration(v, tu), DataType::Duration(tu_r)) => AnyValue::Duration(
615 match (tu, tu_r) {
616 (_, _) if tu == tu_r => *v,
617 (TimeUnit::Nanoseconds, TimeUnit::Microseconds) => *v / 1_000i64,
618 (TimeUnit::Nanoseconds, TimeUnit::Milliseconds) => *v / 1_000_000i64,
619 (TimeUnit::Microseconds, TimeUnit::Nanoseconds) => *v * 1_000i64,
620 (TimeUnit::Microseconds, TimeUnit::Milliseconds) => *v / 1_000i64,
621 (TimeUnit::Milliseconds, TimeUnit::Microseconds) => *v * 1_000i64,
622 (TimeUnit::Milliseconds, TimeUnit::Nanoseconds) => *v * 1_000_000i64,
623 _ => *v,
624 },
625 *tu_r,
626 ),
627
628 #[cfg(feature = "dtype-decimal")]
629 (av, DataType::Decimal(p, s)) if av.is_integer() => {
630 let int = av.try_extract::<i128>().ok()?;
631 let dec = i128_to_dec128(int, *p, *s)?;
632 AnyValue::Decimal(dec, *p, *s)
633 },
634
635 #[cfg(feature = "dtype-decimal")]
636 (av, DataType::Decimal(p, s)) if av.is_float() => {
637 let f = av.try_extract::<f64>().unwrap();
638 let dec = f64_to_dec128(f, *p, *s)?;
639 AnyValue::Decimal(dec, *p, *s)
640 },
641
642 #[cfg(feature = "dtype-decimal")]
643 (AnyValue::Decimal(value, _old_p, old_s), DataType::Decimal(p, s)) => {
644 let converted = dec128_rescale(*value, *old_s, *p, *s)?;
645 AnyValue::Decimal(converted, *p, *s)
646 },
647
648 (av, dtype) if av.dtype() == *dtype => self.clone(),
650
651 _ => return None,
652 };
653 Some(new_av)
654 }
655
656 pub fn try_strict_cast(&self, dtype: &'a DataType) -> PolarsResult<AnyValue<'a>> {
659 self.strict_cast(dtype).ok_or_else(
660 || polars_err!(ComputeError: "cannot cast any-value {:?} to dtype '{}'", self, dtype),
661 )
662 }
663
664 pub fn cast(&self, dtype: &'a DataType) -> AnyValue<'a> {
665 match self.strict_cast(dtype) {
666 Some(av) => av,
667 None => AnyValue::Null,
668 }
669 }
670
671 pub fn idx(&self) -> IdxSize {
672 match self {
673 #[cfg(not(feature = "bigidx"))]
674 Self::UInt32(v) => *v,
675 #[cfg(feature = "bigidx")]
676 Self::UInt64(v) => *v,
677 _ => panic!("expected index type found {self:?}"),
678 }
679 }
680
681 pub fn str_value(&self) -> Cow<'a, str> {
682 match self {
683 Self::String(s) => Cow::Borrowed(s),
684 Self::StringOwned(s) => Cow::Owned(s.to_string()),
685 Self::Null => Cow::Borrowed("null"),
686 #[cfg(feature = "dtype-categorical")]
687 Self::Categorical(cat, map) | Self::Enum(cat, map) => {
688 Cow::Borrowed(unsafe { map.cat_to_str_unchecked(*cat) })
689 },
690 #[cfg(feature = "dtype-categorical")]
691 Self::CategoricalOwned(cat, map) | Self::EnumOwned(cat, map) => {
692 Cow::Owned(unsafe { map.cat_to_str_unchecked(*cat) }.to_owned())
693 },
694 av => Cow::Owned(av.to_string()),
695 }
696 }
697
698 pub fn to_physical(self) -> Self {
699 match self {
700 Self::Null
701 | Self::Boolean(_)
702 | Self::String(_)
703 | Self::StringOwned(_)
704 | Self::Binary(_)
705 | Self::BinaryOwned(_)
706 | Self::UInt8(_)
707 | Self::UInt16(_)
708 | Self::UInt32(_)
709 | Self::UInt64(_)
710 | Self::UInt128(_)
711 | Self::Int8(_)
712 | Self::Int16(_)
713 | Self::Int32(_)
714 | Self::Int64(_)
715 | Self::Int128(_)
716 | Self::Float16(_)
717 | Self::Float32(_)
718 | Self::Float64(_) => self,
719
720 #[cfg(feature = "object")]
721 Self::Object(_) | Self::ObjectOwned(_) => self,
722
723 #[cfg(feature = "dtype-date")]
724 Self::Date(v) => Self::Int32(v),
725 #[cfg(feature = "dtype-datetime")]
726 Self::Datetime(v, _, _) | Self::DatetimeOwned(v, _, _) => Self::Int64(v),
727
728 #[cfg(feature = "dtype-duration")]
729 Self::Duration(v, _) => Self::Int64(v),
730 #[cfg(feature = "dtype-time")]
731 Self::Time(v) => Self::Int64(v),
732
733 #[cfg(feature = "dtype-categorical")]
734 Self::Categorical(v, &ref m)
735 | Self::CategoricalOwned(v, ref m)
736 | Self::Enum(v, &ref m)
737 | Self::EnumOwned(v, ref m) => {
738 match CategoricalPhysical::smallest_physical(m.max_categories()).unwrap() {
739 CategoricalPhysical::U8 => Self::UInt8(v as u8),
740 CategoricalPhysical::U16 => Self::UInt16(v as u16),
741 CategoricalPhysical::U32 => Self::UInt32(v),
742 }
743 },
744 Self::List(series) => Self::List(series.to_physical_repr().into_owned()),
745
746 #[cfg(feature = "dtype-array")]
747 Self::Array(series, width) => {
748 Self::Array(series.to_physical_repr().into_owned(), width)
749 },
750
751 #[cfg(feature = "dtype-struct")]
752 Self::Struct(_, _, _) => todo!(),
753 #[cfg(feature = "dtype-struct")]
754 Self::StructOwned(values) => Self::StructOwned(Box::new((
755 values.0.into_iter().map(|v| v.to_physical()).collect(),
756 values
757 .1
758 .into_iter()
759 .map(|mut f| {
760 f.dtype = f.dtype.to_physical();
761 f
762 })
763 .collect(),
764 ))),
765
766 #[cfg(feature = "dtype-decimal")]
767 Self::Decimal(v, _, _) => Self::Int128(v),
768 }
769 }
770
771 #[inline]
772 pub fn extract_bool(&self) -> Option<bool> {
773 match self {
774 AnyValue::Boolean(v) => Some(*v),
775 _ => None,
776 }
777 }
778
779 #[inline]
780 pub fn extract_str(&self) -> Option<&str> {
781 match self {
782 AnyValue::String(v) => Some(v),
783 AnyValue::StringOwned(v) => Some(v.as_str()),
784 _ => None,
785 }
786 }
787
788 #[inline]
789 pub fn extract_bytes(&self) -> Option<&[u8]> {
790 match self {
791 AnyValue::Binary(v) => Some(v),
792 AnyValue::BinaryOwned(v) => Some(v.as_slice()),
793 _ => None,
794 }
795 }
796}
797
798impl From<AnyValue<'_>> for DataType {
799 fn from(value: AnyValue<'_>) -> Self {
800 value.dtype()
801 }
802}
803
804impl<'a> From<&AnyValue<'a>> for DataType {
805 fn from(value: &AnyValue<'a>) -> Self {
806 value.dtype()
807 }
808}
809
810impl AnyValue<'_> {
811 pub fn hash_impl<H: Hasher>(&self, state: &mut H, cheap: bool) {
812 use AnyValue::*;
813 std::mem::discriminant(self).hash(state);
814 match self {
815 Int8(v) => v.hash(state),
816 Int16(v) => v.hash(state),
817 Int32(v) => v.hash(state),
818 Int64(v) => v.hash(state),
819 Int128(v) => feature_gated!("dtype-i128", v.hash(state)),
820 UInt8(v) => v.hash(state),
821 UInt16(v) => v.hash(state),
822 UInt32(v) => v.hash(state),
823 UInt64(v) => v.hash(state),
824 UInt128(v) => feature_gated!("dtype-u128", v.hash(state)),
825 String(v) => v.hash(state),
826 StringOwned(v) => v.hash(state),
827 Float16(v) => v.to_ne_bytes().hash(state),
828 Float32(v) => v.to_ne_bytes().hash(state),
829 Float64(v) => v.to_ne_bytes().hash(state),
830 Binary(v) => v.hash(state),
831 BinaryOwned(v) => v.hash(state),
832 Boolean(v) => v.hash(state),
833 List(v) => {
834 if !cheap || v.len() < CHEAP_SERIES_HASH_LIMIT {
835 Hash::hash(&Wrap(v.clone()), state)
836 }
837 },
838 #[cfg(feature = "dtype-array")]
839 Array(v, width) => {
840 if !cheap || v.len() < CHEAP_SERIES_HASH_LIMIT {
841 Hash::hash(&Wrap(v.clone()), state)
842 }
843 width.hash(state)
844 },
845 #[cfg(feature = "dtype-date")]
846 Date(v) => v.hash(state),
847 #[cfg(feature = "dtype-datetime")]
848 Datetime(v, tu, tz) => {
849 v.hash(state);
850 tu.hash(state);
851 tz.hash(state);
852 },
853 #[cfg(feature = "dtype-datetime")]
854 DatetimeOwned(v, tu, tz) => {
855 v.hash(state);
856 tu.hash(state);
857 tz.hash(state);
858 },
859 #[cfg(feature = "dtype-duration")]
860 Duration(v, tz) => {
861 v.hash(state);
862 tz.hash(state);
863 },
864 #[cfg(feature = "dtype-time")]
865 Time(v) => v.hash(state),
866 #[cfg(feature = "dtype-categorical")]
867 Categorical(v, _) | CategoricalOwned(v, _) | Enum(v, _) | EnumOwned(v, _) => {
868 v.hash(state)
869 },
870 #[cfg(feature = "object")]
871 Object(_) => {},
872 #[cfg(feature = "object")]
873 ObjectOwned(_) => {},
874 #[cfg(feature = "dtype-struct")]
875 Struct(_, _, _) => {
876 if !cheap {
877 let mut buf = vec![];
878 self._materialize_struct_av(&mut buf);
879 buf.hash(state)
880 }
881 },
882 #[cfg(feature = "dtype-struct")]
883 StructOwned(v) => v.0.hash(state),
884 #[cfg(feature = "dtype-decimal")]
885 Decimal(v, s, p) => {
886 v.hash(state);
887 s.hash(state);
888 p.hash(state);
889 },
890 Null => {},
891 }
892 }
893}
894
895impl Hash for AnyValue<'_> {
896 fn hash<H: Hasher>(&self, state: &mut H) {
897 self.hash_impl(state, false)
898 }
899}
900
901impl Eq for AnyValue<'_> {}
902
903impl<'a, T> From<Option<T>> for AnyValue<'a>
904where
905 T: Into<AnyValue<'a>>,
906{
907 #[inline]
908 fn from(a: Option<T>) -> Self {
909 match a {
910 None => AnyValue::Null,
911 Some(v) => v.into(),
912 }
913 }
914}
915
916impl<'a> AnyValue<'a> {
917 #[cfg(any(feature = "dtype-date", feature = "dtype-datetime"))]
918 pub(crate) fn as_date(&self) -> AnyValue<'static> {
919 match self {
920 #[cfg(feature = "dtype-date")]
921 AnyValue::Int32(v) => AnyValue::Date(*v),
922 AnyValue::Null => AnyValue::Null,
923 av => panic!("cannot create date from other type. dtype: {}", av.dtype()),
924 }
925 }
926
927 #[cfg(feature = "dtype-datetime")]
928 pub(crate) fn as_datetime(&self, tu: TimeUnit, tz: Option<&'a TimeZone>) -> AnyValue<'a> {
929 match self {
930 AnyValue::Int64(v) => AnyValue::Datetime(*v, tu, tz),
931 AnyValue::Null => AnyValue::Null,
932 av => panic!(
933 "cannot create datetime from other type. dtype: {}",
934 av.dtype()
935 ),
936 }
937 }
938
939 #[cfg(feature = "dtype-datetime")]
940 pub(crate) fn as_datetime_owned(
941 &self,
942 tu: TimeUnit,
943 tz: Option<Arc<TimeZone>>,
944 ) -> AnyValue<'static> {
945 match self {
946 AnyValue::Int64(v) => AnyValue::DatetimeOwned(*v, tu, tz),
947 AnyValue::Null => AnyValue::Null,
948 av => panic!(
949 "cannot create datetime from other type. dtype: {}",
950 av.dtype()
951 ),
952 }
953 }
954
955 #[cfg(feature = "dtype-duration")]
956 pub(crate) fn as_duration(&self, tu: TimeUnit) -> AnyValue<'static> {
957 match self {
958 AnyValue::Int64(v) => AnyValue::Duration(*v, tu),
959 AnyValue::Null => AnyValue::Null,
960 av => panic!(
961 "cannot create duration from other type. dtype: {}",
962 av.dtype()
963 ),
964 }
965 }
966
967 #[cfg(feature = "dtype-time")]
968 pub(crate) fn as_time(&self) -> AnyValue<'static> {
969 match self {
970 AnyValue::Int64(v) => AnyValue::Time(*v),
971 AnyValue::Null => AnyValue::Null,
972 av => panic!("cannot create time from other type. dtype: {}", av.dtype()),
973 }
974 }
975
976 pub(crate) fn to_i128(&self) -> Option<i128> {
977 match self {
978 AnyValue::UInt8(v) => Some((*v).into()),
979 AnyValue::UInt16(v) => Some((*v).into()),
980 AnyValue::UInt32(v) => Some((*v).into()),
981 AnyValue::UInt64(v) => Some((*v).into()),
982 AnyValue::Int8(v) => Some((*v).into()),
983 AnyValue::Int16(v) => Some((*v).into()),
984 AnyValue::Int32(v) => Some((*v).into()),
985 AnyValue::Int64(v) => Some((*v).into()),
986 AnyValue::Int128(v) => Some(*v),
987 _ => None,
988 }
989 }
990
991 pub(crate) fn to_f64(&self) -> Option<f64> {
992 match self {
993 AnyValue::Float16(v) => Some((*v).into()),
994 AnyValue::Float32(v) => Some((*v).into()),
995 AnyValue::Float64(v) => Some(*v),
996 _ => None,
997 }
998 }
999
1000 #[must_use]
1001 pub fn add(&self, rhs: &AnyValue) -> AnyValue<'static> {
1002 use AnyValue::*;
1003 match (self, rhs) {
1004 (Null, r) => r.clone().into_static(),
1005 (l, Null) => l.clone().into_static(),
1006 (Int32(l), Int32(r)) => Int32(l + r),
1007 (Int64(l), Int64(r)) => Int64(l + r),
1008 (UInt32(l), UInt32(r)) => UInt32(l + r),
1009 (UInt64(l), UInt64(r)) => UInt64(l + r),
1010 (Float16(l), Float16(r)) => Float16(*l + *r),
1011 (Float32(l), Float32(r)) => Float32(l + r),
1012 (Float64(l), Float64(r)) => Float64(l + r),
1013 #[cfg(feature = "dtype-duration")]
1014 (Duration(l, lu), Duration(r, ru)) => {
1015 if lu != ru {
1016 unimplemented!("adding durations with different units is not supported here");
1017 }
1018
1019 Duration(l + r, *lu)
1020 },
1021 #[cfg(feature = "dtype-decimal")]
1022 (Decimal(l, lp, ls), Decimal(r, rp, rs)) => {
1023 if (lp, ls) != (rp, rs) {
1024 unimplemented!(
1025 "adding decimals with different precisions/scales is not supported here"
1026 );
1027 }
1028
1029 Decimal(l + r, *lp, *ls)
1030 },
1031 _ => unimplemented!(),
1032 }
1033 }
1034
1035 #[inline]
1036 pub fn as_borrowed(&self) -> AnyValue<'_> {
1037 match self {
1038 AnyValue::BinaryOwned(data) => AnyValue::Binary(data),
1039 AnyValue::StringOwned(data) => AnyValue::String(data.as_str()),
1040 #[cfg(feature = "dtype-datetime")]
1041 AnyValue::DatetimeOwned(v, tu, tz) => {
1042 AnyValue::Datetime(*v, *tu, tz.as_ref().map(AsRef::as_ref))
1043 },
1044 #[cfg(feature = "dtype-categorical")]
1045 AnyValue::CategoricalOwned(cat, map) => AnyValue::Categorical(*cat, map),
1046 #[cfg(feature = "dtype-categorical")]
1047 AnyValue::EnumOwned(cat, map) => AnyValue::Enum(*cat, map),
1048 av => av.clone(),
1049 }
1050 }
1051
1052 #[inline]
1055 pub fn into_static(self) -> AnyValue<'static> {
1056 use AnyValue::*;
1057 match self {
1058 Null => Null,
1059 Int8(v) => Int8(v),
1060 Int16(v) => Int16(v),
1061 Int32(v) => Int32(v),
1062 Int64(v) => Int64(v),
1063 Int128(v) => Int128(v),
1064 UInt8(v) => UInt8(v),
1065 UInt16(v) => UInt16(v),
1066 UInt32(v) => UInt32(v),
1067 UInt64(v) => UInt64(v),
1068 UInt128(v) => UInt128(v),
1069 Boolean(v) => Boolean(v),
1070 Float16(v) => Float16(v),
1071 Float32(v) => Float32(v),
1072 Float64(v) => Float64(v),
1073 #[cfg(feature = "dtype-datetime")]
1074 Datetime(v, tu, tz) => DatetimeOwned(v, tu, tz.map(|v| Arc::new(v.clone()))),
1075 #[cfg(feature = "dtype-datetime")]
1076 DatetimeOwned(v, tu, tz) => DatetimeOwned(v, tu, tz),
1077 #[cfg(feature = "dtype-date")]
1078 Date(v) => Date(v),
1079 #[cfg(feature = "dtype-duration")]
1080 Duration(v, tu) => Duration(v, tu),
1081 #[cfg(feature = "dtype-time")]
1082 Time(v) => Time(v),
1083 List(v) => List(v),
1084 #[cfg(feature = "dtype-array")]
1085 Array(s, size) => Array(s, size),
1086 String(v) => StringOwned(PlSmallStr::from_str(v)),
1087 StringOwned(v) => StringOwned(v),
1088 Binary(v) => BinaryOwned(v.to_vec()),
1089 BinaryOwned(v) => BinaryOwned(v),
1090 #[cfg(feature = "object")]
1091 Object(v) => ObjectOwned(OwnedObject(v.to_boxed())),
1092 #[cfg(feature = "dtype-struct")]
1093 Struct(idx, arr, fields) => {
1094 let avs = struct_to_avs_static(idx, arr, fields);
1095 StructOwned(Box::new((avs, fields.to_vec())))
1096 },
1097 #[cfg(feature = "dtype-struct")]
1098 StructOwned(payload) => {
1099 let av = StructOwned(payload);
1100 unsafe { std::mem::transmute::<AnyValue<'a>, AnyValue<'static>>(av) }
1102 },
1103 #[cfg(feature = "object")]
1104 ObjectOwned(payload) => {
1105 let av = ObjectOwned(payload);
1106 unsafe { std::mem::transmute::<AnyValue<'a>, AnyValue<'static>>(av) }
1108 },
1109 #[cfg(feature = "dtype-decimal")]
1110 Decimal(val, s, p) => Decimal(val, s, p),
1111 #[cfg(feature = "dtype-categorical")]
1112 Categorical(cat, map) => CategoricalOwned(cat, map.clone()),
1113 #[cfg(feature = "dtype-categorical")]
1114 CategoricalOwned(cat, map) => CategoricalOwned(cat, map),
1115 #[cfg(feature = "dtype-categorical")]
1116 Enum(cat, map) => EnumOwned(cat, map.clone()),
1117 #[cfg(feature = "dtype-categorical")]
1118 EnumOwned(cat, map) => EnumOwned(cat, map),
1119 }
1120 }
1121
1122 pub fn get_str(&self) -> Option<&str> {
1124 match self {
1125 AnyValue::String(s) => Some(s),
1126 AnyValue::StringOwned(s) => Some(s.as_str()),
1127 #[cfg(feature = "dtype-categorical")]
1128 Self::Categorical(cat, map) | Self::Enum(cat, map) => {
1129 Some(unsafe { map.cat_to_str_unchecked(*cat) })
1130 },
1131 #[cfg(feature = "dtype-categorical")]
1132 Self::CategoricalOwned(cat, map) | Self::EnumOwned(cat, map) => {
1133 Some(unsafe { map.cat_to_str_unchecked(*cat) })
1134 },
1135 _ => None,
1136 }
1137 }
1138}
1139
1140impl<'a> From<AnyValue<'a>> for Option<i64> {
1141 fn from(val: AnyValue<'a>) -> Self {
1142 use AnyValue::*;
1143 match val {
1144 Null => None,
1145 Int32(v) => Some(v as i64),
1146 Int64(v) => Some(v),
1147 UInt32(v) => Some(v as i64),
1148 _ => todo!(),
1149 }
1150 }
1151}
1152
1153impl AnyValue<'_> {
1154 #[inline]
1155 pub fn eq_missing(&self, other: &Self, null_equal: bool) -> bool {
1156 fn struct_owned_value_iter<'a>(
1157 v: &'a (Vec<AnyValue<'_>>, Vec<Field>),
1158 ) -> impl ExactSizeIterator<Item = AnyValue<'a>> {
1159 v.0.iter().map(|v| v.as_borrowed())
1160 }
1161 fn struct_value_iter(
1162 idx: usize,
1163 arr: &StructArray,
1164 ) -> impl ExactSizeIterator<Item = AnyValue<'_>> {
1165 assert!(idx < arr.len());
1166
1167 arr.values().iter().map(move |field_arr| unsafe {
1168 field_arr.get_unchecked(idx)
1172 })
1173 }
1174
1175 fn struct_eq_missing<'a>(
1176 l: impl ExactSizeIterator<Item = AnyValue<'a>>,
1177 r: impl ExactSizeIterator<Item = AnyValue<'a>>,
1178 null_equal: bool,
1179 ) -> bool {
1180 if l.len() != r.len() {
1181 return false;
1182 }
1183
1184 l.zip(r).all(|(lv, rv)| lv.eq_missing(&rv, null_equal))
1185 }
1186
1187 use AnyValue::*;
1188 match (self, other) {
1189 (StringOwned(l), r) => AnyValue::String(l.as_str()) == *r,
1191 (BinaryOwned(l), r) => AnyValue::Binary(l.as_slice()) == *r,
1192 #[cfg(feature = "object")]
1193 (ObjectOwned(l), r) => AnyValue::Object(&*l.0) == *r,
1194 (l, StringOwned(r)) => *l == AnyValue::String(r.as_str()),
1195 (l, BinaryOwned(r)) => *l == AnyValue::Binary(r.as_slice()),
1196 #[cfg(feature = "object")]
1197 (l, ObjectOwned(r)) => *l == AnyValue::Object(&*r.0),
1198 #[cfg(feature = "dtype-datetime")]
1199 (DatetimeOwned(lv, ltu, ltz), r) => {
1200 Datetime(*lv, *ltu, ltz.as_ref().map(|v| v.as_ref())) == *r
1201 },
1202 #[cfg(feature = "dtype-datetime")]
1203 (l, DatetimeOwned(rv, rtu, rtz)) => {
1204 *l == Datetime(*rv, *rtu, rtz.as_ref().map(|v| v.as_ref()))
1205 },
1206 #[cfg(feature = "dtype-categorical")]
1207 (CategoricalOwned(cat, map), r) => Categorical(*cat, map) == *r,
1208 #[cfg(feature = "dtype-categorical")]
1209 (l, CategoricalOwned(cat, map)) => *l == Categorical(*cat, map),
1210 #[cfg(feature = "dtype-categorical")]
1211 (EnumOwned(cat, map), r) => Enum(*cat, map) == *r,
1212 #[cfg(feature = "dtype-categorical")]
1213 (l, EnumOwned(cat, map)) => *l == Enum(*cat, map),
1214
1215 (Null, Null) => null_equal,
1217 (Null, _) => false,
1218 (_, Null) => false,
1219
1220 (Boolean(l), Boolean(r)) => *l == *r,
1222 (UInt8(l), UInt8(r)) => *l == *r,
1223 (UInt16(l), UInt16(r)) => *l == *r,
1224 (UInt32(l), UInt32(r)) => *l == *r,
1225 (UInt64(l), UInt64(r)) => *l == *r,
1226 (UInt128(l), UInt128(r)) => *l == *r,
1227 (Int8(l), Int8(r)) => *l == *r,
1228 (Int16(l), Int16(r)) => *l == *r,
1229 (Int32(l), Int32(r)) => *l == *r,
1230 (Int64(l), Int64(r)) => *l == *r,
1231 (Int128(l), Int128(r)) => *l == *r,
1232 (Float16(l), Float16(r)) => l.to_total_ord() == r.to_total_ord(),
1233 (Float32(l), Float32(r)) => l.to_total_ord() == r.to_total_ord(),
1234 (Float64(l), Float64(r)) => l.to_total_ord() == r.to_total_ord(),
1235 (String(l), String(r)) => l == r,
1236 (Binary(l), Binary(r)) => l == r,
1237 #[cfg(feature = "dtype-time")]
1238 (Time(l), Time(r)) => *l == *r,
1239 #[cfg(all(feature = "dtype-datetime", feature = "dtype-date"))]
1240 (Date(l), Date(r)) => *l == *r,
1241 #[cfg(all(feature = "dtype-datetime", feature = "dtype-date"))]
1242 (Datetime(l, tul, tzl), Datetime(r, tur, tzr)) => {
1243 *l == *r && *tul == *tur && tzl == tzr
1244 },
1245 (List(l), List(r)) => l == r,
1246 #[cfg(feature = "dtype-categorical")]
1247 (Categorical(cat_l, map_l), Categorical(cat_r, map_r)) => {
1248 if !Arc::ptr_eq(map_l, map_r) {
1249 unimplemented!(
1252 "comparing categoricals with different Categories is not supported through AnyValue"
1253 );
1254 }
1255
1256 cat_l == cat_r
1257 },
1258 #[cfg(feature = "dtype-categorical")]
1259 (Enum(cat_l, map_l), Enum(cat_r, map_r)) => {
1260 if !Arc::ptr_eq(map_l, map_r) {
1261 unimplemented!(
1264 "comparing enums with different FrozenCategories is not supported through AnyValue"
1265 );
1266 }
1267
1268 cat_l == cat_r
1269 },
1270 #[cfg(feature = "dtype-duration")]
1271 (Duration(l, tu_l), Duration(r, tu_r)) => l == r && tu_l == tu_r,
1272
1273 #[cfg(feature = "dtype-struct")]
1274 (StructOwned(l), StructOwned(r)) => struct_eq_missing(
1275 struct_owned_value_iter(l.as_ref()),
1276 struct_owned_value_iter(r.as_ref()),
1277 null_equal,
1278 ),
1279 #[cfg(feature = "dtype-struct")]
1280 (StructOwned(l), Struct(idx, arr, _)) => struct_eq_missing(
1281 struct_owned_value_iter(l.as_ref()),
1282 struct_value_iter(*idx, arr),
1283 null_equal,
1284 ),
1285 #[cfg(feature = "dtype-struct")]
1286 (Struct(idx, arr, _), StructOwned(r)) => struct_eq_missing(
1287 struct_value_iter(*idx, arr),
1288 struct_owned_value_iter(r.as_ref()),
1289 null_equal,
1290 ),
1291 #[cfg(feature = "dtype-struct")]
1292 (Struct(l_idx, l_arr, _), Struct(r_idx, r_arr, _)) => struct_eq_missing(
1293 struct_value_iter(*l_idx, l_arr),
1294 struct_value_iter(*r_idx, r_arr),
1295 null_equal,
1296 ),
1297 #[cfg(feature = "dtype-decimal")]
1298 (Decimal(lv, _lp, ls), Decimal(rv, _rp, rs)) => dec128_eq(*lv, *ls, *rv, *rs),
1299 #[cfg(feature = "object")]
1300 (Object(l), Object(r)) => l == r,
1301 #[cfg(feature = "dtype-array")]
1302 (Array(l_values, l_size), Array(r_values, r_size)) => {
1303 if l_size != r_size {
1304 return false;
1305 }
1306
1307 debug_assert_eq!(l_values.len(), *l_size);
1308 debug_assert_eq!(r_values.len(), *r_size);
1309
1310 let mut is_equal = true;
1311 for i in 0..*l_size {
1312 let l = unsafe { l_values.get_unchecked(i) };
1313 let r = unsafe { r_values.get_unchecked(i) };
1314
1315 is_equal &= l.eq_missing(&r, null_equal);
1316 }
1317 is_equal
1318 },
1319
1320 (l, r) if l.to_i128().is_some() && r.to_i128().is_some() => l.to_i128() == r.to_i128(),
1321 (l, r) if l.to_f64().is_some() && r.to_f64().is_some() => {
1322 l.to_f64().unwrap().to_total_ord() == r.to_f64().unwrap().to_total_ord()
1323 },
1324
1325 (_, _) => {
1326 unimplemented!(
1327 "scalar eq_missing for mixed dtypes {self:?} and {other:?} is not supported"
1328 )
1329 },
1330 }
1331 }
1332}
1333
1334impl PartialEq for AnyValue<'_> {
1335 #[inline]
1336 fn eq(&self, other: &Self) -> bool {
1337 self.eq_missing(other, true)
1338 }
1339}
1340
1341impl PartialOrd for AnyValue<'_> {
1342 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
1344 use AnyValue::*;
1345 match (self, &other) {
1346 (StringOwned(l), r) => AnyValue::String(l.as_str()).partial_cmp(r),
1348 (BinaryOwned(l), r) => AnyValue::Binary(l.as_slice()).partial_cmp(r),
1349 #[cfg(feature = "object")]
1350 (ObjectOwned(l), r) => AnyValue::Object(&*l.0).partial_cmp(r),
1351 (l, StringOwned(r)) => l.partial_cmp(&AnyValue::String(r.as_str())),
1352 (l, BinaryOwned(r)) => l.partial_cmp(&AnyValue::Binary(r.as_slice())),
1353 #[cfg(feature = "object")]
1354 (l, ObjectOwned(r)) => l.partial_cmp(&AnyValue::Object(&*r.0)),
1355 #[cfg(feature = "dtype-datetime")]
1356 (DatetimeOwned(lv, ltu, ltz), r) => {
1357 Datetime(*lv, *ltu, ltz.as_ref().map(|v| v.as_ref())).partial_cmp(r)
1358 },
1359 #[cfg(feature = "dtype-datetime")]
1360 (l, DatetimeOwned(rv, rtu, rtz)) => {
1361 l.partial_cmp(&Datetime(*rv, *rtu, rtz.as_ref().map(|v| v.as_ref())))
1362 },
1363 #[cfg(feature = "dtype-categorical")]
1364 (CategoricalOwned(cat, map), r) => Categorical(*cat, map).partial_cmp(r),
1365 #[cfg(feature = "dtype-categorical")]
1366 (l, CategoricalOwned(cat, map)) => l.partial_cmp(&Categorical(*cat, map)),
1367 #[cfg(feature = "dtype-categorical")]
1368 (EnumOwned(cat, map), r) => Enum(*cat, map).partial_cmp(r),
1369 #[cfg(feature = "dtype-categorical")]
1370 (l, EnumOwned(cat, map)) => l.partial_cmp(&Enum(*cat, map)),
1371
1372 (Null, Null) => Some(Ordering::Equal),
1374 (Null, _) => Some(Ordering::Less),
1375 (_, Null) => Some(Ordering::Greater),
1376
1377 (Boolean(l), Boolean(r)) => l.partial_cmp(r),
1379 (UInt8(l), UInt8(r)) => l.partial_cmp(r),
1380 (UInt16(l), UInt16(r)) => l.partial_cmp(r),
1381 (UInt32(l), UInt32(r)) => l.partial_cmp(r),
1382 (UInt64(l), UInt64(r)) => l.partial_cmp(r),
1383 (UInt128(l), UInt128(r)) => l.partial_cmp(r),
1384 (Int8(l), Int8(r)) => l.partial_cmp(r),
1385 (Int16(l), Int16(r)) => l.partial_cmp(r),
1386 (Int32(l), Int32(r)) => l.partial_cmp(r),
1387 (Int64(l), Int64(r)) => l.partial_cmp(r),
1388 (Int128(l), Int128(r)) => l.partial_cmp(r),
1389 (Float16(l), Float16(r)) => Some(l.tot_cmp(r)),
1390 (Float32(l), Float32(r)) => Some(l.tot_cmp(r)),
1391 (Float64(l), Float64(r)) => Some(l.tot_cmp(r)),
1392 (String(l), String(r)) => l.partial_cmp(r),
1393 (Binary(l), Binary(r)) => l.partial_cmp(r),
1394 #[cfg(feature = "dtype-date")]
1395 (Date(l), Date(r)) => l.partial_cmp(r),
1396 #[cfg(feature = "dtype-datetime")]
1397 (Datetime(lt, lu, lz), Datetime(rt, ru, rz)) => {
1398 if lu != ru || lz != rz {
1399 unimplemented!(
1400 "comparing datetimes with different units or timezones is not supported"
1401 );
1402 }
1403
1404 lt.partial_cmp(rt)
1405 },
1406 #[cfg(feature = "dtype-duration")]
1407 (Duration(lt, lu), Duration(rt, ru)) => {
1408 if lu != ru {
1409 unimplemented!("comparing durations with different units is not supported");
1410 }
1411
1412 lt.partial_cmp(rt)
1413 },
1414 #[cfg(feature = "dtype-time")]
1415 (Time(l), Time(r)) => l.partial_cmp(r),
1416 #[cfg(feature = "dtype-categorical")]
1417 (Categorical(l_cat, l_map), Categorical(r_cat, r_map)) => unsafe {
1418 let l_str = l_map.cat_to_str_unchecked(*l_cat);
1419 let r_str = r_map.cat_to_str_unchecked(*r_cat);
1420 l_str.partial_cmp(r_str)
1421 },
1422 #[cfg(feature = "dtype-categorical")]
1423 (Enum(l_cat, l_map), Enum(r_cat, r_map)) => {
1424 if !Arc::ptr_eq(l_map, r_map) {
1425 unimplemented!("can't order enums from different FrozenCategories")
1426 }
1427 l_cat.partial_cmp(r_cat)
1428 },
1429 (List(_), List(_)) => {
1430 unimplemented!("ordering for List dtype is not supported")
1431 },
1432 #[cfg(feature = "dtype-array")]
1433 (Array(..), Array(..)) => {
1434 unimplemented!("ordering for Array dtype is not supported")
1435 },
1436 #[cfg(feature = "object")]
1437 (Object(_), Object(_)) => {
1438 unimplemented!("ordering for Object dtype is not supported")
1439 },
1440 #[cfg(feature = "dtype-struct")]
1441 (StructOwned(_), StructOwned(_))
1442 | (StructOwned(_), Struct(..))
1443 | (Struct(..), StructOwned(_))
1444 | (Struct(..), Struct(..)) => {
1445 unimplemented!("ordering for Struct dtype is not supported")
1446 },
1447 #[cfg(feature = "dtype-decimal")]
1448 (Decimal(lv, _lp, ls), Decimal(rv, _rp, rs)) => Some(dec128_cmp(*lv, *ls, *rv, *rs)),
1449
1450 (_, _) => {
1451 unimplemented!(
1452 "scalar ordering for mixed dtypes {self:?} and {other:?} is not supported"
1453 )
1454 },
1455 }
1456 }
1457}
1458
1459impl TotalEq for AnyValue<'_> {
1460 #[inline]
1461 fn tot_eq(&self, other: &Self) -> bool {
1462 self.eq_missing(other, true)
1463 }
1464}
1465
1466#[cfg(feature = "dtype-struct")]
1467fn struct_to_avs_static(idx: usize, arr: &StructArray, fields: &[Field]) -> Vec<AnyValue<'static>> {
1468 assert!(idx < arr.len());
1469
1470 let arrs = arr.values();
1471
1472 debug_assert_eq!(arrs.len(), fields.len());
1473
1474 arrs.iter()
1475 .zip(fields)
1476 .map(|(arr, field)| {
1477 unsafe { arr_to_any_value(arr.as_ref(), idx, &field.dtype) }.into_static()
1481 })
1482 .collect()
1483}
1484
1485pub trait GetAnyValue {
1486 unsafe fn get_unchecked(&self, index: usize) -> AnyValue<'_>;
1490}
1491
1492impl GetAnyValue for ArrayRef {
1493 unsafe fn get_unchecked(&self, index: usize) -> AnyValue<'_> {
1495 match self.dtype() {
1496 ArrowDataType::Int8 => {
1497 let arr = self
1498 .as_any()
1499 .downcast_ref::<PrimitiveArray<i8>>()
1500 .unwrap_unchecked();
1501 match arr.get_unchecked(index) {
1502 None => AnyValue::Null,
1503 Some(v) => AnyValue::Int8(v),
1504 }
1505 },
1506 ArrowDataType::Int16 => {
1507 let arr = self
1508 .as_any()
1509 .downcast_ref::<PrimitiveArray<i16>>()
1510 .unwrap_unchecked();
1511 match arr.get_unchecked(index) {
1512 None => AnyValue::Null,
1513 Some(v) => AnyValue::Int16(v),
1514 }
1515 },
1516 ArrowDataType::Int32 => {
1517 let arr = self
1518 .as_any()
1519 .downcast_ref::<PrimitiveArray<i32>>()
1520 .unwrap_unchecked();
1521 match arr.get_unchecked(index) {
1522 None => AnyValue::Null,
1523 Some(v) => AnyValue::Int32(v),
1524 }
1525 },
1526 ArrowDataType::Int64 => {
1527 let arr = self
1528 .as_any()
1529 .downcast_ref::<PrimitiveArray<i64>>()
1530 .unwrap_unchecked();
1531 match arr.get_unchecked(index) {
1532 None => AnyValue::Null,
1533 Some(v) => AnyValue::Int64(v),
1534 }
1535 },
1536 ArrowDataType::Int128 => {
1537 let arr = self
1538 .as_any()
1539 .downcast_ref::<PrimitiveArray<i128>>()
1540 .unwrap_unchecked();
1541 match arr.get_unchecked(index) {
1542 None => AnyValue::Null,
1543 Some(v) => AnyValue::Int128(v),
1544 }
1545 },
1546 ArrowDataType::UInt8 => {
1547 let arr = self
1548 .as_any()
1549 .downcast_ref::<PrimitiveArray<u8>>()
1550 .unwrap_unchecked();
1551 match arr.get_unchecked(index) {
1552 None => AnyValue::Null,
1553 Some(v) => AnyValue::UInt8(v),
1554 }
1555 },
1556 ArrowDataType::UInt16 => {
1557 let arr = self
1558 .as_any()
1559 .downcast_ref::<PrimitiveArray<u16>>()
1560 .unwrap_unchecked();
1561 match arr.get_unchecked(index) {
1562 None => AnyValue::Null,
1563 Some(v) => AnyValue::UInt16(v),
1564 }
1565 },
1566 ArrowDataType::UInt32 => {
1567 let arr = self
1568 .as_any()
1569 .downcast_ref::<PrimitiveArray<u32>>()
1570 .unwrap_unchecked();
1571 match arr.get_unchecked(index) {
1572 None => AnyValue::Null,
1573 Some(v) => AnyValue::UInt32(v),
1574 }
1575 },
1576 ArrowDataType::UInt64 => {
1577 let arr = self
1578 .as_any()
1579 .downcast_ref::<PrimitiveArray<u64>>()
1580 .unwrap_unchecked();
1581 match arr.get_unchecked(index) {
1582 None => AnyValue::Null,
1583 Some(v) => AnyValue::UInt64(v),
1584 }
1585 },
1586 ArrowDataType::UInt128 => {
1587 let arr = self
1588 .as_any()
1589 .downcast_ref::<PrimitiveArray<u128>>()
1590 .unwrap_unchecked();
1591 match arr.get_unchecked(index) {
1592 None => AnyValue::Null,
1593 Some(v) => AnyValue::UInt128(v),
1594 }
1595 },
1596 ArrowDataType::Float16 => {
1597 let arr = self
1598 .as_any()
1599 .downcast_ref::<PrimitiveArray<pf16>>()
1600 .unwrap_unchecked();
1601 match arr.get_unchecked(index) {
1602 None => AnyValue::Null,
1603 Some(v) => AnyValue::Float16(v),
1604 }
1605 },
1606 ArrowDataType::Float32 => {
1607 let arr = self
1608 .as_any()
1609 .downcast_ref::<PrimitiveArray<f32>>()
1610 .unwrap_unchecked();
1611 match arr.get_unchecked(index) {
1612 None => AnyValue::Null,
1613 Some(v) => AnyValue::Float32(v),
1614 }
1615 },
1616 ArrowDataType::Float64 => {
1617 let arr = self
1618 .as_any()
1619 .downcast_ref::<PrimitiveArray<f64>>()
1620 .unwrap_unchecked();
1621 match arr.get_unchecked(index) {
1622 None => AnyValue::Null,
1623 Some(v) => AnyValue::Float64(v),
1624 }
1625 },
1626 ArrowDataType::Boolean => {
1627 let arr = self
1628 .as_any()
1629 .downcast_ref::<BooleanArray>()
1630 .unwrap_unchecked();
1631 match arr.get_unchecked(index) {
1632 None => AnyValue::Null,
1633 Some(v) => AnyValue::Boolean(v),
1634 }
1635 },
1636 ArrowDataType::LargeUtf8 => {
1637 let arr = self
1638 .as_any()
1639 .downcast_ref::<LargeStringArray>()
1640 .unwrap_unchecked();
1641 match arr.get_unchecked(index) {
1642 None => AnyValue::Null,
1643 Some(v) => AnyValue::String(v),
1644 }
1645 },
1646 _ => unimplemented!(),
1647 }
1648 }
1649}
1650
1651impl<K: NumericNative> From<K> for AnyValue<'static> {
1652 fn from(value: K) -> Self {
1653 unsafe {
1654 match K::PRIMITIVE {
1655 PrimitiveType::Int8 => AnyValue::Int8(NumCast::from(value).unwrap_unchecked()),
1656 PrimitiveType::Int16 => AnyValue::Int16(NumCast::from(value).unwrap_unchecked()),
1657 PrimitiveType::Int32 => AnyValue::Int32(NumCast::from(value).unwrap_unchecked()),
1658 PrimitiveType::Int64 => AnyValue::Int64(NumCast::from(value).unwrap_unchecked()),
1659 PrimitiveType::Int128 => AnyValue::Int128(NumCast::from(value).unwrap_unchecked()),
1660 PrimitiveType::UInt8 => AnyValue::UInt8(NumCast::from(value).unwrap_unchecked()),
1661 PrimitiveType::UInt16 => AnyValue::UInt16(NumCast::from(value).unwrap_unchecked()),
1662 PrimitiveType::UInt32 => AnyValue::UInt32(NumCast::from(value).unwrap_unchecked()),
1663 PrimitiveType::UInt64 => AnyValue::UInt64(NumCast::from(value).unwrap_unchecked()),
1664 PrimitiveType::UInt128 => {
1665 AnyValue::UInt128(NumCast::from(value).unwrap_unchecked())
1666 },
1667 PrimitiveType::Float16 => {
1668 AnyValue::Float16(NumCast::from(value).unwrap_unchecked())
1669 },
1670 PrimitiveType::Float32 => {
1671 AnyValue::Float32(NumCast::from(value).unwrap_unchecked())
1672 },
1673 PrimitiveType::Float64 => {
1674 AnyValue::Float64(NumCast::from(value).unwrap_unchecked())
1675 },
1676 _ => unreachable!(),
1678 }
1679 }
1680 }
1681}
1682
1683impl<'a> From<&'a [u8]> for AnyValue<'a> {
1684 fn from(value: &'a [u8]) -> Self {
1685 AnyValue::Binary(value)
1686 }
1687}
1688
1689impl<'a> From<&'a str> for AnyValue<'a> {
1690 fn from(value: &'a str) -> Self {
1691 AnyValue::String(value)
1692 }
1693}
1694
1695impl From<bool> for AnyValue<'static> {
1696 fn from(value: bool) -> Self {
1697 AnyValue::Boolean(value)
1698 }
1699}
1700
1701#[cfg(test)]
1702mod test {
1703 #[cfg(feature = "dtype-categorical")]
1704 use super::*;
1705
1706 #[test]
1707 #[cfg(feature = "dtype-categorical")]
1708 fn test_arrow_dtypes_to_polars() {
1709 let dtypes = [
1710 (
1711 ArrowDataType::Duration(ArrowTimeUnit::Nanosecond),
1712 DataType::Duration(TimeUnit::Nanoseconds),
1713 ),
1714 (
1715 ArrowDataType::Duration(ArrowTimeUnit::Millisecond),
1716 DataType::Duration(TimeUnit::Milliseconds),
1717 ),
1718 (
1719 ArrowDataType::Date64,
1720 DataType::Datetime(TimeUnit::Milliseconds, None),
1721 ),
1722 (
1723 ArrowDataType::Timestamp(ArrowTimeUnit::Nanosecond, None),
1724 DataType::Datetime(TimeUnit::Nanoseconds, None),
1725 ),
1726 (
1727 ArrowDataType::Timestamp(ArrowTimeUnit::Microsecond, None),
1728 DataType::Datetime(TimeUnit::Microseconds, None),
1729 ),
1730 (
1731 ArrowDataType::Timestamp(ArrowTimeUnit::Millisecond, None),
1732 DataType::Datetime(TimeUnit::Milliseconds, None),
1733 ),
1734 (
1735 ArrowDataType::Timestamp(ArrowTimeUnit::Second, None),
1736 DataType::Datetime(TimeUnit::Milliseconds, None),
1737 ),
1738 (
1739 ArrowDataType::Timestamp(ArrowTimeUnit::Second, Some(PlSmallStr::EMPTY)),
1740 DataType::Datetime(TimeUnit::Milliseconds, None),
1741 ),
1742 (ArrowDataType::LargeUtf8, DataType::String),
1743 (ArrowDataType::Utf8, DataType::String),
1744 (ArrowDataType::LargeBinary, DataType::Binary),
1745 (ArrowDataType::Binary, DataType::Binary),
1746 (
1747 ArrowDataType::Time64(ArrowTimeUnit::Nanosecond),
1748 DataType::Time,
1749 ),
1750 (
1751 ArrowDataType::Time64(ArrowTimeUnit::Millisecond),
1752 DataType::Time,
1753 ),
1754 (
1755 ArrowDataType::Time64(ArrowTimeUnit::Microsecond),
1756 DataType::Time,
1757 ),
1758 (ArrowDataType::Time64(ArrowTimeUnit::Second), DataType::Time),
1759 (
1760 ArrowDataType::Time32(ArrowTimeUnit::Nanosecond),
1761 DataType::Time,
1762 ),
1763 (
1764 ArrowDataType::Time32(ArrowTimeUnit::Millisecond),
1765 DataType::Time,
1766 ),
1767 (
1768 ArrowDataType::Time32(ArrowTimeUnit::Microsecond),
1769 DataType::Time,
1770 ),
1771 (ArrowDataType::Time32(ArrowTimeUnit::Second), DataType::Time),
1772 (
1773 ArrowDataType::List(Box::new(ArrowField::new(
1774 LIST_VALUES_NAME,
1775 ArrowDataType::Float64,
1776 true,
1777 ))),
1778 DataType::List(DataType::Float64.into()),
1779 ),
1780 (
1781 ArrowDataType::LargeList(Box::new(ArrowField::new(
1782 LIST_VALUES_NAME,
1783 ArrowDataType::Float64,
1784 true,
1785 ))),
1786 DataType::List(DataType::Float64.into()),
1787 ),
1788 ];
1789
1790 for (dt_a, dt_p) in dtypes {
1791 let dt = DataType::from_arrow_dtype(&dt_a);
1792
1793 assert_eq!(dt_p, dt);
1794 }
1795 }
1796}