1#![allow(unsafe_op_in_unsafe_fn)]
2use std::borrow::Cow;
3
4use arrow::types::PrimitiveType;
5use polars_compute::cast::SerPrimitive;
6use polars_error::feature_gated;
7#[cfg(feature = "dtype-categorical")]
8use polars_utils::sync::SyncPtr;
9use polars_utils::total_ord::ToTotalOrd;
10
11use super::*;
12#[cfg(feature = "dtype-struct")]
13use crate::prelude::any_value::arr_to_any_value;
14
15#[cfg(feature = "object")]
16#[derive(Debug)]
17pub struct OwnedObject(pub Box<dyn PolarsObjectSafe>);
18
19#[cfg(feature = "object")]
20impl Clone for OwnedObject {
21 fn clone(&self) -> Self {
22 Self(self.0.to_boxed())
23 }
24}
25
26#[derive(Debug, Clone, Default)]
27pub enum AnyValue<'a> {
28 #[default]
29 Null,
30 Boolean(bool),
32 String(&'a str),
34 UInt8(u8),
36 UInt16(u16),
38 UInt32(u32),
40 UInt64(u64),
42 Int8(i8),
44 Int16(i16),
46 Int32(i32),
48 Int64(i64),
50 Int128(i128),
52 Float32(f32),
54 Float64(f64),
56 #[cfg(feature = "dtype-date")]
59 Date(i32),
60 #[cfg(feature = "dtype-datetime")]
63 Datetime(i64, TimeUnit, Option<&'a TimeZone>),
64 #[cfg(feature = "dtype-datetime")]
67 DatetimeOwned(i64, TimeUnit, Option<Arc<TimeZone>>),
68 #[cfg(feature = "dtype-duration")]
70 Duration(i64, TimeUnit),
71 #[cfg(feature = "dtype-time")]
73 Time(i64),
74 #[cfg(feature = "dtype-categorical")]
77 Categorical(u32, &'a RevMapping, SyncPtr<Utf8ViewArray>),
78 #[cfg(feature = "dtype-categorical")]
81 CategoricalOwned(u32, Arc<RevMapping>, SyncPtr<Utf8ViewArray>),
82 #[cfg(feature = "dtype-categorical")]
83 Enum(u32, &'a RevMapping, SyncPtr<Utf8ViewArray>),
84 #[cfg(feature = "dtype-categorical")]
85 EnumOwned(u32, Arc<RevMapping>, SyncPtr<Utf8ViewArray>),
86 List(Series),
88 #[cfg(feature = "dtype-array")]
89 Array(Series, usize),
90 #[cfg(feature = "object")]
92 Object(&'a dyn PolarsObjectSafe),
93 #[cfg(feature = "object")]
94 ObjectOwned(OwnedObject),
95 #[cfg(feature = "dtype-struct")]
100 Struct(usize, &'a StructArray, &'a [Field]),
101 #[cfg(feature = "dtype-struct")]
102 StructOwned(Box<(Vec<AnyValue<'a>>, Vec<Field>)>),
103 StringOwned(PlSmallStr),
105 Binary(&'a [u8]),
106 BinaryOwned(Vec<u8>),
107 #[cfg(feature = "dtype-decimal")]
109 Decimal(i128, usize),
110}
111
112#[cfg(feature = "serde")]
113impl Serialize for AnyValue<'_> {
114 fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
115 where
116 S: Serializer,
117 {
118 let name = "AnyValue";
119 match self {
120 AnyValue::Null => serializer.serialize_unit_variant(name, 0, "Null"),
121 AnyValue::Int8(v) => serializer.serialize_newtype_variant(name, 1, "Int8", v),
122 AnyValue::Int16(v) => serializer.serialize_newtype_variant(name, 2, "Int16", v),
123 AnyValue::Int32(v) => serializer.serialize_newtype_variant(name, 3, "Int32", v),
124 AnyValue::Int64(v) => serializer.serialize_newtype_variant(name, 4, "Int64", v),
125 AnyValue::Int128(v) => serializer.serialize_newtype_variant(name, 4, "Int128", v),
126 AnyValue::UInt8(v) => serializer.serialize_newtype_variant(name, 5, "UInt8", v),
127 AnyValue::UInt16(v) => serializer.serialize_newtype_variant(name, 6, "UInt16", v),
128 AnyValue::UInt32(v) => serializer.serialize_newtype_variant(name, 7, "UInt32", v),
129 AnyValue::UInt64(v) => serializer.serialize_newtype_variant(name, 8, "UInt64", v),
130 AnyValue::Float32(v) => serializer.serialize_newtype_variant(name, 9, "Float32", v),
131 AnyValue::Float64(v) => serializer.serialize_newtype_variant(name, 10, "Float64", v),
132 AnyValue::List(v) => serializer.serialize_newtype_variant(name, 11, "List", v),
133 AnyValue::Boolean(v) => serializer.serialize_newtype_variant(name, 12, "Bool", v),
134 AnyValue::String(v) => serializer.serialize_newtype_variant(name, 13, "StringOwned", v),
136 AnyValue::StringOwned(v) => {
137 serializer.serialize_newtype_variant(name, 13, "StringOwned", v.as_str())
138 },
139 AnyValue::Binary(v) => serializer.serialize_newtype_variant(name, 14, "BinaryOwned", v),
140 AnyValue::BinaryOwned(v) => {
141 serializer.serialize_newtype_variant(name, 14, "BinaryOwned", v)
142 },
143 _ => Err(serde::ser::Error::custom(
144 "Unknown data type. Cannot serialize",
145 )),
146 }
147 }
148}
149
150#[cfg(feature = "serde")]
151impl<'a> Deserialize<'a> for AnyValue<'static> {
152 fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
153 where
154 D: Deserializer<'a>,
155 {
156 #[repr(u8)]
157 enum AvField {
158 Null,
159 Int8,
160 Int16,
161 Int32,
162 Int64,
163 Int128,
164 UInt8,
165 UInt16,
166 UInt32,
167 UInt64,
168 Float32,
169 Float64,
170 List,
171 Bool,
172 StringOwned,
173 BinaryOwned,
174 }
175 const VARIANTS: &[&str] = &[
176 "Null",
177 "UInt8",
178 "UInt16",
179 "UInt32",
180 "UInt64",
181 "Int8",
182 "Int16",
183 "Int32",
184 "Int64",
185 "Int128",
186 "Float32",
187 "Float64",
188 "List",
189 "Boolean",
190 "StringOwned",
191 "BinaryOwned",
192 ];
193 const LAST: u8 = unsafe { std::mem::transmute::<_, u8>(AvField::BinaryOwned) };
194
195 struct FieldVisitor;
196
197 impl Visitor<'_> for FieldVisitor {
198 type Value = AvField;
199
200 fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result {
201 write!(formatter, "an integer between 0-{LAST}")
202 }
203
204 fn visit_i64<E>(self, v: i64) -> std::result::Result<Self::Value, E>
205 where
206 E: Error,
207 {
208 let field: u8 = NumCast::from(v).ok_or_else(|| {
209 serde::de::Error::invalid_value(
210 Unexpected::Signed(v),
211 &"expected value that fits into u8",
212 )
213 })?;
214
215 let field = unsafe {
218 if field <= LAST {
219 std::mem::transmute::<u8, AvField>(field)
220 } else {
221 return Err(serde::de::Error::invalid_value(
222 Unexpected::Signed(v),
223 &"expected value that fits into AnyValue's number of fields",
224 ));
225 }
226 };
227 Ok(field)
228 }
229
230 fn visit_str<E>(self, v: &str) -> std::result::Result<Self::Value, E>
231 where
232 E: Error,
233 {
234 self.visit_bytes(v.as_bytes())
235 }
236
237 fn visit_bytes<E>(self, v: &[u8]) -> std::result::Result<Self::Value, E>
238 where
239 E: Error,
240 {
241 let field = match v {
242 b"Null" => AvField::Null,
243 b"Int8" => AvField::Int8,
244 b"Int16" => AvField::Int16,
245 b"Int32" => AvField::Int32,
246 b"Int64" => AvField::Int64,
247 b"Int128" => AvField::Int128,
248 b"UInt8" => AvField::UInt8,
249 b"UInt16" => AvField::UInt16,
250 b"UInt32" => AvField::UInt32,
251 b"UInt64" => AvField::UInt64,
252 b"Float32" => AvField::Float32,
253 b"Float64" => AvField::Float64,
254 b"List" => AvField::List,
255 b"Bool" => AvField::Bool,
256 b"StringOwned" | b"String" => AvField::StringOwned,
257 b"BinaryOwned" | b"Binary" => AvField::BinaryOwned,
258 _ => {
259 return Err(serde::de::Error::unknown_variant(
260 &String::from_utf8_lossy(v),
261 VARIANTS,
262 ));
263 },
264 };
265 Ok(field)
266 }
267 }
268
269 impl<'a> Deserialize<'a> for AvField {
270 fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
271 where
272 D: Deserializer<'a>,
273 {
274 deserializer.deserialize_identifier(FieldVisitor)
275 }
276 }
277
278 struct OuterVisitor;
279
280 impl<'b> Visitor<'b> for OuterVisitor {
281 type Value = AnyValue<'static>;
282
283 fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result {
284 write!(formatter, "enum AnyValue")
285 }
286
287 fn visit_enum<A>(self, data: A) -> std::result::Result<Self::Value, A::Error>
288 where
289 A: EnumAccess<'b>,
290 {
291 let out = match data.variant()? {
292 (AvField::Null, _variant) => AnyValue::Null,
293 (AvField::Int8, variant) => {
294 let value = variant.newtype_variant()?;
295 AnyValue::Int8(value)
296 },
297 (AvField::Int16, variant) => {
298 let value = variant.newtype_variant()?;
299 AnyValue::Int16(value)
300 },
301 (AvField::Int32, variant) => {
302 let value = variant.newtype_variant()?;
303 AnyValue::Int32(value)
304 },
305 (AvField::Int64, variant) => {
306 let value = variant.newtype_variant()?;
307 AnyValue::Int64(value)
308 },
309 (AvField::Int128, variant) => {
310 let value = variant.newtype_variant()?;
311 AnyValue::Int128(value)
312 },
313 (AvField::UInt8, variant) => {
314 let value = variant.newtype_variant()?;
315 AnyValue::UInt8(value)
316 },
317 (AvField::UInt16, variant) => {
318 let value = variant.newtype_variant()?;
319 AnyValue::UInt16(value)
320 },
321 (AvField::UInt32, variant) => {
322 let value = variant.newtype_variant()?;
323 AnyValue::UInt32(value)
324 },
325 (AvField::UInt64, variant) => {
326 let value = variant.newtype_variant()?;
327 AnyValue::UInt64(value)
328 },
329 (AvField::Float32, variant) => {
330 let value = variant.newtype_variant()?;
331 AnyValue::Float32(value)
332 },
333 (AvField::Float64, variant) => {
334 let value = variant.newtype_variant()?;
335 AnyValue::Float64(value)
336 },
337 (AvField::Bool, variant) => {
338 let value = variant.newtype_variant()?;
339 AnyValue::Boolean(value)
340 },
341 (AvField::List, variant) => {
342 let value = variant.newtype_variant()?;
343 AnyValue::List(value)
344 },
345 (AvField::StringOwned, variant) => {
346 let value: PlSmallStr = variant.newtype_variant()?;
347 AnyValue::StringOwned(value)
348 },
349 (AvField::BinaryOwned, variant) => {
350 let value = variant.newtype_variant()?;
351 AnyValue::BinaryOwned(value)
352 },
353 };
354 Ok(out)
355 }
356 }
357 deserializer.deserialize_enum("AnyValue", VARIANTS, OuterVisitor)
358 }
359}
360
361impl AnyValue<'static> {
362 pub fn zero_sum(dtype: &DataType) -> Self {
363 match dtype {
364 DataType::String => AnyValue::StringOwned(PlSmallStr::EMPTY),
365 DataType::Binary => AnyValue::BinaryOwned(Vec::new()),
366 DataType::Boolean => (0 as IdxSize).into(),
367 d if d.is_primitive_numeric() => unsafe {
369 std::mem::transmute::<AnyValue<'_>, AnyValue<'static>>(
370 AnyValue::UInt8(0).cast(dtype),
371 )
372 },
373 #[cfg(feature = "dtype-duration")]
374 DataType::Duration(unit) => AnyValue::Duration(0, *unit),
375 #[cfg(feature = "dtype-decimal")]
376 DataType::Decimal(_p, s) => {
377 AnyValue::Decimal(0, s.expect("unknown scale during execution"))
378 },
379 _ => AnyValue::Null,
380 }
381 }
382
383 pub fn can_have_dtype(&self, dtype: &DataType) -> bool {
385 matches!(self, AnyValue::Null) || dtype == &self.dtype()
386 }
387}
388
389impl<'a> AnyValue<'a> {
390 pub fn dtype(&self) -> DataType {
395 use AnyValue::*;
396 match self {
397 Null => DataType::Null,
398 Boolean(_) => DataType::Boolean,
399 Int8(_) => DataType::Int8,
400 Int16(_) => DataType::Int16,
401 Int32(_) => DataType::Int32,
402 Int64(_) => DataType::Int64,
403 Int128(_) => DataType::Int128,
404 UInt8(_) => DataType::UInt8,
405 UInt16(_) => DataType::UInt16,
406 UInt32(_) => DataType::UInt32,
407 UInt64(_) => DataType::UInt64,
408 Float32(_) => DataType::Float32,
409 Float64(_) => DataType::Float64,
410 String(_) | StringOwned(_) => DataType::String,
411 Binary(_) | BinaryOwned(_) => DataType::Binary,
412 #[cfg(feature = "dtype-date")]
413 Date(_) => DataType::Date,
414 #[cfg(feature = "dtype-time")]
415 Time(_) => DataType::Time,
416 #[cfg(feature = "dtype-datetime")]
417 Datetime(_, tu, tz) => DataType::Datetime(*tu, (*tz).cloned()),
418 #[cfg(feature = "dtype-datetime")]
419 DatetimeOwned(_, tu, tz) => {
420 DataType::Datetime(*tu, tz.as_ref().map(|v| v.as_ref().clone()))
421 },
422 #[cfg(feature = "dtype-duration")]
423 Duration(_, tu) => DataType::Duration(*tu),
424 #[cfg(feature = "dtype-categorical")]
425 Categorical(_, _, _) | CategoricalOwned(_, _, _) => {
426 DataType::Categorical(None, Default::default())
427 },
428 #[cfg(feature = "dtype-categorical")]
429 Enum(_, _, _) | EnumOwned(_, _, _) => DataType::Enum(None, Default::default()),
430 List(s) => DataType::List(Box::new(s.dtype().clone())),
431 #[cfg(feature = "dtype-array")]
432 Array(s, size) => DataType::Array(Box::new(s.dtype().clone()), *size),
433 #[cfg(feature = "dtype-struct")]
434 Struct(_, _, fields) => DataType::Struct(fields.to_vec()),
435 #[cfg(feature = "dtype-struct")]
436 StructOwned(payload) => DataType::Struct(payload.1.clone()),
437 #[cfg(feature = "dtype-decimal")]
438 Decimal(_, scale) => DataType::Decimal(None, Some(*scale)),
439 #[cfg(feature = "object")]
440 Object(o) => DataType::Object(o.type_name()),
441 #[cfg(feature = "object")]
442 ObjectOwned(o) => DataType::Object(o.0.type_name()),
443 }
444 }
445
446 #[doc(hidden)]
448 #[inline]
449 pub fn extract<T: NumCast>(&self) -> Option<T> {
450 use AnyValue::*;
451 match self {
452 Int8(v) => NumCast::from(*v),
453 Int16(v) => NumCast::from(*v),
454 Int32(v) => NumCast::from(*v),
455 Int64(v) => NumCast::from(*v),
456 Int128(v) => NumCast::from(*v),
457 UInt8(v) => NumCast::from(*v),
458 UInt16(v) => NumCast::from(*v),
459 UInt32(v) => NumCast::from(*v),
460 UInt64(v) => NumCast::from(*v),
461 Float32(v) => NumCast::from(*v),
462 Float64(v) => NumCast::from(*v),
463 #[cfg(feature = "dtype-date")]
464 Date(v) => NumCast::from(*v),
465 #[cfg(feature = "dtype-datetime")]
466 Datetime(v, _, _) | DatetimeOwned(v, _, _) => NumCast::from(*v),
467 #[cfg(feature = "dtype-time")]
468 Time(v) => NumCast::from(*v),
469 #[cfg(feature = "dtype-duration")]
470 Duration(v, _) => NumCast::from(*v),
471 #[cfg(feature = "dtype-decimal")]
472 Decimal(v, scale) => {
473 if *scale == 0 {
474 NumCast::from(*v)
475 } else {
476 let f: Option<f64> = NumCast::from(*v);
477 NumCast::from(f? / 10f64.powi(*scale as _))
478 }
479 },
480 Boolean(v) => NumCast::from(if *v { 1 } else { 0 }),
481 String(v) => {
482 if let Ok(val) = (*v).parse::<i128>() {
483 NumCast::from(val)
484 } else {
485 NumCast::from((*v).parse::<f64>().ok()?)
486 }
487 },
488 StringOwned(v) => String(v.as_str()).extract(),
489 _ => None,
490 }
491 }
492
493 #[inline]
494 pub fn try_extract<T: NumCast>(&self) -> PolarsResult<T> {
495 self.extract().ok_or_else(|| {
496 polars_err!(
497 ComputeError: "could not extract number from any-value of dtype: '{:?}'",
498 self.dtype(),
499 )
500 })
501 }
502
503 pub fn is_boolean(&self) -> bool {
504 matches!(self, AnyValue::Boolean(_))
505 }
506
507 pub fn is_primitive_numeric(&self) -> bool {
508 self.is_integer() || self.is_float()
509 }
510
511 pub fn is_float(&self) -> bool {
512 matches!(self, AnyValue::Float32(_) | AnyValue::Float64(_))
513 }
514
515 pub fn is_integer(&self) -> bool {
516 self.is_signed_integer() || self.is_unsigned_integer()
517 }
518
519 pub fn is_signed_integer(&self) -> bool {
520 matches!(
521 self,
522 AnyValue::Int8(_)
523 | AnyValue::Int16(_)
524 | AnyValue::Int32(_)
525 | AnyValue::Int64(_)
526 | AnyValue::Int128(_)
527 )
528 }
529
530 pub fn is_unsigned_integer(&self) -> bool {
531 matches!(
532 self,
533 AnyValue::UInt8(_) | AnyValue::UInt16(_) | AnyValue::UInt32(_) | AnyValue::UInt64(_)
534 )
535 }
536
537 pub fn is_nan(&self) -> bool {
538 match self {
539 AnyValue::Float32(f) => f.is_nan(),
540 AnyValue::Float64(f) => f.is_nan(),
541 _ => false,
542 }
543 }
544
545 pub fn is_null(&self) -> bool {
546 matches!(self, AnyValue::Null)
547 }
548
549 pub fn is_nested_null(&self) -> bool {
550 match self {
551 AnyValue::Null => true,
552 AnyValue::List(s) => s.null_count() == s.len(),
553 #[cfg(feature = "dtype-array")]
554 AnyValue::Array(s, _) => s.null_count() == s.len(),
555 #[cfg(feature = "dtype-struct")]
556 AnyValue::Struct(_, _, _) => self._iter_struct_av().all(|av| av.is_nested_null()),
557 _ => false,
558 }
559 }
560
561 pub fn strict_cast(&self, dtype: &'a DataType) -> Option<AnyValue<'a>> {
564 let new_av = match (self, dtype) {
565 (av, DataType::UInt8) => AnyValue::UInt8(av.extract::<u8>()?),
567 (av, DataType::UInt16) => AnyValue::UInt16(av.extract::<u16>()?),
568 (av, DataType::UInt32) => AnyValue::UInt32(av.extract::<u32>()?),
569 (av, DataType::UInt64) => AnyValue::UInt64(av.extract::<u64>()?),
570 (av, DataType::Int8) => AnyValue::Int8(av.extract::<i8>()?),
571 (av, DataType::Int16) => AnyValue::Int16(av.extract::<i16>()?),
572 (av, DataType::Int32) => AnyValue::Int32(av.extract::<i32>()?),
573 (av, DataType::Int64) => AnyValue::Int64(av.extract::<i64>()?),
574 (av, DataType::Int128) => AnyValue::Int128(av.extract::<i128>()?),
575 (av, DataType::Float32) => AnyValue::Float32(av.extract::<f32>()?),
576 (av, DataType::Float64) => AnyValue::Float64(av.extract::<f64>()?),
577
578 (AnyValue::UInt8(v), DataType::Boolean) => AnyValue::Boolean(*v != u8::default()),
580 (AnyValue::UInt16(v), DataType::Boolean) => AnyValue::Boolean(*v != u16::default()),
581 (AnyValue::UInt32(v), DataType::Boolean) => AnyValue::Boolean(*v != u32::default()),
582 (AnyValue::UInt64(v), DataType::Boolean) => AnyValue::Boolean(*v != u64::default()),
583 (AnyValue::Int8(v), DataType::Boolean) => AnyValue::Boolean(*v != i8::default()),
584 (AnyValue::Int16(v), DataType::Boolean) => AnyValue::Boolean(*v != i16::default()),
585 (AnyValue::Int32(v), DataType::Boolean) => AnyValue::Boolean(*v != i32::default()),
586 (AnyValue::Int64(v), DataType::Boolean) => AnyValue::Boolean(*v != i64::default()),
587 (AnyValue::Int128(v), DataType::Boolean) => AnyValue::Boolean(*v != i128::default()),
588 (AnyValue::Float32(v), DataType::Boolean) => AnyValue::Boolean(*v != f32::default()),
589 (AnyValue::Float64(v), DataType::Boolean) => AnyValue::Boolean(*v != f64::default()),
590
591 (AnyValue::String(v), DataType::String) => AnyValue::String(v),
593 (AnyValue::StringOwned(v), DataType::String) => AnyValue::StringOwned(v.clone()),
594
595 (av, DataType::String) => {
596 let mut tmp = vec![];
597 if av.is_unsigned_integer() {
598 let val = av.extract::<u64>()?;
599 SerPrimitive::write(&mut tmp, val);
600 } else if av.is_float() {
601 let val = av.extract::<f64>()?;
602 SerPrimitive::write(&mut tmp, val);
603 } else {
604 let val = av.extract::<i64>()?;
605 SerPrimitive::write(&mut tmp, val);
606 }
607 AnyValue::StringOwned(PlSmallStr::from_str(std::str::from_utf8(&tmp).unwrap()))
608 },
609
610 (AnyValue::String(v), DataType::Binary) => AnyValue::Binary(v.as_bytes()),
612
613 #[cfg(feature = "dtype-datetime")]
615 (av, DataType::Datetime(tu, tz)) if av.is_primitive_numeric() => {
616 AnyValue::Datetime(av.extract::<i64>()?, *tu, tz.as_ref())
617 },
618 #[cfg(all(feature = "dtype-datetime", feature = "dtype-date"))]
619 (AnyValue::Date(v), DataType::Datetime(tu, _)) => AnyValue::Datetime(
620 match tu {
621 TimeUnit::Nanoseconds => (*v as i64) * NS_IN_DAY,
622 TimeUnit::Microseconds => (*v as i64) * US_IN_DAY,
623 TimeUnit::Milliseconds => (*v as i64) * MS_IN_DAY,
624 },
625 *tu,
626 None,
627 ),
628 #[cfg(feature = "dtype-datetime")]
629 (
630 AnyValue::Datetime(v, tu, _) | AnyValue::DatetimeOwned(v, tu, _),
631 DataType::Datetime(tu_r, tz_r),
632 ) => AnyValue::Datetime(
633 match (tu, tu_r) {
634 (TimeUnit::Nanoseconds, TimeUnit::Microseconds) => *v / 1_000i64,
635 (TimeUnit::Nanoseconds, TimeUnit::Milliseconds) => *v / 1_000_000i64,
636 (TimeUnit::Microseconds, TimeUnit::Nanoseconds) => *v * 1_000i64,
637 (TimeUnit::Microseconds, TimeUnit::Milliseconds) => *v / 1_000i64,
638 (TimeUnit::Milliseconds, TimeUnit::Microseconds) => *v * 1_000i64,
639 (TimeUnit::Milliseconds, TimeUnit::Nanoseconds) => *v * 1_000_000i64,
640 _ => *v,
641 },
642 *tu_r,
643 tz_r.as_ref(),
644 ),
645
646 #[cfg(feature = "dtype-date")]
648 (av, DataType::Date) if av.is_primitive_numeric() => {
649 AnyValue::Date(av.extract::<i32>()?)
650 },
651 #[cfg(all(feature = "dtype-date", feature = "dtype-datetime"))]
652 (AnyValue::Datetime(v, tu, _) | AnyValue::DatetimeOwned(v, tu, _), DataType::Date) => {
653 AnyValue::Date(match tu {
654 TimeUnit::Nanoseconds => *v / NS_IN_DAY,
655 TimeUnit::Microseconds => *v / US_IN_DAY,
656 TimeUnit::Milliseconds => *v / MS_IN_DAY,
657 } as i32)
658 },
659
660 #[cfg(feature = "dtype-time")]
662 (av, DataType::Time) if av.is_primitive_numeric() => {
663 AnyValue::Time(av.extract::<i64>()?)
664 },
665 #[cfg(all(feature = "dtype-time", feature = "dtype-datetime"))]
666 (AnyValue::Datetime(v, tu, _) | AnyValue::DatetimeOwned(v, tu, _), DataType::Time) => {
667 AnyValue::Time(match tu {
668 TimeUnit::Nanoseconds => *v % NS_IN_DAY,
669 TimeUnit::Microseconds => (*v % US_IN_DAY) * 1_000i64,
670 TimeUnit::Milliseconds => (*v % MS_IN_DAY) * 1_000_000i64,
671 })
672 },
673
674 #[cfg(feature = "dtype-duration")]
676 (av, DataType::Duration(tu)) if av.is_primitive_numeric() => {
677 AnyValue::Duration(av.extract::<i64>()?, *tu)
678 },
679 #[cfg(all(feature = "dtype-duration", feature = "dtype-time"))]
680 (AnyValue::Time(v), DataType::Duration(tu)) => AnyValue::Duration(
681 match *tu {
682 TimeUnit::Nanoseconds => *v,
683 TimeUnit::Microseconds => *v / 1_000i64,
684 TimeUnit::Milliseconds => *v / 1_000_000i64,
685 },
686 *tu,
687 ),
688 #[cfg(feature = "dtype-duration")]
689 (AnyValue::Duration(v, tu), DataType::Duration(tu_r)) => AnyValue::Duration(
690 match (tu, tu_r) {
691 (_, _) if tu == tu_r => *v,
692 (TimeUnit::Nanoseconds, TimeUnit::Microseconds) => *v / 1_000i64,
693 (TimeUnit::Nanoseconds, TimeUnit::Milliseconds) => *v / 1_000_000i64,
694 (TimeUnit::Microseconds, TimeUnit::Nanoseconds) => *v * 1_000i64,
695 (TimeUnit::Microseconds, TimeUnit::Milliseconds) => *v / 1_000i64,
696 (TimeUnit::Milliseconds, TimeUnit::Microseconds) => *v * 1_000i64,
697 (TimeUnit::Milliseconds, TimeUnit::Nanoseconds) => *v * 1_000_000i64,
698 _ => *v,
699 },
700 *tu_r,
701 ),
702
703 #[cfg(feature = "dtype-decimal")]
705 (av, DataType::Decimal(prec, scale)) if av.is_integer() => {
706 let value = av.try_extract::<i128>().unwrap();
707 let scale = scale.unwrap_or(0);
708 let factor = 10_i128.pow(scale as _); let converted = value.checked_mul(factor)?;
710
711 let prec = prec.unwrap_or(38) as u32;
713 let num_digits = (converted.abs() as f64).log10().ceil() as u32;
714 if num_digits > prec {
715 return None;
716 }
717
718 AnyValue::Decimal(converted, scale)
719 },
720 #[cfg(feature = "dtype-decimal")]
721 (AnyValue::Decimal(value, scale_av), DataType::Decimal(_, scale)) => {
722 let Some(scale) = scale else {
723 return Some(self.clone());
724 };
725 let scale_diff = scale.checked_sub(*scale_av)?;
727 let factor = 10_i128.pow(scale_diff as _); let converted = value.checked_mul(factor)?;
729 AnyValue::Decimal(converted, *scale)
730 },
731
732 (av, dtype) if av.dtype() == *dtype => self.clone(),
734
735 _ => return None,
736 };
737 Some(new_av)
738 }
739
740 pub fn try_strict_cast(&self, dtype: &'a DataType) -> PolarsResult<AnyValue<'a>> {
743 self.strict_cast(dtype).ok_or_else(
744 || polars_err!(ComputeError: "cannot cast any-value {:?} to dtype '{}'", self, dtype),
745 )
746 }
747
748 pub fn cast(&self, dtype: &'a DataType) -> AnyValue<'a> {
749 match self.strict_cast(dtype) {
750 Some(av) => av,
751 None => AnyValue::Null,
752 }
753 }
754
755 pub fn idx(&self) -> IdxSize {
756 match self {
757 #[cfg(not(feature = "bigidx"))]
758 Self::UInt32(v) => *v,
759 #[cfg(feature = "bigidx")]
760 Self::UInt64(v) => *v,
761 _ => panic!("expected index type found {self:?}"),
762 }
763 }
764
765 pub fn str_value(&self) -> Cow<'a, str> {
766 match self {
767 Self::String(s) => Cow::Borrowed(s),
768 Self::StringOwned(s) => Cow::Owned(s.to_string()),
769 Self::Null => Cow::Borrowed("null"),
770 #[cfg(feature = "dtype-categorical")]
771 Self::Categorical(idx, rev, arr) | AnyValue::Enum(idx, rev, arr) => {
772 if arr.is_null() {
773 Cow::Borrowed(rev.get(*idx))
774 } else {
775 unsafe { Cow::Borrowed(arr.deref_unchecked().value(*idx as usize)) }
776 }
777 },
778 #[cfg(feature = "dtype-categorical")]
779 Self::CategoricalOwned(idx, rev, arr) | AnyValue::EnumOwned(idx, rev, arr) => {
780 if arr.is_null() {
781 Cow::Owned(rev.get(*idx).to_string())
782 } else {
783 unsafe { Cow::Borrowed(arr.deref_unchecked().value(*idx as usize)) }
784 }
785 },
786 av => Cow::Owned(av.to_string()),
787 }
788 }
789}
790
791impl From<AnyValue<'_>> for DataType {
792 fn from(value: AnyValue<'_>) -> Self {
793 value.dtype()
794 }
795}
796
797impl<'a> From<&AnyValue<'a>> for DataType {
798 fn from(value: &AnyValue<'a>) -> Self {
799 value.dtype()
800 }
801}
802
803impl AnyValue<'_> {
804 pub fn hash_impl<H: Hasher>(&self, state: &mut H, cheap: bool) {
805 use AnyValue::*;
806 std::mem::discriminant(self).hash(state);
807 match self {
808 Int8(v) => v.hash(state),
809 Int16(v) => v.hash(state),
810 Int32(v) => v.hash(state),
811 Int64(v) => v.hash(state),
812 Int128(v) => feature_gated!("dtype-i128", v.hash(state)),
813 UInt8(v) => v.hash(state),
814 UInt16(v) => v.hash(state),
815 UInt32(v) => v.hash(state),
816 UInt64(v) => v.hash(state),
817 String(v) => v.hash(state),
818 StringOwned(v) => v.hash(state),
819 Float32(v) => v.to_ne_bytes().hash(state),
820 Float64(v) => v.to_ne_bytes().hash(state),
821 Binary(v) => v.hash(state),
822 BinaryOwned(v) => v.hash(state),
823 Boolean(v) => v.hash(state),
824 List(v) => {
825 if !cheap {
826 Hash::hash(&Wrap(v.clone()), state)
827 }
828 },
829 #[cfg(feature = "dtype-array")]
830 Array(v, width) => {
831 if !cheap {
832 Hash::hash(&Wrap(v.clone()), state)
833 }
834 width.hash(state)
835 },
836 #[cfg(feature = "dtype-date")]
837 Date(v) => v.hash(state),
838 #[cfg(feature = "dtype-datetime")]
839 Datetime(v, tu, tz) => {
840 v.hash(state);
841 tu.hash(state);
842 tz.hash(state);
843 },
844 #[cfg(feature = "dtype-datetime")]
845 DatetimeOwned(v, tu, tz) => {
846 v.hash(state);
847 tu.hash(state);
848 tz.hash(state);
849 },
850 #[cfg(feature = "dtype-duration")]
851 Duration(v, tz) => {
852 v.hash(state);
853 tz.hash(state);
854 },
855 #[cfg(feature = "dtype-time")]
856 Time(v) => v.hash(state),
857 #[cfg(feature = "dtype-categorical")]
858 Categorical(v, _, _)
859 | CategoricalOwned(v, _, _)
860 | Enum(v, _, _)
861 | EnumOwned(v, _, _) => v.hash(state),
862 #[cfg(feature = "object")]
863 Object(_) => {},
864 #[cfg(feature = "object")]
865 ObjectOwned(_) => {},
866 #[cfg(feature = "dtype-struct")]
867 Struct(_, _, _) => {
868 if !cheap {
869 let mut buf = vec![];
870 self._materialize_struct_av(&mut buf);
871 buf.hash(state)
872 }
873 },
874 #[cfg(feature = "dtype-struct")]
875 StructOwned(v) => v.0.hash(state),
876 #[cfg(feature = "dtype-decimal")]
877 Decimal(v, k) => {
878 v.hash(state);
879 k.hash(state);
880 },
881 Null => {},
882 }
883 }
884}
885
886impl Hash for AnyValue<'_> {
887 fn hash<H: Hasher>(&self, state: &mut H) {
888 self.hash_impl(state, false)
889 }
890}
891
892impl Eq for AnyValue<'_> {}
893
894impl<'a, T> From<Option<T>> for AnyValue<'a>
895where
896 T: Into<AnyValue<'a>>,
897{
898 #[inline]
899 fn from(a: Option<T>) -> Self {
900 match a {
901 None => AnyValue::Null,
902 Some(v) => v.into(),
903 }
904 }
905}
906
907impl<'a> AnyValue<'a> {
908 #[cfg(any(feature = "dtype-date", feature = "dtype-datetime"))]
909 pub(crate) fn as_date(&self) -> AnyValue<'static> {
910 match self {
911 #[cfg(feature = "dtype-date")]
912 AnyValue::Int32(v) => AnyValue::Date(*v),
913 AnyValue::Null => AnyValue::Null,
914 dt => panic!("cannot create date from other type. dtype: {dt}"),
915 }
916 }
917 #[cfg(feature = "dtype-datetime")]
918 pub(crate) fn as_datetime(&self, tu: TimeUnit, tz: Option<&'a TimeZone>) -> AnyValue<'a> {
919 match self {
920 AnyValue::Int64(v) => AnyValue::Datetime(*v, tu, tz),
921 AnyValue::Null => AnyValue::Null,
922 dt => panic!("cannot create date from other type. dtype: {dt}"),
923 }
924 }
925
926 #[cfg(feature = "dtype-duration")]
927 pub(crate) fn as_duration(&self, tu: TimeUnit) -> AnyValue<'static> {
928 match self {
929 AnyValue::Int64(v) => AnyValue::Duration(*v, tu),
930 AnyValue::Null => AnyValue::Null,
931 dt => panic!("cannot create date from other type. dtype: {dt}"),
932 }
933 }
934
935 #[cfg(feature = "dtype-time")]
936 pub(crate) fn as_time(&self) -> AnyValue<'static> {
937 match self {
938 AnyValue::Int64(v) => AnyValue::Time(*v),
939 AnyValue::Null => AnyValue::Null,
940 dt => panic!("cannot create date from other type. dtype: {dt}"),
941 }
942 }
943
944 pub(crate) fn to_i128(&self) -> Option<i128> {
945 match self {
946 AnyValue::UInt8(v) => Some((*v).into()),
947 AnyValue::UInt16(v) => Some((*v).into()),
948 AnyValue::UInt32(v) => Some((*v).into()),
949 AnyValue::UInt64(v) => Some((*v).into()),
950 AnyValue::Int8(v) => Some((*v).into()),
951 AnyValue::Int16(v) => Some((*v).into()),
952 AnyValue::Int32(v) => Some((*v).into()),
953 AnyValue::Int64(v) => Some((*v).into()),
954 AnyValue::Int128(v) => Some(*v),
955 _ => None,
956 }
957 }
958
959 pub(crate) fn to_f64(&self) -> Option<f64> {
960 match self {
961 AnyValue::Float32(v) => Some((*v).into()),
962 AnyValue::Float64(v) => Some(*v),
963 _ => None,
964 }
965 }
966
967 #[must_use]
968 pub fn add(&self, rhs: &AnyValue) -> AnyValue<'static> {
969 use AnyValue::*;
970 match (self, rhs) {
971 (Null, r) => r.clone().into_static(),
972 (l, Null) => l.clone().into_static(),
973 (Int32(l), Int32(r)) => Int32(l + r),
974 (Int64(l), Int64(r)) => Int64(l + r),
975 (UInt32(l), UInt32(r)) => UInt32(l + r),
976 (UInt64(l), UInt64(r)) => UInt64(l + r),
977 (Float32(l), Float32(r)) => Float32(l + r),
978 (Float64(l), Float64(r)) => Float64(l + r),
979 #[cfg(feature = "dtype-duration")]
980 (Duration(l, lu), Duration(r, ru)) => {
981 if lu != ru {
982 unimplemented!("adding durations with different units is not supported here");
983 }
984
985 Duration(l + r, *lu)
986 },
987 #[cfg(feature = "dtype-decimal")]
988 (Decimal(l, ls), Decimal(r, rs)) => {
989 if ls != rs {
990 unimplemented!("adding decimals with different scales is not supported here");
991 }
992
993 Decimal(l + r, *ls)
994 },
995 _ => unimplemented!(),
996 }
997 }
998
999 #[inline]
1000 pub fn as_borrowed(&self) -> AnyValue<'_> {
1001 match self {
1002 AnyValue::BinaryOwned(data) => AnyValue::Binary(data),
1003 AnyValue::StringOwned(data) => AnyValue::String(data.as_str()),
1004 #[cfg(feature = "dtype-datetime")]
1005 AnyValue::DatetimeOwned(v, tu, tz) => {
1006 AnyValue::Datetime(*v, *tu, tz.as_ref().map(AsRef::as_ref))
1007 },
1008 #[cfg(feature = "dtype-categorical")]
1009 AnyValue::CategoricalOwned(v, rev, arr) => {
1010 AnyValue::Categorical(*v, rev.as_ref(), *arr)
1011 },
1012 #[cfg(feature = "dtype-categorical")]
1013 AnyValue::EnumOwned(v, rev, arr) => AnyValue::Enum(*v, rev.as_ref(), *arr),
1014 av => av.clone(),
1015 }
1016 }
1017
1018 #[inline]
1021 pub fn into_static(self) -> AnyValue<'static> {
1022 use AnyValue::*;
1023 match self {
1024 Null => Null,
1025 Int8(v) => Int8(v),
1026 Int16(v) => Int16(v),
1027 Int32(v) => Int32(v),
1028 Int64(v) => Int64(v),
1029 Int128(v) => Int128(v),
1030 UInt8(v) => UInt8(v),
1031 UInt16(v) => UInt16(v),
1032 UInt32(v) => UInt32(v),
1033 UInt64(v) => UInt64(v),
1034 Boolean(v) => Boolean(v),
1035 Float32(v) => Float32(v),
1036 Float64(v) => Float64(v),
1037 #[cfg(feature = "dtype-datetime")]
1038 Datetime(v, tu, tz) => DatetimeOwned(v, tu, tz.map(|v| Arc::new(v.clone()))),
1039 #[cfg(feature = "dtype-datetime")]
1040 DatetimeOwned(v, tu, tz) => DatetimeOwned(v, tu, tz),
1041 #[cfg(feature = "dtype-date")]
1042 Date(v) => Date(v),
1043 #[cfg(feature = "dtype-duration")]
1044 Duration(v, tu) => Duration(v, tu),
1045 #[cfg(feature = "dtype-time")]
1046 Time(v) => Time(v),
1047 List(v) => List(v),
1048 #[cfg(feature = "dtype-array")]
1049 Array(s, size) => Array(s, size),
1050 String(v) => StringOwned(PlSmallStr::from_str(v)),
1051 StringOwned(v) => StringOwned(v),
1052 Binary(v) => BinaryOwned(v.to_vec()),
1053 BinaryOwned(v) => BinaryOwned(v),
1054 #[cfg(feature = "object")]
1055 Object(v) => ObjectOwned(OwnedObject(v.to_boxed())),
1056 #[cfg(feature = "dtype-struct")]
1057 Struct(idx, arr, fields) => {
1058 let avs = struct_to_avs_static(idx, arr, fields);
1059 StructOwned(Box::new((avs, fields.to_vec())))
1060 },
1061 #[cfg(feature = "dtype-struct")]
1062 StructOwned(payload) => {
1063 let av = StructOwned(payload);
1064 unsafe { std::mem::transmute::<AnyValue<'a>, AnyValue<'static>>(av) }
1066 },
1067 #[cfg(feature = "object")]
1068 ObjectOwned(payload) => {
1069 let av = ObjectOwned(payload);
1070 unsafe { std::mem::transmute::<AnyValue<'a>, AnyValue<'static>>(av) }
1072 },
1073 #[cfg(feature = "dtype-decimal")]
1074 Decimal(val, scale) => Decimal(val, scale),
1075 #[cfg(feature = "dtype-categorical")]
1076 Categorical(v, rev, arr) => CategoricalOwned(v, Arc::new(rev.clone()), arr),
1077 #[cfg(feature = "dtype-categorical")]
1078 CategoricalOwned(v, rev, arr) => CategoricalOwned(v, rev, arr),
1079 #[cfg(feature = "dtype-categorical")]
1080 Enum(v, rev, arr) => EnumOwned(v, Arc::new(rev.clone()), arr),
1081 #[cfg(feature = "dtype-categorical")]
1082 EnumOwned(v, rev, arr) => EnumOwned(v, rev, arr),
1083 }
1084 }
1085
1086 pub fn get_str(&self) -> Option<&str> {
1088 match self {
1089 AnyValue::String(s) => Some(s),
1090 AnyValue::StringOwned(s) => Some(s.as_str()),
1091 #[cfg(feature = "dtype-categorical")]
1092 AnyValue::Categorical(idx, rev, arr) | AnyValue::Enum(idx, rev, arr) => {
1093 let s = if arr.is_null() {
1094 rev.get(*idx)
1095 } else {
1096 unsafe { arr.deref_unchecked().value(*idx as usize) }
1097 };
1098 Some(s)
1099 },
1100 #[cfg(feature = "dtype-categorical")]
1101 AnyValue::CategoricalOwned(idx, rev, arr) | AnyValue::EnumOwned(idx, rev, arr) => {
1102 let s = if arr.is_null() {
1103 rev.get(*idx)
1104 } else {
1105 unsafe { arr.deref_unchecked().value(*idx as usize) }
1106 };
1107 Some(s)
1108 },
1109 _ => None,
1110 }
1111 }
1112}
1113
1114impl<'a> From<AnyValue<'a>> for Option<i64> {
1115 fn from(val: AnyValue<'a>) -> Self {
1116 use AnyValue::*;
1117 match val {
1118 Null => None,
1119 Int32(v) => Some(v as i64),
1120 Int64(v) => Some(v),
1121 UInt32(v) => Some(v as i64),
1122 _ => todo!(),
1123 }
1124 }
1125}
1126
1127impl AnyValue<'_> {
1128 #[inline]
1129 pub fn eq_missing(&self, other: &Self, null_equal: bool) -> bool {
1130 fn struct_owned_value_iter<'a>(
1131 v: &'a (Vec<AnyValue<'_>>, Vec<Field>),
1132 ) -> impl ExactSizeIterator<Item = AnyValue<'a>> {
1133 v.0.iter().map(|v| v.as_borrowed())
1134 }
1135 fn struct_value_iter(
1136 idx: usize,
1137 arr: &StructArray,
1138 ) -> impl ExactSizeIterator<Item = AnyValue<'_>> {
1139 assert!(idx < arr.len());
1140
1141 arr.values().iter().map(move |field_arr| unsafe {
1142 field_arr.get_unchecked(idx)
1146 })
1147 }
1148
1149 fn struct_eq_missing<'a>(
1150 l: impl ExactSizeIterator<Item = AnyValue<'a>>,
1151 r: impl ExactSizeIterator<Item = AnyValue<'a>>,
1152 null_equal: bool,
1153 ) -> bool {
1154 if l.len() != r.len() {
1155 return false;
1156 }
1157
1158 l.zip(r).all(|(lv, rv)| lv.eq_missing(&rv, null_equal))
1159 }
1160
1161 use AnyValue::*;
1162 match (self, other) {
1163 (StringOwned(l), r) => AnyValue::String(l.as_str()) == *r,
1165 (BinaryOwned(l), r) => AnyValue::Binary(l.as_slice()) == *r,
1166 #[cfg(feature = "object")]
1167 (ObjectOwned(l), r) => AnyValue::Object(&*l.0) == *r,
1168 (l, StringOwned(r)) => *l == AnyValue::String(r.as_str()),
1169 (l, BinaryOwned(r)) => *l == AnyValue::Binary(r.as_slice()),
1170 #[cfg(feature = "object")]
1171 (l, ObjectOwned(r)) => *l == AnyValue::Object(&*r.0),
1172 #[cfg(feature = "dtype-datetime")]
1173 (DatetimeOwned(lv, ltu, ltz), r) => {
1174 Datetime(*lv, *ltu, ltz.as_ref().map(|v| v.as_ref())) == *r
1175 },
1176 #[cfg(feature = "dtype-datetime")]
1177 (l, DatetimeOwned(rv, rtu, rtz)) => {
1178 *l == Datetime(*rv, *rtu, rtz.as_ref().map(|v| v.as_ref()))
1179 },
1180 #[cfg(feature = "dtype-categorical")]
1181 (CategoricalOwned(lv, lrev, larr), r) => Categorical(*lv, lrev.as_ref(), *larr) == *r,
1182 #[cfg(feature = "dtype-categorical")]
1183 (l, CategoricalOwned(rv, rrev, rarr)) => *l == Categorical(*rv, rrev.as_ref(), *rarr),
1184 #[cfg(feature = "dtype-categorical")]
1185 (EnumOwned(lv, lrev, larr), r) => Enum(*lv, lrev.as_ref(), *larr) == *r,
1186 #[cfg(feature = "dtype-categorical")]
1187 (l, EnumOwned(rv, rrev, rarr)) => *l == Enum(*rv, rrev.as_ref(), *rarr),
1188
1189 (Null, Null) => null_equal,
1191 (Null, _) => false,
1192 (_, Null) => false,
1193
1194 (Boolean(l), Boolean(r)) => *l == *r,
1196 (UInt8(l), UInt8(r)) => *l == *r,
1197 (UInt16(l), UInt16(r)) => *l == *r,
1198 (UInt32(l), UInt32(r)) => *l == *r,
1199 (UInt64(l), UInt64(r)) => *l == *r,
1200 (Int8(l), Int8(r)) => *l == *r,
1201 (Int16(l), Int16(r)) => *l == *r,
1202 (Int32(l), Int32(r)) => *l == *r,
1203 (Int64(l), Int64(r)) => *l == *r,
1204 (Int128(l), Int128(r)) => *l == *r,
1205 (Float32(l), Float32(r)) => l.to_total_ord() == r.to_total_ord(),
1206 (Float64(l), Float64(r)) => l.to_total_ord() == r.to_total_ord(),
1207 (String(l), String(r)) => l == r,
1208 (Binary(l), Binary(r)) => l == r,
1209 #[cfg(feature = "dtype-time")]
1210 (Time(l), Time(r)) => *l == *r,
1211 #[cfg(all(feature = "dtype-datetime", feature = "dtype-date"))]
1212 (Date(l), Date(r)) => *l == *r,
1213 #[cfg(all(feature = "dtype-datetime", feature = "dtype-date"))]
1214 (Datetime(l, tul, tzl), Datetime(r, tur, tzr)) => {
1215 *l == *r && *tul == *tur && tzl == tzr
1216 },
1217 (List(l), List(r)) => l == r,
1218 #[cfg(feature = "dtype-categorical")]
1219 (Categorical(idx_l, rev_l, ptr_l), Categorical(idx_r, rev_r, ptr_r)) => {
1220 if !same_revmap(rev_l, *ptr_l, rev_r, *ptr_r) {
1221 unimplemented!(
1224 "comparing categoricals with different revmaps is not supported"
1225 );
1226 }
1227
1228 idx_l == idx_r
1229 },
1230 #[cfg(feature = "dtype-categorical")]
1231 (Enum(idx_l, rev_l, ptr_l), Enum(idx_r, rev_r, ptr_r)) => {
1232 if !same_revmap(rev_l, *ptr_l, rev_r, *ptr_r) {
1235 unimplemented!("comparing enums with different revmaps is not supported");
1236 }
1237
1238 idx_l == idx_r
1239 },
1240 #[cfg(feature = "dtype-duration")]
1241 (Duration(l, tu_l), Duration(r, tu_r)) => l == r && tu_l == tu_r,
1242
1243 #[cfg(feature = "dtype-struct")]
1244 (StructOwned(l), StructOwned(r)) => struct_eq_missing(
1245 struct_owned_value_iter(l.as_ref()),
1246 struct_owned_value_iter(r.as_ref()),
1247 null_equal,
1248 ),
1249 #[cfg(feature = "dtype-struct")]
1250 (StructOwned(l), Struct(idx, arr, _)) => struct_eq_missing(
1251 struct_owned_value_iter(l.as_ref()),
1252 struct_value_iter(*idx, arr),
1253 null_equal,
1254 ),
1255 #[cfg(feature = "dtype-struct")]
1256 (Struct(idx, arr, _), StructOwned(r)) => struct_eq_missing(
1257 struct_value_iter(*idx, arr),
1258 struct_owned_value_iter(r.as_ref()),
1259 null_equal,
1260 ),
1261 #[cfg(feature = "dtype-struct")]
1262 (Struct(l_idx, l_arr, _), Struct(r_idx, r_arr, _)) => struct_eq_missing(
1263 struct_value_iter(*l_idx, l_arr),
1264 struct_value_iter(*r_idx, r_arr),
1265 null_equal,
1266 ),
1267 #[cfg(feature = "dtype-decimal")]
1268 (Decimal(l_v, l_s), Decimal(r_v, r_s)) => {
1269 if l_s == r_s && l_v == r_v || *l_v == 0 && *r_v == 0 {
1271 true
1272 } else if l_s < r_s {
1273 if let Some(lhs) = (|| {
1275 let exp = i128::checked_pow(10, (r_s - l_s).try_into().ok()?)?;
1276 l_v.checked_mul(exp)
1277 })() {
1278 lhs == *r_v
1279 } else {
1280 false
1281 }
1282 } else {
1283 if let Some(rhs) = (|| {
1285 let exp = i128::checked_pow(10, (l_s - r_s).try_into().ok()?)?;
1286 r_v.checked_mul(exp)
1287 })() {
1288 *l_v == rhs
1289 } else {
1290 false
1291 }
1292 }
1293 },
1294 #[cfg(feature = "object")]
1295 (Object(l), Object(r)) => l == r,
1296 #[cfg(feature = "dtype-array")]
1297 (Array(l_values, l_size), Array(r_values, r_size)) => {
1298 if l_size != r_size {
1299 return false;
1300 }
1301
1302 debug_assert_eq!(l_values.len(), *l_size);
1303 debug_assert_eq!(r_values.len(), *r_size);
1304
1305 let mut is_equal = true;
1306 for i in 0..*l_size {
1307 let l = unsafe { l_values.get_unchecked(i) };
1308 let r = unsafe { r_values.get_unchecked(i) };
1309
1310 is_equal &= l.eq_missing(&r, null_equal);
1311 }
1312 is_equal
1313 },
1314
1315 (l, r) if l.to_i128().is_some() && r.to_i128().is_some() => l.to_i128() == r.to_i128(),
1316 (l, r) if l.to_f64().is_some() && r.to_f64().is_some() => {
1317 l.to_f64().unwrap().to_total_ord() == r.to_f64().unwrap().to_total_ord()
1318 },
1319
1320 (_, _) => {
1321 unimplemented!(
1322 "scalar eq_missing for mixed dtypes {self:?} and {other:?} is not supported"
1323 )
1324 },
1325 }
1326 }
1327}
1328
1329impl PartialEq for AnyValue<'_> {
1330 #[inline]
1331 fn eq(&self, other: &Self) -> bool {
1332 self.eq_missing(other, true)
1333 }
1334}
1335
1336impl PartialOrd for AnyValue<'_> {
1337 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
1339 use AnyValue::*;
1340 match (self, &other) {
1341 (StringOwned(l), r) => AnyValue::String(l.as_str()).partial_cmp(r),
1343 (BinaryOwned(l), r) => AnyValue::Binary(l.as_slice()).partial_cmp(r),
1344 #[cfg(feature = "object")]
1345 (ObjectOwned(l), r) => AnyValue::Object(&*l.0).partial_cmp(r),
1346 (l, StringOwned(r)) => l.partial_cmp(&AnyValue::String(r.as_str())),
1347 (l, BinaryOwned(r)) => l.partial_cmp(&AnyValue::Binary(r.as_slice())),
1348 #[cfg(feature = "object")]
1349 (l, ObjectOwned(r)) => l.partial_cmp(&AnyValue::Object(&*r.0)),
1350 #[cfg(feature = "dtype-datetime")]
1351 (DatetimeOwned(lv, ltu, ltz), r) => {
1352 Datetime(*lv, *ltu, ltz.as_ref().map(|v| v.as_ref())).partial_cmp(r)
1353 },
1354 #[cfg(feature = "dtype-datetime")]
1355 (l, DatetimeOwned(rv, rtu, rtz)) => {
1356 l.partial_cmp(&Datetime(*rv, *rtu, rtz.as_ref().map(|v| v.as_ref())))
1357 },
1358 #[cfg(feature = "dtype-categorical")]
1359 (CategoricalOwned(lv, lrev, larr), r) => {
1360 Categorical(*lv, lrev.as_ref(), *larr).partial_cmp(r)
1361 },
1362 #[cfg(feature = "dtype-categorical")]
1363 (l, CategoricalOwned(rv, rrev, rarr)) => {
1364 l.partial_cmp(&Categorical(*rv, rrev.as_ref(), *rarr))
1365 },
1366 #[cfg(feature = "dtype-categorical")]
1367 (EnumOwned(lv, lrev, larr), r) => Enum(*lv, lrev.as_ref(), *larr).partial_cmp(r),
1368 #[cfg(feature = "dtype-categorical")]
1369 (l, EnumOwned(rv, rrev, rarr)) => l.partial_cmp(&Enum(*rv, rrev.as_ref(), *rarr)),
1370
1371 (Null, Null) => Some(Ordering::Equal),
1373 (Null, _) => Some(Ordering::Less),
1374 (_, Null) => Some(Ordering::Greater),
1375
1376 (Boolean(l), Boolean(r)) => l.partial_cmp(r),
1378 (UInt8(l), UInt8(r)) => l.partial_cmp(r),
1379 (UInt16(l), UInt16(r)) => l.partial_cmp(r),
1380 (UInt32(l), UInt32(r)) => l.partial_cmp(r),
1381 (UInt64(l), UInt64(r)) => l.partial_cmp(r),
1382 (Int8(l), Int8(r)) => l.partial_cmp(r),
1383 (Int16(l), Int16(r)) => l.partial_cmp(r),
1384 (Int32(l), Int32(r)) => l.partial_cmp(r),
1385 (Int64(l), Int64(r)) => l.partial_cmp(r),
1386 (Int128(l), Int128(r)) => l.partial_cmp(r),
1387 (Float32(l), Float32(r)) => Some(l.tot_cmp(r)),
1388 (Float64(l), Float64(r)) => Some(l.tot_cmp(r)),
1389 (String(l), String(r)) => l.partial_cmp(r),
1390 (Binary(l), Binary(r)) => l.partial_cmp(r),
1391 #[cfg(feature = "dtype-date")]
1392 (Date(l), Date(r)) => l.partial_cmp(r),
1393 #[cfg(feature = "dtype-datetime")]
1394 (Datetime(lt, lu, lz), Datetime(rt, ru, rz)) => {
1395 if lu != ru || lz != rz {
1396 unimplemented!(
1397 "comparing datetimes with different units or timezones is not supported"
1398 );
1399 }
1400
1401 lt.partial_cmp(rt)
1402 },
1403 #[cfg(feature = "dtype-duration")]
1404 (Duration(lt, lu), Duration(rt, ru)) => {
1405 if lu != ru {
1406 unimplemented!("comparing durations with different units is not supported");
1407 }
1408
1409 lt.partial_cmp(rt)
1410 },
1411 #[cfg(feature = "dtype-time")]
1412 (Time(l), Time(r)) => l.partial_cmp(r),
1413 #[cfg(feature = "dtype-categorical")]
1414 (Categorical(..), Categorical(..)) => {
1415 unimplemented!(
1416 "can't order categoricals as AnyValues, dtype for ordering is needed"
1417 )
1418 },
1419 #[cfg(feature = "dtype-categorical")]
1420 (Enum(..), Enum(..)) => {
1421 unimplemented!("can't order enums as AnyValues, dtype for ordering is needed")
1422 },
1423 (List(_), List(_)) => {
1424 unimplemented!("ordering for List dtype is not supported")
1425 },
1426 #[cfg(feature = "dtype-array")]
1427 (Array(..), Array(..)) => {
1428 unimplemented!("ordering for Array dtype is not supported")
1429 },
1430 #[cfg(feature = "object")]
1431 (Object(_), Object(_)) => {
1432 unimplemented!("ordering for Object dtype is not supported")
1433 },
1434 #[cfg(feature = "dtype-struct")]
1435 (StructOwned(_), StructOwned(_))
1436 | (StructOwned(_), Struct(..))
1437 | (Struct(..), StructOwned(_))
1438 | (Struct(..), Struct(..)) => {
1439 unimplemented!("ordering for Struct dtype is not supported")
1440 },
1441 #[cfg(feature = "dtype-decimal")]
1442 (Decimal(l_v, l_s), Decimal(r_v, r_s)) => {
1443 if l_s == r_s && l_v == r_v || *l_v == 0 && *r_v == 0 {
1445 Some(Ordering::Equal)
1446 } else if l_s < r_s {
1447 if let Some(lhs) = (|| {
1449 let exp = i128::checked_pow(10, (r_s - l_s).try_into().ok()?)?;
1450 l_v.checked_mul(exp)
1451 })() {
1452 lhs.partial_cmp(r_v)
1453 } else {
1454 Some(Ordering::Greater)
1455 }
1456 } else {
1457 if let Some(rhs) = (|| {
1459 let exp = i128::checked_pow(10, (l_s - r_s).try_into().ok()?)?;
1460 r_v.checked_mul(exp)
1461 })() {
1462 l_v.partial_cmp(&rhs)
1463 } else {
1464 Some(Ordering::Less)
1465 }
1466 }
1467 },
1468
1469 (_, _) => {
1470 unimplemented!(
1471 "scalar ordering for mixed dtypes {self:?} and {other:?} is not supported"
1472 )
1473 },
1474 }
1475 }
1476}
1477
1478impl TotalEq for AnyValue<'_> {
1479 #[inline]
1480 fn tot_eq(&self, other: &Self) -> bool {
1481 self.eq_missing(other, true)
1482 }
1483}
1484
1485#[cfg(feature = "dtype-struct")]
1486fn struct_to_avs_static(idx: usize, arr: &StructArray, fields: &[Field]) -> Vec<AnyValue<'static>> {
1487 assert!(idx < arr.len());
1488
1489 let arrs = arr.values();
1490
1491 debug_assert_eq!(arrs.len(), fields.len());
1492
1493 arrs.iter()
1494 .zip(fields)
1495 .map(|(arr, field)| {
1496 unsafe { arr_to_any_value(arr.as_ref(), idx, &field.dtype) }.into_static()
1500 })
1501 .collect()
1502}
1503
1504#[cfg(feature = "dtype-categorical")]
1505fn same_revmap(
1506 rev_l: &RevMapping,
1507 ptr_l: SyncPtr<Utf8ViewArray>,
1508 rev_r: &RevMapping,
1509 ptr_r: SyncPtr<Utf8ViewArray>,
1510) -> bool {
1511 if ptr_l.is_null() && ptr_r.is_null() {
1512 match (rev_l, rev_r) {
1513 (RevMapping::Global(_, _, id_l), RevMapping::Global(_, _, id_r)) => id_l == id_r,
1514 (RevMapping::Local(_, id_l), RevMapping::Local(_, id_r)) => id_l == id_r,
1515 _ => false,
1516 }
1517 } else {
1518 ptr_l == ptr_r
1519 }
1520}
1521
1522pub trait GetAnyValue {
1523 unsafe fn get_unchecked(&self, index: usize) -> AnyValue;
1527}
1528
1529impl GetAnyValue for ArrayRef {
1530 unsafe fn get_unchecked(&self, index: usize) -> AnyValue {
1532 match self.dtype() {
1533 ArrowDataType::Int8 => {
1534 let arr = self
1535 .as_any()
1536 .downcast_ref::<PrimitiveArray<i8>>()
1537 .unwrap_unchecked();
1538 match arr.get_unchecked(index) {
1539 None => AnyValue::Null,
1540 Some(v) => AnyValue::Int8(v),
1541 }
1542 },
1543 ArrowDataType::Int16 => {
1544 let arr = self
1545 .as_any()
1546 .downcast_ref::<PrimitiveArray<i16>>()
1547 .unwrap_unchecked();
1548 match arr.get_unchecked(index) {
1549 None => AnyValue::Null,
1550 Some(v) => AnyValue::Int16(v),
1551 }
1552 },
1553 ArrowDataType::Int32 => {
1554 let arr = self
1555 .as_any()
1556 .downcast_ref::<PrimitiveArray<i32>>()
1557 .unwrap_unchecked();
1558 match arr.get_unchecked(index) {
1559 None => AnyValue::Null,
1560 Some(v) => AnyValue::Int32(v),
1561 }
1562 },
1563 ArrowDataType::Int64 => {
1564 let arr = self
1565 .as_any()
1566 .downcast_ref::<PrimitiveArray<i64>>()
1567 .unwrap_unchecked();
1568 match arr.get_unchecked(index) {
1569 None => AnyValue::Null,
1570 Some(v) => AnyValue::Int64(v),
1571 }
1572 },
1573 ArrowDataType::Int128 => {
1574 let arr = self
1575 .as_any()
1576 .downcast_ref::<PrimitiveArray<i128>>()
1577 .unwrap_unchecked();
1578 match arr.get_unchecked(index) {
1579 None => AnyValue::Null,
1580 Some(v) => AnyValue::Int128(v),
1581 }
1582 },
1583 ArrowDataType::UInt8 => {
1584 let arr = self
1585 .as_any()
1586 .downcast_ref::<PrimitiveArray<u8>>()
1587 .unwrap_unchecked();
1588 match arr.get_unchecked(index) {
1589 None => AnyValue::Null,
1590 Some(v) => AnyValue::UInt8(v),
1591 }
1592 },
1593 ArrowDataType::UInt16 => {
1594 let arr = self
1595 .as_any()
1596 .downcast_ref::<PrimitiveArray<u16>>()
1597 .unwrap_unchecked();
1598 match arr.get_unchecked(index) {
1599 None => AnyValue::Null,
1600 Some(v) => AnyValue::UInt16(v),
1601 }
1602 },
1603 ArrowDataType::UInt32 => {
1604 let arr = self
1605 .as_any()
1606 .downcast_ref::<PrimitiveArray<u32>>()
1607 .unwrap_unchecked();
1608 match arr.get_unchecked(index) {
1609 None => AnyValue::Null,
1610 Some(v) => AnyValue::UInt32(v),
1611 }
1612 },
1613 ArrowDataType::UInt64 => {
1614 let arr = self
1615 .as_any()
1616 .downcast_ref::<PrimitiveArray<u64>>()
1617 .unwrap_unchecked();
1618 match arr.get_unchecked(index) {
1619 None => AnyValue::Null,
1620 Some(v) => AnyValue::UInt64(v),
1621 }
1622 },
1623 ArrowDataType::Float32 => {
1624 let arr = self
1625 .as_any()
1626 .downcast_ref::<PrimitiveArray<f32>>()
1627 .unwrap_unchecked();
1628 match arr.get_unchecked(index) {
1629 None => AnyValue::Null,
1630 Some(v) => AnyValue::Float32(v),
1631 }
1632 },
1633 ArrowDataType::Float64 => {
1634 let arr = self
1635 .as_any()
1636 .downcast_ref::<PrimitiveArray<f64>>()
1637 .unwrap_unchecked();
1638 match arr.get_unchecked(index) {
1639 None => AnyValue::Null,
1640 Some(v) => AnyValue::Float64(v),
1641 }
1642 },
1643 ArrowDataType::Boolean => {
1644 let arr = self
1645 .as_any()
1646 .downcast_ref::<BooleanArray>()
1647 .unwrap_unchecked();
1648 match arr.get_unchecked(index) {
1649 None => AnyValue::Null,
1650 Some(v) => AnyValue::Boolean(v),
1651 }
1652 },
1653 ArrowDataType::LargeUtf8 => {
1654 let arr = self
1655 .as_any()
1656 .downcast_ref::<LargeStringArray>()
1657 .unwrap_unchecked();
1658 match arr.get_unchecked(index) {
1659 None => AnyValue::Null,
1660 Some(v) => AnyValue::String(v),
1661 }
1662 },
1663 _ => unimplemented!(),
1664 }
1665 }
1666}
1667
1668impl<K: NumericNative> From<K> for AnyValue<'static> {
1669 fn from(value: K) -> Self {
1670 unsafe {
1671 match K::PRIMITIVE {
1672 PrimitiveType::Int8 => AnyValue::Int8(NumCast::from(value).unwrap_unchecked()),
1673 PrimitiveType::Int16 => AnyValue::Int16(NumCast::from(value).unwrap_unchecked()),
1674 PrimitiveType::Int32 => AnyValue::Int32(NumCast::from(value).unwrap_unchecked()),
1675 PrimitiveType::Int64 => AnyValue::Int64(NumCast::from(value).unwrap_unchecked()),
1676 PrimitiveType::Int128 => AnyValue::Int128(NumCast::from(value).unwrap_unchecked()),
1677 PrimitiveType::UInt8 => AnyValue::UInt8(NumCast::from(value).unwrap_unchecked()),
1678 PrimitiveType::UInt16 => AnyValue::UInt16(NumCast::from(value).unwrap_unchecked()),
1679 PrimitiveType::UInt32 => AnyValue::UInt32(NumCast::from(value).unwrap_unchecked()),
1680 PrimitiveType::UInt64 => AnyValue::UInt64(NumCast::from(value).unwrap_unchecked()),
1681 PrimitiveType::Float32 => {
1682 AnyValue::Float32(NumCast::from(value).unwrap_unchecked())
1683 },
1684 PrimitiveType::Float64 => {
1685 AnyValue::Float64(NumCast::from(value).unwrap_unchecked())
1686 },
1687 _ => unreachable!(),
1689 }
1690 }
1691 }
1692}
1693
1694impl<'a> From<&'a [u8]> for AnyValue<'a> {
1695 fn from(value: &'a [u8]) -> Self {
1696 AnyValue::Binary(value)
1697 }
1698}
1699
1700impl<'a> From<&'a str> for AnyValue<'a> {
1701 fn from(value: &'a str) -> Self {
1702 AnyValue::String(value)
1703 }
1704}
1705
1706impl From<bool> for AnyValue<'static> {
1707 fn from(value: bool) -> Self {
1708 AnyValue::Boolean(value)
1709 }
1710}
1711
1712#[cfg(test)]
1713mod test {
1714 #[cfg(feature = "dtype-categorical")]
1715 use super::*;
1716
1717 #[test]
1718 #[cfg(feature = "dtype-categorical")]
1719 fn test_arrow_dtypes_to_polars() {
1720 let dtypes = [
1721 (
1722 ArrowDataType::Duration(ArrowTimeUnit::Nanosecond),
1723 DataType::Duration(TimeUnit::Nanoseconds),
1724 ),
1725 (
1726 ArrowDataType::Duration(ArrowTimeUnit::Millisecond),
1727 DataType::Duration(TimeUnit::Milliseconds),
1728 ),
1729 (
1730 ArrowDataType::Date64,
1731 DataType::Datetime(TimeUnit::Milliseconds, None),
1732 ),
1733 (
1734 ArrowDataType::Timestamp(ArrowTimeUnit::Nanosecond, None),
1735 DataType::Datetime(TimeUnit::Nanoseconds, None),
1736 ),
1737 (
1738 ArrowDataType::Timestamp(ArrowTimeUnit::Microsecond, None),
1739 DataType::Datetime(TimeUnit::Microseconds, None),
1740 ),
1741 (
1742 ArrowDataType::Timestamp(ArrowTimeUnit::Millisecond, None),
1743 DataType::Datetime(TimeUnit::Milliseconds, None),
1744 ),
1745 (
1746 ArrowDataType::Timestamp(ArrowTimeUnit::Second, None),
1747 DataType::Datetime(TimeUnit::Milliseconds, None),
1748 ),
1749 (
1750 ArrowDataType::Timestamp(ArrowTimeUnit::Second, Some(PlSmallStr::EMPTY)),
1751 DataType::Datetime(TimeUnit::Milliseconds, None),
1752 ),
1753 (ArrowDataType::LargeUtf8, DataType::String),
1754 (ArrowDataType::Utf8, DataType::String),
1755 (ArrowDataType::LargeBinary, DataType::Binary),
1756 (ArrowDataType::Binary, DataType::Binary),
1757 (
1758 ArrowDataType::Time64(ArrowTimeUnit::Nanosecond),
1759 DataType::Time,
1760 ),
1761 (
1762 ArrowDataType::Time64(ArrowTimeUnit::Millisecond),
1763 DataType::Time,
1764 ),
1765 (
1766 ArrowDataType::Time64(ArrowTimeUnit::Microsecond),
1767 DataType::Time,
1768 ),
1769 (ArrowDataType::Time64(ArrowTimeUnit::Second), DataType::Time),
1770 (
1771 ArrowDataType::Time32(ArrowTimeUnit::Nanosecond),
1772 DataType::Time,
1773 ),
1774 (
1775 ArrowDataType::Time32(ArrowTimeUnit::Millisecond),
1776 DataType::Time,
1777 ),
1778 (
1779 ArrowDataType::Time32(ArrowTimeUnit::Microsecond),
1780 DataType::Time,
1781 ),
1782 (ArrowDataType::Time32(ArrowTimeUnit::Second), DataType::Time),
1783 (
1784 ArrowDataType::List(Box::new(ArrowField::new(
1785 PlSmallStr::from_static("item"),
1786 ArrowDataType::Float64,
1787 true,
1788 ))),
1789 DataType::List(DataType::Float64.into()),
1790 ),
1791 (
1792 ArrowDataType::LargeList(Box::new(ArrowField::new(
1793 PlSmallStr::from_static("item"),
1794 ArrowDataType::Float64,
1795 true,
1796 ))),
1797 DataType::List(DataType::Float64.into()),
1798 ),
1799 ];
1800
1801 for (dt_a, dt_p) in dtypes {
1802 let dt = DataType::from_arrow_dtype(&dt_a);
1803
1804 assert_eq!(dt_p, dt);
1805 }
1806 }
1807}