1#![allow(unsafe_op_in_unsafe_fn)]
2use std::borrow::Cow;
3
4use arrow::types::PrimitiveType;
5use num_traits::ToBytes;
6use polars_compute::cast::SerPrimitive;
7use polars_error::feature_gated;
8use polars_utils::float16::pf16;
9use polars_utils::total_ord::ToTotalOrd;
10
11use super::*;
12use crate::CHEAP_SERIES_HASH_LIMIT;
13#[cfg(feature = "dtype-struct")]
14use crate::prelude::any_value::arr_to_any_value;
15
16#[cfg(feature = "object")]
17#[derive(Debug)]
18pub struct OwnedObject(pub Box<dyn PolarsObjectSafe>);
19
20#[cfg(feature = "object")]
21impl Clone for OwnedObject {
22 fn clone(&self) -> Self {
23 Self(self.0.to_boxed())
24 }
25}
26
27#[cfg(feature = "dtype-decimal")]
28use polars_compute::decimal::{
29 dec128_cmp, dec128_eq, dec128_rescale, dec128_to_f64, dec128_to_i128, f64_to_dec128,
30 i128_to_dec128,
31};
32
33#[derive(Debug, Clone, Default)]
34pub enum AnyValue<'a> {
35 #[default]
36 Null,
37 Boolean(bool),
39 String(&'a str),
41 UInt8(u8),
43 UInt16(u16),
45 UInt32(u32),
47 UInt64(u64),
49 UInt128(u128),
51 Int8(i8),
53 Int16(i16),
55 Int32(i32),
57 Int64(i64),
59 Int128(i128),
61 Float16(pf16),
63 Float32(f32),
65 Float64(f64),
67 #[cfg(feature = "dtype-date")]
70 Date(i32),
71 #[cfg(feature = "dtype-datetime")]
74 Datetime(i64, TimeUnit, Option<&'a TimeZone>),
75 #[cfg(feature = "dtype-datetime")]
78 DatetimeOwned(i64, TimeUnit, Option<Arc<TimeZone>>),
79 #[cfg(feature = "dtype-duration")]
81 Duration(i64, TimeUnit),
82 #[cfg(feature = "dtype-time")]
84 Time(i64),
85 #[cfg(feature = "dtype-categorical")]
86 Categorical(CatSize, &'a Arc<CategoricalMapping>),
87 #[cfg(feature = "dtype-categorical")]
88 CategoricalOwned(CatSize, Arc<CategoricalMapping>),
89 #[cfg(feature = "dtype-categorical")]
90 Enum(CatSize, &'a Arc<CategoricalMapping>),
91 #[cfg(feature = "dtype-categorical")]
92 EnumOwned(CatSize, Arc<CategoricalMapping>),
93 List(Series),
95 #[cfg(feature = "dtype-array")]
96 Array(Series, usize),
97 #[cfg(feature = "object")]
99 Object(&'a dyn PolarsObjectSafe),
100 #[cfg(feature = "object")]
101 ObjectOwned(OwnedObject),
102 #[cfg(feature = "dtype-struct")]
107 Struct(usize, &'a StructArray, &'a [Field]),
108 #[cfg(feature = "dtype-struct")]
109 StructOwned(Box<(Vec<AnyValue<'a>>, Vec<Field>)>),
110 StringOwned(PlSmallStr),
112 Binary(&'a [u8]),
113 BinaryOwned(Vec<u8>),
114 #[cfg(feature = "dtype-decimal")]
116 Decimal(i128, usize, usize),
117}
118
119impl AnyValue<'static> {
120 pub fn zero_sum(dtype: &DataType) -> Self {
121 match dtype {
122 DataType::String => AnyValue::StringOwned(PlSmallStr::EMPTY),
123 DataType::Binary => AnyValue::BinaryOwned(Vec::new()),
124 DataType::Boolean => (0 as IdxSize).into(),
125 d if d.is_primitive_numeric() => unsafe {
127 std::mem::transmute::<AnyValue<'_>, AnyValue<'static>>(
128 AnyValue::UInt8(0).cast(dtype),
129 )
130 },
131 #[cfg(feature = "dtype-duration")]
132 DataType::Duration(unit) => AnyValue::Duration(0, *unit),
133 #[cfg(feature = "dtype-decimal")]
134 DataType::Decimal(p, s) => AnyValue::Decimal(0, *p, *s),
135 _ => AnyValue::Null,
136 }
137 }
138
139 pub fn can_have_dtype(&self, dtype: &DataType) -> bool {
141 matches!(self, AnyValue::Null) || dtype == &self.dtype()
142 }
143
144 pub fn default_value(
146 dtype: &DataType,
147 numeric_to_one: bool,
148 num_list_values: usize,
149 ) -> AnyValue<'static> {
150 use AnyValue as AV;
151 use DataType as DT;
152 match dtype {
153 DT::Boolean => AV::Boolean(false),
154 DT::UInt8 => AV::UInt8(numeric_to_one.into()),
155 DT::UInt16 => AV::UInt16(numeric_to_one.into()),
156 DT::UInt32 => AV::UInt32(numeric_to_one.into()),
157 DT::UInt64 => AV::UInt64(numeric_to_one.into()),
158 DT::UInt128 => AV::UInt128(numeric_to_one.into()),
159 DT::Int8 => AV::Int8(numeric_to_one.into()),
160 DT::Int16 => AV::Int16(numeric_to_one.into()),
161 DT::Int32 => AV::Int32(numeric_to_one.into()),
162 DT::Int64 => AV::Int64(numeric_to_one.into()),
163 DT::Int128 => AV::Int128(numeric_to_one.into()),
164 DT::Float16 => AV::Float16(numeric_to_one.into()),
165 DT::Float32 => AV::Float32(numeric_to_one.into()),
166 DT::Float64 => AV::Float64(numeric_to_one.into()),
167 #[cfg(feature = "dtype-decimal")]
168 DT::Decimal(p, s) => AV::Decimal(0, *p, *s),
169 DT::String => AV::String(""),
170 DT::Binary => AV::Binary(&[]),
171 DT::BinaryOffset => AV::Binary(&[]),
172 DT::Date => feature_gated!("dtype-date", AV::Date(0)),
173 DT::Datetime(time_unit, time_zone) => feature_gated!(
174 "dtype-datetime",
175 AV::DatetimeOwned(0, *time_unit, time_zone.clone().map(Arc::new))
176 ),
177 DT::Duration(time_unit) => {
178 feature_gated!("dtype-duration", AV::Duration(0, *time_unit))
179 },
180 DT::Time => feature_gated!("dtype-time", AV::Time(0)),
181 #[cfg(feature = "dtype-array")]
182 DT::Array(inner_dtype, width) => {
183 let inner_value =
184 AnyValue::default_value(inner_dtype, numeric_to_one, num_list_values);
185 AV::Array(
186 Scalar::new(inner_dtype.as_ref().clone(), inner_value)
187 .into_series(PlSmallStr::EMPTY)
188 .new_from_index(0, *width),
189 *width,
190 )
191 },
192 DT::List(inner_dtype) => AV::List(if num_list_values == 0 {
193 Series::new_empty(PlSmallStr::EMPTY, inner_dtype.as_ref())
194 } else {
195 let inner_value =
196 AnyValue::default_value(inner_dtype, numeric_to_one, num_list_values);
197
198 Scalar::new(inner_dtype.as_ref().clone(), inner_value)
199 .into_series(PlSmallStr::EMPTY)
200 .new_from_index(0, num_list_values)
201 }),
202 #[cfg(feature = "object")]
203 DT::Object(_) => AV::Null,
204 DT::Null => AV::Null,
205 #[cfg(feature = "dtype-categorical")]
206 DT::Categorical(_, _) => AV::Null,
207 #[cfg(feature = "dtype-categorical")]
208 DT::Enum(categories, mapping) => match categories.categories().is_empty() {
209 true => AV::Null,
210 false => AV::EnumOwned(0, mapping.clone()),
211 },
212 #[cfg(feature = "dtype-struct")]
213 DT::Struct(fields) => AV::StructOwned(Box::new((
214 fields
215 .iter()
216 .map(|f| AnyValue::default_value(f.dtype(), numeric_to_one, num_list_values))
217 .collect(),
218 fields.clone(),
219 ))),
220 #[cfg(feature = "dtype-extension")]
221 DT::Extension(_typ, storage) => {
222 AnyValue::default_value(storage, numeric_to_one, num_list_values)
223 },
224 DT::Unknown(_) => unreachable!(),
225 }
226 }
227}
228
229impl<'a> AnyValue<'a> {
230 pub fn dtype(&self) -> DataType {
235 use AnyValue::*;
236 match self {
237 Null => DataType::Null,
238 Boolean(_) => DataType::Boolean,
239 Int8(_) => DataType::Int8,
240 Int16(_) => DataType::Int16,
241 Int32(_) => DataType::Int32,
242 Int64(_) => DataType::Int64,
243 Int128(_) => DataType::Int128,
244 UInt8(_) => DataType::UInt8,
245 UInt16(_) => DataType::UInt16,
246 UInt32(_) => DataType::UInt32,
247 UInt64(_) => DataType::UInt64,
248 UInt128(_) => DataType::UInt128,
249 Float16(_) => DataType::Float16,
250 Float32(_) => DataType::Float32,
251 Float64(_) => DataType::Float64,
252 String(_) | StringOwned(_) => DataType::String,
253 Binary(_) | BinaryOwned(_) => DataType::Binary,
254 #[cfg(feature = "dtype-date")]
255 Date(_) => DataType::Date,
256 #[cfg(feature = "dtype-time")]
257 Time(_) => DataType::Time,
258 #[cfg(feature = "dtype-datetime")]
259 Datetime(_, tu, tz) => DataType::Datetime(*tu, (*tz).cloned()),
260 #[cfg(feature = "dtype-datetime")]
261 DatetimeOwned(_, tu, tz) => {
262 DataType::Datetime(*tu, tz.as_ref().map(|v| v.as_ref().clone()))
263 },
264 #[cfg(feature = "dtype-duration")]
265 Duration(_, tu) => DataType::Duration(*tu),
266 #[cfg(feature = "dtype-categorical")]
267 Categorical(_, _) | CategoricalOwned(_, _) => {
268 unimplemented!("can not get dtype of Categorical AnyValue")
269 },
270 #[cfg(feature = "dtype-categorical")]
271 Enum(_, _) | EnumOwned(_, _) => unimplemented!("can not get dtype of Enum AnyValue"),
272 List(s) => DataType::List(Box::new(s.dtype().clone())),
273 #[cfg(feature = "dtype-array")]
274 Array(s, size) => DataType::Array(Box::new(s.dtype().clone()), *size),
275 #[cfg(feature = "dtype-struct")]
276 Struct(_, _, fields) => DataType::Struct(fields.to_vec()),
277 #[cfg(feature = "dtype-struct")]
278 StructOwned(payload) => DataType::Struct(payload.1.clone()),
279 #[cfg(feature = "dtype-decimal")]
280 Decimal(_, p, s) => DataType::Decimal(*p, *s),
281 #[cfg(feature = "object")]
282 Object(o) => DataType::Object(o.type_name()),
283 #[cfg(feature = "object")]
284 ObjectOwned(o) => DataType::Object(o.0.type_name()),
285 }
286 }
287
288 #[doc(hidden)]
290 #[inline]
291 pub fn extract<T: NumCast + IsFloat>(&self) -> Option<T> {
292 use AnyValue::*;
293 match self {
294 Int8(v) => NumCast::from(*v),
295 Int16(v) => NumCast::from(*v),
296 Int32(v) => NumCast::from(*v),
297 Int64(v) => NumCast::from(*v),
298 Int128(v) => NumCast::from(*v),
299 UInt8(v) => NumCast::from(*v),
300 UInt16(v) => NumCast::from(*v),
301 UInt32(v) => NumCast::from(*v),
302 UInt64(v) => NumCast::from(*v),
303 UInt128(v) => NumCast::from(*v),
304 #[cfg(feature = "dtype-f16")]
305 Float16(v) => NumCast::from(*v),
306 Float32(v) => NumCast::from(*v),
307 Float64(v) => NumCast::from(*v),
308 #[cfg(feature = "dtype-date")]
309 Date(v) => NumCast::from(*v),
310 #[cfg(feature = "dtype-datetime")]
311 Datetime(v, _, _) | DatetimeOwned(v, _, _) => NumCast::from(*v),
312 #[cfg(feature = "dtype-time")]
313 Time(v) => NumCast::from(*v),
314 #[cfg(feature = "dtype-duration")]
315 Duration(v, _) => NumCast::from(*v),
316 #[cfg(feature = "dtype-decimal")]
317 Decimal(v, _p, s) => {
318 if T::is_float() {
319 NumCast::from(dec128_to_f64(*v, *s))
320 } else {
321 NumCast::from(dec128_to_i128(*v, *s))
322 }
323 },
324 Boolean(v) => NumCast::from(if *v { 1 } else { 0 }),
325 String(v) => {
326 if let Ok(val) = (*v).parse::<i128>() {
327 NumCast::from(val)
328 } else {
329 NumCast::from((*v).parse::<f64>().ok()?)
330 }
331 },
332 StringOwned(v) => String(v.as_str()).extract(),
333 _ => None,
334 }
335 }
336
337 #[inline]
338 pub fn try_extract<T: NumCast + IsFloat>(&self) -> PolarsResult<T> {
339 self.extract().ok_or_else(|| {
340 polars_err!(
341 ComputeError: "could not extract number from any-value of dtype: '{:?}'",
342 self.dtype(),
343 )
344 })
345 }
346
347 pub fn is_boolean(&self) -> bool {
348 matches!(self, AnyValue::Boolean(_))
349 }
350
351 pub fn is_primitive_numeric(&self) -> bool {
352 self.is_integer() || self.is_float()
353 }
354
355 pub fn is_float(&self) -> bool {
356 matches!(
357 self,
358 AnyValue::Float16(_) | AnyValue::Float32(_) | AnyValue::Float64(_)
359 )
360 }
361
362 pub fn is_integer(&self) -> bool {
363 self.is_signed_integer() || self.is_unsigned_integer()
364 }
365
366 pub fn is_signed_integer(&self) -> bool {
367 matches!(
368 self,
369 AnyValue::Int8(_)
370 | AnyValue::Int16(_)
371 | AnyValue::Int32(_)
372 | AnyValue::Int64(_)
373 | AnyValue::Int128(_)
374 )
375 }
376
377 pub fn is_unsigned_integer(&self) -> bool {
378 matches!(
379 self,
380 AnyValue::UInt8(_)
381 | AnyValue::UInt16(_)
382 | AnyValue::UInt32(_)
383 | AnyValue::UInt64(_)
384 | AnyValue::UInt128(_)
385 )
386 }
387
388 pub fn is_nan(&self) -> bool {
389 match self {
390 AnyValue::Float16(f) => f.is_nan(),
391 AnyValue::Float32(f) => f.is_nan(),
392 AnyValue::Float64(f) => f.is_nan(),
393 _ => false,
394 }
395 }
396
397 #[inline(always)]
398 pub fn is_null(&self) -> bool {
399 matches!(self, AnyValue::Null)
400 }
401
402 pub fn is_nested_null(&self) -> bool {
403 match self {
404 AnyValue::Null => true,
405 AnyValue::List(s) => s.null_count() == s.len(),
406 #[cfg(feature = "dtype-array")]
407 AnyValue::Array(s, _) => s.null_count() == s.len(),
408 #[cfg(feature = "dtype-struct")]
409 AnyValue::Struct(_, _, _) => self._iter_struct_av().all(|av| av.is_nested_null()),
410 _ => false,
411 }
412 }
413
414 #[inline]
416 pub fn null_to_none(self) -> Option<Self> {
417 match self {
418 AnyValue::Null => None,
419 av => Some(av),
420 }
421 }
422
423 pub fn strict_cast(&self, dtype: &'a DataType) -> Option<AnyValue<'a>> {
426 let new_av = match (self, dtype) {
427 (av, DataType::UInt8) => AnyValue::UInt8(av.extract::<u8>()?),
429 (av, DataType::UInt16) => AnyValue::UInt16(av.extract::<u16>()?),
430 (av, DataType::UInt32) => AnyValue::UInt32(av.extract::<u32>()?),
431 (av, DataType::UInt64) => AnyValue::UInt64(av.extract::<u64>()?),
432 (av, DataType::UInt128) => AnyValue::UInt128(av.extract::<u128>()?),
433 (av, DataType::Int8) => AnyValue::Int8(av.extract::<i8>()?),
434 (av, DataType::Int16) => AnyValue::Int16(av.extract::<i16>()?),
435 (av, DataType::Int32) => AnyValue::Int32(av.extract::<i32>()?),
436 (av, DataType::Int64) => AnyValue::Int64(av.extract::<i64>()?),
437 (av, DataType::Int128) => AnyValue::Int128(av.extract::<i128>()?),
438 (av, DataType::Float16) => AnyValue::Float16(av.extract::<pf16>()?),
439 (av, DataType::Float32) => AnyValue::Float32(av.extract::<f32>()?),
440 (av, DataType::Float64) => AnyValue::Float64(av.extract::<f64>()?),
441
442 (AnyValue::UInt8(v), DataType::Boolean) => AnyValue::Boolean(*v != u8::default()),
444 (AnyValue::UInt16(v), DataType::Boolean) => AnyValue::Boolean(*v != u16::default()),
445 (AnyValue::UInt32(v), DataType::Boolean) => AnyValue::Boolean(*v != u32::default()),
446 (AnyValue::UInt64(v), DataType::Boolean) => AnyValue::Boolean(*v != u64::default()),
447 (AnyValue::UInt128(v), DataType::Boolean) => AnyValue::Boolean(*v != u128::default()),
448 (AnyValue::Int8(v), DataType::Boolean) => AnyValue::Boolean(*v != i8::default()),
449 (AnyValue::Int16(v), DataType::Boolean) => AnyValue::Boolean(*v != i16::default()),
450 (AnyValue::Int32(v), DataType::Boolean) => AnyValue::Boolean(*v != i32::default()),
451 (AnyValue::Int64(v), DataType::Boolean) => AnyValue::Boolean(*v != i64::default()),
452 (AnyValue::Int128(v), DataType::Boolean) => AnyValue::Boolean(*v != i128::default()),
453 (AnyValue::Float16(v), DataType::Boolean) => AnyValue::Boolean(*v != pf16::default()),
454 (AnyValue::Float32(v), DataType::Boolean) => AnyValue::Boolean(*v != f32::default()),
455 (AnyValue::Float64(v), DataType::Boolean) => AnyValue::Boolean(*v != f64::default()),
456
457 #[cfg(feature = "dtype-categorical")]
459 (
460 &AnyValue::Categorical(cat, &ref lmap) | &AnyValue::CategoricalOwned(cat, ref lmap),
461 DataType::Categorical(_, rmap),
462 ) => {
463 if Arc::ptr_eq(lmap, rmap) {
464 self.clone()
465 } else {
466 let s = unsafe { lmap.cat_to_str_unchecked(cat) };
467 let new_cat = rmap.insert_cat(s).unwrap();
468 AnyValue::CategoricalOwned(new_cat, rmap.clone())
469 }
470 },
471
472 #[cfg(feature = "dtype-categorical")]
473 (
474 &AnyValue::Enum(cat, &ref lmap) | &AnyValue::EnumOwned(cat, ref lmap),
475 DataType::Enum(_, rmap),
476 ) => {
477 if Arc::ptr_eq(lmap, rmap) {
478 self.clone()
479 } else {
480 let s = unsafe { lmap.cat_to_str_unchecked(cat) };
481 let new_cat = rmap.get_cat(s)?;
482 AnyValue::EnumOwned(new_cat, rmap.clone())
483 }
484 },
485
486 #[cfg(feature = "dtype-categorical")]
487 (
488 &AnyValue::Categorical(cat, &ref map)
489 | &AnyValue::CategoricalOwned(cat, ref map)
490 | &AnyValue::Enum(cat, &ref map)
491 | &AnyValue::EnumOwned(cat, ref map),
492 DataType::String,
493 ) => {
494 let s = unsafe { map.cat_to_str_unchecked(cat) };
495 AnyValue::StringOwned(PlSmallStr::from(s))
496 },
497
498 #[cfg(feature = "dtype-categorical")]
499 (AnyValue::String(s), DataType::Categorical(_, map)) => {
500 AnyValue::CategoricalOwned(map.insert_cat(s).unwrap(), map.clone())
501 },
502
503 #[cfg(feature = "dtype-categorical")]
504 (AnyValue::StringOwned(s), DataType::Categorical(_, map)) => {
505 AnyValue::CategoricalOwned(map.insert_cat(s).unwrap(), map.clone())
506 },
507
508 #[cfg(feature = "dtype-categorical")]
509 (AnyValue::String(s), DataType::Enum(_, map)) => {
510 AnyValue::CategoricalOwned(map.get_cat(s)?, map.clone())
511 },
512
513 #[cfg(feature = "dtype-categorical")]
514 (AnyValue::StringOwned(s), DataType::Enum(_, map)) => {
515 AnyValue::CategoricalOwned(map.get_cat(s)?, map.clone())
516 },
517
518 (AnyValue::String(v), DataType::String) => AnyValue::String(v),
520 (AnyValue::StringOwned(v), DataType::String) => AnyValue::StringOwned(v.clone()),
521
522 (av, DataType::String) => {
523 let mut tmp = vec![];
524 if av.is_unsigned_integer() {
525 let val = av.extract::<u64>()?;
526 SerPrimitive::write(&mut tmp, val);
527 } else if av.is_float() {
528 let val = av.extract::<f64>()?;
529 SerPrimitive::write(&mut tmp, val);
530 } else {
531 let val = av.extract::<i64>()?;
532 SerPrimitive::write(&mut tmp, val);
533 }
534 AnyValue::StringOwned(PlSmallStr::from_str(std::str::from_utf8(&tmp).unwrap()))
535 },
536
537 (AnyValue::String(v), DataType::Binary) => AnyValue::Binary(v.as_bytes()),
539
540 #[cfg(feature = "dtype-datetime")]
542 (av, DataType::Datetime(tu, tz)) if av.is_primitive_numeric() => {
543 AnyValue::Datetime(av.extract::<i64>()?, *tu, tz.as_ref())
544 },
545 #[cfg(all(feature = "dtype-datetime", feature = "dtype-date"))]
546 (AnyValue::Date(v), DataType::Datetime(tu, _)) => AnyValue::Datetime(
547 match tu {
548 TimeUnit::Nanoseconds => (*v as i64) * NS_IN_DAY,
549 TimeUnit::Microseconds => (*v as i64) * US_IN_DAY,
550 TimeUnit::Milliseconds => (*v as i64) * MS_IN_DAY,
551 },
552 *tu,
553 None,
554 ),
555 #[cfg(feature = "dtype-datetime")]
556 (
557 AnyValue::Datetime(v, tu, _) | AnyValue::DatetimeOwned(v, tu, _),
558 DataType::Datetime(tu_r, tz_r),
559 ) => AnyValue::Datetime(
560 match (tu, tu_r) {
561 (TimeUnit::Nanoseconds, TimeUnit::Microseconds) => *v / 1_000i64,
562 (TimeUnit::Nanoseconds, TimeUnit::Milliseconds) => *v / 1_000_000i64,
563 (TimeUnit::Microseconds, TimeUnit::Nanoseconds) => *v * 1_000i64,
564 (TimeUnit::Microseconds, TimeUnit::Milliseconds) => *v / 1_000i64,
565 (TimeUnit::Milliseconds, TimeUnit::Microseconds) => *v * 1_000i64,
566 (TimeUnit::Milliseconds, TimeUnit::Nanoseconds) => *v * 1_000_000i64,
567 _ => *v,
568 },
569 *tu_r,
570 tz_r.as_ref(),
571 ),
572
573 #[cfg(feature = "dtype-date")]
575 (av, DataType::Date) if av.is_primitive_numeric() => {
576 AnyValue::Date(av.extract::<i32>()?)
577 },
578 #[cfg(all(feature = "dtype-date", feature = "dtype-datetime"))]
579 (AnyValue::Datetime(v, tu, _) | AnyValue::DatetimeOwned(v, tu, _), DataType::Date) => {
580 AnyValue::Date(match tu {
581 TimeUnit::Nanoseconds => *v / NS_IN_DAY,
582 TimeUnit::Microseconds => *v / US_IN_DAY,
583 TimeUnit::Milliseconds => *v / MS_IN_DAY,
584 } as i32)
585 },
586
587 #[cfg(feature = "dtype-time")]
589 (av, DataType::Time) if av.is_primitive_numeric() => {
590 AnyValue::Time(av.extract::<i64>()?)
591 },
592 #[cfg(all(feature = "dtype-time", feature = "dtype-datetime"))]
593 (AnyValue::Datetime(v, tu, _) | AnyValue::DatetimeOwned(v, tu, _), DataType::Time) => {
594 AnyValue::Time(match tu {
595 TimeUnit::Nanoseconds => *v % NS_IN_DAY,
596 TimeUnit::Microseconds => (*v % US_IN_DAY) * 1_000i64,
597 TimeUnit::Milliseconds => (*v % MS_IN_DAY) * 1_000_000i64,
598 })
599 },
600
601 #[cfg(feature = "dtype-duration")]
603 (av, DataType::Duration(tu)) if av.is_primitive_numeric() => {
604 AnyValue::Duration(av.extract::<i64>()?, *tu)
605 },
606 #[cfg(all(feature = "dtype-duration", feature = "dtype-time"))]
607 (AnyValue::Time(v), DataType::Duration(tu)) => AnyValue::Duration(
608 match *tu {
609 TimeUnit::Nanoseconds => *v,
610 TimeUnit::Microseconds => *v / 1_000i64,
611 TimeUnit::Milliseconds => *v / 1_000_000i64,
612 },
613 *tu,
614 ),
615 #[cfg(feature = "dtype-duration")]
616 (AnyValue::Duration(v, tu), DataType::Duration(tu_r)) => AnyValue::Duration(
617 match (tu, tu_r) {
618 (_, _) if tu == tu_r => *v,
619 (TimeUnit::Nanoseconds, TimeUnit::Microseconds) => *v / 1_000i64,
620 (TimeUnit::Nanoseconds, TimeUnit::Milliseconds) => *v / 1_000_000i64,
621 (TimeUnit::Microseconds, TimeUnit::Nanoseconds) => *v * 1_000i64,
622 (TimeUnit::Microseconds, TimeUnit::Milliseconds) => *v / 1_000i64,
623 (TimeUnit::Milliseconds, TimeUnit::Microseconds) => *v * 1_000i64,
624 (TimeUnit::Milliseconds, TimeUnit::Nanoseconds) => *v * 1_000_000i64,
625 _ => *v,
626 },
627 *tu_r,
628 ),
629
630 #[cfg(feature = "dtype-decimal")]
631 (av, DataType::Decimal(p, s)) if av.is_integer() => {
632 let int = av.try_extract::<i128>().ok()?;
633 let dec = i128_to_dec128(int, *p, *s)?;
634 AnyValue::Decimal(dec, *p, *s)
635 },
636
637 #[cfg(feature = "dtype-decimal")]
638 (av, DataType::Decimal(p, s)) if av.is_float() => {
639 let f = av.try_extract::<f64>().unwrap();
640 let dec = f64_to_dec128(f, *p, *s)?;
641 AnyValue::Decimal(dec, *p, *s)
642 },
643
644 #[cfg(feature = "dtype-decimal")]
645 (AnyValue::Decimal(value, _old_p, old_s), DataType::Decimal(p, s)) => {
646 let converted = dec128_rescale(*value, *old_s, *p, *s)?;
647 AnyValue::Decimal(converted, *p, *s)
648 },
649
650 (av, dtype) if av.dtype() == *dtype => self.clone(),
652
653 _ => return None,
654 };
655 Some(new_av)
656 }
657
658 pub fn try_strict_cast(&self, dtype: &'a DataType) -> PolarsResult<AnyValue<'a>> {
661 self.strict_cast(dtype).ok_or_else(
662 || polars_err!(ComputeError: "cannot cast any-value {:?} to dtype '{}'", self, dtype),
663 )
664 }
665
666 pub fn cast(&self, dtype: &'a DataType) -> AnyValue<'a> {
667 match self.strict_cast(dtype) {
668 Some(av) => av,
669 None => AnyValue::Null,
670 }
671 }
672
673 pub fn idx(&self) -> IdxSize {
674 match self {
675 #[cfg(not(feature = "bigidx"))]
676 Self::UInt32(v) => *v,
677 #[cfg(feature = "bigidx")]
678 Self::UInt64(v) => *v,
679 _ => panic!("expected index type found {self:?}"),
680 }
681 }
682
683 pub fn str_value(&self) -> Cow<'a, str> {
684 match self {
685 Self::String(s) => Cow::Borrowed(s),
686 Self::StringOwned(s) => Cow::Owned(s.to_string()),
687 Self::Null => Cow::Borrowed("null"),
688 #[cfg(feature = "dtype-categorical")]
689 Self::Categorical(cat, map) | Self::Enum(cat, map) => {
690 Cow::Borrowed(unsafe { map.cat_to_str_unchecked(*cat) })
691 },
692 #[cfg(feature = "dtype-categorical")]
693 Self::CategoricalOwned(cat, map) | Self::EnumOwned(cat, map) => {
694 Cow::Owned(unsafe { map.cat_to_str_unchecked(*cat) }.to_owned())
695 },
696 av => Cow::Owned(av.to_string()),
697 }
698 }
699
700 pub fn to_physical(self) -> Self {
701 match self {
702 Self::Null
703 | Self::Boolean(_)
704 | Self::String(_)
705 | Self::StringOwned(_)
706 | Self::Binary(_)
707 | Self::BinaryOwned(_)
708 | Self::UInt8(_)
709 | Self::UInt16(_)
710 | Self::UInt32(_)
711 | Self::UInt64(_)
712 | Self::UInt128(_)
713 | Self::Int8(_)
714 | Self::Int16(_)
715 | Self::Int32(_)
716 | Self::Int64(_)
717 | Self::Int128(_)
718 | Self::Float16(_)
719 | Self::Float32(_)
720 | Self::Float64(_) => self,
721
722 #[cfg(feature = "object")]
723 Self::Object(_) | Self::ObjectOwned(_) => self,
724
725 #[cfg(feature = "dtype-date")]
726 Self::Date(v) => Self::Int32(v),
727 #[cfg(feature = "dtype-datetime")]
728 Self::Datetime(v, _, _) | Self::DatetimeOwned(v, _, _) => Self::Int64(v),
729
730 #[cfg(feature = "dtype-duration")]
731 Self::Duration(v, _) => Self::Int64(v),
732 #[cfg(feature = "dtype-time")]
733 Self::Time(v) => Self::Int64(v),
734
735 #[cfg(feature = "dtype-categorical")]
736 Self::Categorical(v, &ref m)
737 | Self::CategoricalOwned(v, ref m)
738 | Self::Enum(v, &ref m)
739 | Self::EnumOwned(v, ref m) => {
740 match CategoricalPhysical::smallest_physical(m.max_categories()).unwrap() {
741 CategoricalPhysical::U8 => Self::UInt8(v as u8),
742 CategoricalPhysical::U16 => Self::UInt16(v as u16),
743 CategoricalPhysical::U32 => Self::UInt32(v),
744 }
745 },
746 Self::List(series) => Self::List(series.to_physical_repr().into_owned()),
747
748 #[cfg(feature = "dtype-array")]
749 Self::Array(series, width) => {
750 Self::Array(series.to_physical_repr().into_owned(), width)
751 },
752
753 #[cfg(feature = "dtype-struct")]
754 Self::Struct(_, _, _) => todo!(),
755 #[cfg(feature = "dtype-struct")]
756 Self::StructOwned(values) => Self::StructOwned(Box::new((
757 values.0.into_iter().map(|v| v.to_physical()).collect(),
758 values
759 .1
760 .into_iter()
761 .map(|mut f| {
762 f.dtype = f.dtype.to_physical();
763 f
764 })
765 .collect(),
766 ))),
767
768 #[cfg(feature = "dtype-decimal")]
769 Self::Decimal(v, _, _) => Self::Int128(v),
770 }
771 }
772
773 #[inline]
774 pub fn extract_bool(&self) -> Option<bool> {
775 match self {
776 AnyValue::Boolean(v) => Some(*v),
777 _ => None,
778 }
779 }
780
781 #[inline]
782 pub fn extract_str(&self) -> Option<&str> {
783 match self {
784 AnyValue::String(v) => Some(v),
785 AnyValue::StringOwned(v) => Some(v.as_str()),
786 _ => None,
787 }
788 }
789
790 #[inline]
791 pub fn extract_bytes(&self) -> Option<&[u8]> {
792 match self {
793 AnyValue::Binary(v) => Some(v),
794 AnyValue::BinaryOwned(v) => Some(v.as_slice()),
795 _ => None,
796 }
797 }
798}
799
800impl IsNull for AnyValue<'_> {
801 const HAS_NULLS: bool = true;
802 type Inner = Self;
803
804 fn is_null(&self) -> bool {
805 AnyValue::is_null(self)
806 }
807
808 fn unwrap_inner(self) -> Self::Inner {
809 self
810 }
811}
812
813impl From<AnyValue<'_>> for DataType {
814 fn from(value: AnyValue<'_>) -> Self {
815 value.dtype()
816 }
817}
818
819impl<'a> From<&AnyValue<'a>> for DataType {
820 fn from(value: &AnyValue<'a>) -> Self {
821 value.dtype()
822 }
823}
824
825impl AnyValue<'_> {
826 pub fn hash_impl<H: Hasher>(&self, state: &mut H, cheap: bool) {
827 use AnyValue::*;
828 std::mem::discriminant(self).hash(state);
829 match self {
830 Int8(v) => v.hash(state),
831 Int16(v) => v.hash(state),
832 Int32(v) => v.hash(state),
833 Int64(v) => v.hash(state),
834 Int128(v) => feature_gated!("dtype-i128", v.hash(state)),
835 UInt8(v) => v.hash(state),
836 UInt16(v) => v.hash(state),
837 UInt32(v) => v.hash(state),
838 UInt64(v) => v.hash(state),
839 UInt128(v) => feature_gated!("dtype-u128", v.hash(state)),
840 String(v) => v.hash(state),
841 StringOwned(v) => v.hash(state),
842 Float16(v) => v.to_ne_bytes().hash(state),
843 Float32(v) => v.to_ne_bytes().hash(state),
844 Float64(v) => v.to_ne_bytes().hash(state),
845 Binary(v) => v.hash(state),
846 BinaryOwned(v) => v.hash(state),
847 Boolean(v) => v.hash(state),
848 List(v) => {
849 if !cheap || v.len() < CHEAP_SERIES_HASH_LIMIT {
850 Hash::hash(&Wrap(v.clone()), state)
851 }
852 },
853 #[cfg(feature = "dtype-array")]
854 Array(v, width) => {
855 if !cheap || v.len() < CHEAP_SERIES_HASH_LIMIT {
856 Hash::hash(&Wrap(v.clone()), state)
857 }
858 width.hash(state)
859 },
860 #[cfg(feature = "dtype-date")]
861 Date(v) => v.hash(state),
862 #[cfg(feature = "dtype-datetime")]
863 Datetime(v, tu, tz) => {
864 v.hash(state);
865 tu.hash(state);
866 tz.hash(state);
867 },
868 #[cfg(feature = "dtype-datetime")]
869 DatetimeOwned(v, tu, tz) => {
870 v.hash(state);
871 tu.hash(state);
872 tz.hash(state);
873 },
874 #[cfg(feature = "dtype-duration")]
875 Duration(v, tz) => {
876 v.hash(state);
877 tz.hash(state);
878 },
879 #[cfg(feature = "dtype-time")]
880 Time(v) => v.hash(state),
881 #[cfg(feature = "dtype-categorical")]
882 Categorical(v, _) | CategoricalOwned(v, _) | Enum(v, _) | EnumOwned(v, _) => {
883 v.hash(state)
884 },
885 #[cfg(feature = "object")]
886 Object(_) => {},
887 #[cfg(feature = "object")]
888 ObjectOwned(_) => {},
889 #[cfg(feature = "dtype-struct")]
890 Struct(_, _, _) => {
891 if !cheap {
892 let mut buf = vec![];
893 self._materialize_struct_av(&mut buf);
894 buf.hash(state)
895 }
896 },
897 #[cfg(feature = "dtype-struct")]
898 StructOwned(v) => v.0.hash(state),
899 #[cfg(feature = "dtype-decimal")]
900 Decimal(v, s, p) => {
901 v.hash(state);
902 s.hash(state);
903 p.hash(state);
904 },
905 Null => {},
906 }
907 }
908}
909
910impl Hash for AnyValue<'_> {
911 fn hash<H: Hasher>(&self, state: &mut H) {
912 self.hash_impl(state, false)
913 }
914}
915
916impl Eq for AnyValue<'_> {}
917
918impl<'a, T> From<Option<T>> for AnyValue<'a>
919where
920 T: Into<AnyValue<'a>>,
921{
922 #[inline]
923 fn from(a: Option<T>) -> Self {
924 match a {
925 None => AnyValue::Null,
926 Some(v) => v.into(),
927 }
928 }
929}
930
931impl<'a> AnyValue<'a> {
932 #[cfg(any(feature = "dtype-date", feature = "dtype-datetime"))]
933 pub(crate) fn as_date(&self) -> AnyValue<'static> {
934 match self {
935 #[cfg(feature = "dtype-date")]
936 AnyValue::Int32(v) => AnyValue::Date(*v),
937 AnyValue::Null => AnyValue::Null,
938 av => panic!("cannot create date from other type. dtype: {}", av.dtype()),
939 }
940 }
941
942 #[cfg(feature = "dtype-datetime")]
943 pub(crate) fn as_datetime(&self, tu: TimeUnit, tz: Option<&'a TimeZone>) -> AnyValue<'a> {
944 match self {
945 AnyValue::Int64(v) => AnyValue::Datetime(*v, tu, tz),
946 AnyValue::Null => AnyValue::Null,
947 av => panic!(
948 "cannot create datetime from other type. dtype: {}",
949 av.dtype()
950 ),
951 }
952 }
953
954 #[cfg(feature = "dtype-datetime")]
955 pub(crate) fn as_datetime_owned(
956 &self,
957 tu: TimeUnit,
958 tz: Option<Arc<TimeZone>>,
959 ) -> AnyValue<'static> {
960 match self {
961 AnyValue::Int64(v) => AnyValue::DatetimeOwned(*v, tu, tz),
962 AnyValue::Null => AnyValue::Null,
963 av => panic!(
964 "cannot create datetime from other type. dtype: {}",
965 av.dtype()
966 ),
967 }
968 }
969
970 #[cfg(feature = "dtype-duration")]
971 pub(crate) fn as_duration(&self, tu: TimeUnit) -> AnyValue<'static> {
972 match self {
973 AnyValue::Int64(v) => AnyValue::Duration(*v, tu),
974 AnyValue::Null => AnyValue::Null,
975 av => panic!(
976 "cannot create duration from other type. dtype: {}",
977 av.dtype()
978 ),
979 }
980 }
981
982 #[cfg(feature = "dtype-time")]
983 pub(crate) fn as_time(&self) -> AnyValue<'static> {
984 match self {
985 AnyValue::Int64(v) => AnyValue::Time(*v),
986 AnyValue::Null => AnyValue::Null,
987 av => panic!("cannot create time from other type. dtype: {}", av.dtype()),
988 }
989 }
990
991 pub(crate) fn to_i128(&self) -> Option<i128> {
992 match self {
993 AnyValue::UInt8(v) => Some((*v).into()),
994 AnyValue::UInt16(v) => Some((*v).into()),
995 AnyValue::UInt32(v) => Some((*v).into()),
996 AnyValue::UInt64(v) => Some((*v).into()),
997 AnyValue::Int8(v) => Some((*v).into()),
998 AnyValue::Int16(v) => Some((*v).into()),
999 AnyValue::Int32(v) => Some((*v).into()),
1000 AnyValue::Int64(v) => Some((*v).into()),
1001 AnyValue::Int128(v) => Some(*v),
1002 _ => None,
1003 }
1004 }
1005
1006 pub(crate) fn to_f64(&self) -> Option<f64> {
1007 match self {
1008 AnyValue::Float16(v) => Some((*v).into()),
1009 AnyValue::Float32(v) => Some((*v).into()),
1010 AnyValue::Float64(v) => Some(*v),
1011 _ => None,
1012 }
1013 }
1014
1015 #[must_use]
1016 pub fn add(&self, rhs: &AnyValue) -> AnyValue<'static> {
1017 use AnyValue::*;
1018 match (self, rhs) {
1019 (Null, r) => r.clone().into_static(),
1020 (l, Null) => l.clone().into_static(),
1021 (Int32(l), Int32(r)) => Int32(l + r),
1022 (Int64(l), Int64(r)) => Int64(l + r),
1023 (UInt32(l), UInt32(r)) => UInt32(l + r),
1024 (UInt64(l), UInt64(r)) => UInt64(l + r),
1025 (Float16(l), Float16(r)) => Float16(*l + *r),
1026 (Float32(l), Float32(r)) => Float32(l + r),
1027 (Float64(l), Float64(r)) => Float64(l + r),
1028 #[cfg(feature = "dtype-duration")]
1029 (Duration(l, lu), Duration(r, ru)) => {
1030 if lu != ru {
1031 unimplemented!("adding durations with different units is not supported here");
1032 }
1033
1034 Duration(l + r, *lu)
1035 },
1036 #[cfg(feature = "dtype-decimal")]
1037 (Decimal(l, lp, ls), Decimal(r, rp, rs)) => {
1038 if (lp, ls) != (rp, rs) {
1039 unimplemented!(
1040 "adding decimals with different precisions/scales is not supported here"
1041 );
1042 }
1043
1044 Decimal(l + r, *lp, *ls)
1045 },
1046 _ => unimplemented!(),
1047 }
1048 }
1049
1050 #[inline]
1051 pub fn as_borrowed(&self) -> AnyValue<'_> {
1052 match self {
1053 AnyValue::BinaryOwned(data) => AnyValue::Binary(data),
1054 AnyValue::StringOwned(data) => AnyValue::String(data.as_str()),
1055 #[cfg(feature = "dtype-datetime")]
1056 AnyValue::DatetimeOwned(v, tu, tz) => {
1057 AnyValue::Datetime(*v, *tu, tz.as_ref().map(AsRef::as_ref))
1058 },
1059 #[cfg(feature = "dtype-categorical")]
1060 AnyValue::CategoricalOwned(cat, map) => AnyValue::Categorical(*cat, map),
1061 #[cfg(feature = "dtype-categorical")]
1062 AnyValue::EnumOwned(cat, map) => AnyValue::Enum(*cat, map),
1063 av => av.clone(),
1064 }
1065 }
1066
1067 #[inline]
1070 pub fn into_static(self) -> AnyValue<'static> {
1071 use AnyValue::*;
1072 match self {
1073 Null => Null,
1074 Int8(v) => Int8(v),
1075 Int16(v) => Int16(v),
1076 Int32(v) => Int32(v),
1077 Int64(v) => Int64(v),
1078 Int128(v) => Int128(v),
1079 UInt8(v) => UInt8(v),
1080 UInt16(v) => UInt16(v),
1081 UInt32(v) => UInt32(v),
1082 UInt64(v) => UInt64(v),
1083 UInt128(v) => UInt128(v),
1084 Boolean(v) => Boolean(v),
1085 Float16(v) => Float16(v),
1086 Float32(v) => Float32(v),
1087 Float64(v) => Float64(v),
1088 #[cfg(feature = "dtype-datetime")]
1089 Datetime(v, tu, tz) => DatetimeOwned(v, tu, tz.map(|v| Arc::new(v.clone()))),
1090 #[cfg(feature = "dtype-datetime")]
1091 DatetimeOwned(v, tu, tz) => DatetimeOwned(v, tu, tz),
1092 #[cfg(feature = "dtype-date")]
1093 Date(v) => Date(v),
1094 #[cfg(feature = "dtype-duration")]
1095 Duration(v, tu) => Duration(v, tu),
1096 #[cfg(feature = "dtype-time")]
1097 Time(v) => Time(v),
1098 List(v) => List(v),
1099 #[cfg(feature = "dtype-array")]
1100 Array(s, size) => Array(s, size),
1101 String(v) => StringOwned(PlSmallStr::from_str(v)),
1102 StringOwned(v) => StringOwned(v),
1103 Binary(v) => BinaryOwned(v.to_vec()),
1104 BinaryOwned(v) => BinaryOwned(v),
1105 #[cfg(feature = "object")]
1106 Object(v) => ObjectOwned(OwnedObject(v.to_boxed())),
1107 #[cfg(feature = "dtype-struct")]
1108 Struct(idx, arr, fields) => {
1109 let avs = struct_to_avs_static(idx, arr, fields);
1110 StructOwned(Box::new((avs, fields.to_vec())))
1111 },
1112 #[cfg(feature = "dtype-struct")]
1113 StructOwned(payload) => {
1114 let av = StructOwned(payload);
1115 unsafe { std::mem::transmute::<AnyValue<'a>, AnyValue<'static>>(av) }
1117 },
1118 #[cfg(feature = "object")]
1119 ObjectOwned(payload) => {
1120 let av = ObjectOwned(payload);
1121 unsafe { std::mem::transmute::<AnyValue<'a>, AnyValue<'static>>(av) }
1123 },
1124 #[cfg(feature = "dtype-decimal")]
1125 Decimal(val, s, p) => Decimal(val, s, p),
1126 #[cfg(feature = "dtype-categorical")]
1127 Categorical(cat, map) => CategoricalOwned(cat, map.clone()),
1128 #[cfg(feature = "dtype-categorical")]
1129 CategoricalOwned(cat, map) => CategoricalOwned(cat, map),
1130 #[cfg(feature = "dtype-categorical")]
1131 Enum(cat, map) => EnumOwned(cat, map.clone()),
1132 #[cfg(feature = "dtype-categorical")]
1133 EnumOwned(cat, map) => EnumOwned(cat, map),
1134 }
1135 }
1136
1137 pub fn get_str(&self) -> Option<&str> {
1139 match self {
1140 AnyValue::String(s) => Some(s),
1141 AnyValue::StringOwned(s) => Some(s.as_str()),
1142 #[cfg(feature = "dtype-categorical")]
1143 Self::Categorical(cat, map) | Self::Enum(cat, map) => {
1144 Some(unsafe { map.cat_to_str_unchecked(*cat) })
1145 },
1146 #[cfg(feature = "dtype-categorical")]
1147 Self::CategoricalOwned(cat, map) | Self::EnumOwned(cat, map) => {
1148 Some(unsafe { map.cat_to_str_unchecked(*cat) })
1149 },
1150 _ => None,
1151 }
1152 }
1153}
1154
1155impl<'a> From<AnyValue<'a>> for Option<i64> {
1156 fn from(val: AnyValue<'a>) -> Self {
1157 use AnyValue::*;
1158 match val {
1159 Null => None,
1160 Int32(v) => Some(v as i64),
1161 Int64(v) => Some(v),
1162 UInt32(v) => Some(v as i64),
1163 _ => todo!(),
1164 }
1165 }
1166}
1167
1168impl AnyValue<'_> {
1169 #[inline]
1170 pub fn eq_missing(&self, other: &Self, null_equal: bool) -> bool {
1171 fn struct_owned_value_iter<'a>(
1172 v: &'a (Vec<AnyValue<'_>>, Vec<Field>),
1173 ) -> impl ExactSizeIterator<Item = AnyValue<'a>> {
1174 v.0.iter().map(|v| v.as_borrowed())
1175 }
1176 fn struct_value_iter(
1177 idx: usize,
1178 arr: &StructArray,
1179 ) -> impl ExactSizeIterator<Item = AnyValue<'_>> {
1180 assert!(idx < arr.len());
1181
1182 arr.values().iter().map(move |field_arr| unsafe {
1183 field_arr.get_unchecked(idx)
1187 })
1188 }
1189
1190 fn struct_eq_missing<'a>(
1191 l: impl ExactSizeIterator<Item = AnyValue<'a>>,
1192 r: impl ExactSizeIterator<Item = AnyValue<'a>>,
1193 null_equal: bool,
1194 ) -> bool {
1195 if l.len() != r.len() {
1196 return false;
1197 }
1198
1199 l.zip(r).all(|(lv, rv)| lv.eq_missing(&rv, null_equal))
1200 }
1201
1202 use AnyValue::*;
1203 match (self, other) {
1204 (StringOwned(l), r) => AnyValue::String(l.as_str()) == *r,
1206 (BinaryOwned(l), r) => AnyValue::Binary(l.as_slice()) == *r,
1207 #[cfg(feature = "object")]
1208 (ObjectOwned(l), r) => AnyValue::Object(&*l.0) == *r,
1209 (l, StringOwned(r)) => *l == AnyValue::String(r.as_str()),
1210 (l, BinaryOwned(r)) => *l == AnyValue::Binary(r.as_slice()),
1211 #[cfg(feature = "object")]
1212 (l, ObjectOwned(r)) => *l == AnyValue::Object(&*r.0),
1213 #[cfg(feature = "dtype-datetime")]
1214 (DatetimeOwned(lv, ltu, ltz), r) => {
1215 Datetime(*lv, *ltu, ltz.as_ref().map(|v| v.as_ref())) == *r
1216 },
1217 #[cfg(feature = "dtype-datetime")]
1218 (l, DatetimeOwned(rv, rtu, rtz)) => {
1219 *l == Datetime(*rv, *rtu, rtz.as_ref().map(|v| v.as_ref()))
1220 },
1221 #[cfg(feature = "dtype-categorical")]
1222 (CategoricalOwned(cat, map), r) => Categorical(*cat, map) == *r,
1223 #[cfg(feature = "dtype-categorical")]
1224 (l, CategoricalOwned(cat, map)) => *l == Categorical(*cat, map),
1225 #[cfg(feature = "dtype-categorical")]
1226 (EnumOwned(cat, map), r) => Enum(*cat, map) == *r,
1227 #[cfg(feature = "dtype-categorical")]
1228 (l, EnumOwned(cat, map)) => *l == Enum(*cat, map),
1229
1230 (Null, Null) => null_equal,
1232 (Null, _) => false,
1233 (_, Null) => false,
1234
1235 (Boolean(l), Boolean(r)) => *l == *r,
1237 (UInt8(l), UInt8(r)) => *l == *r,
1238 (UInt16(l), UInt16(r)) => *l == *r,
1239 (UInt32(l), UInt32(r)) => *l == *r,
1240 (UInt64(l), UInt64(r)) => *l == *r,
1241 (UInt128(l), UInt128(r)) => *l == *r,
1242 (Int8(l), Int8(r)) => *l == *r,
1243 (Int16(l), Int16(r)) => *l == *r,
1244 (Int32(l), Int32(r)) => *l == *r,
1245 (Int64(l), Int64(r)) => *l == *r,
1246 (Int128(l), Int128(r)) => *l == *r,
1247 (Float16(l), Float16(r)) => l.to_total_ord() == r.to_total_ord(),
1248 (Float32(l), Float32(r)) => l.to_total_ord() == r.to_total_ord(),
1249 (Float64(l), Float64(r)) => l.to_total_ord() == r.to_total_ord(),
1250 (String(l), String(r)) => l == r,
1251 (Binary(l), Binary(r)) => l == r,
1252 #[cfg(feature = "dtype-time")]
1253 (Time(l), Time(r)) => *l == *r,
1254 #[cfg(all(feature = "dtype-datetime", feature = "dtype-date"))]
1255 (Date(l), Date(r)) => *l == *r,
1256 #[cfg(all(feature = "dtype-datetime", feature = "dtype-date"))]
1257 (Datetime(l, tul, tzl), Datetime(r, tur, tzr)) => {
1258 *l == *r && *tul == *tur && tzl == tzr
1259 },
1260 (List(l), List(r)) => l == r,
1261 #[cfg(feature = "dtype-categorical")]
1262 (Categorical(cat_l, map_l), Categorical(cat_r, map_r)) => {
1263 if !Arc::ptr_eq(map_l, map_r) {
1264 unimplemented!(
1267 "comparing categoricals with different Categories is not supported through AnyValue"
1268 );
1269 }
1270
1271 cat_l == cat_r
1272 },
1273 #[cfg(feature = "dtype-categorical")]
1274 (Enum(cat_l, map_l), Enum(cat_r, map_r)) => {
1275 if !Arc::ptr_eq(map_l, map_r) {
1276 unimplemented!(
1279 "comparing enums with different FrozenCategories is not supported through AnyValue"
1280 );
1281 }
1282
1283 cat_l == cat_r
1284 },
1285 #[cfg(feature = "dtype-duration")]
1286 (Duration(l, tu_l), Duration(r, tu_r)) => l == r && tu_l == tu_r,
1287
1288 #[cfg(feature = "dtype-struct")]
1289 (StructOwned(l), StructOwned(r)) => struct_eq_missing(
1290 struct_owned_value_iter(l.as_ref()),
1291 struct_owned_value_iter(r.as_ref()),
1292 null_equal,
1293 ),
1294 #[cfg(feature = "dtype-struct")]
1295 (StructOwned(l), Struct(idx, arr, _)) => struct_eq_missing(
1296 struct_owned_value_iter(l.as_ref()),
1297 struct_value_iter(*idx, arr),
1298 null_equal,
1299 ),
1300 #[cfg(feature = "dtype-struct")]
1301 (Struct(idx, arr, _), StructOwned(r)) => struct_eq_missing(
1302 struct_value_iter(*idx, arr),
1303 struct_owned_value_iter(r.as_ref()),
1304 null_equal,
1305 ),
1306 #[cfg(feature = "dtype-struct")]
1307 (Struct(l_idx, l_arr, _), Struct(r_idx, r_arr, _)) => struct_eq_missing(
1308 struct_value_iter(*l_idx, l_arr),
1309 struct_value_iter(*r_idx, r_arr),
1310 null_equal,
1311 ),
1312 #[cfg(feature = "dtype-decimal")]
1313 (Decimal(lv, _lp, ls), Decimal(rv, _rp, rs)) => dec128_eq(*lv, *ls, *rv, *rs),
1314 #[cfg(feature = "object")]
1315 (Object(l), Object(r)) => l == r,
1316 #[cfg(feature = "dtype-array")]
1317 (Array(l_values, l_size), Array(r_values, r_size)) => {
1318 if l_size != r_size {
1319 return false;
1320 }
1321
1322 debug_assert_eq!(l_values.len(), *l_size);
1323 debug_assert_eq!(r_values.len(), *r_size);
1324
1325 let mut is_equal = true;
1326 for i in 0..*l_size {
1327 let l = unsafe { l_values.get_unchecked(i) };
1328 let r = unsafe { r_values.get_unchecked(i) };
1329
1330 is_equal &= l.eq_missing(&r, null_equal);
1331 }
1332 is_equal
1333 },
1334
1335 (l, r) if l.to_i128().is_some() && r.to_i128().is_some() => l.to_i128() == r.to_i128(),
1336 (l, r) if l.to_f64().is_some() && r.to_f64().is_some() => {
1337 l.to_f64().unwrap().to_total_ord() == r.to_f64().unwrap().to_total_ord()
1338 },
1339
1340 (_, _) => false,
1341 }
1342 }
1343}
1344
1345impl PartialEq for AnyValue<'_> {
1346 #[inline]
1347 fn eq(&self, other: &Self) -> bool {
1348 self.eq_missing(other, true)
1349 }
1350}
1351
1352impl PartialOrd for AnyValue<'_> {
1353 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
1355 use AnyValue::*;
1356 match (self, &other) {
1357 (StringOwned(l), r) => AnyValue::String(l.as_str()).partial_cmp(r),
1359 (BinaryOwned(l), r) => AnyValue::Binary(l.as_slice()).partial_cmp(r),
1360 #[cfg(feature = "object")]
1361 (ObjectOwned(l), r) => AnyValue::Object(&*l.0).partial_cmp(r),
1362 (l, StringOwned(r)) => l.partial_cmp(&AnyValue::String(r.as_str())),
1363 (l, BinaryOwned(r)) => l.partial_cmp(&AnyValue::Binary(r.as_slice())),
1364 #[cfg(feature = "object")]
1365 (l, ObjectOwned(r)) => l.partial_cmp(&AnyValue::Object(&*r.0)),
1366 #[cfg(feature = "dtype-datetime")]
1367 (DatetimeOwned(lv, ltu, ltz), r) => {
1368 Datetime(*lv, *ltu, ltz.as_ref().map(|v| v.as_ref())).partial_cmp(r)
1369 },
1370 #[cfg(feature = "dtype-datetime")]
1371 (l, DatetimeOwned(rv, rtu, rtz)) => {
1372 l.partial_cmp(&Datetime(*rv, *rtu, rtz.as_ref().map(|v| v.as_ref())))
1373 },
1374 #[cfg(feature = "dtype-categorical")]
1375 (CategoricalOwned(cat, map), r) => Categorical(*cat, map).partial_cmp(r),
1376 #[cfg(feature = "dtype-categorical")]
1377 (l, CategoricalOwned(cat, map)) => l.partial_cmp(&Categorical(*cat, map)),
1378 #[cfg(feature = "dtype-categorical")]
1379 (EnumOwned(cat, map), r) => Enum(*cat, map).partial_cmp(r),
1380 #[cfg(feature = "dtype-categorical")]
1381 (l, EnumOwned(cat, map)) => l.partial_cmp(&Enum(*cat, map)),
1382
1383 (Null, Null) => Some(Ordering::Equal),
1385 (Null, _) => Some(Ordering::Less),
1386 (_, Null) => Some(Ordering::Greater),
1387
1388 (Boolean(l), Boolean(r)) => l.partial_cmp(r),
1390 (UInt8(l), UInt8(r)) => l.partial_cmp(r),
1391 (UInt16(l), UInt16(r)) => l.partial_cmp(r),
1392 (UInt32(l), UInt32(r)) => l.partial_cmp(r),
1393 (UInt64(l), UInt64(r)) => l.partial_cmp(r),
1394 (UInt128(l), UInt128(r)) => l.partial_cmp(r),
1395 (Int8(l), Int8(r)) => l.partial_cmp(r),
1396 (Int16(l), Int16(r)) => l.partial_cmp(r),
1397 (Int32(l), Int32(r)) => l.partial_cmp(r),
1398 (Int64(l), Int64(r)) => l.partial_cmp(r),
1399 (Int128(l), Int128(r)) => l.partial_cmp(r),
1400 (Float16(l), Float16(r)) => Some(l.tot_cmp(r)),
1401 (Float32(l), Float32(r)) => Some(l.tot_cmp(r)),
1402 (Float64(l), Float64(r)) => Some(l.tot_cmp(r)),
1403 (String(l), String(r)) => l.partial_cmp(r),
1404 (Binary(l), Binary(r)) => l.partial_cmp(r),
1405 #[cfg(feature = "dtype-date")]
1406 (Date(l), Date(r)) => l.partial_cmp(r),
1407 #[cfg(feature = "dtype-datetime")]
1408 (Datetime(lt, lu, lz), Datetime(rt, ru, rz)) => {
1409 if lu != ru || lz != rz {
1410 unimplemented!(
1411 "comparing datetimes with different units or timezones is not supported"
1412 );
1413 }
1414
1415 lt.partial_cmp(rt)
1416 },
1417 #[cfg(feature = "dtype-duration")]
1418 (Duration(lt, lu), Duration(rt, ru)) => {
1419 if lu != ru {
1420 unimplemented!("comparing durations with different units is not supported");
1421 }
1422
1423 lt.partial_cmp(rt)
1424 },
1425 #[cfg(feature = "dtype-time")]
1426 (Time(l), Time(r)) => l.partial_cmp(r),
1427 #[cfg(feature = "dtype-categorical")]
1428 (Categorical(l_cat, l_map), Categorical(r_cat, r_map)) => unsafe {
1429 let l_str = l_map.cat_to_str_unchecked(*l_cat);
1430 let r_str = r_map.cat_to_str_unchecked(*r_cat);
1431 l_str.partial_cmp(r_str)
1432 },
1433 #[cfg(feature = "dtype-categorical")]
1434 (Enum(l_cat, l_map), Enum(r_cat, r_map)) => {
1435 if !Arc::ptr_eq(l_map, r_map) {
1436 unimplemented!("can't order enums from different FrozenCategories")
1437 }
1438 l_cat.partial_cmp(r_cat)
1439 },
1440 (List(_), List(_)) => {
1441 unimplemented!("ordering for List dtype is not supported")
1442 },
1443 #[cfg(feature = "dtype-array")]
1444 (Array(..), Array(..)) => {
1445 unimplemented!("ordering for Array dtype is not supported")
1446 },
1447 #[cfg(feature = "object")]
1448 (Object(_), Object(_)) => {
1449 unimplemented!("ordering for Object dtype is not supported")
1450 },
1451 #[cfg(feature = "dtype-struct")]
1452 (StructOwned(_), StructOwned(_))
1453 | (StructOwned(_), Struct(..))
1454 | (Struct(..), StructOwned(_))
1455 | (Struct(..), Struct(..)) => {
1456 unimplemented!("ordering for Struct dtype is not supported")
1457 },
1458 #[cfg(feature = "dtype-decimal")]
1459 (Decimal(lv, _lp, ls), Decimal(rv, _rp, rs)) => Some(dec128_cmp(*lv, *ls, *rv, *rs)),
1460
1461 (_, _) => {
1462 unimplemented!(
1463 "scalar ordering for mixed dtypes {self:?} and {other:?} is not supported"
1464 )
1465 },
1466 }
1467 }
1468}
1469
1470impl TotalEq for AnyValue<'_> {
1471 #[inline]
1472 fn tot_eq(&self, other: &Self) -> bool {
1473 self.eq_missing(other, true)
1474 }
1475}
1476
1477#[cfg(feature = "dtype-struct")]
1478fn struct_to_avs_static(idx: usize, arr: &StructArray, fields: &[Field]) -> Vec<AnyValue<'static>> {
1479 assert!(idx < arr.len());
1480
1481 let arrs = arr.values();
1482
1483 debug_assert_eq!(arrs.len(), fields.len());
1484
1485 arrs.iter()
1486 .zip(fields)
1487 .map(|(arr, field)| {
1488 unsafe { arr_to_any_value(arr.as_ref(), idx, &field.dtype) }.into_static()
1492 })
1493 .collect()
1494}
1495
1496pub trait GetAnyValue {
1497 unsafe fn get_unchecked(&self, index: usize) -> AnyValue<'_>;
1501}
1502
1503impl GetAnyValue for ArrayRef {
1504 unsafe fn get_unchecked(&self, index: usize) -> AnyValue<'_> {
1506 match self.dtype() {
1507 ArrowDataType::Int8 => {
1508 let arr = self
1509 .as_any()
1510 .downcast_ref::<PrimitiveArray<i8>>()
1511 .unwrap_unchecked();
1512 match arr.get_unchecked(index) {
1513 None => AnyValue::Null,
1514 Some(v) => AnyValue::Int8(v),
1515 }
1516 },
1517 ArrowDataType::Int16 => {
1518 let arr = self
1519 .as_any()
1520 .downcast_ref::<PrimitiveArray<i16>>()
1521 .unwrap_unchecked();
1522 match arr.get_unchecked(index) {
1523 None => AnyValue::Null,
1524 Some(v) => AnyValue::Int16(v),
1525 }
1526 },
1527 ArrowDataType::Int32 => {
1528 let arr = self
1529 .as_any()
1530 .downcast_ref::<PrimitiveArray<i32>>()
1531 .unwrap_unchecked();
1532 match arr.get_unchecked(index) {
1533 None => AnyValue::Null,
1534 Some(v) => AnyValue::Int32(v),
1535 }
1536 },
1537 ArrowDataType::Int64 => {
1538 let arr = self
1539 .as_any()
1540 .downcast_ref::<PrimitiveArray<i64>>()
1541 .unwrap_unchecked();
1542 match arr.get_unchecked(index) {
1543 None => AnyValue::Null,
1544 Some(v) => AnyValue::Int64(v),
1545 }
1546 },
1547 ArrowDataType::Int128 => {
1548 let arr = self
1549 .as_any()
1550 .downcast_ref::<PrimitiveArray<i128>>()
1551 .unwrap_unchecked();
1552 match arr.get_unchecked(index) {
1553 None => AnyValue::Null,
1554 Some(v) => AnyValue::Int128(v),
1555 }
1556 },
1557 ArrowDataType::UInt8 => {
1558 let arr = self
1559 .as_any()
1560 .downcast_ref::<PrimitiveArray<u8>>()
1561 .unwrap_unchecked();
1562 match arr.get_unchecked(index) {
1563 None => AnyValue::Null,
1564 Some(v) => AnyValue::UInt8(v),
1565 }
1566 },
1567 ArrowDataType::UInt16 => {
1568 let arr = self
1569 .as_any()
1570 .downcast_ref::<PrimitiveArray<u16>>()
1571 .unwrap_unchecked();
1572 match arr.get_unchecked(index) {
1573 None => AnyValue::Null,
1574 Some(v) => AnyValue::UInt16(v),
1575 }
1576 },
1577 ArrowDataType::UInt32 => {
1578 let arr = self
1579 .as_any()
1580 .downcast_ref::<PrimitiveArray<u32>>()
1581 .unwrap_unchecked();
1582 match arr.get_unchecked(index) {
1583 None => AnyValue::Null,
1584 Some(v) => AnyValue::UInt32(v),
1585 }
1586 },
1587 ArrowDataType::UInt64 => {
1588 let arr = self
1589 .as_any()
1590 .downcast_ref::<PrimitiveArray<u64>>()
1591 .unwrap_unchecked();
1592 match arr.get_unchecked(index) {
1593 None => AnyValue::Null,
1594 Some(v) => AnyValue::UInt64(v),
1595 }
1596 },
1597 ArrowDataType::UInt128 => {
1598 let arr = self
1599 .as_any()
1600 .downcast_ref::<PrimitiveArray<u128>>()
1601 .unwrap_unchecked();
1602 match arr.get_unchecked(index) {
1603 None => AnyValue::Null,
1604 Some(v) => AnyValue::UInt128(v),
1605 }
1606 },
1607 ArrowDataType::Float16 => {
1608 let arr = self
1609 .as_any()
1610 .downcast_ref::<PrimitiveArray<pf16>>()
1611 .unwrap_unchecked();
1612 match arr.get_unchecked(index) {
1613 None => AnyValue::Null,
1614 Some(v) => AnyValue::Float16(v),
1615 }
1616 },
1617 ArrowDataType::Float32 => {
1618 let arr = self
1619 .as_any()
1620 .downcast_ref::<PrimitiveArray<f32>>()
1621 .unwrap_unchecked();
1622 match arr.get_unchecked(index) {
1623 None => AnyValue::Null,
1624 Some(v) => AnyValue::Float32(v),
1625 }
1626 },
1627 ArrowDataType::Float64 => {
1628 let arr = self
1629 .as_any()
1630 .downcast_ref::<PrimitiveArray<f64>>()
1631 .unwrap_unchecked();
1632 match arr.get_unchecked(index) {
1633 None => AnyValue::Null,
1634 Some(v) => AnyValue::Float64(v),
1635 }
1636 },
1637 ArrowDataType::Boolean => {
1638 let arr = self
1639 .as_any()
1640 .downcast_ref::<BooleanArray>()
1641 .unwrap_unchecked();
1642 match arr.get_unchecked(index) {
1643 None => AnyValue::Null,
1644 Some(v) => AnyValue::Boolean(v),
1645 }
1646 },
1647 ArrowDataType::LargeUtf8 => {
1648 let arr = self
1649 .as_any()
1650 .downcast_ref::<LargeStringArray>()
1651 .unwrap_unchecked();
1652 match arr.get_unchecked(index) {
1653 None => AnyValue::Null,
1654 Some(v) => AnyValue::String(v),
1655 }
1656 },
1657 _ => unimplemented!(),
1658 }
1659 }
1660}
1661
1662impl<K: NumericNative> From<K> for AnyValue<'static> {
1663 fn from(value: K) -> Self {
1664 unsafe {
1665 match K::PRIMITIVE {
1666 PrimitiveType::Int8 => AnyValue::Int8(NumCast::from(value).unwrap_unchecked()),
1667 PrimitiveType::Int16 => AnyValue::Int16(NumCast::from(value).unwrap_unchecked()),
1668 PrimitiveType::Int32 => AnyValue::Int32(NumCast::from(value).unwrap_unchecked()),
1669 PrimitiveType::Int64 => AnyValue::Int64(NumCast::from(value).unwrap_unchecked()),
1670 PrimitiveType::Int128 => AnyValue::Int128(NumCast::from(value).unwrap_unchecked()),
1671 PrimitiveType::UInt8 => AnyValue::UInt8(NumCast::from(value).unwrap_unchecked()),
1672 PrimitiveType::UInt16 => AnyValue::UInt16(NumCast::from(value).unwrap_unchecked()),
1673 PrimitiveType::UInt32 => AnyValue::UInt32(NumCast::from(value).unwrap_unchecked()),
1674 PrimitiveType::UInt64 => AnyValue::UInt64(NumCast::from(value).unwrap_unchecked()),
1675 PrimitiveType::UInt128 => {
1676 AnyValue::UInt128(NumCast::from(value).unwrap_unchecked())
1677 },
1678 PrimitiveType::Float16 => {
1679 AnyValue::Float16(NumCast::from(value).unwrap_unchecked())
1680 },
1681 PrimitiveType::Float32 => {
1682 AnyValue::Float32(NumCast::from(value).unwrap_unchecked())
1683 },
1684 PrimitiveType::Float64 => {
1685 AnyValue::Float64(NumCast::from(value).unwrap_unchecked())
1686 },
1687 _ => unreachable!(),
1689 }
1690 }
1691 }
1692}
1693
1694impl<'a> From<&'a [u8]> for AnyValue<'a> {
1695 fn from(value: &'a [u8]) -> Self {
1696 AnyValue::Binary(value)
1697 }
1698}
1699
1700impl<'a> From<&'a str> for AnyValue<'a> {
1701 fn from(value: &'a str) -> Self {
1702 AnyValue::String(value)
1703 }
1704}
1705
1706impl From<bool> for AnyValue<'static> {
1707 fn from(value: bool) -> Self {
1708 AnyValue::Boolean(value)
1709 }
1710}
1711
1712#[cfg(test)]
1713mod test {
1714 #[cfg(feature = "dtype-categorical")]
1715 use super::*;
1716
1717 #[test]
1718 #[cfg(feature = "dtype-categorical")]
1719 fn test_arrow_dtypes_to_polars() {
1720 let dtypes = [
1721 (
1722 ArrowDataType::Duration(ArrowTimeUnit::Nanosecond),
1723 DataType::Duration(TimeUnit::Nanoseconds),
1724 ),
1725 (
1726 ArrowDataType::Duration(ArrowTimeUnit::Millisecond),
1727 DataType::Duration(TimeUnit::Milliseconds),
1728 ),
1729 (
1730 ArrowDataType::Date64,
1731 DataType::Datetime(TimeUnit::Milliseconds, None),
1732 ),
1733 (
1734 ArrowDataType::Timestamp(ArrowTimeUnit::Nanosecond, None),
1735 DataType::Datetime(TimeUnit::Nanoseconds, None),
1736 ),
1737 (
1738 ArrowDataType::Timestamp(ArrowTimeUnit::Microsecond, None),
1739 DataType::Datetime(TimeUnit::Microseconds, None),
1740 ),
1741 (
1742 ArrowDataType::Timestamp(ArrowTimeUnit::Millisecond, None),
1743 DataType::Datetime(TimeUnit::Milliseconds, None),
1744 ),
1745 (
1746 ArrowDataType::Timestamp(ArrowTimeUnit::Second, None),
1747 DataType::Datetime(TimeUnit::Milliseconds, None),
1748 ),
1749 (
1750 ArrowDataType::Timestamp(ArrowTimeUnit::Second, Some(PlSmallStr::EMPTY)),
1751 DataType::Datetime(TimeUnit::Milliseconds, None),
1752 ),
1753 (ArrowDataType::LargeUtf8, DataType::String),
1754 (ArrowDataType::Utf8, DataType::String),
1755 (ArrowDataType::LargeBinary, DataType::Binary),
1756 (ArrowDataType::Binary, DataType::Binary),
1757 (
1758 ArrowDataType::Time64(ArrowTimeUnit::Nanosecond),
1759 DataType::Time,
1760 ),
1761 (
1762 ArrowDataType::Time64(ArrowTimeUnit::Millisecond),
1763 DataType::Time,
1764 ),
1765 (
1766 ArrowDataType::Time64(ArrowTimeUnit::Microsecond),
1767 DataType::Time,
1768 ),
1769 (ArrowDataType::Time64(ArrowTimeUnit::Second), DataType::Time),
1770 (
1771 ArrowDataType::Time32(ArrowTimeUnit::Nanosecond),
1772 DataType::Time,
1773 ),
1774 (
1775 ArrowDataType::Time32(ArrowTimeUnit::Millisecond),
1776 DataType::Time,
1777 ),
1778 (
1779 ArrowDataType::Time32(ArrowTimeUnit::Microsecond),
1780 DataType::Time,
1781 ),
1782 (ArrowDataType::Time32(ArrowTimeUnit::Second), DataType::Time),
1783 (
1784 ArrowDataType::List(Box::new(ArrowField::new(
1785 LIST_VALUES_NAME,
1786 ArrowDataType::Float64,
1787 true,
1788 ))),
1789 DataType::List(DataType::Float64.into()),
1790 ),
1791 (
1792 ArrowDataType::LargeList(Box::new(ArrowField::new(
1793 LIST_VALUES_NAME,
1794 ArrowDataType::Float64,
1795 true,
1796 ))),
1797 DataType::List(DataType::Float64.into()),
1798 ),
1799 ];
1800
1801 for (dt_a, dt_p) in dtypes {
1802 let dt = DataType::from_arrow_dtype(&dt_a);
1803
1804 assert_eq!(dt_p, dt);
1805 }
1806 }
1807}