polars_core/series/
any_value.rs

1use std::fmt::Write;
2
3use arrow::bitmap::MutableBitmap;
4
5use crate::chunked_array::builder::{AnonymousOwnedListBuilder, get_list_builder};
6use crate::prelude::*;
7use crate::utils::any_values_to_supertype;
8
9impl<'a, T: AsRef<[AnyValue<'a>]>> NamedFrom<T, [AnyValue<'a>]> for Series {
10    /// Construct a new [`Series`] from a collection of [`AnyValue`].
11    ///
12    /// # Panics
13    ///
14    /// Panics if the values do not all share the same data type (with the exception
15    /// of [`DataType::Null`], which is always allowed).
16    ///
17    /// [`AnyValue`]: crate::datatypes::AnyValue
18    fn new(name: PlSmallStr, values: T) -> Self {
19        let values = values.as_ref();
20        Series::from_any_values(name, values, true).expect("data types of values should match")
21    }
22}
23
24impl Series {
25    /// Construct a new [`Series`] from a slice of AnyValues.
26    ///
27    /// The data type of the resulting Series is determined by the `values`
28    /// and the `strict` parameter:
29    /// - If `strict` is `true`, the data type is equal to the data type of the
30    ///   first non-null value. If any other non-null values do not match this
31    ///   data type, an error is raised.
32    /// - If `strict` is `false`, the data type is the supertype of the `values`.
33    ///   An error is returned if no supertype can be determined.
34    ///   **WARNING**: A full pass over the values is required to determine the supertype.
35    /// - If no values were passed, the resulting data type is `Null`.
36    pub fn from_any_values(
37        name: PlSmallStr,
38        values: &[AnyValue],
39        strict: bool,
40    ) -> PolarsResult<Self> {
41        fn get_first_non_null_dtype(values: &[AnyValue]) -> DataType {
42            let mut all_flat_null = true;
43            let first_non_null = values.iter().find(|av| {
44                if !av.is_null() {
45                    all_flat_null = false
46                };
47                !av.is_nested_null()
48            });
49            match first_non_null {
50                Some(av) => av.dtype(),
51                None => {
52                    if all_flat_null {
53                        DataType::Null
54                    } else {
55                        // Second pass to check for the nested null value that
56                        // toggled `all_flat_null` to false, e.g. a List(Null).
57                        let first_nested_null = values.iter().find(|av| !av.is_null()).unwrap();
58                        first_nested_null.dtype()
59                    }
60                },
61            }
62        }
63        let dtype = if strict {
64            get_first_non_null_dtype(values)
65        } else {
66            // Currently does not work correctly for Decimal because equality is not implemented.
67            any_values_to_supertype(values)?
68        };
69
70        // TODO: Remove this when Decimal data type equality is implemented.
71        #[cfg(feature = "dtype-decimal")]
72        if dtype.is_decimal() {
73            let dtype = DataType::Decimal(None, None);
74            return Self::from_any_values_and_dtype(name, values, &dtype, strict);
75        }
76
77        Self::from_any_values_and_dtype(name, values, &dtype, strict)
78    }
79
80    /// Construct a new [`Series`] with the given `dtype` from a slice of AnyValues.
81    ///
82    /// If `strict` is `true`, an error is returned if the values do not match the given
83    /// data type. If `strict` is `false`, values that do not match the given data type
84    /// are cast. If casting is not possible, the values are set to null instead.
85    pub fn from_any_values_and_dtype(
86        name: PlSmallStr,
87        values: &[AnyValue],
88        dtype: &DataType,
89        strict: bool,
90    ) -> PolarsResult<Self> {
91        if values.is_empty() {
92            return Ok(Self::new_empty(name, dtype));
93        }
94
95        let mut s = match dtype {
96            #[cfg(feature = "dtype-i8")]
97            DataType::Int8 => any_values_to_integer::<Int8Type>(values, strict)?.into_series(),
98            #[cfg(feature = "dtype-i16")]
99            DataType::Int16 => any_values_to_integer::<Int16Type>(values, strict)?.into_series(),
100            DataType::Int32 => any_values_to_integer::<Int32Type>(values, strict)?.into_series(),
101            DataType::Int64 => any_values_to_integer::<Int64Type>(values, strict)?.into_series(),
102            #[cfg(feature = "dtype-i128")]
103            DataType::Int128 => any_values_to_integer::<Int128Type>(values, strict)?.into_series(),
104            #[cfg(feature = "dtype-u8")]
105            DataType::UInt8 => any_values_to_integer::<UInt8Type>(values, strict)?.into_series(),
106            #[cfg(feature = "dtype-u16")]
107            DataType::UInt16 => any_values_to_integer::<UInt16Type>(values, strict)?.into_series(),
108            DataType::UInt32 => any_values_to_integer::<UInt32Type>(values, strict)?.into_series(),
109            DataType::UInt64 => any_values_to_integer::<UInt64Type>(values, strict)?.into_series(),
110            DataType::Float32 => any_values_to_f32(values, strict)?.into_series(),
111            DataType::Float64 => any_values_to_f64(values, strict)?.into_series(),
112            DataType::Boolean => any_values_to_bool(values, strict)?.into_series(),
113            DataType::String => any_values_to_string(values, strict)?.into_series(),
114            DataType::Binary => any_values_to_binary(values, strict)?.into_series(),
115            #[cfg(feature = "dtype-date")]
116            DataType::Date => any_values_to_date(values, strict)?.into_series(),
117            #[cfg(feature = "dtype-time")]
118            DataType::Time => any_values_to_time(values, strict)?.into_series(),
119            #[cfg(feature = "dtype-datetime")]
120            DataType::Datetime(tu, tz) => {
121                any_values_to_datetime(values, *tu, (*tz).clone(), strict)?.into_series()
122            },
123            #[cfg(feature = "dtype-duration")]
124            DataType::Duration(tu) => any_values_to_duration(values, *tu, strict)?.into_series(),
125            #[cfg(feature = "dtype-categorical")]
126            dt @ DataType::Categorical(_, _) => any_values_to_categorical(values, dt, strict)?,
127            #[cfg(feature = "dtype-categorical")]
128            dt @ DataType::Enum(_, _) => any_values_to_enum(values, dt, strict)?,
129            #[cfg(feature = "dtype-decimal")]
130            DataType::Decimal(precision, scale) => {
131                any_values_to_decimal(values, *precision, *scale, strict)?.into_series()
132            },
133            DataType::List(inner) => any_values_to_list(values, inner, strict)?.into_series(),
134            #[cfg(feature = "dtype-array")]
135            DataType::Array(inner, size) => any_values_to_array(values, inner, strict, *size)?
136                .into_series()
137                .cast(&DataType::Array(inner.clone(), *size))?,
138            #[cfg(feature = "dtype-struct")]
139            DataType::Struct(fields) => any_values_to_struct(values, fields, strict)?,
140            #[cfg(feature = "object")]
141            DataType::Object(_) => any_values_to_object(values)?,
142            DataType::Null => Series::new_null(PlSmallStr::EMPTY, values.len()),
143            dt => {
144                polars_bail!(
145                    InvalidOperation:
146                    "constructing a Series with data type {dt:?} from AnyValues is not supported"
147                )
148            },
149        };
150        s.rename(name);
151        Ok(s)
152    }
153}
154
155fn any_values_to_primitive_nonstrict<T: PolarsNumericType>(values: &[AnyValue]) -> ChunkedArray<T> {
156    values
157        .iter()
158        .map(|av| av.extract::<T::Native>())
159        .collect_trusted()
160}
161
162fn any_values_to_integer<T: PolarsIntegerType>(
163    values: &[AnyValue],
164    strict: bool,
165) -> PolarsResult<ChunkedArray<T>> {
166    fn any_values_to_integer_strict<T: PolarsIntegerType>(
167        values: &[AnyValue],
168    ) -> PolarsResult<ChunkedArray<T>> {
169        let mut builder = PrimitiveChunkedBuilder::<T>::new(PlSmallStr::EMPTY, values.len());
170        for av in values {
171            match &av {
172                av if av.is_integer() => {
173                    let opt_val = av.extract::<T::Native>();
174                    let val = match opt_val {
175                        Some(v) => v,
176                        None => return Err(invalid_value_error(&T::get_dtype(), av)),
177                    };
178                    builder.append_value(val)
179                },
180                AnyValue::Null => builder.append_null(),
181                av => return Err(invalid_value_error(&T::get_dtype(), av)),
182            }
183        }
184        Ok(builder.finish())
185    }
186
187    if strict {
188        any_values_to_integer_strict::<T>(values)
189    } else {
190        Ok(any_values_to_primitive_nonstrict::<T>(values))
191    }
192}
193
194fn any_values_to_f32(values: &[AnyValue], strict: bool) -> PolarsResult<Float32Chunked> {
195    fn any_values_to_f32_strict(values: &[AnyValue]) -> PolarsResult<Float32Chunked> {
196        let mut builder =
197            PrimitiveChunkedBuilder::<Float32Type>::new(PlSmallStr::EMPTY, values.len());
198        for av in values {
199            match av {
200                AnyValue::Float32(i) => builder.append_value(*i),
201                AnyValue::Null => builder.append_null(),
202                av => return Err(invalid_value_error(&DataType::Float32, av)),
203            }
204        }
205        Ok(builder.finish())
206    }
207    if strict {
208        any_values_to_f32_strict(values)
209    } else {
210        Ok(any_values_to_primitive_nonstrict::<Float32Type>(values))
211    }
212}
213fn any_values_to_f64(values: &[AnyValue], strict: bool) -> PolarsResult<Float64Chunked> {
214    fn any_values_to_f64_strict(values: &[AnyValue]) -> PolarsResult<Float64Chunked> {
215        let mut builder =
216            PrimitiveChunkedBuilder::<Float64Type>::new(PlSmallStr::EMPTY, values.len());
217        for av in values {
218            match av {
219                AnyValue::Float64(i) => builder.append_value(*i),
220                AnyValue::Float32(i) => builder.append_value(*i as f64),
221                AnyValue::Null => builder.append_null(),
222                av => return Err(invalid_value_error(&DataType::Float64, av)),
223            }
224        }
225        Ok(builder.finish())
226    }
227    if strict {
228        any_values_to_f64_strict(values)
229    } else {
230        Ok(any_values_to_primitive_nonstrict::<Float64Type>(values))
231    }
232}
233
234fn any_values_to_bool(values: &[AnyValue], strict: bool) -> PolarsResult<BooleanChunked> {
235    let mut builder = BooleanChunkedBuilder::new(PlSmallStr::EMPTY, values.len());
236    for av in values {
237        match av {
238            AnyValue::Boolean(b) => builder.append_value(*b),
239            AnyValue::Null => builder.append_null(),
240            av => {
241                if strict {
242                    return Err(invalid_value_error(&DataType::Boolean, av));
243                }
244                match av.cast(&DataType::Boolean) {
245                    AnyValue::Boolean(b) => builder.append_value(b),
246                    _ => builder.append_null(),
247                }
248            },
249        }
250    }
251    Ok(builder.finish())
252}
253
254fn any_values_to_string(values: &[AnyValue], strict: bool) -> PolarsResult<StringChunked> {
255    fn any_values_to_string_strict(values: &[AnyValue]) -> PolarsResult<StringChunked> {
256        let mut builder = StringChunkedBuilder::new(PlSmallStr::EMPTY, values.len());
257        for av in values {
258            match av {
259                AnyValue::String(s) => builder.append_value(s),
260                AnyValue::StringOwned(s) => builder.append_value(s),
261                AnyValue::Null => builder.append_null(),
262                av => return Err(invalid_value_error(&DataType::String, av)),
263            }
264        }
265        Ok(builder.finish())
266    }
267    fn any_values_to_string_nonstrict(values: &[AnyValue]) -> StringChunked {
268        let mut builder = StringChunkedBuilder::new(PlSmallStr::EMPTY, values.len());
269        let mut owned = String::new(); // Amortize allocations.
270        for av in values {
271            match av {
272                AnyValue::String(s) => builder.append_value(s),
273                AnyValue::StringOwned(s) => builder.append_value(s),
274                AnyValue::Null => builder.append_null(),
275                AnyValue::Binary(_) | AnyValue::BinaryOwned(_) => builder.append_null(),
276                av => {
277                    owned.clear();
278                    write!(owned, "{av}").unwrap();
279                    builder.append_value(&owned);
280                },
281            }
282        }
283        builder.finish()
284    }
285    if strict {
286        any_values_to_string_strict(values)
287    } else {
288        Ok(any_values_to_string_nonstrict(values))
289    }
290}
291
292fn any_values_to_binary(values: &[AnyValue], strict: bool) -> PolarsResult<BinaryChunked> {
293    fn any_values_to_binary_strict(values: &[AnyValue]) -> PolarsResult<BinaryChunked> {
294        let mut builder = BinaryChunkedBuilder::new(PlSmallStr::EMPTY, values.len());
295        for av in values {
296            match av {
297                AnyValue::Binary(s) => builder.append_value(*s),
298                AnyValue::BinaryOwned(s) => builder.append_value(&**s),
299                AnyValue::Null => builder.append_null(),
300                av => return Err(invalid_value_error(&DataType::Binary, av)),
301            }
302        }
303        Ok(builder.finish())
304    }
305    fn any_values_to_binary_nonstrict(values: &[AnyValue]) -> BinaryChunked {
306        values
307            .iter()
308            .map(|av| match av {
309                AnyValue::Binary(b) => Some(*b),
310                AnyValue::BinaryOwned(b) => Some(&**b),
311                AnyValue::String(s) => Some(s.as_bytes()),
312                AnyValue::StringOwned(s) => Some(s.as_str().as_bytes()),
313                _ => None,
314            })
315            .collect_trusted()
316    }
317    if strict {
318        any_values_to_binary_strict(values)
319    } else {
320        Ok(any_values_to_binary_nonstrict(values))
321    }
322}
323
324#[cfg(feature = "dtype-date")]
325fn any_values_to_date(values: &[AnyValue], strict: bool) -> PolarsResult<DateChunked> {
326    let mut builder = PrimitiveChunkedBuilder::<Int32Type>::new(PlSmallStr::EMPTY, values.len());
327    for av in values {
328        match av {
329            AnyValue::Date(i) => builder.append_value(*i),
330            AnyValue::Null => builder.append_null(),
331            av => {
332                if strict {
333                    return Err(invalid_value_error(&DataType::Date, av));
334                }
335                match av.cast(&DataType::Date) {
336                    AnyValue::Date(i) => builder.append_value(i),
337                    _ => builder.append_null(),
338                }
339            },
340        }
341    }
342    Ok(builder.finish().into())
343}
344
345#[cfg(feature = "dtype-time")]
346fn any_values_to_time(values: &[AnyValue], strict: bool) -> PolarsResult<TimeChunked> {
347    let mut builder = PrimitiveChunkedBuilder::<Int64Type>::new(PlSmallStr::EMPTY, values.len());
348    for av in values {
349        match av {
350            AnyValue::Time(i) => builder.append_value(*i),
351            AnyValue::Null => builder.append_null(),
352            av => {
353                if strict {
354                    return Err(invalid_value_error(&DataType::Time, av));
355                }
356                match av.cast(&DataType::Time) {
357                    AnyValue::Time(i) => builder.append_value(i),
358                    _ => builder.append_null(),
359                }
360            },
361        }
362    }
363    Ok(builder.finish().into())
364}
365
366#[cfg(feature = "dtype-datetime")]
367fn any_values_to_datetime(
368    values: &[AnyValue],
369    time_unit: TimeUnit,
370    time_zone: Option<TimeZone>,
371    strict: bool,
372) -> PolarsResult<DatetimeChunked> {
373    let mut builder = PrimitiveChunkedBuilder::<Int64Type>::new(PlSmallStr::EMPTY, values.len());
374    let target_dtype = DataType::Datetime(time_unit, time_zone.clone());
375    for av in values {
376        match av {
377            AnyValue::Datetime(i, tu, _) if *tu == time_unit => builder.append_value(*i),
378            AnyValue::DatetimeOwned(i, tu, _) if *tu == time_unit => builder.append_value(*i),
379            AnyValue::Null => builder.append_null(),
380            av => {
381                if strict {
382                    return Err(invalid_value_error(&target_dtype, av));
383                }
384                match av.cast(&target_dtype) {
385                    AnyValue::Datetime(i, _, _) => builder.append_value(i),
386                    AnyValue::DatetimeOwned(i, _, _) => builder.append_value(i),
387                    _ => builder.append_null(),
388                }
389            },
390        }
391    }
392    Ok(builder.finish().into_datetime(time_unit, time_zone))
393}
394
395#[cfg(feature = "dtype-duration")]
396fn any_values_to_duration(
397    values: &[AnyValue],
398    time_unit: TimeUnit,
399    strict: bool,
400) -> PolarsResult<DurationChunked> {
401    let mut builder = PrimitiveChunkedBuilder::<Int64Type>::new(PlSmallStr::EMPTY, values.len());
402    let target_dtype = DataType::Duration(time_unit);
403    for av in values {
404        match av {
405            AnyValue::Duration(i, tu) if *tu == time_unit => builder.append_value(*i),
406            AnyValue::Null => builder.append_null(),
407            av => {
408                if strict {
409                    return Err(invalid_value_error(&target_dtype, av));
410                }
411                match av.cast(&target_dtype) {
412                    AnyValue::Duration(i, _) => builder.append_value(i),
413                    _ => builder.append_null(),
414                }
415            },
416        }
417    }
418    Ok(builder.finish().into_duration(time_unit))
419}
420
421#[cfg(feature = "dtype-categorical")]
422fn any_values_to_categorical(
423    values: &[AnyValue],
424    dtype: &DataType,
425    strict: bool,
426) -> PolarsResult<Series> {
427    let ordering = match dtype {
428        DataType::Categorical(_, ordering) => ordering,
429        _ => panic!("any_values_to_categorical with dtype={dtype:?}"),
430    };
431
432    let mut builder = CategoricalChunkedBuilder::new(PlSmallStr::EMPTY, values.len(), *ordering);
433
434    let mut owned = String::new(); // Amortize allocations.
435    for av in values {
436        match av {
437            AnyValue::String(s) => builder.append_value(s),
438            AnyValue::StringOwned(s) => builder.append_value(s),
439
440            AnyValue::Enum(s, rev, _) => builder.append_value(rev.get(*s)),
441            AnyValue::EnumOwned(s, rev, _) => builder.append_value(rev.get(*s)),
442
443            AnyValue::Categorical(s, rev, _) => builder.append_value(rev.get(*s)),
444            AnyValue::CategoricalOwned(s, rev, _) => builder.append_value(rev.get(*s)),
445
446            AnyValue::Binary(_) | AnyValue::BinaryOwned(_) if !strict => builder.append_null(),
447            AnyValue::Null => builder.append_null(),
448
449            av => {
450                if strict {
451                    return Err(invalid_value_error(&DataType::String, av));
452                }
453
454                owned.clear();
455                write!(owned, "{av}").unwrap();
456                builder.append_value(&owned);
457            },
458        }
459    }
460
461    let ca = builder.finish();
462
463    Ok(ca.into_series())
464}
465
466#[cfg(feature = "dtype-categorical")]
467fn any_values_to_enum(values: &[AnyValue], dtype: &DataType, strict: bool) -> PolarsResult<Series> {
468    use self::enum_::EnumChunkedBuilder;
469
470    let (rev, ordering) = match dtype {
471        DataType::Enum(rev, ordering) => (rev.clone(), ordering),
472        _ => panic!("any_values_to_categorical with dtype={dtype:?}"),
473    };
474
475    let Some(rev) = rev else {
476        polars_bail!(nyi = "Not yet possible to create enum series without a rev-map");
477    };
478
479    let mut builder =
480        EnumChunkedBuilder::new(PlSmallStr::EMPTY, values.len(), rev, *ordering, strict);
481
482    let mut owned = String::new(); // Amortize allocations.
483    for av in values {
484        match av {
485            AnyValue::String(s) => builder.append_str(s)?,
486            AnyValue::StringOwned(s) => builder.append_str(s)?,
487
488            AnyValue::Enum(s, rev, _) => builder.append_enum(*s, rev)?,
489            AnyValue::EnumOwned(s, rev, _) => builder.append_enum(*s, rev)?,
490
491            AnyValue::Categorical(s, rev, _) => builder.append_str(rev.get(*s))?,
492            AnyValue::CategoricalOwned(s, rev, _) => builder.append_str(rev.get(*s))?,
493
494            AnyValue::Binary(_) | AnyValue::BinaryOwned(_) if !strict => builder.append_null(),
495            AnyValue::Null => builder.append_null(),
496
497            av => {
498                if strict {
499                    return Err(invalid_value_error(&DataType::String, av));
500                }
501
502                owned.clear();
503                write!(owned, "{av}").unwrap();
504                builder.append_str(&owned)?
505            },
506        };
507    }
508
509    let ca = builder.finish();
510
511    Ok(ca.into_series())
512}
513
514#[cfg(feature = "dtype-decimal")]
515fn any_values_to_decimal(
516    values: &[AnyValue],
517    precision: Option<usize>,
518    scale: Option<usize>, // If None, we're inferring the scale.
519    strict: bool,
520) -> PolarsResult<DecimalChunked> {
521    /// Get the maximum scale among AnyValues
522    fn infer_scale(
523        values: &[AnyValue],
524        precision: Option<usize>,
525        strict: bool,
526    ) -> PolarsResult<usize> {
527        let mut max_scale = 0;
528        for av in values {
529            let av_scale = match av {
530                AnyValue::Decimal(_, scale) => *scale,
531                AnyValue::Null => continue,
532                av => {
533                    if strict {
534                        let target_dtype = DataType::Decimal(precision, None);
535                        return Err(invalid_value_error(&target_dtype, av));
536                    }
537                    continue;
538                },
539            };
540            max_scale = max_scale.max(av_scale);
541        }
542        Ok(max_scale)
543    }
544    let scale = match scale {
545        Some(s) => s,
546        None => infer_scale(values, precision, strict)?,
547    };
548    let target_dtype = DataType::Decimal(precision, Some(scale));
549
550    let mut builder = PrimitiveChunkedBuilder::<Int128Type>::new(PlSmallStr::EMPTY, values.len());
551    for av in values {
552        match av {
553            // Allow equal or less scale. We do want to support different scales even in 'strict' mode.
554            AnyValue::Decimal(v, s) if *s <= scale => {
555                if *s == scale {
556                    builder.append_value(*v)
557                } else {
558                    match av.strict_cast(&target_dtype) {
559                        Some(AnyValue::Decimal(i, _)) => builder.append_value(i),
560                        _ => builder.append_null(),
561                    }
562                }
563            },
564            AnyValue::Null => builder.append_null(),
565            av => {
566                if strict {
567                    return Err(invalid_value_error(&target_dtype, av));
568                }
569                // TODO: Precision check, else set to null
570                match av.strict_cast(&target_dtype) {
571                    Some(AnyValue::Decimal(i, _)) => builder.append_value(i),
572                    _ => builder.append_null(),
573                }
574            },
575        };
576    }
577
578    // Build the array and do a precision check if needed.
579    builder.finish().into_decimal(precision, scale)
580}
581
582fn any_values_to_list(
583    avs: &[AnyValue],
584    inner_type: &DataType,
585    strict: bool,
586) -> PolarsResult<ListChunked> {
587    // GB:
588    // Lord forgive for the sins I have committed in this function. The amount of strange
589    // exceptions that need to happen for this to work are insane and I feel like I am going crazy.
590    //
591    // This function is essentially a copy of the `<ListChunked as FromIterator>` where it does not
592    // sample the datatype from the first element and instead we give it explicitly. This allows
593    // this function to properly assign a datatype if `avs` starts with a `null` value. Previously,
594    // this was solved by assigning the `dtype` again afterwards, but why? We should not link the
595    // implementation of these functions. We still need to assign the dtype of the ListArray and
596    // such, anyways.
597    //
598    // Then, `collect_ca_with_dtype` does not possess the necessary exceptions shown in this
599    // function to use that. I have tried adding the exceptions there and it broke other things. I
600    // really do feel like this is the simplest solution.
601
602    let mut valid = true;
603    let capacity = avs.len();
604
605    let ca = match inner_type {
606        // AnyValues with empty lists in python can create
607        // Series of an unknown dtype.
608        // We use the anonymousbuilder without a dtype
609        // the empty arrays is then not added (we add an extra offset instead)
610        // the next non-empty series then must have the correct dtype.
611        DataType::Null => {
612            let mut builder = AnonymousOwnedListBuilder::new(PlSmallStr::EMPTY, capacity, None);
613            for av in avs {
614                match av {
615                    AnyValue::List(b) => builder.append_series(b)?,
616                    AnyValue::Null => builder.append_null(),
617                    _ => {
618                        valid = false;
619                        builder.append_null();
620                    },
621                }
622            }
623            builder.finish()
624        },
625
626        #[cfg(feature = "object")]
627        DataType::Object(_) => polars_bail!(nyi = "Nested object types"),
628
629        _ => {
630            let list_inner_type = match inner_type {
631                // Categoricals may not have a revmap yet. We just give them an empty one here and
632                // the list builder takes care of the rest.
633                #[cfg(feature = "dtype-categorical")]
634                DataType::Categorical(None, ordering) => {
635                    DataType::Categorical(Some(Arc::new(RevMapping::default())), *ordering)
636                },
637
638                _ => inner_type.clone(),
639            };
640
641            let mut builder =
642                get_list_builder(&list_inner_type, capacity * 5, capacity, PlSmallStr::EMPTY);
643
644            for av in avs {
645                match av {
646                    AnyValue::List(b) => match b.cast(inner_type) {
647                        Ok(casted) => {
648                            if casted.null_count() != b.null_count() {
649                                valid = !strict;
650                            }
651                            builder.append_series(&casted)?;
652                        },
653                        Err(_) => {
654                            valid = false;
655                            for _ in 0..b.len() {
656                                builder.append_null();
657                            }
658                        },
659                    },
660                    AnyValue::Null => builder.append_null(),
661                    _ => {
662                        valid = false;
663                        builder.append_null()
664                    },
665                }
666            }
667
668            builder.finish()
669        },
670    };
671
672    if strict && !valid {
673        polars_bail!(SchemaMismatch: "unexpected value while building Series of type {:?}", DataType::List(Box::new(inner_type.clone())));
674    }
675
676    Ok(ca)
677}
678
679#[cfg(feature = "dtype-array")]
680fn any_values_to_array(
681    avs: &[AnyValue],
682    inner_type: &DataType,
683    strict: bool,
684    width: usize,
685) -> PolarsResult<ArrayChunked> {
686    fn to_arr(s: &Series) -> Option<ArrayRef> {
687        if s.chunks().len() > 1 {
688            let s = s.rechunk();
689            Some(s.chunks()[0].clone())
690        } else {
691            Some(s.chunks()[0].clone())
692        }
693    }
694
695    let target_dtype = DataType::Array(Box::new(inner_type.clone()), width);
696
697    // This is handled downstream. The builder will choose the first non null type.
698    let mut valid = true;
699    #[allow(unused_mut)]
700    let mut out: ArrayChunked = if inner_type == &DataType::Null {
701        avs.iter()
702            .map(|av| match av {
703                AnyValue::List(b) | AnyValue::Array(b, _) => to_arr(b),
704                AnyValue::Null => None,
705                _ => {
706                    valid = false;
707                    None
708                },
709            })
710            .collect_ca_with_dtype(PlSmallStr::EMPTY, target_dtype.clone())
711    }
712    // Make sure that wrongly inferred AnyValues don't deviate from the datatype.
713    else {
714        avs.iter()
715            .map(|av| match av {
716                AnyValue::List(b) | AnyValue::Array(b, _) => {
717                    if b.dtype() == inner_type {
718                        to_arr(b)
719                    } else {
720                        let s = match b.cast(inner_type) {
721                            Ok(out) => out,
722                            Err(_) => Series::full_null(b.name().clone(), b.len(), inner_type),
723                        };
724                        to_arr(&s)
725                    }
726                },
727                AnyValue::Null => None,
728                _ => {
729                    valid = false;
730                    None
731                },
732            })
733            .collect_ca_with_dtype(PlSmallStr::EMPTY, target_dtype.clone())
734    };
735
736    if strict && !valid {
737        polars_bail!(SchemaMismatch: "unexpected value while building Series of type {:?}", target_dtype);
738    }
739    polars_ensure!(
740        out.width() == width,
741        SchemaMismatch: "got mixed size array widths where width {} was expected", width
742    );
743
744    // Ensure the logical type is correct for nested types.
745    #[cfg(feature = "dtype-struct")]
746    if !matches!(inner_type, DataType::Null) && out.inner_dtype().is_nested() {
747        unsafe {
748            out.set_dtype(target_dtype.clone());
749        };
750    }
751
752    Ok(out)
753}
754
755#[cfg(feature = "dtype-struct")]
756fn _any_values_to_struct<'a>(
757    av_fields: &[Field],
758    av_values: &[AnyValue<'a>],
759    field_index: usize,
760    field: &Field,
761    fields: &[Field],
762    field_avs: &mut Vec<AnyValue<'a>>,
763) {
764    // TODO: Optimize.
765
766    let mut append_by_search = || {
767        // Search for the name.
768        if let Some(i) = av_fields
769            .iter()
770            .position(|av_fld| av_fld.name == field.name)
771        {
772            field_avs.push(av_values[i].clone());
773            return;
774        }
775        field_avs.push(AnyValue::Null)
776    };
777
778    // All fields are available in this single value.
779    // We can use the index to get value.
780    if fields.len() == av_fields.len() {
781        if fields.iter().zip(av_fields.iter()).any(|(l, r)| l != r) {
782            append_by_search()
783        } else {
784            let av_val = av_values
785                .get(field_index)
786                .cloned()
787                .unwrap_or(AnyValue::Null);
788            field_avs.push(av_val)
789        }
790    }
791    // Not all fields are available, we search the proper field.
792    else {
793        // Search for the name.
794        append_by_search()
795    }
796}
797
798#[cfg(feature = "dtype-struct")]
799fn any_values_to_struct(
800    values: &[AnyValue],
801    fields: &[Field],
802    strict: bool,
803) -> PolarsResult<Series> {
804    // Fast path for structs with no fields.
805    if fields.is_empty() {
806        return Ok(
807            StructChunked::from_series(PlSmallStr::EMPTY, values.len(), [].iter())?.into_series(),
808        );
809    }
810
811    // The physical series fields of the struct.
812    let mut series_fields = Vec::with_capacity(fields.len());
813    let mut has_outer_validity = false;
814    let mut field_avs = Vec::with_capacity(values.len());
815    for (i, field) in fields.iter().enumerate() {
816        field_avs.clear();
817
818        for av in values.iter() {
819            match av {
820                AnyValue::StructOwned(payload) => {
821                    let av_fields = &payload.1;
822                    let av_values = &payload.0;
823                    _any_values_to_struct(av_fields, av_values, i, field, fields, &mut field_avs);
824                },
825                AnyValue::Struct(_, _, av_fields) => {
826                    let av_values: Vec<_> = av._iter_struct_av().collect();
827                    _any_values_to_struct(av_fields, &av_values, i, field, fields, &mut field_avs);
828                },
829                _ => {
830                    has_outer_validity = true;
831                    field_avs.push(AnyValue::Null)
832                },
833            }
834        }
835        // If the inferred dtype is null, we let auto inference work.
836        let s = if matches!(field.dtype, DataType::Null) {
837            Series::from_any_values(field.name().clone(), &field_avs, strict)?
838        } else {
839            Series::from_any_values_and_dtype(
840                field.name().clone(),
841                &field_avs,
842                &field.dtype,
843                strict,
844            )?
845        };
846        series_fields.push(s)
847    }
848
849    let mut out =
850        StructChunked::from_series(PlSmallStr::EMPTY, values.len(), series_fields.iter())?;
851    if has_outer_validity {
852        let mut validity = MutableBitmap::new();
853        validity.extend_constant(values.len(), true);
854        for (i, v) in values.iter().enumerate() {
855            if matches!(v, AnyValue::Null) {
856                unsafe { validity.set_unchecked(i, false) }
857            }
858        }
859        out.set_outer_validity(Some(validity.freeze()))
860    }
861    Ok(out.into_series())
862}
863
864#[cfg(feature = "object")]
865fn any_values_to_object(values: &[AnyValue]) -> PolarsResult<Series> {
866    use crate::chunked_array::object::registry;
867    let converter = registry::get_object_converter();
868    let mut builder = registry::get_object_builder(PlSmallStr::EMPTY, values.len());
869    for av in values {
870        match av {
871            AnyValue::Object(val) => builder.append_value(val.as_any()),
872            AnyValue::Null => builder.append_null(),
873            _ => {
874                // This is needed because in Python users can send mixed types.
875                // This only works if you set a global converter.
876                let any = converter(av.as_borrowed());
877                builder.append_value(&*any)
878            },
879        }
880    }
881
882    Ok(builder.to_series())
883}
884
885fn invalid_value_error(dtype: &DataType, value: &AnyValue) -> PolarsError {
886    polars_err!(
887        SchemaMismatch:
888        "unexpected value while building Series of type {:?}; found value of type {:?}: {}",
889        dtype,
890        value.dtype(),
891        value
892    )
893}