polars_core/utils/
series.rs

1use std::rc::Rc;
2
3use polars_compute::find_validity_mismatch::find_validity_mismatch;
4use polars_compute::gather::take_unchecked;
5
6use crate::prelude::*;
7use crate::series::amortized_iter::AmortSeries;
8
9/// A utility that allocates an [`AmortSeries`]. The applied function can then use that
10/// series container to save heap allocations and swap arrow arrays.
11pub fn with_unstable_series<F, T>(dtype: &DataType, f: F) -> T
12where
13    F: Fn(&mut AmortSeries) -> T,
14{
15    let container = Series::full_null(PlSmallStr::EMPTY, 0, dtype);
16    let mut us = AmortSeries::new(Rc::new(container));
17
18    f(&mut us)
19}
20
21pub fn is_deprecated_cast(input_dtype: &DataType, output_dtype: &DataType) -> bool {
22    use DataType as D;
23
24    #[allow(clippy::single_match)]
25    match (input_dtype, output_dtype) {
26        #[cfg(feature = "dtype-struct")]
27        (D::Struct(l_fields), D::Struct(r_fields)) => {
28            l_fields.len() != r_fields.len()
29                || l_fields
30                    .iter()
31                    .zip(r_fields.iter())
32                    .any(|(l, r)| l.name() != r.name() || is_deprecated_cast(l.dtype(), r.dtype()))
33        },
34        (D::List(input_dtype), D::List(output_dtype)) => {
35            is_deprecated_cast(input_dtype, output_dtype)
36        },
37        #[cfg(feature = "dtype-array")]
38        (D::Array(input_dtype, _), D::Array(output_dtype, _)) => {
39            is_deprecated_cast(input_dtype, output_dtype)
40        },
41        #[cfg(feature = "dtype-array")]
42        (D::List(input_dtype), D::Array(output_dtype, _))
43        | (D::Array(input_dtype, _), D::List(output_dtype)) => {
44            is_deprecated_cast(input_dtype, output_dtype)
45        },
46        _ => false,
47    }
48}
49
50pub fn handle_casting_failures(input: &Series, output: &Series) -> PolarsResult<()> {
51    // @Hack to deal with deprecated cast
52    // @2.0
53    if is_deprecated_cast(input.dtype(), output.dtype()) {
54        return Ok(());
55    }
56
57    let mut idxs = Vec::new();
58    input.find_validity_mismatch(output, &mut idxs);
59
60    if idxs.is_empty() {
61        return Ok(());
62    }
63
64    let num_failures = idxs.len();
65    let failures = input.take_slice(&idxs[..num_failures.min(10)])?;
66
67    let additional_info = match (input.dtype(), output.dtype()) {
68        (DataType::String, DataType::Date | DataType::Datetime(_, _)) => {
69            "\n\nYou might want to try:\n\
70            - setting `strict=False` to set values that cannot be converted to `null`\n\
71            - using `str.strptime`, `str.to_date`, or `str.to_datetime` and providing a format string"
72        },
73        #[cfg(feature = "dtype-categorical")]
74        (DataType::String, DataType::Enum(_, _)) => {
75            "\n\nEnsure that all values in the input column are present in the categories of the enum datatype."
76        },
77        _ if failures.len() < num_failures => {
78            "\n\nDid not show all failed cases as there were too many."
79        },
80        _ => "",
81    };
82
83    polars_bail!(
84        InvalidOperation:
85        "conversion from `{}` to `{}` failed in column '{}' for {} out of {} values: {}{}",
86        input.dtype(),
87        output.dtype(),
88        output.name(),
89        num_failures,
90        input.len(),
91        failures.fmt_list(),
92        additional_info,
93    )
94}
95
96pub fn handle_array_casting_failures(input: &dyn Array, output: &dyn Array) -> PolarsResult<()> {
97    let mut idxs = Vec::new();
98    find_validity_mismatch(input, output, &mut idxs);
99    if idxs.is_empty() {
100        return Ok(());
101    }
102
103    let num_failures = idxs.len();
104    let failures = PrimitiveArray::with_slice(&idxs[..num_failures.min(10)], |idxs| unsafe {
105        take_unchecked(input, &idxs)
106    });
107
108    polars_bail!(
109        InvalidOperation:
110        "conversion from `{}` to `{}` failed for {} out of {} values: {}",
111        DataType::from_arrow(input.dtype(), None),
112        DataType::from_arrow(output.dtype(), None),
113        num_failures,
114        input.len(),
115        Series::try_from((PlSmallStr::EMPTY, failures))?,
116    )
117}