polars_ops/series/ops/
strings.rs

1use std::borrow::Cow;
2
3use arrow::array::builder::StaticArrayBuilder;
4use arrow::array::{Array, Utf8ViewArrayBuilder};
5use arrow::datatypes::ArrowDataType;
6use polars_core::prelude::{Column, DataType, IntoColumn, StringChunked};
7use polars_core::scalar::Scalar;
8use polars_error::{PolarsResult, polars_ensure};
9use polars_utils::pl_str::PlSmallStr;
10
11#[inline(always)]
12fn opt_str_to_string(s: Option<&str>) -> &str {
13    s.unwrap_or("null")
14}
15
16pub fn str_format(cs: &mut [Column], format: &str, insertions: &[usize]) -> PolarsResult<Column> {
17    assert_eq!(cs.len(), insertions.len());
18    assert!(!cs.is_empty()); // Checked at IR construction
19
20    let output_name = cs[0].name().clone();
21    let mut output_length = 1;
22    for c in cs.iter() {
23        if c.len() != 1 {
24            polars_ensure!(
25                output_length == 1 || output_length == c.len(),
26                length_mismatch = "format",
27                output_length,
28                c.len()
29            );
30            output_length = c.len();
31        }
32    }
33
34    let mut num_scalar_inputs = 0;
35    for c in cs.iter_mut() {
36        *c = c.cast(&DataType::String)?;
37        num_scalar_inputs += usize::from(c.len() == 1);
38    }
39
40    let mut format = Cow::Borrowed(format);
41    let mut insertions = Cow::Borrowed(insertions);
42
43    // Fill in any constants into the format string.
44    if num_scalar_inputs > 0 {
45        let mut filled_format = String::new();
46        filled_format.push_str(&format[..*insertions.first().unwrap()]);
47        insertions = Cow::Owned(
48            cs.iter()
49                .enumerate()
50                .filter_map(|(i, c)| {
51                    let v = if c.len() == 1 {
52                        filled_format.push_str(opt_str_to_string(c.str().unwrap().get(0)));
53                        None
54                    } else {
55                        Some(filled_format.len())
56                    };
57
58                    let s = if i == cs.len() - 1 {
59                        &format[insertions[i]..]
60                    } else {
61                        &format[insertions[i]..insertions[i + 1]]
62                    };
63                    filled_format.push_str(s);
64
65                    v
66                })
67                .collect(),
68        );
69        format = filled_format.into();
70    }
71
72    let format = format.as_ref();
73    let insertions = insertions.as_ref();
74
75    // If the format string is constant.
76    if num_scalar_inputs == cs.len() {
77        let sc = Scalar::from(PlSmallStr::from_str(format));
78        return Ok(Column::new_scalar(output_name, sc, output_length));
79    }
80
81    let mut builder = Utf8ViewArrayBuilder::new(ArrowDataType::Utf8View);
82    builder.reserve(output_length);
83
84    let mut arrays = cs
85        .iter()
86        .filter(|c| c.len() != 1)
87        .map(|c| {
88            let ca = c.str().unwrap();
89            let mut iter = ca.downcast_iter();
90            let arr = iter.next().unwrap();
91            (iter, arr, 0)
92        })
93        .collect::<Vec<_>>();
94
95    // @Performance. There is some smarter stuff that can be done with views and stuff. Don't think
96    // it is worth the complexity.
97
98    // Amortize the format string allocation.
99    let mut s = String::new();
100    for i in 0..output_length {
101        s.clear();
102        s.push_str(&format[..insertions[0]]);
103
104        for (j, (iter, arr, elem_idx)) in arrays.iter_mut().enumerate() {
105            s.push_str(opt_str_to_string(arr.get(*elem_idx)));
106            let start = insertions[j];
107            let end = insertions.get(j + 1).copied().unwrap_or(format.len());
108            s.push_str(&format[start..end]);
109
110            *elem_idx += 1;
111            if i + 1 != output_length && *elem_idx == arr.len() {
112                *arr = iter.next().unwrap();
113            }
114        }
115
116        builder.push_value_ignore_validity(&s);
117    }
118
119    let array = builder.freeze().to_boxed();
120    Ok(unsafe { StringChunked::from_chunks(output_name, vec![array]) }.into_column())
121}