polars_ops/series/ops/
strings.rs1use std::borrow::Cow;
2
3use arrow::array::builder::StaticArrayBuilder;
4use arrow::array::{Array, Utf8ViewArrayBuilder};
5use arrow::datatypes::ArrowDataType;
6use polars_core::prelude::{Column, DataType, IntoColumn, StringChunked};
7use polars_core::scalar::Scalar;
8use polars_error::{PolarsResult, polars_ensure};
9use polars_utils::pl_str::PlSmallStr;
10
11#[inline(always)]
12fn opt_str_to_string(s: Option<&str>) -> &str {
13 s.unwrap_or("null")
14}
15
16pub fn str_format(cs: &mut [Column], format: &str, insertions: &[usize]) -> PolarsResult<Column> {
17 assert_eq!(cs.len(), insertions.len());
18 assert!(!cs.is_empty()); let output_name = cs[0].name().clone();
21 let mut output_length = 1;
22 for c in cs.iter() {
23 if c.len() != 1 {
24 polars_ensure!(
25 output_length == 1 || output_length == c.len(),
26 length_mismatch = "format",
27 output_length,
28 c.len()
29 );
30 output_length = c.len();
31 }
32 }
33
34 let mut validity = None;
35 let mut num_scalar_inputs = 0;
36 for c in cs.iter_mut() {
37 if let Some(c_validity) = c.rechunk_validity() {
38 if c.null_count() == c.len() {
40 return Ok(Column::full_null(
41 output_name,
42 output_length,
43 &DataType::String,
44 ));
45 }
46
47 match &mut validity {
48 v @ None => *v = Some(c_validity),
49 Some(v) => *v = arrow::bitmap::and(v, &c_validity),
50 }
51 }
52
53 *c = c.cast(&DataType::String)?;
54 num_scalar_inputs += usize::from(c.len() == 1);
55 }
56
57 let mut format = Cow::Borrowed(format);
58 let mut insertions = Cow::Borrowed(insertions);
59
60 if num_scalar_inputs > 0 {
62 let mut filled_format = String::new();
63 filled_format.push_str(&format[..*insertions.first().unwrap()]);
64 insertions = Cow::Owned(
65 cs.iter()
66 .enumerate()
67 .filter_map(|(i, c)| {
68 let v = if c.len() == 1 {
69 filled_format.push_str(opt_str_to_string(c.str().unwrap().get(0)));
70 None
71 } else {
72 Some(filled_format.len())
73 };
74
75 let s = if i == cs.len() - 1 {
76 &format[insertions[i]..]
77 } else {
78 &format[insertions[i]..insertions[i + 1]]
79 };
80 filled_format.push_str(s);
81
82 v
83 })
84 .collect(),
85 );
86 format = filled_format.into();
87 }
88
89 let format = format.as_ref();
90 let insertions = insertions.as_ref();
91
92 if num_scalar_inputs == cs.len() {
94 let sc = Scalar::from(PlSmallStr::from_str(format));
95 return Ok(Column::new_scalar(output_name, sc, output_length));
96 }
97
98 let mut builder = Utf8ViewArrayBuilder::new(ArrowDataType::Utf8View);
99 builder.reserve(output_length);
100
101 let mut arrays = cs
102 .iter()
103 .filter(|c| c.len() != 1)
104 .map(|c| {
105 let ca = c.str().unwrap();
106 let mut iter = ca.downcast_iter();
107 let arr = iter.next().unwrap();
108 (iter, arr, 0)
109 })
110 .collect::<Vec<_>>();
111
112 let mut s = String::new();
117 for i in 0..output_length {
118 if validity
119 .as_ref()
120 .is_some_and(|v| !unsafe { v.get_bit_unchecked(i) })
121 {
122 unsafe { builder.push_inline_view_ignore_validity(Default::default()) };
123
124 for (iter, arr, elem_idx) in arrays.iter_mut() {
125 *elem_idx += 1;
126 if i + 1 != output_length && *elem_idx == arr.len() {
127 *arr = iter.next().unwrap();
128 *elem_idx = 0;
129 }
130 }
131
132 continue;
133 }
134
135 s.clear();
136 s.push_str(&format[..insertions[0]]);
137
138 for (j, (iter, arr, elem_idx)) in arrays.iter_mut().enumerate() {
139 s.push_str(opt_str_to_string(arr.get(*elem_idx)));
140 let start = insertions[j];
141 let end = insertions.get(j + 1).copied().unwrap_or(format.len());
142 s.push_str(&format[start..end]);
143
144 *elem_idx += 1;
145 if i + 1 != output_length && *elem_idx == arr.len() {
146 *arr = iter.next().unwrap();
147 *elem_idx = 0;
148 }
149 }
150
151 builder.push_value_ignore_validity(&s);
152 }
153
154 let array = builder.freeze().with_validity(validity).to_boxed();
155 Ok(unsafe { StringChunked::from_chunks(output_name, vec![array]) }.into_column())
156}