polars_ops/chunked_array/binary/
namespace.rs1#[cfg(feature = "binary_encoding")]
2use std::borrow::Cow;
3
4#[cfg(feature = "binary_encoding")]
5use arrow::array::Array;
6#[cfg(feature = "binary_encoding")]
7use base64::Engine as _;
8#[cfg(feature = "binary_encoding")]
9use base64::engine::general_purpose;
10use memchr::memmem::find;
11use polars_compute::cast::{binview_to_fixed_size_list_dyn, binview_to_primitive_dyn};
12use polars_compute::size::binary_size_bytes;
13use polars_core::prelude::arity::{broadcast_binary_elementwise_values, unary_elementwise_values};
14
15use super::*;
16
17pub trait BinaryNameSpaceImpl: AsBinary {
18 fn bin_slice(&self, offset: &Column, length: &Column) -> PolarsResult<BinaryChunked> {
23 let ca = self.as_binary();
24 let offset = offset.cast(&DataType::Int64)?;
25 let length = length.strict_cast(&DataType::UInt64)?;
26
27 Ok(super::slice::slice(ca, offset.i64()?, length.u64()?))
28 }
29 fn bin_head(&self, n: &Column) -> PolarsResult<BinaryChunked> {
34 let ca = self.as_binary();
35 let n = n.strict_cast(&DataType::Int64)?;
36
37 super::slice::head(ca, n.i64()?)
38 }
39
40 fn bin_tail(&self, n: &Column) -> PolarsResult<BinaryChunked> {
45 let ca = self.as_binary();
46 let n = n.strict_cast(&DataType::Int64)?;
47
48 super::slice::tail(ca, n.i64()?)
49 }
50
51 fn contains(&self, lit: &[u8]) -> BooleanChunked {
53 let ca = self.as_binary();
54 let f = |s: &[u8]| find(s, lit).is_some();
55 unary_elementwise_values(ca, f)
56 }
57
58 fn contains_chunked(&self, lit: &BinaryChunked) -> PolarsResult<BooleanChunked> {
59 let ca = self.as_binary();
60 Ok(match lit.len() {
61 1 => match lit.get(0) {
62 Some(lit) => ca.contains(lit),
63 None => BooleanChunked::full_null(ca.name().clone(), ca.len()),
64 },
65 _ => {
66 polars_ensure!(
67 ca.len() == lit.len() || ca.len() == 1,
68 length_mismatch = "bin.contains",
69 ca.len(),
70 lit.len()
71 );
72 broadcast_binary_elementwise_values(ca, lit, |src, lit| find(src, lit).is_some())
73 },
74 })
75 }
76
77 fn ends_with(&self, sub: &[u8]) -> BooleanChunked {
79 let ca = self.as_binary();
80 let f = |s: &[u8]| s.ends_with(sub);
81 ca.apply_nonnull_values_generic(DataType::Boolean, f)
82 }
83
84 fn starts_with(&self, sub: &[u8]) -> BooleanChunked {
86 let ca = self.as_binary();
87 let f = |s: &[u8]| s.starts_with(sub);
88 ca.apply_nonnull_values_generic(DataType::Boolean, f)
89 }
90
91 fn starts_with_chunked(&self, prefix: &BinaryChunked) -> PolarsResult<BooleanChunked> {
92 let ca = self.as_binary();
93 Ok(match prefix.len() {
94 1 => match prefix.get(0) {
95 Some(s) => self.starts_with(s),
96 None => BooleanChunked::full_null(ca.name().clone(), ca.len()),
97 },
98 _ => {
99 polars_ensure!(
100 ca.len() == prefix.len() || ca.len() == 1,
101 length_mismatch = "bin.starts_with",
102 ca.len(),
103 prefix.len()
104 );
105 broadcast_binary_elementwise_values(ca, prefix, |s, sub| s.starts_with(sub))
106 },
107 })
108 }
109
110 fn ends_with_chunked(&self, suffix: &BinaryChunked) -> PolarsResult<BooleanChunked> {
111 let ca = self.as_binary();
112 Ok(match suffix.len() {
113 1 => match suffix.get(0) {
114 Some(s) => self.ends_with(s),
115 None => BooleanChunked::full_null(ca.name().clone(), ca.len()),
116 },
117 _ => {
118 polars_ensure!(
119 ca.len() == suffix.len() || ca.len() == 1,
120 length_mismatch = "bin.ends_with",
121 ca.len(),
122 suffix.len()
123 );
124 broadcast_binary_elementwise_values(ca, suffix, |s, sub| s.ends_with(sub))
125 },
126 })
127 }
128
129 fn size_bytes(&self) -> UInt32Chunked {
131 let ca = self.as_binary();
132 ca.apply_kernel_cast(&binary_size_bytes)
133 }
134
135 #[cfg(feature = "binary_encoding")]
136 fn hex_decode(&self, strict: bool) -> PolarsResult<BinaryChunked> {
137 let ca = self.as_binary();
138 if strict {
139 ca.try_apply_nonnull_values_generic(|s| {
140 hex::decode(s).map_err(|_| {
141 polars_err!(
142 ComputeError:
143 "invalid `hex` encoding found; try setting `strict=false` to ignore"
144 )
145 })
146 })
147 } else {
148 Ok(ca.apply(|opt_s| opt_s.and_then(|s| hex::decode(s).ok().map(Cow::Owned))))
149 }
150 }
151
152 #[cfg(feature = "binary_encoding")]
153 fn hex_encode(&self) -> Series {
154 let ca = self.as_binary();
155 unsafe {
156 ca.apply_values(|s| hex::encode(s).into_bytes().into())
157 .cast_unchecked(&DataType::String)
158 .unwrap()
159 }
160 }
161
162 #[cfg(feature = "binary_encoding")]
163 fn base64_decode(&self, strict: bool) -> PolarsResult<BinaryChunked> {
164 let ca = self.as_binary();
165 if strict {
166 ca.try_apply_nonnull_values_generic(|s| {
167 general_purpose::STANDARD.decode(s).map_err(|_e| {
168 polars_err!(
169 ComputeError:
170 "invalid `base64` encoding found; try setting `strict=false` to ignore"
171 )
172 })
173 })
174 } else {
175 Ok(ca.apply(|opt_s| {
176 opt_s.and_then(|s| general_purpose::STANDARD.decode(s).ok().map(Cow::Owned))
177 }))
178 }
179 }
180
181 #[cfg(feature = "binary_encoding")]
182 fn base64_encode(&self) -> Series {
183 let ca = self.as_binary();
184 unsafe {
185 ca.apply_values(|s| general_purpose::STANDARD.encode(s).into_bytes().into())
186 .cast_unchecked(&DataType::String)
187 .unwrap()
188 }
189 }
190
191 #[cfg(feature = "binary_encoding")]
192 fn reinterpret(&self, dtype: &DataType, is_little_endian: bool) -> PolarsResult<Series> {
193 unsafe {
194 Ok(Series::from_chunks_and_dtype_unchecked(
195 self.as_binary().name().clone(),
196 self._reinterpret_inner(dtype, is_little_endian)?,
197 dtype,
198 ))
199 }
200 }
201
202 #[cfg(feature = "binary_encoding")]
203 fn _reinterpret_inner(
204 &self,
205 dtype: &DataType,
206 is_little_endian: bool,
207 ) -> PolarsResult<Vec<Box<dyn Array>>> {
208 use polars_core::with_match_physical_numeric_polars_type;
209
210 let ca = self.as_binary();
211
212 match dtype {
213 dtype if dtype.is_primitive_numeric() || dtype.is_temporal() => {
214 let dtype = dtype.to_physical();
215 let arrow_data_type = dtype
216 .to_arrow(CompatLevel::newest())
217 .underlying_physical_type();
218 with_match_physical_numeric_polars_type!(dtype, |$T| {
219 unsafe {
220 ca.chunks().iter().map(|chunk| {
221 binview_to_primitive_dyn::<<$T as PolarsNumericType>::Native>(
222 &**chunk,
223 &arrow_data_type,
224 is_little_endian,
225 )
226 }).collect()
227 }
228 })
229 },
230 #[cfg(feature = "dtype-array")]
231 DataType::Array(inner_dtype, array_width)
232 if inner_dtype.is_primitive_numeric() || inner_dtype.is_temporal() =>
233 {
234 let inner_dtype = inner_dtype.to_physical();
235 let result: Vec<ArrayRef> = with_match_physical_numeric_polars_type!(inner_dtype, |$T| {
236 unsafe {
237 ca.chunks().iter().map(|chunk| {
238 binview_to_fixed_size_list_dyn::<<$T as PolarsNumericType>::Native>(
239 &**chunk,
240 *array_width,
241 is_little_endian
242 )
243 }).collect::<Result<Vec<ArrayRef>, _>>()
244 }
245 })?;
246 Ok(result)
247 },
248 _ => Err(
249 polars_err!(InvalidOperation: "unsupported data type {:?} in reinterpret. Only numeric or temporal types, or Arrays of those, are allowed.", dtype),
250 ),
251 }
252 }
253}
254
255impl BinaryNameSpaceImpl for BinaryChunked {}