polars_ops/chunked_array/binary/
namespace.rs1#[cfg(feature = "binary_encoding")]
2use std::borrow::Cow;
3
4#[cfg(feature = "binary_encoding")]
5use arrow::array::Array;
6#[cfg(feature = "binary_encoding")]
7use base64::Engine as _;
8#[cfg(feature = "binary_encoding")]
9use base64::engine::general_purpose;
10use memchr::memmem::find;
11use polars_compute::cast::{binview_to_fixed_size_list_dyn, binview_to_primitive_dyn};
12use polars_compute::size::binary_size_bytes;
13use polars_core::prelude::arity::{broadcast_binary_elementwise_values, unary_elementwise_values};
14
15use super::*;
16
17pub trait BinaryNameSpaceImpl: AsBinary {
18 fn contains(&self, lit: &[u8]) -> BooleanChunked {
20 let ca = self.as_binary();
21 let f = |s: &[u8]| find(s, lit).is_some();
22 unary_elementwise_values(ca, f)
23 }
24
25 fn contains_chunked(&self, lit: &BinaryChunked) -> PolarsResult<BooleanChunked> {
26 let ca = self.as_binary();
27 Ok(match lit.len() {
28 1 => match lit.get(0) {
29 Some(lit) => ca.contains(lit),
30 None => BooleanChunked::full_null(ca.name().clone(), ca.len()),
31 },
32 _ => {
33 polars_ensure!(
34 ca.len() == lit.len() || ca.len() == 1,
35 length_mismatch = "bin.contains",
36 ca.len(),
37 lit.len()
38 );
39 broadcast_binary_elementwise_values(ca, lit, |src, lit| find(src, lit).is_some())
40 },
41 })
42 }
43
44 fn ends_with(&self, sub: &[u8]) -> BooleanChunked {
46 let ca = self.as_binary();
47 let f = |s: &[u8]| s.ends_with(sub);
48 ca.apply_nonnull_values_generic(DataType::Boolean, f)
49 }
50
51 fn starts_with(&self, sub: &[u8]) -> BooleanChunked {
53 let ca = self.as_binary();
54 let f = |s: &[u8]| s.starts_with(sub);
55 ca.apply_nonnull_values_generic(DataType::Boolean, f)
56 }
57
58 fn starts_with_chunked(&self, prefix: &BinaryChunked) -> PolarsResult<BooleanChunked> {
59 let ca = self.as_binary();
60 Ok(match prefix.len() {
61 1 => match prefix.get(0) {
62 Some(s) => self.starts_with(s),
63 None => BooleanChunked::full_null(ca.name().clone(), ca.len()),
64 },
65 _ => {
66 polars_ensure!(
67 ca.len() == prefix.len() || ca.len() == 1,
68 length_mismatch = "bin.starts_with",
69 ca.len(),
70 prefix.len()
71 );
72 broadcast_binary_elementwise_values(ca, prefix, |s, sub| s.starts_with(sub))
73 },
74 })
75 }
76
77 fn ends_with_chunked(&self, suffix: &BinaryChunked) -> PolarsResult<BooleanChunked> {
78 let ca = self.as_binary();
79 Ok(match suffix.len() {
80 1 => match suffix.get(0) {
81 Some(s) => self.ends_with(s),
82 None => BooleanChunked::full_null(ca.name().clone(), ca.len()),
83 },
84 _ => {
85 polars_ensure!(
86 ca.len() == suffix.len() || ca.len() == 1,
87 length_mismatch = "bin.ends_with",
88 ca.len(),
89 suffix.len()
90 );
91 broadcast_binary_elementwise_values(ca, suffix, |s, sub| s.ends_with(sub))
92 },
93 })
94 }
95
96 fn size_bytes(&self) -> UInt32Chunked {
98 let ca = self.as_binary();
99 ca.apply_kernel_cast(&binary_size_bytes)
100 }
101
102 #[cfg(feature = "binary_encoding")]
103 fn hex_decode(&self, strict: bool) -> PolarsResult<BinaryChunked> {
104 let ca = self.as_binary();
105 if strict {
106 ca.try_apply_nonnull_values_generic(|s| {
107 hex::decode(s).map_err(|_| {
108 polars_err!(
109 ComputeError:
110 "invalid `hex` encoding found; try setting `strict=false` to ignore"
111 )
112 })
113 })
114 } else {
115 Ok(ca.apply(|opt_s| opt_s.and_then(|s| hex::decode(s).ok().map(Cow::Owned))))
116 }
117 }
118
119 #[cfg(feature = "binary_encoding")]
120 fn hex_encode(&self) -> Series {
121 let ca = self.as_binary();
122 unsafe {
123 ca.apply_values(|s| hex::encode(s).into_bytes().into())
124 .cast_unchecked(&DataType::String)
125 .unwrap()
126 }
127 }
128
129 #[cfg(feature = "binary_encoding")]
130 fn base64_decode(&self, strict: bool) -> PolarsResult<BinaryChunked> {
131 let ca = self.as_binary();
132 if strict {
133 ca.try_apply_nonnull_values_generic(|s| {
134 general_purpose::STANDARD.decode(s).map_err(|_e| {
135 polars_err!(
136 ComputeError:
137 "invalid `base64` encoding found; try setting `strict=false` to ignore"
138 )
139 })
140 })
141 } else {
142 Ok(ca.apply(|opt_s| {
143 opt_s.and_then(|s| general_purpose::STANDARD.decode(s).ok().map(Cow::Owned))
144 }))
145 }
146 }
147
148 #[cfg(feature = "binary_encoding")]
149 fn base64_encode(&self) -> Series {
150 let ca = self.as_binary();
151 unsafe {
152 ca.apply_values(|s| general_purpose::STANDARD.encode(s).into_bytes().into())
153 .cast_unchecked(&DataType::String)
154 .unwrap()
155 }
156 }
157
158 #[cfg(feature = "binary_encoding")]
159 fn reinterpret(&self, dtype: &DataType, is_little_endian: bool) -> PolarsResult<Series> {
160 unsafe {
161 Ok(Series::from_chunks_and_dtype_unchecked(
162 self.as_binary().name().clone(),
163 self._reinterpret_inner(dtype, is_little_endian)?,
164 dtype,
165 ))
166 }
167 }
168
169 #[cfg(feature = "binary_encoding")]
170 fn _reinterpret_inner(
171 &self,
172 dtype: &DataType,
173 is_little_endian: bool,
174 ) -> PolarsResult<Vec<Box<dyn Array>>> {
175 use polars_core::with_match_physical_numeric_polars_type;
176
177 let ca = self.as_binary();
178
179 match dtype {
180 dtype if dtype.is_primitive_numeric() || dtype.is_temporal() => {
181 let dtype = dtype.to_physical();
182 let arrow_data_type = dtype
183 .to_arrow(CompatLevel::newest())
184 .underlying_physical_type();
185 with_match_physical_numeric_polars_type!(dtype, |$T| {
186 unsafe {
187 ca.chunks().iter().map(|chunk| {
188 binview_to_primitive_dyn::<<$T as PolarsNumericType>::Native>(
189 &**chunk,
190 &arrow_data_type,
191 is_little_endian,
192 )
193 }).collect()
194 }
195 })
196 },
197 #[cfg(feature = "dtype-array")]
198 DataType::Array(inner_dtype, array_width)
199 if inner_dtype.is_primitive_numeric() || inner_dtype.is_temporal() =>
200 {
201 let inner_dtype = inner_dtype.to_physical();
202 let result: Vec<ArrayRef> = with_match_physical_numeric_polars_type!(inner_dtype, |$T| {
203 unsafe {
204 ca.chunks().iter().map(|chunk| {
205 binview_to_fixed_size_list_dyn::<<$T as PolarsNumericType>::Native>(
206 &**chunk,
207 *array_width,
208 is_little_endian
209 )
210 }).collect::<Result<Vec<ArrayRef>, _>>()
211 }
212 })?;
213 Ok(result)
214 },
215 _ => Err(
216 polars_err!(InvalidOperation: "unsupported data type {:?} in reinterpret. Only numeric or temporal types, or Arrays of those, are allowed.", dtype),
217 ),
218 }
219 }
220}
221
222impl BinaryNameSpaceImpl for BinaryChunked {}