polars_ops/chunked_array/binary/
namespace.rs
1#[cfg(feature = "binary_encoding")]
2use std::borrow::Cow;
3
4use arrow::with_match_primitive_type;
5#[cfg(feature = "binary_encoding")]
6use base64::Engine as _;
7#[cfg(feature = "binary_encoding")]
8use base64::engine::general_purpose;
9use memchr::memmem::find;
10use polars_compute::size::binary_size_bytes;
11use polars_core::prelude::arity::{broadcast_binary_elementwise_values, unary_elementwise_values};
12
13use super::cast_binary_to_numerical::cast_binview_to_primitive_dyn;
14use super::*;
15
16pub trait BinaryNameSpaceImpl: AsBinary {
17 fn contains(&self, lit: &[u8]) -> BooleanChunked {
19 let ca = self.as_binary();
20 let f = |s: &[u8]| find(s, lit).is_some();
21 unary_elementwise_values(ca, f)
22 }
23
24 fn contains_chunked(&self, lit: &BinaryChunked) -> PolarsResult<BooleanChunked> {
25 let ca = self.as_binary();
26 Ok(match lit.len() {
27 1 => match lit.get(0) {
28 Some(lit) => ca.contains(lit),
29 None => BooleanChunked::full_null(ca.name().clone(), ca.len()),
30 },
31 _ => {
32 polars_ensure!(
33 ca.len() == lit.len() || ca.len() == 1,
34 length_mismatch = "bin.contains",
35 ca.len(),
36 lit.len()
37 );
38 broadcast_binary_elementwise_values(ca, lit, |src, lit| find(src, lit).is_some())
39 },
40 })
41 }
42
43 fn ends_with(&self, sub: &[u8]) -> BooleanChunked {
45 let ca = self.as_binary();
46 let f = |s: &[u8]| s.ends_with(sub);
47 ca.apply_nonnull_values_generic(DataType::Boolean, f)
48 }
49
50 fn starts_with(&self, sub: &[u8]) -> BooleanChunked {
52 let ca = self.as_binary();
53 let f = |s: &[u8]| s.starts_with(sub);
54 ca.apply_nonnull_values_generic(DataType::Boolean, f)
55 }
56
57 fn starts_with_chunked(&self, prefix: &BinaryChunked) -> PolarsResult<BooleanChunked> {
58 let ca = self.as_binary();
59 Ok(match prefix.len() {
60 1 => match prefix.get(0) {
61 Some(s) => self.starts_with(s),
62 None => BooleanChunked::full_null(ca.name().clone(), ca.len()),
63 },
64 _ => {
65 polars_ensure!(
66 ca.len() == prefix.len() || ca.len() == 1,
67 length_mismatch = "bin.starts_with",
68 ca.len(),
69 prefix.len()
70 );
71 broadcast_binary_elementwise_values(ca, prefix, |s, sub| s.starts_with(sub))
72 },
73 })
74 }
75
76 fn ends_with_chunked(&self, suffix: &BinaryChunked) -> PolarsResult<BooleanChunked> {
77 let ca = self.as_binary();
78 Ok(match suffix.len() {
79 1 => match suffix.get(0) {
80 Some(s) => self.ends_with(s),
81 None => BooleanChunked::full_null(ca.name().clone(), ca.len()),
82 },
83 _ => {
84 polars_ensure!(
85 ca.len() == suffix.len() || ca.len() == 1,
86 length_mismatch = "bin.ends_with",
87 ca.len(),
88 suffix.len()
89 );
90 broadcast_binary_elementwise_values(ca, suffix, |s, sub| s.ends_with(sub))
91 },
92 })
93 }
94
95 fn size_bytes(&self) -> UInt32Chunked {
97 let ca = self.as_binary();
98 ca.apply_kernel_cast(&binary_size_bytes)
99 }
100
101 #[cfg(feature = "binary_encoding")]
102 fn hex_decode(&self, strict: bool) -> PolarsResult<BinaryChunked> {
103 let ca = self.as_binary();
104 if strict {
105 ca.try_apply_nonnull_values_generic(|s| {
106 hex::decode(s).map_err(|_| {
107 polars_err!(
108 ComputeError:
109 "invalid `hex` encoding found; try setting `strict=false` to ignore"
110 )
111 })
112 })
113 } else {
114 Ok(ca.apply(|opt_s| opt_s.and_then(|s| hex::decode(s).ok().map(Cow::Owned))))
115 }
116 }
117
118 #[cfg(feature = "binary_encoding")]
119 fn hex_encode(&self) -> Series {
120 let ca = self.as_binary();
121 unsafe {
122 ca.apply_values(|s| hex::encode(s).into_bytes().into())
123 .cast_unchecked(&DataType::String)
124 .unwrap()
125 }
126 }
127
128 #[cfg(feature = "binary_encoding")]
129 fn base64_decode(&self, strict: bool) -> PolarsResult<BinaryChunked> {
130 let ca = self.as_binary();
131 if strict {
132 ca.try_apply_nonnull_values_generic(|s| {
133 general_purpose::STANDARD.decode(s).map_err(|_e| {
134 polars_err!(
135 ComputeError:
136 "invalid `base64` encoding found; try setting `strict=false` to ignore"
137 )
138 })
139 })
140 } else {
141 Ok(ca.apply(|opt_s| {
142 opt_s.and_then(|s| general_purpose::STANDARD.decode(s).ok().map(Cow::Owned))
143 }))
144 }
145 }
146
147 #[cfg(feature = "binary_encoding")]
148 fn base64_encode(&self) -> Series {
149 let ca = self.as_binary();
150 unsafe {
151 ca.apply_values(|s| general_purpose::STANDARD.encode(s).into_bytes().into())
152 .cast_unchecked(&DataType::String)
153 .unwrap()
154 }
155 }
156
157 #[cfg(feature = "binary_encoding")]
158 #[allow(clippy::wrong_self_convention)]
159 fn from_buffer(&self, dtype: &DataType, is_little_endian: bool) -> PolarsResult<Series> {
160 let ca = self.as_binary();
161 let arrow_type = dtype.to_arrow(CompatLevel::newest());
162
163 match arrow_type.to_physical_type() {
164 arrow::datatypes::PhysicalType::Primitive(ty) => {
165 with_match_primitive_type!(ty, |$T| {
166 unsafe {
167 Ok(Series::from_chunks_and_dtype_unchecked(
168 ca.name().clone(),
169 ca.chunks().iter().map(|chunk| {
170 cast_binview_to_primitive_dyn::<$T>(
171 &**chunk,
172 &arrow_type,
173 is_little_endian,
174 )
175 }).collect::<PolarsResult<Vec<_>>>()?,
176 dtype
177 ))
178 }
179 })
180 },
181 _ => Err(
182 polars_err!(InvalidOperation:"unsupported data type in from_buffer. Only numerical types are allowed."),
183 ),
184 }
185 }
186}
187
188impl BinaryNameSpaceImpl for BinaryChunked {}