polars_ops/chunked_array/binary/
namespace.rs1#[cfg(feature = "binary_encoding")]
2use std::borrow::Cow;
3
4use arrow::with_match_primitive_type;
5#[cfg(feature = "binary_encoding")]
6use base64::Engine as _;
7#[cfg(feature = "binary_encoding")]
8use base64::engine::general_purpose;
9use memchr::memmem::find;
10use polars_compute::size::binary_size_bytes;
11use polars_core::prelude::arity::{broadcast_binary_elementwise_values, unary_elementwise_values};
12
13use super::cast_binary_to_numerical::cast_binview_to_primitive_dyn;
14use super::*;
15
16pub trait BinaryNameSpaceImpl: AsBinary {
17 fn contains(&self, lit: &[u8]) -> BooleanChunked {
19 let ca = self.as_binary();
20 let f = |s: &[u8]| find(s, lit).is_some();
21 unary_elementwise_values(ca, f)
22 }
23
24 fn contains_chunked(&self, lit: &BinaryChunked) -> BooleanChunked {
25 let ca = self.as_binary();
26 match lit.len() {
27 1 => match lit.get(0) {
28 Some(lit) => ca.contains(lit),
29 None => BooleanChunked::full_null(ca.name().clone(), ca.len()),
30 },
31 _ => broadcast_binary_elementwise_values(ca, lit, |src, lit| find(src, lit).is_some()),
32 }
33 }
34
35 fn ends_with(&self, sub: &[u8]) -> BooleanChunked {
37 let ca = self.as_binary();
38 let f = |s: &[u8]| s.ends_with(sub);
39 ca.apply_nonnull_values_generic(DataType::Boolean, f)
40 }
41
42 fn starts_with(&self, sub: &[u8]) -> BooleanChunked {
44 let ca = self.as_binary();
45 let f = |s: &[u8]| s.starts_with(sub);
46 ca.apply_nonnull_values_generic(DataType::Boolean, f)
47 }
48
49 fn starts_with_chunked(&self, prefix: &BinaryChunked) -> BooleanChunked {
50 let ca = self.as_binary();
51 match prefix.len() {
52 1 => match prefix.get(0) {
53 Some(s) => self.starts_with(s),
54 None => BooleanChunked::full_null(ca.name().clone(), ca.len()),
55 },
56 _ => broadcast_binary_elementwise_values(ca, prefix, |s, sub| s.starts_with(sub)),
57 }
58 }
59
60 fn ends_with_chunked(&self, suffix: &BinaryChunked) -> BooleanChunked {
61 let ca = self.as_binary();
62 match suffix.len() {
63 1 => match suffix.get(0) {
64 Some(s) => self.ends_with(s),
65 None => BooleanChunked::full_null(ca.name().clone(), ca.len()),
66 },
67 _ => broadcast_binary_elementwise_values(ca, suffix, |s, sub| s.ends_with(sub)),
68 }
69 }
70
71 fn size_bytes(&self) -> UInt32Chunked {
73 let ca = self.as_binary();
74 ca.apply_kernel_cast(&binary_size_bytes)
75 }
76
77 #[cfg(feature = "binary_encoding")]
78 fn hex_decode(&self, strict: bool) -> PolarsResult<BinaryChunked> {
79 let ca = self.as_binary();
80 if strict {
81 ca.try_apply_nonnull_values_generic(|s| {
82 hex::decode(s).map_err(|_| {
83 polars_err!(
84 ComputeError:
85 "invalid `hex` encoding found; try setting `strict=false` to ignore"
86 )
87 })
88 })
89 } else {
90 Ok(ca.apply(|opt_s| opt_s.and_then(|s| hex::decode(s).ok().map(Cow::Owned))))
91 }
92 }
93
94 #[cfg(feature = "binary_encoding")]
95 fn hex_encode(&self) -> Series {
96 let ca = self.as_binary();
97 unsafe {
98 ca.apply_values(|s| hex::encode(s).into_bytes().into())
99 .cast_unchecked(&DataType::String)
100 .unwrap()
101 }
102 }
103
104 #[cfg(feature = "binary_encoding")]
105 fn base64_decode(&self, strict: bool) -> PolarsResult<BinaryChunked> {
106 let ca = self.as_binary();
107 if strict {
108 ca.try_apply_nonnull_values_generic(|s| {
109 general_purpose::STANDARD.decode(s).map_err(|_e| {
110 polars_err!(
111 ComputeError:
112 "invalid `base64` encoding found; try setting `strict=false` to ignore"
113 )
114 })
115 })
116 } else {
117 Ok(ca.apply(|opt_s| {
118 opt_s.and_then(|s| general_purpose::STANDARD.decode(s).ok().map(Cow::Owned))
119 }))
120 }
121 }
122
123 #[cfg(feature = "binary_encoding")]
124 fn base64_encode(&self) -> Series {
125 let ca = self.as_binary();
126 unsafe {
127 ca.apply_values(|s| general_purpose::STANDARD.encode(s).into_bytes().into())
128 .cast_unchecked(&DataType::String)
129 .unwrap()
130 }
131 }
132
133 #[cfg(feature = "binary_encoding")]
134 #[allow(clippy::wrong_self_convention)]
135 fn from_buffer(&self, dtype: &DataType, is_little_endian: bool) -> PolarsResult<Series> {
136 let ca = self.as_binary();
137 let arrow_type = dtype.to_arrow(CompatLevel::newest());
138
139 match arrow_type.to_physical_type() {
140 arrow::datatypes::PhysicalType::Primitive(ty) => {
141 with_match_primitive_type!(ty, |$T| {
142 unsafe {
143 Ok(Series::from_chunks_and_dtype_unchecked(
144 ca.name().clone(),
145 ca.chunks().iter().map(|chunk| {
146 cast_binview_to_primitive_dyn::<$T>(
147 &**chunk,
148 &arrow_type,
149 is_little_endian,
150 )
151 }).collect::<PolarsResult<Vec<_>>>()?,
152 dtype
153 ))
154 }
155 })
156 },
157 _ => Err(
158 polars_err!(InvalidOperation:"unsupported data type in from_buffer. Only numerical types are allowed."),
159 ),
160 }
161 }
162}
163
164impl BinaryNameSpaceImpl for BinaryChunked {}