polars_ops/chunked_array/binary/
namespace.rs

1#[cfg(feature = "binary_encoding")]
2use std::borrow::Cow;
3
4#[cfg(feature = "binary_encoding")]
5use arrow::array::Array;
6#[cfg(feature = "binary_encoding")]
7use base64::Engine as _;
8#[cfg(feature = "binary_encoding")]
9use base64::engine::general_purpose;
10use memchr::memmem::find;
11use polars_compute::cast::{binview_to_fixed_size_list_dyn, binview_to_primitive_dyn};
12use polars_compute::size::binary_size_bytes;
13use polars_core::prelude::arity::{broadcast_binary_elementwise_values, unary_elementwise_values};
14
15use super::*;
16
17pub trait BinaryNameSpaceImpl: AsBinary {
18    /// Slice the binary values.
19    ///
20    /// Determines a slice starting from `offset` and with length `length` of each of the elements.
21    /// `offset` can be negative, in which case the start counts from the end of the bytes.
22    fn bin_slice(&self, offset: &Column, length: &Column) -> PolarsResult<BinaryChunked> {
23        let ca = self.as_binary();
24        let offset = offset.cast(&DataType::Int64)?;
25        let length = length.strict_cast(&DataType::UInt64)?;
26
27        Ok(super::slice::slice(ca, offset.i64()?, length.u64()?))
28    }
29    /// Slice the first `n` bytes of the binary value.
30    ///
31    /// Determines a slice starting at the beginning of the binary data up to offset `n` of each
32    /// element. `n` can be negative, in which case the slice ends `n` bytes from the end.
33    fn bin_head(&self, n: &Column) -> PolarsResult<BinaryChunked> {
34        let ca = self.as_binary();
35        let n = n.strict_cast(&DataType::Int64)?;
36
37        super::slice::head(ca, n.i64()?)
38    }
39
40    /// Slice the last `n` bytes of the binary value.
41    ///
42    /// Determines a slice starting at offset `n` of each element. `n` can be
43    /// negative, in which case the slice begins `n` bytes from the start.
44    fn bin_tail(&self, n: &Column) -> PolarsResult<BinaryChunked> {
45        let ca = self.as_binary();
46        let n = n.strict_cast(&DataType::Int64)?;
47
48        super::slice::tail(ca, n.i64()?)
49    }
50
51    /// Check if binary contains given literal
52    fn contains(&self, lit: &[u8]) -> BooleanChunked {
53        let ca = self.as_binary();
54        let f = |s: &[u8]| find(s, lit).is_some();
55        unary_elementwise_values(ca, f)
56    }
57
58    fn contains_chunked(&self, lit: &BinaryChunked) -> PolarsResult<BooleanChunked> {
59        let ca = self.as_binary();
60        Ok(match lit.len() {
61            1 => match lit.get(0) {
62                Some(lit) => ca.contains(lit),
63                None => BooleanChunked::full_null(ca.name().clone(), ca.len()),
64            },
65            _ => {
66                polars_ensure!(
67                    ca.len() == lit.len() || ca.len() == 1,
68                    length_mismatch = "bin.contains",
69                    ca.len(),
70                    lit.len()
71                );
72                broadcast_binary_elementwise_values(ca, lit, |src, lit| find(src, lit).is_some())
73            },
74        })
75    }
76
77    /// Check if strings ends with a substring
78    fn ends_with(&self, sub: &[u8]) -> BooleanChunked {
79        let ca = self.as_binary();
80        let f = |s: &[u8]| s.ends_with(sub);
81        ca.apply_nonnull_values_generic(DataType::Boolean, f)
82    }
83
84    /// Check if strings starts with a substring
85    fn starts_with(&self, sub: &[u8]) -> BooleanChunked {
86        let ca = self.as_binary();
87        let f = |s: &[u8]| s.starts_with(sub);
88        ca.apply_nonnull_values_generic(DataType::Boolean, f)
89    }
90
91    fn starts_with_chunked(&self, prefix: &BinaryChunked) -> PolarsResult<BooleanChunked> {
92        let ca = self.as_binary();
93        Ok(match prefix.len() {
94            1 => match prefix.get(0) {
95                Some(s) => self.starts_with(s),
96                None => BooleanChunked::full_null(ca.name().clone(), ca.len()),
97            },
98            _ => {
99                polars_ensure!(
100                    ca.len() == prefix.len() || ca.len() == 1,
101                    length_mismatch = "bin.starts_with",
102                    ca.len(),
103                    prefix.len()
104                );
105                broadcast_binary_elementwise_values(ca, prefix, |s, sub| s.starts_with(sub))
106            },
107        })
108    }
109
110    fn ends_with_chunked(&self, suffix: &BinaryChunked) -> PolarsResult<BooleanChunked> {
111        let ca = self.as_binary();
112        Ok(match suffix.len() {
113            1 => match suffix.get(0) {
114                Some(s) => self.ends_with(s),
115                None => BooleanChunked::full_null(ca.name().clone(), ca.len()),
116            },
117            _ => {
118                polars_ensure!(
119                    ca.len() == suffix.len() || ca.len() == 1,
120                    length_mismatch = "bin.ends_with",
121                    ca.len(),
122                    suffix.len()
123                );
124                broadcast_binary_elementwise_values(ca, suffix, |s, sub| s.ends_with(sub))
125            },
126        })
127    }
128
129    /// Get the size of the binary values in bytes.
130    fn size_bytes(&self) -> UInt32Chunked {
131        let ca = self.as_binary();
132        ca.apply_kernel_cast(&binary_size_bytes)
133    }
134
135    #[cfg(feature = "binary_encoding")]
136    fn hex_decode(&self, strict: bool) -> PolarsResult<BinaryChunked> {
137        let ca = self.as_binary();
138        if strict {
139            ca.try_apply_nonnull_values_generic(|s| {
140                hex::decode(s).map_err(|_| {
141                    polars_err!(
142                        ComputeError:
143                        "invalid `hex` encoding found; try setting `strict=false` to ignore"
144                    )
145                })
146            })
147        } else {
148            Ok(ca.apply(|opt_s| opt_s.and_then(|s| hex::decode(s).ok().map(Cow::Owned))))
149        }
150    }
151
152    #[cfg(feature = "binary_encoding")]
153    fn hex_encode(&self) -> Series {
154        let ca = self.as_binary();
155        unsafe {
156            ca.apply_values(|s| hex::encode(s).into_bytes().into())
157                .cast_unchecked(&DataType::String)
158                .unwrap()
159        }
160    }
161
162    #[cfg(feature = "binary_encoding")]
163    fn base64_decode(&self, strict: bool) -> PolarsResult<BinaryChunked> {
164        let ca = self.as_binary();
165        if strict {
166            ca.try_apply_nonnull_values_generic(|s| {
167                general_purpose::STANDARD.decode(s).map_err(|_e| {
168                    polars_err!(
169                        ComputeError:
170                        "invalid `base64` encoding found; try setting `strict=false` to ignore"
171                    )
172                })
173            })
174        } else {
175            Ok(ca.apply(|opt_s| {
176                opt_s.and_then(|s| general_purpose::STANDARD.decode(s).ok().map(Cow::Owned))
177            }))
178        }
179    }
180
181    #[cfg(feature = "binary_encoding")]
182    fn base64_encode(&self) -> Series {
183        let ca = self.as_binary();
184        unsafe {
185            ca.apply_values(|s| general_purpose::STANDARD.encode(s).into_bytes().into())
186                .cast_unchecked(&DataType::String)
187                .unwrap()
188        }
189    }
190
191    #[cfg(feature = "binary_encoding")]
192    fn reinterpret(&self, dtype: &DataType, is_little_endian: bool) -> PolarsResult<Series> {
193        unsafe {
194            Ok(Series::from_chunks_and_dtype_unchecked(
195                self.as_binary().name().clone(),
196                self._reinterpret_inner(dtype, is_little_endian)?,
197                dtype,
198            ))
199        }
200    }
201
202    #[cfg(feature = "binary_encoding")]
203    fn _reinterpret_inner(
204        &self,
205        dtype: &DataType,
206        is_little_endian: bool,
207    ) -> PolarsResult<Vec<Box<dyn Array>>> {
208        use polars_core::with_match_physical_numeric_polars_type;
209
210        let ca = self.as_binary();
211
212        match dtype {
213            dtype if dtype.is_primitive_numeric() || dtype.is_temporal() => {
214                let dtype = dtype.to_physical();
215                let arrow_data_type = dtype
216                    .to_arrow(CompatLevel::newest())
217                    .underlying_physical_type();
218                with_match_physical_numeric_polars_type!(dtype, |$T| {
219                    unsafe {
220                        ca.chunks().iter().map(|chunk| {
221                            binview_to_primitive_dyn::<<$T as PolarsNumericType>::Native>(
222                                &**chunk,
223                                &arrow_data_type,
224                                is_little_endian,
225                            )
226                        }).collect()
227                    }
228                })
229            },
230            #[cfg(feature = "dtype-array")]
231            DataType::Array(inner_dtype, array_width)
232                if inner_dtype.is_primitive_numeric() || inner_dtype.is_temporal() =>
233            {
234                let inner_dtype = inner_dtype.to_physical();
235                let result: Vec<ArrayRef> = with_match_physical_numeric_polars_type!(inner_dtype, |$T| {
236                    unsafe {
237                        ca.chunks().iter().map(|chunk| {
238                            binview_to_fixed_size_list_dyn::<<$T as PolarsNumericType>::Native>(
239                                &**chunk,
240                                *array_width,
241                                is_little_endian
242                            )
243                        }).collect::<Result<Vec<ArrayRef>, _>>()
244                    }
245                })?;
246                Ok(result)
247            },
248            _ => Err(
249                polars_err!(InvalidOperation: "unsupported data type {:?} in reinterpret. Only numeric or temporal types, or Arrays of those, are allowed.", dtype),
250            ),
251        }
252    }
253}
254
255impl BinaryNameSpaceImpl for BinaryChunked {}