polars_core/chunked_array/ops/
bit_repr.rs

1use arrow::buffer::Buffer;
2use polars_error::feature_gated;
3
4use crate::prelude::*;
5use crate::series::BitRepr;
6
7/// Reinterprets the type of a [`ChunkedArray`]. T and U must have the same size
8/// and alignment.
9fn reinterpret_chunked_array<T: PolarsNumericType, U: PolarsNumericType>(
10    ca: &ChunkedArray<T>,
11) -> ChunkedArray<U> {
12    assert!(size_of::<T::Native>() == size_of::<U::Native>());
13    assert!(align_of::<T::Native>() == align_of::<U::Native>());
14
15    let chunks = ca.downcast_iter().map(|array| {
16        let buf = array.values().clone();
17        let reinterpreted_buf = Buffer::try_transmute::<U::Native>(buf).unwrap();
18        PrimitiveArray::from_data_default(reinterpreted_buf, array.validity().cloned())
19    });
20
21    ChunkedArray::from_chunk_iter(ca.name().clone(), chunks)
22}
23
24/// Reinterprets the type of a [`ListChunked`]. T and U must have the same size
25/// and alignment.
26fn reinterpret_list_chunked<T: PolarsNumericType, U: PolarsNumericType>(
27    ca: &ListChunked,
28) -> ListChunked {
29    assert!(size_of::<T::Native>() == size_of::<U::Native>());
30    assert!(align_of::<T::Native>() == align_of::<U::Native>());
31
32    let chunks = ca.downcast_iter().map(|array| {
33        let inner_arr = array
34            .values()
35            .as_any()
36            .downcast_ref::<PrimitiveArray<T::Native>>()
37            .unwrap();
38        let reinterpreted_buf =
39            Buffer::try_transmute::<U::Native>(inner_arr.values().clone()).unwrap();
40        let pa =
41            PrimitiveArray::from_data_default(reinterpreted_buf, inner_arr.validity().cloned());
42        LargeListArray::new(
43            DataType::List(Box::new(U::get_static_dtype())).to_arrow(CompatLevel::newest()),
44            array.offsets().clone(),
45            pa.to_boxed(),
46            array.validity().cloned(),
47        )
48    });
49
50    ListChunked::from_chunk_iter(ca.name().clone(), chunks)
51}
52
53#[cfg(all(feature = "dtype-i16", feature = "dtype-u16"))]
54impl Reinterpret for Int16Chunked {
55    fn reinterpret_signed(&self) -> Series {
56        self.clone().into_series()
57    }
58
59    fn reinterpret_unsigned(&self) -> Series {
60        reinterpret_chunked_array::<_, UInt16Type>(self).into_series()
61    }
62}
63
64#[cfg(all(feature = "dtype-u16", feature = "dtype-i16"))]
65impl Reinterpret for UInt16Chunked {
66    fn reinterpret_signed(&self) -> Series {
67        reinterpret_chunked_array::<_, Int16Type>(self).into_series()
68    }
69
70    fn reinterpret_unsigned(&self) -> Series {
71        self.clone().into_series()
72    }
73}
74
75#[cfg(all(feature = "dtype-i8", feature = "dtype-u8"))]
76impl Reinterpret for Int8Chunked {
77    fn reinterpret_signed(&self) -> Series {
78        self.clone().into_series()
79    }
80
81    fn reinterpret_unsigned(&self) -> Series {
82        reinterpret_chunked_array::<_, UInt8Type>(self).into_series()
83    }
84}
85
86#[cfg(all(feature = "dtype-u8", feature = "dtype-i8"))]
87impl Reinterpret for UInt8Chunked {
88    fn reinterpret_signed(&self) -> Series {
89        reinterpret_chunked_array::<_, Int8Type>(self).into_series()
90    }
91
92    fn reinterpret_unsigned(&self) -> Series {
93        self.clone().into_series()
94    }
95}
96
97impl<T> ToBitRepr for ChunkedArray<T>
98where
99    T: PolarsNumericType,
100{
101    fn to_bit_repr(&self) -> BitRepr {
102        match size_of::<T::Native>() {
103            16 => {
104                feature_gated!("dtype-u128", {
105                    if matches!(self.dtype(), DataType::UInt128) {
106                        let ca: &UInt128Chunked = self.as_any().downcast_ref().unwrap();
107                        return BitRepr::U128(ca.clone());
108                    }
109
110                    BitRepr::U128(reinterpret_chunked_array(self))
111                })
112            },
113
114            8 => {
115                if matches!(self.dtype(), DataType::UInt64) {
116                    let ca: &UInt64Chunked = self.as_any().downcast_ref().unwrap();
117                    return BitRepr::U64(ca.clone());
118                }
119
120                BitRepr::U64(reinterpret_chunked_array(self))
121            },
122
123            4 => {
124                if matches!(self.dtype(), DataType::UInt32) {
125                    let ca: &UInt32Chunked = self.as_any().downcast_ref().unwrap();
126                    return BitRepr::U32(ca.clone());
127                }
128
129                BitRepr::U32(reinterpret_chunked_array(self))
130            },
131
132            2 => {
133                if matches!(self.dtype(), DataType::UInt16) {
134                    let ca: &UInt16Chunked = self.as_any().downcast_ref().unwrap();
135                    return BitRepr::U16(ca.clone());
136                }
137
138                BitRepr::U16(reinterpret_chunked_array(self))
139            },
140
141            1 => {
142                if matches!(self.dtype(), DataType::UInt8) {
143                    let ca: &UInt8Chunked = self.as_any().downcast_ref().unwrap();
144                    return BitRepr::U8(ca.clone());
145                }
146
147                BitRepr::U8(reinterpret_chunked_array(self))
148            },
149
150            _ => unreachable!(),
151        }
152    }
153}
154
155impl Reinterpret for UInt64Chunked {
156    fn reinterpret_signed(&self) -> Series {
157        reinterpret_chunked_array::<_, Int64Type>(self).into_series()
158    }
159
160    fn reinterpret_unsigned(&self) -> Series {
161        self.clone().into_series()
162    }
163}
164
165impl Reinterpret for Int64Chunked {
166    fn reinterpret_signed(&self) -> Series {
167        self.clone().into_series()
168    }
169
170    fn reinterpret_unsigned(&self) -> Series {
171        reinterpret_chunked_array::<_, UInt64Type>(self).into_series()
172    }
173}
174
175impl Reinterpret for UInt32Chunked {
176    fn reinterpret_signed(&self) -> Series {
177        reinterpret_chunked_array::<_, Int32Type>(self).into_series()
178    }
179
180    fn reinterpret_unsigned(&self) -> Series {
181        self.clone().into_series()
182    }
183}
184
185impl Reinterpret for Int32Chunked {
186    fn reinterpret_signed(&self) -> Series {
187        self.clone().into_series()
188    }
189
190    fn reinterpret_unsigned(&self) -> Series {
191        reinterpret_chunked_array::<_, UInt32Type>(self).into_series()
192    }
193}
194
195impl Reinterpret for Float32Chunked {
196    fn reinterpret_signed(&self) -> Series {
197        reinterpret_chunked_array::<_, Int32Type>(self).into_series()
198    }
199
200    fn reinterpret_unsigned(&self) -> Series {
201        reinterpret_chunked_array::<_, UInt32Type>(self).into_series()
202    }
203}
204
205impl Reinterpret for ListChunked {
206    fn reinterpret_signed(&self) -> Series {
207        match self.inner_dtype() {
208            DataType::Float32 => reinterpret_list_chunked::<Float32Type, Int32Type>(self),
209            DataType::Float64 => reinterpret_list_chunked::<Float64Type, Int64Type>(self),
210            _ => unimplemented!(),
211        }
212        .into_series()
213    }
214
215    fn reinterpret_unsigned(&self) -> Series {
216        match self.inner_dtype() {
217            DataType::Float32 => reinterpret_list_chunked::<Float32Type, UInt32Type>(self),
218            DataType::Float64 => reinterpret_list_chunked::<Float64Type, UInt64Type>(self),
219            _ => unimplemented!(),
220        }
221        .into_series()
222    }
223}
224
225impl Reinterpret for Float64Chunked {
226    fn reinterpret_signed(&self) -> Series {
227        reinterpret_chunked_array::<_, Int64Type>(self).into_series()
228    }
229
230    fn reinterpret_unsigned(&self) -> Series {
231        reinterpret_chunked_array::<_, UInt64Type>(self).into_series()
232    }
233}
234
235impl UInt64Chunked {
236    #[doc(hidden)]
237    pub fn _reinterpret_float(&self) -> Float64Chunked {
238        reinterpret_chunked_array(self)
239    }
240}
241impl UInt32Chunked {
242    #[doc(hidden)]
243    pub fn _reinterpret_float(&self) -> Float32Chunked {
244        reinterpret_chunked_array(self)
245    }
246}
247
248/// Used to save compilation paths. Use carefully. Although this is safe,
249/// if misused it can lead to incorrect results.
250impl Float32Chunked {
251    pub fn apply_as_ints<F>(&self, f: F) -> Series
252    where
253        F: Fn(&Series) -> Series,
254    {
255        let BitRepr::U32(s) = self.to_bit_repr() else {
256            unreachable!()
257        };
258        let s = s.into_series();
259        let out = f(&s);
260        let out = out.u32().unwrap();
261        out._reinterpret_float().into()
262    }
263}
264impl Float64Chunked {
265    pub fn apply_as_ints<F>(&self, f: F) -> Series
266    where
267        F: Fn(&Series) -> Series,
268    {
269        let BitRepr::U64(s) = self.to_bit_repr() else {
270            unreachable!()
271        };
272        let s = s.into_series();
273        let out = f(&s);
274        let out = out.u64().unwrap();
275        out._reinterpret_float().into()
276    }
277}