polars_core/chunked_array/ops/
bit_repr.rs

1use arrow::buffer::Buffer;
2
3use crate::prelude::*;
4use crate::series::BitRepr;
5
6/// Reinterprets the type of a [`ChunkedArray`]. T and U must have the same size
7/// and alignment.
8fn reinterpret_chunked_array<T: PolarsNumericType, U: PolarsNumericType>(
9    ca: &ChunkedArray<T>,
10) -> ChunkedArray<U> {
11    assert!(size_of::<T::Native>() == size_of::<U::Native>());
12    assert!(align_of::<T::Native>() == align_of::<U::Native>());
13
14    let chunks = ca.downcast_iter().map(|array| {
15        let buf = array.values().clone();
16        // SAFETY: we checked that the size and alignment matches.
17        #[allow(clippy::transmute_undefined_repr)]
18        let reinterpreted_buf =
19            unsafe { std::mem::transmute::<Buffer<T::Native>, Buffer<U::Native>>(buf) };
20        PrimitiveArray::from_data_default(reinterpreted_buf, array.validity().cloned())
21    });
22
23    ChunkedArray::from_chunk_iter(ca.name().clone(), chunks)
24}
25
26/// Reinterprets the type of a [`ListChunked`]. T and U must have the same size
27/// and alignment.
28#[cfg(feature = "reinterpret")]
29fn reinterpret_list_chunked<T: PolarsNumericType, U: PolarsNumericType>(
30    ca: &ListChunked,
31) -> ListChunked {
32    assert!(size_of::<T::Native>() == size_of::<U::Native>());
33    assert!(align_of::<T::Native>() == align_of::<U::Native>());
34
35    let chunks = ca.downcast_iter().map(|array| {
36        let inner_arr = array
37            .values()
38            .as_any()
39            .downcast_ref::<PrimitiveArray<T::Native>>()
40            .unwrap();
41        // SAFETY: we checked that the size and alignment matches.
42        #[allow(clippy::transmute_undefined_repr)]
43        let reinterpreted_buf = unsafe {
44            std::mem::transmute::<Buffer<T::Native>, Buffer<U::Native>>(inner_arr.values().clone())
45        };
46        let pa =
47            PrimitiveArray::from_data_default(reinterpreted_buf, inner_arr.validity().cloned());
48        LargeListArray::new(
49            DataType::List(Box::new(U::get_dtype())).to_arrow(CompatLevel::newest()),
50            array.offsets().clone(),
51            pa.to_boxed(),
52            array.validity().cloned(),
53        )
54    });
55
56    ListChunked::from_chunk_iter(ca.name().clone(), chunks)
57}
58
59#[cfg(all(feature = "reinterpret", feature = "dtype-i16", feature = "dtype-u16"))]
60impl Reinterpret for Int16Chunked {
61    fn reinterpret_signed(&self) -> Series {
62        self.clone().into_series()
63    }
64
65    fn reinterpret_unsigned(&self) -> Series {
66        reinterpret_chunked_array::<_, UInt16Type>(self).into_series()
67    }
68}
69
70#[cfg(all(feature = "reinterpret", feature = "dtype-u16", feature = "dtype-i16"))]
71impl Reinterpret for UInt16Chunked {
72    fn reinterpret_signed(&self) -> Series {
73        reinterpret_chunked_array::<_, Int16Type>(self).into_series()
74    }
75
76    fn reinterpret_unsigned(&self) -> Series {
77        self.clone().into_series()
78    }
79}
80
81#[cfg(all(feature = "reinterpret", feature = "dtype-i8", feature = "dtype-u8"))]
82impl Reinterpret for Int8Chunked {
83    fn reinterpret_signed(&self) -> Series {
84        self.clone().into_series()
85    }
86
87    fn reinterpret_unsigned(&self) -> Series {
88        reinterpret_chunked_array::<_, UInt8Type>(self).into_series()
89    }
90}
91
92#[cfg(all(feature = "reinterpret", feature = "dtype-u8", feature = "dtype-i8"))]
93impl Reinterpret for UInt8Chunked {
94    fn reinterpret_signed(&self) -> Series {
95        reinterpret_chunked_array::<_, Int8Type>(self).into_series()
96    }
97
98    fn reinterpret_unsigned(&self) -> Series {
99        self.clone().into_series()
100    }
101}
102
103impl<T> ToBitRepr for ChunkedArray<T>
104where
105    T: PolarsNumericType,
106{
107    fn to_bit_repr(&self) -> BitRepr {
108        let is_large = size_of::<T::Native>() == 8;
109
110        if is_large {
111            if matches!(self.dtype(), DataType::UInt64) {
112                let ca = self.clone();
113                // Convince the compiler we are this type. This keeps flags.
114                return BitRepr::Large(unsafe {
115                    std::mem::transmute::<ChunkedArray<T>, UInt64Chunked>(ca)
116                });
117            }
118
119            BitRepr::Large(reinterpret_chunked_array(self))
120        } else {
121            BitRepr::Small(if size_of::<T::Native>() == 4 {
122                if matches!(self.dtype(), DataType::UInt32) {
123                    let ca = self.clone();
124                    // Convince the compiler we are this type. This preserves flags.
125                    return BitRepr::Small(unsafe {
126                        std::mem::transmute::<ChunkedArray<T>, UInt32Chunked>(ca)
127                    });
128                }
129
130                reinterpret_chunked_array(self)
131            } else {
132                // SAFETY: an unchecked cast to uint32 (which has no invariants) is
133                // always sound.
134                unsafe {
135                    self.cast_unchecked(&DataType::UInt32)
136                        .unwrap()
137                        .u32()
138                        .unwrap()
139                        .clone()
140                }
141            })
142        }
143    }
144}
145
146#[cfg(feature = "reinterpret")]
147impl Reinterpret for UInt64Chunked {
148    fn reinterpret_signed(&self) -> Series {
149        let signed: Int64Chunked = reinterpret_chunked_array(self);
150        signed.into_series()
151    }
152
153    fn reinterpret_unsigned(&self) -> Series {
154        self.clone().into_series()
155    }
156}
157#[cfg(feature = "reinterpret")]
158impl Reinterpret for Int64Chunked {
159    fn reinterpret_signed(&self) -> Series {
160        self.clone().into_series()
161    }
162
163    fn reinterpret_unsigned(&self) -> Series {
164        let BitRepr::Large(b) = self.to_bit_repr() else {
165            unreachable!()
166        };
167        b.into_series()
168    }
169}
170
171#[cfg(feature = "reinterpret")]
172impl Reinterpret for UInt32Chunked {
173    fn reinterpret_signed(&self) -> Series {
174        let signed: Int32Chunked = reinterpret_chunked_array(self);
175        signed.into_series()
176    }
177
178    fn reinterpret_unsigned(&self) -> Series {
179        self.clone().into_series()
180    }
181}
182
183#[cfg(feature = "reinterpret")]
184impl Reinterpret for Int32Chunked {
185    fn reinterpret_signed(&self) -> Series {
186        self.clone().into_series()
187    }
188
189    fn reinterpret_unsigned(&self) -> Series {
190        let BitRepr::Small(b) = self.to_bit_repr() else {
191            unreachable!()
192        };
193        b.into_series()
194    }
195}
196
197#[cfg(feature = "reinterpret")]
198impl Reinterpret for Float32Chunked {
199    fn reinterpret_signed(&self) -> Series {
200        reinterpret_chunked_array::<_, Int32Type>(self).into_series()
201    }
202
203    fn reinterpret_unsigned(&self) -> Series {
204        reinterpret_chunked_array::<_, UInt32Type>(self).into_series()
205    }
206}
207
208#[cfg(feature = "reinterpret")]
209impl Reinterpret for ListChunked {
210    fn reinterpret_signed(&self) -> Series {
211        match self.inner_dtype() {
212            DataType::Float32 => reinterpret_list_chunked::<Float32Type, Int32Type>(self),
213            DataType::Float64 => reinterpret_list_chunked::<Float64Type, Int64Type>(self),
214            _ => unimplemented!(),
215        }
216        .into_series()
217    }
218
219    fn reinterpret_unsigned(&self) -> Series {
220        match self.inner_dtype() {
221            DataType::Float32 => reinterpret_list_chunked::<Float32Type, UInt32Type>(self),
222            DataType::Float64 => reinterpret_list_chunked::<Float64Type, UInt64Type>(self),
223            _ => unimplemented!(),
224        }
225        .into_series()
226    }
227}
228
229#[cfg(feature = "reinterpret")]
230impl Reinterpret for Float64Chunked {
231    fn reinterpret_signed(&self) -> Series {
232        reinterpret_chunked_array::<_, Int64Type>(self).into_series()
233    }
234
235    fn reinterpret_unsigned(&self) -> Series {
236        reinterpret_chunked_array::<_, UInt64Type>(self).into_series()
237    }
238}
239
240impl UInt64Chunked {
241    #[doc(hidden)]
242    pub fn _reinterpret_float(&self) -> Float64Chunked {
243        reinterpret_chunked_array(self)
244    }
245}
246impl UInt32Chunked {
247    #[doc(hidden)]
248    pub fn _reinterpret_float(&self) -> Float32Chunked {
249        reinterpret_chunked_array(self)
250    }
251}
252
253/// Used to save compilation paths. Use carefully. Although this is safe,
254/// if misused it can lead to incorrect results.
255impl Float32Chunked {
256    pub fn apply_as_ints<F>(&self, f: F) -> Series
257    where
258        F: Fn(&Series) -> Series,
259    {
260        let BitRepr::Small(s) = self.to_bit_repr() else {
261            unreachable!()
262        };
263        let s = s.into_series();
264        let out = f(&s);
265        let out = out.u32().unwrap();
266        out._reinterpret_float().into()
267    }
268}
269impl Float64Chunked {
270    pub fn apply_as_ints<F>(&self, f: F) -> Series
271    where
272        F: Fn(&Series) -> Series,
273    {
274        let BitRepr::Large(s) = self.to_bit_repr() else {
275            unreachable!()
276        };
277        let s = s.into_series();
278        let out = f(&s);
279        let out = out.u64().unwrap();
280        out._reinterpret_float().into()
281    }
282}