polars_core/chunked_array/array/
mod.rs

1//! Special fixed-size-list utility methods
2
3mod iterator;
4
5use std::borrow::Cow;
6
7use either::Either;
8
9use crate::prelude::*;
10
11impl ArrayChunked {
12    /// Get the inner data type of the fixed size list.
13    pub fn inner_dtype(&self) -> &DataType {
14        match self.dtype() {
15            DataType::Array(dt, _size) => dt.as_ref(),
16            _ => unreachable!(),
17        }
18    }
19
20    /// # Panics
21    /// Panics if the physical representation of `dtype` differs the physical
22    /// representation of the existing inner `dtype`.
23    pub fn set_inner_dtype(&mut self, dtype: DataType) {
24        assert_eq!(dtype.to_physical(), self.inner_dtype().to_physical());
25        let width = self.width();
26        let field = Arc::make_mut(&mut self.field);
27        field.coerce(DataType::Array(Box::new(dtype), width));
28    }
29
30    pub fn width(&self) -> usize {
31        match self.dtype() {
32            DataType::Array(_dt, size) => *size,
33            _ => unreachable!(),
34        }
35    }
36
37    /// # Safety
38    /// The caller must ensure that the logical type given fits the physical type of the array.
39    pub unsafe fn to_logical(&mut self, inner_dtype: DataType) {
40        debug_assert_eq!(&inner_dtype.to_physical(), self.inner_dtype());
41        let width = self.width();
42        let fld = Arc::make_mut(&mut self.field);
43        fld.coerce(DataType::Array(Box::new(inner_dtype), width))
44    }
45
46    /// Convert the datatype of the array into the physical datatype.
47    pub fn to_physical_repr(&self) -> Cow<'_, ArrayChunked> {
48        let Cow::Owned(physical_repr) = self.get_inner().to_physical_repr() else {
49            return Cow::Borrowed(self);
50        };
51
52        let chunk_len_validity_iter =
53            if physical_repr.chunks().len() == 1 && self.chunks().len() > 1 {
54                // Physical repr got rechunked, rechunk our validity as well.
55                Either::Left(std::iter::once((self.len(), self.rechunk_validity())))
56            } else {
57                // No rechunking, expect the same number of chunks.
58                assert_eq!(self.chunks().len(), physical_repr.chunks().len());
59                Either::Right(
60                    self.chunks()
61                        .iter()
62                        .map(|c| (c.len(), c.validity().cloned())),
63                )
64            };
65
66        let width = self.width();
67        let chunks: Vec<_> = chunk_len_validity_iter
68            .zip(physical_repr.into_chunks())
69            .map(|((len, validity), values)| {
70                FixedSizeListArray::new(
71                    ArrowDataType::FixedSizeList(
72                        Box::new(ArrowField::new(
73                            LIST_VALUES_NAME,
74                            values.dtype().clone(),
75                            true,
76                        )),
77                        width,
78                    ),
79                    len,
80                    values,
81                    validity,
82                )
83                .to_boxed()
84            })
85            .collect();
86
87        let name = self.name().clone();
88        let dtype = DataType::Array(Box::new(self.inner_dtype().to_physical()), width);
89        Cow::Owned(unsafe { ArrayChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype) })
90    }
91
92    /// Convert a non-logical [`ArrayChunked`] back into a logical [`ArrayChunked`] without casting.
93    ///
94    /// # Safety
95    ///
96    /// This can lead to invalid memory access in downstream code.
97    pub unsafe fn from_physical_unchecked(&self, to_inner_dtype: DataType) -> PolarsResult<Self> {
98        debug_assert!(!self.inner_dtype().is_logical());
99
100        let chunks = self
101            .downcast_iter()
102            .map(|chunk| chunk.values())
103            .cloned()
104            .collect();
105
106        let inner = unsafe {
107            Series::from_chunks_and_dtype_unchecked(PlSmallStr::EMPTY, chunks, self.inner_dtype())
108        };
109        let inner = unsafe { inner.from_physical_unchecked(&to_inner_dtype) }?;
110
111        let chunks: Vec<_> = self
112            .downcast_iter()
113            .zip(inner.into_chunks())
114            .map(|(chunk, values)| {
115                FixedSizeListArray::new(
116                    ArrowDataType::FixedSizeList(
117                        Box::new(ArrowField::new(
118                            LIST_VALUES_NAME,
119                            values.dtype().clone(),
120                            true,
121                        )),
122                        self.width(),
123                    ),
124                    chunk.len(),
125                    values,
126                    chunk.validity().cloned(),
127                )
128                .to_boxed()
129            })
130            .collect();
131
132        let name = self.name().clone();
133        let dtype = DataType::Array(Box::new(to_inner_dtype), self.width());
134        Ok(unsafe { Self::from_chunks_and_dtype_unchecked(name, chunks, dtype) })
135    }
136
137    /// Get the inner values as `Series`
138    pub fn get_inner(&self) -> Series {
139        let chunks: Vec<_> = self.downcast_iter().map(|c| c.values().clone()).collect();
140
141        // SAFETY: Data type of arrays matches because they are chunks from the same array.
142        unsafe {
143            Series::from_chunks_and_dtype_unchecked(self.name().clone(), chunks, self.inner_dtype())
144        }
145    }
146
147    /// Ignore the list indices and apply `func` to the inner type as [`Series`].
148    pub fn apply_to_inner(
149        &self,
150        func: &dyn Fn(Series) -> PolarsResult<Series>,
151    ) -> PolarsResult<ArrayChunked> {
152        // Rechunk or the generated Series will have wrong length.
153        let ca = self.rechunk();
154        let arr = ca.downcast_as_array();
155
156        // SAFETY:
157        // Inner dtype is passed correctly
158        let elements = unsafe {
159            Series::from_chunks_and_dtype_unchecked(
160                self.name().clone(),
161                vec![arr.values().clone()],
162                ca.inner_dtype(),
163            )
164        };
165
166        let expected_len = elements.len();
167        let out: Series = func(elements)?;
168        polars_ensure!(
169            out.len() == expected_len,
170            ComputeError: "the function should apply element-wise, it removed elements instead"
171        );
172        let out = out.rechunk();
173        let values = out.chunks()[0].clone();
174
175        let inner_dtype = FixedSizeListArray::default_datatype(values.dtype().clone(), ca.width());
176        let arr = FixedSizeListArray::new(inner_dtype, arr.len(), values, arr.validity().cloned());
177
178        Ok(unsafe {
179            ArrayChunked::from_chunks_and_dtype_unchecked(
180                self.name().clone(),
181                vec![arr.into_boxed()],
182                DataType::Array(Box::new(out.dtype().clone()), self.width()),
183            )
184        })
185    }
186
187    /// Recurse nested types until we are at the leaf array.
188    pub fn get_leaf_array(&self) -> Series {
189        let mut current = self.get_inner();
190        while let Some(child_array) = current.try_array() {
191            current = child_array.get_inner();
192        }
193        current
194    }
195}