polars_core/chunked_array/array/
mod.rs

1//! Special fixed-size-list utility methods
2
3mod iterator;
4
5use std::borrow::Cow;
6
7use either::Either;
8
9use crate::prelude::*;
10
11impl ArrayChunked {
12    /// Get the inner data type of the fixed size list.
13    pub fn inner_dtype(&self) -> &DataType {
14        match self.dtype() {
15            DataType::Array(dt, _size) => dt.as_ref(),
16            _ => unreachable!(),
17        }
18    }
19
20    pub fn width(&self) -> usize {
21        match self.dtype() {
22            DataType::Array(_dt, size) => *size,
23            _ => unreachable!(),
24        }
25    }
26
27    /// # Safety
28    /// The caller must ensure that the logical type given fits the physical type of the array.
29    pub unsafe fn to_logical(&mut self, inner_dtype: DataType) {
30        debug_assert_eq!(&inner_dtype.to_physical(), self.inner_dtype());
31        let width = self.width();
32        let fld = Arc::make_mut(&mut self.field);
33        fld.coerce(DataType::Array(Box::new(inner_dtype), width))
34    }
35
36    /// Convert the datatype of the array into the physical datatype.
37    pub fn to_physical_repr(&self) -> Cow<ArrayChunked> {
38        let Cow::Owned(physical_repr) = self.get_inner().to_physical_repr() else {
39            return Cow::Borrowed(self);
40        };
41
42        let chunk_len_validity_iter =
43            if physical_repr.chunks().len() == 1 && self.chunks().len() > 1 {
44                // Physical repr got rechunked, rechunk our validity as well.
45                Either::Left(std::iter::once((self.len(), self.rechunk_validity())))
46            } else {
47                // No rechunking, expect the same number of chunks.
48                assert_eq!(self.chunks().len(), physical_repr.chunks().len());
49                Either::Right(
50                    self.chunks()
51                        .iter()
52                        .map(|c| (c.len(), c.validity().cloned())),
53                )
54            };
55
56        let width = self.width();
57        let chunks: Vec<_> = chunk_len_validity_iter
58            .zip(physical_repr.into_chunks())
59            .map(|((len, validity), values)| {
60                FixedSizeListArray::new(
61                    ArrowDataType::FixedSizeList(
62                        Box::new(ArrowField::new(
63                            PlSmallStr::from_static("item"),
64                            values.dtype().clone(),
65                            true,
66                        )),
67                        width,
68                    ),
69                    len,
70                    values,
71                    validity,
72                )
73                .to_boxed()
74            })
75            .collect();
76
77        let name = self.name().clone();
78        let dtype = DataType::Array(Box::new(self.inner_dtype().to_physical()), width);
79        Cow::Owned(unsafe { ArrayChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype) })
80    }
81
82    /// Convert a non-logical [`ArrayChunked`] back into a logical [`ArrayChunked`] without casting.
83    ///
84    /// # Safety
85    ///
86    /// This can lead to invalid memory access in downstream code.
87    pub unsafe fn from_physical_unchecked(&self, to_inner_dtype: DataType) -> PolarsResult<Self> {
88        debug_assert!(!self.inner_dtype().is_logical());
89
90        let chunks = self
91            .downcast_iter()
92            .map(|chunk| chunk.values())
93            .cloned()
94            .collect();
95
96        let inner = unsafe {
97            Series::from_chunks_and_dtype_unchecked(PlSmallStr::EMPTY, chunks, self.inner_dtype())
98        };
99        let inner = unsafe { inner.from_physical_unchecked(&to_inner_dtype) }?;
100
101        let chunks: Vec<_> = self
102            .downcast_iter()
103            .zip(inner.into_chunks())
104            .map(|(chunk, values)| {
105                FixedSizeListArray::new(
106                    ArrowDataType::FixedSizeList(
107                        Box::new(ArrowField::new(
108                            PlSmallStr::from_static("item"),
109                            values.dtype().clone(),
110                            true,
111                        )),
112                        self.width(),
113                    ),
114                    chunk.len(),
115                    values,
116                    chunk.validity().cloned(),
117                )
118                .to_boxed()
119            })
120            .collect();
121
122        let name = self.name().clone();
123        let dtype = DataType::Array(Box::new(to_inner_dtype), self.width());
124        Ok(unsafe { Self::from_chunks_and_dtype_unchecked(name, chunks, dtype) })
125    }
126
127    /// Get the inner values as `Series`
128    pub fn get_inner(&self) -> Series {
129        let chunks: Vec<_> = self.downcast_iter().map(|c| c.values().clone()).collect();
130
131        // SAFETY: Data type of arrays matches because they are chunks from the same array.
132        unsafe {
133            Series::from_chunks_and_dtype_unchecked(self.name().clone(), chunks, self.inner_dtype())
134        }
135    }
136
137    /// Ignore the list indices and apply `func` to the inner type as [`Series`].
138    pub fn apply_to_inner(
139        &self,
140        func: &dyn Fn(Series) -> PolarsResult<Series>,
141    ) -> PolarsResult<ArrayChunked> {
142        // Rechunk or the generated Series will have wrong length.
143        let ca = self.rechunk();
144        let field = self
145            .inner_dtype()
146            .to_arrow_field(PlSmallStr::from_static("item"), CompatLevel::newest());
147
148        let chunks = ca.downcast_iter().map(|arr| {
149            let elements = unsafe {
150                Series::_try_from_arrow_unchecked_with_md(
151                    self.name().clone(),
152                    vec![(*arr.values()).clone()],
153                    &field.dtype,
154                    field.metadata.as_deref(),
155                )
156                .unwrap()
157            };
158
159            let expected_len = elements.len();
160            let out: Series = func(elements)?;
161            polars_ensure!(
162                out.len() == expected_len,
163                ComputeError: "the function should apply element-wise, it removed elements instead"
164            );
165            let out = out.rechunk();
166            let values = out.chunks()[0].clone();
167
168            let inner_dtype = FixedSizeListArray::default_datatype(
169                out.dtype().to_arrow(CompatLevel::newest()),
170                ca.width(),
171            );
172            let arr =
173                FixedSizeListArray::new(inner_dtype, arr.len(), values, arr.validity().cloned());
174            Ok(arr)
175        });
176
177        ArrayChunked::try_from_chunk_iter(self.name().clone(), chunks)
178    }
179
180    /// Recurse nested types until we are at the leaf array.
181    pub fn get_leaf_array(&self) -> Series {
182        let mut current = self.get_inner();
183        while let Some(child_array) = current.try_array() {
184            current = child_array.get_inner();
185        }
186        current
187    }
188}