polars_core/chunked_array/
from.rs
1use arrow::compute::concatenate::concatenate_unchecked;
2
3use super::*;
4
5#[allow(clippy::all)]
6fn from_chunks_list_dtype(chunks: &mut Vec<ArrayRef>, dtype: DataType) -> DataType {
7 let dtype = if let Some(arr) = chunks.get(0) {
9 DataType::from_arrow_dtype(arr.dtype())
10 } else {
11 dtype
12 };
13
14 match dtype {
15 #[cfg(feature = "dtype-categorical")]
16 DataType::List(inner)
20 if matches!(
21 *inner,
22 DataType::Categorical(None, _) | DataType::Enum(None, _)
23 ) =>
24 {
25 let array = concatenate_unchecked(chunks).unwrap();
26 let list_arr = array.as_any().downcast_ref::<ListArray<i64>>().unwrap();
27 let values_arr = list_arr.values();
28 let cat = unsafe {
29 Series::_try_from_arrow_unchecked(
30 PlSmallStr::EMPTY,
31 vec![values_arr.clone()],
32 values_arr.dtype(),
33 )
34 .unwrap()
35 };
36
37 let arrow_dtype = ListArray::<i64>::default_datatype(ArrowDataType::UInt32);
40 let new_array = ListArray::new(
41 arrow_dtype,
42 list_arr.offsets().clone(),
43 cat.array_ref(0).clone(),
44 list_arr.validity().cloned(),
45 );
46 chunks.clear();
47 chunks.push(Box::new(new_array));
48 DataType::List(Box::new(cat.dtype().clone()))
49 },
50 #[cfg(all(feature = "dtype-array", feature = "dtype-categorical"))]
51 DataType::Array(inner, width)
52 if matches!(
53 *inner,
54 DataType::Categorical(None, _) | DataType::Enum(None, _)
55 ) =>
56 {
57 let array = concatenate_unchecked(chunks).unwrap();
58 let list_arr = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
59 let values_arr = list_arr.values();
60 let cat = unsafe {
61 Series::_try_from_arrow_unchecked(
62 PlSmallStr::EMPTY,
63 vec![values_arr.clone()],
64 values_arr.dtype(),
65 )
66 .unwrap()
67 };
68
69 let arrow_dtype = FixedSizeListArray::default_datatype(ArrowDataType::UInt32, width);
72 let new_array = FixedSizeListArray::new(
73 arrow_dtype,
74 values_arr.len(),
75 cat.array_ref(0).clone(),
76 list_arr.validity().cloned(),
77 );
78 chunks.clear();
79 chunks.push(Box::new(new_array));
80 DataType::Array(Box::new(cat.dtype().clone()), width)
81 },
82 _ => dtype,
83 }
84}
85
86impl<T, A> From<A> for ChunkedArray<T>
87where
88 T: PolarsDataType<Array = A>,
89 A: Array,
90{
91 fn from(arr: A) -> Self {
92 Self::with_chunk(PlSmallStr::EMPTY, arr)
93 }
94}
95
96impl<T> ChunkedArray<T>
97where
98 T: PolarsDataType,
99{
100 pub fn with_chunk<A>(name: PlSmallStr, arr: A) -> Self
101 where
102 A: Array,
103 T: PolarsDataType<Array = A>,
104 {
105 unsafe { Self::from_chunks(name, vec![Box::new(arr)]) }
106 }
107
108 pub fn with_chunk_like<A>(ca: &Self, arr: A) -> Self
109 where
110 A: Array,
111 T: PolarsDataType<Array = A>,
112 {
113 Self::from_chunk_iter_like(ca, std::iter::once(arr))
114 }
115
116 pub fn from_chunk_iter<I>(name: PlSmallStr, iter: I) -> Self
117 where
118 I: IntoIterator,
119 T: PolarsDataType<Array = <I as IntoIterator>::Item>,
120 <I as IntoIterator>::Item: Array,
121 {
122 let chunks = iter
123 .into_iter()
124 .map(|x| Box::new(x) as Box<dyn Array>)
125 .collect();
126 unsafe { Self::from_chunks(name, chunks) }
127 }
128
129 pub fn from_chunk_iter_like<I>(ca: &Self, iter: I) -> Self
130 where
131 I: IntoIterator,
132 T: PolarsDataType<Array = <I as IntoIterator>::Item>,
133 <I as IntoIterator>::Item: Array,
134 {
135 let chunks = iter
136 .into_iter()
137 .map(|x| Box::new(x) as Box<dyn Array>)
138 .collect();
139 unsafe {
140 Self::from_chunks_and_dtype_unchecked(ca.name().clone(), chunks, ca.dtype().clone())
141 }
142 }
143
144 pub fn try_from_chunk_iter<I, A, E>(name: PlSmallStr, iter: I) -> Result<Self, E>
145 where
146 I: IntoIterator<Item = Result<A, E>>,
147 T: PolarsDataType<Array = A>,
148 A: Array,
149 {
150 let chunks: Result<_, _> = iter
151 .into_iter()
152 .map(|x| Ok(Box::new(x?) as Box<dyn Array>))
153 .collect();
154 unsafe { Ok(Self::from_chunks(name, chunks?)) }
155 }
156
157 pub(crate) fn from_chunk_iter_and_field<I>(field: Arc<Field>, chunks: I) -> Self
158 where
159 I: IntoIterator,
160 T: PolarsDataType<Array = <I as IntoIterator>::Item>,
161 <I as IntoIterator>::Item: Array,
162 {
163 assert_eq!(
164 std::mem::discriminant(&T::get_dtype()),
165 std::mem::discriminant(&field.dtype)
166 );
167
168 let mut length = 0;
169 let mut null_count = 0;
170 let chunks = chunks
171 .into_iter()
172 .map(|x| {
173 length += x.len();
174 null_count += x.null_count();
175 Box::new(x) as Box<dyn Array>
176 })
177 .collect();
178
179 unsafe { ChunkedArray::new_with_dims(field, chunks, length, null_count) }
180 }
181
182 pub unsafe fn from_chunks(name: PlSmallStr, mut chunks: Vec<ArrayRef>) -> Self {
187 let dtype = match T::get_dtype() {
188 dtype @ DataType::List(_) => from_chunks_list_dtype(&mut chunks, dtype),
189 #[cfg(feature = "dtype-array")]
190 dtype @ DataType::Array(_, _) => from_chunks_list_dtype(&mut chunks, dtype),
191 #[cfg(feature = "dtype-struct")]
192 dtype @ DataType::Struct(_) => from_chunks_list_dtype(&mut chunks, dtype),
193 dt => dt,
194 };
195 Self::from_chunks_and_dtype(name, chunks, dtype)
196 }
197
198 pub unsafe fn with_chunks(&self, chunks: Vec<ArrayRef>) -> Self {
201 ChunkedArray::new_with_compute_len(self.field.clone(), chunks)
202 }
203
204 pub unsafe fn from_chunks_and_dtype(
210 name: PlSmallStr,
211 chunks: Vec<ArrayRef>,
212 dtype: DataType,
213 ) -> Self {
214 #[cfg(debug_assertions)]
217 {
218 if !chunks.is_empty() && !chunks[0].is_empty() && dtype.is_primitive() {
219 assert_eq!(chunks[0].dtype(), &dtype.to_arrow(CompatLevel::newest()))
220 }
221 }
222
223 Self::from_chunks_and_dtype_unchecked(name, chunks, dtype)
224 }
225
226 pub(crate) unsafe fn from_chunks_and_dtype_unchecked(
232 name: PlSmallStr,
233 chunks: Vec<ArrayRef>,
234 dtype: DataType,
235 ) -> Self {
236 let field = Arc::new(Field::new(name, dtype));
237 ChunkedArray::new_with_compute_len(field, chunks)
238 }
239
240 pub fn full_null_like(ca: &Self, length: usize) -> Self {
241 let chunks = std::iter::once(T::Array::full_null(
242 length,
243 ca.dtype().to_arrow(CompatLevel::newest()),
244 ));
245 Self::from_chunk_iter_like(ca, chunks)
246 }
247}
248
249impl<T> ChunkedArray<T>
250where
251 T: PolarsNumericType,
252{
253 pub fn from_vec(name: PlSmallStr, v: Vec<T::Native>) -> Self {
255 Self::with_chunk(name, to_primitive::<T>(v, None))
256 }
257
258 pub fn from_vec_validity(
260 name: PlSmallStr,
261 values: Vec<T::Native>,
262 buffer: Option<Bitmap>,
263 ) -> Self {
264 let arr = to_array::<T>(values, buffer);
265 ChunkedArray::new_with_compute_len(Arc::new(Field::new(name, T::get_dtype())), vec![arr])
266 }
267
268 pub unsafe fn mmap_slice(name: PlSmallStr, values: &[T::Native]) -> Self {
274 Self::with_chunk(name, arrow::ffi::mmap::slice(values))
275 }
276}
277
278impl BooleanChunked {
279 pub unsafe fn mmap_slice(name: PlSmallStr, values: &[u8], offset: usize, len: usize) -> Self {
285 let arr = arrow::ffi::mmap::bitmap(values, offset, len).unwrap();
286 Self::with_chunk(name, arr)
287 }
288
289 pub fn from_bitmap(name: PlSmallStr, bitmap: Bitmap) -> Self {
290 Self::with_chunk(
291 name,
292 BooleanArray::new(ArrowDataType::Boolean, bitmap, None),
293 )
294 }
295}
296
297impl<'a, T> From<&'a ChunkedArray<T>> for Vec<Option<T::Physical<'a>>>
298where
299 T: PolarsDataType,
300{
301 fn from(ca: &'a ChunkedArray<T>) -> Self {
302 let mut out = Vec::with_capacity(ca.len());
303 for arr in ca.downcast_iter() {
304 out.extend(arr.iter())
305 }
306 out
307 }
308}
309impl From<StringChunked> for Vec<Option<String>> {
310 fn from(ca: StringChunked) -> Self {
311 ca.iter().map(|opt| opt.map(|s| s.to_string())).collect()
312 }
313}
314
315impl From<BooleanChunked> for Vec<Option<bool>> {
316 fn from(ca: BooleanChunked) -> Self {
317 let mut out = Vec::with_capacity(ca.len());
318 for arr in ca.downcast_iter() {
319 out.extend(arr.iter())
320 }
321 out
322 }
323}