polars_core/chunked_array/
ndarray.rs1use ndarray::prelude::*;
2use rayon::prelude::*;
3#[cfg(feature = "serde")]
4use serde::{Deserialize, Serialize};
5
6use crate::POOL;
7use crate::prelude::*;
8
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
10#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
11pub enum IndexOrder {
12 C,
13 #[default]
14 Fortran,
15}
16
17impl<T> ChunkedArray<T>
18where
19 T: PolarsNumericType,
20{
21 pub fn to_ndarray(&self) -> PolarsResult<ArrayView1<T::Native>> {
24 let slice = self.cont_slice()?;
25 Ok(aview1(slice))
26 }
27}
28
29impl ListChunked {
30 pub fn to_ndarray<N>(&self) -> PolarsResult<Array2<N::Native>>
32 where
33 N: PolarsNumericType,
34 {
35 polars_ensure!(
36 self.null_count() == 0,
37 ComputeError: "creation of ndarray with null values is not supported"
38 );
39
40 let mut iter = self.into_no_null_iter();
42 let series = iter
43 .next()
44 .ok_or_else(|| polars_err!(NoData: "unable to create ndarray of empty ListChunked"))?;
45
46 let width = series.len();
47 let mut row_idx = 0;
48 let mut ndarray = ndarray::Array::uninit((self.len(), width));
49
50 let series = series.cast(&N::get_dtype())?;
51 let ca = series.unpack::<N>()?;
52 let a = ca.to_ndarray()?;
53 let mut row = ndarray.slice_mut(s![row_idx, ..]);
54 a.assign_to(&mut row);
55 row_idx += 1;
56
57 for series in iter {
58 polars_ensure!(
59 series.len() == width,
60 ShapeMismatch: "unable to create a 2-D array, series have different lengths"
61 );
62 let series = series.cast(&N::get_dtype())?;
63 let ca = series.unpack::<N>()?;
64 let a = ca.to_ndarray()?;
65 let mut row = ndarray.slice_mut(s![row_idx, ..]);
66 a.assign_to(&mut row);
67 row_idx += 1;
68 }
69
70 debug_assert_eq!(row_idx, self.len());
71 unsafe { Ok(ndarray.assume_init()) }
74 }
75}
76
77impl DataFrame {
78 pub fn to_ndarray<N>(&self, ordering: IndexOrder) -> PolarsResult<Array2<N::Native>>
100 where
101 N: PolarsNumericType,
102 {
103 let shape = self.shape();
104 let height = self.height();
105 let mut membuf = Vec::with_capacity(shape.0 * shape.1);
106 let ptr = membuf.as_ptr() as usize;
107
108 let columns = self.get_columns();
109 POOL.install(|| {
110 columns.par_iter().enumerate().try_for_each(|(col_idx, s)| {
111 let s = s.as_materialized_series().cast(&N::get_dtype())?;
112 let s = match s.dtype() {
113 DataType::Float32 => {
114 let ca = s.f32().unwrap();
115 ca.none_to_nan().into_series()
116 },
117 DataType::Float64 => {
118 let ca = s.f64().unwrap();
119 ca.none_to_nan().into_series()
120 },
121 _ => s,
122 };
123 polars_ensure!(
124 s.null_count() == 0,
125 ComputeError: "creation of ndarray with null values is not supported"
126 );
127 let ca = s.unpack::<N>()?;
128
129 let mut chunk_offset = 0;
130 for arr in ca.downcast_iter() {
131 let vals = arr.values();
132
133 match ordering {
140 IndexOrder::C => unsafe {
141 let num_cols = columns.len();
142 let mut offset =
143 (ptr as *mut N::Native).add(col_idx + chunk_offset * num_cols);
144 for v in vals.iter() {
145 *offset = *v;
146 offset = offset.add(num_cols);
147 }
148 },
149 IndexOrder::Fortran => unsafe {
150 let offset_ptr =
151 (ptr as *mut N::Native).add(col_idx * height + chunk_offset);
152 let buf = std::slice::from_raw_parts_mut(offset_ptr, vals.len());
156 buf.copy_from_slice(vals)
157 },
158 }
159 chunk_offset += vals.len();
160 }
161
162 Ok(())
163 })
164 })?;
165
166 unsafe {
169 membuf.set_len(shape.0 * shape.1);
170 }
171 match ordering {
174 IndexOrder::C => Ok(Array2::from_shape_vec((shape.0, shape.1), membuf).unwrap()),
175 IndexOrder::Fortran => {
176 let ndarr = Array2::from_shape_vec((shape.1, shape.0), membuf).unwrap();
177 Ok(ndarr.reversed_axes())
178 },
179 }
180 }
181}
182
183#[cfg(test)]
184mod test {
185 use super::*;
186
187 #[test]
188 fn test_ndarray_from_ca() -> PolarsResult<()> {
189 let ca = Float64Chunked::new(PlSmallStr::EMPTY, &[1.0, 2.0, 3.0]);
190 let ndarr = ca.to_ndarray()?;
191 assert_eq!(ndarr, ArrayView1::from(&[1.0, 2.0, 3.0]));
192
193 let mut builder = ListPrimitiveChunkedBuilder::<Float64Type>::new(
194 PlSmallStr::EMPTY,
195 10,
196 10,
197 DataType::Float64,
198 );
199 builder.append_opt_slice(Some(&[1.0, 2.0, 3.0]));
200 builder.append_opt_slice(Some(&[2.0, 4.0, 5.0]));
201 builder.append_opt_slice(Some(&[6.0, 7.0, 8.0]));
202 let list = builder.finish();
203
204 let ndarr = list.to_ndarray::<Float64Type>()?;
205 let expected = array![[1.0, 2.0, 3.0], [2.0, 4.0, 5.0], [6.0, 7.0, 8.0]];
206 assert_eq!(ndarr, expected);
207
208 let mut builder = ListPrimitiveChunkedBuilder::<Float64Type>::new(
210 PlSmallStr::EMPTY,
211 10,
212 10,
213 DataType::Float64,
214 );
215 builder.append_opt_slice(Some(&[1.0, 2.0, 3.0]));
216 builder.append_opt_slice(Some(&[2.0]));
217 builder.append_opt_slice(Some(&[6.0, 7.0, 8.0]));
218 let list = builder.finish();
219 assert!(list.to_ndarray::<Float64Type>().is_err());
220 Ok(())
221 }
222
223 #[test]
224 fn test_ndarray_from_df_order_fortran() -> PolarsResult<()> {
225 let df = df!["a"=> [1.0, 2.0, 3.0],
226 "b" => [2.0, 3.0, 4.0]
227 ]?;
228
229 let ndarr = df.to_ndarray::<Float64Type>(IndexOrder::Fortran)?;
230 let expected = array![[1.0, 2.0], [2.0, 3.0], [3.0, 4.0]];
231 assert!(!ndarr.is_standard_layout());
232 assert_eq!(ndarr, expected);
233
234 Ok(())
235 }
236
237 #[test]
238 fn test_ndarray_from_df_order_c() -> PolarsResult<()> {
239 let df = df!["a"=> [1.0, 2.0, 3.0],
240 "b" => [2.0, 3.0, 4.0]
241 ]?;
242
243 let ndarr = df.to_ndarray::<Float64Type>(IndexOrder::C)?;
244 let expected = array![[1.0, 2.0], [2.0, 3.0], [3.0, 4.0]];
245 assert!(ndarr.is_standard_layout());
246 assert_eq!(ndarr, expected);
247
248 Ok(())
249 }
250}