polars_core/chunked_array/
ndarray.rs

1use ndarray::prelude::*;
2use rayon::prelude::*;
3#[cfg(feature = "serde")]
4use serde::{Deserialize, Serialize};
5
6use crate::POOL;
7use crate::prelude::*;
8
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
10#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
11pub enum IndexOrder {
12    C,
13    #[default]
14    Fortran,
15}
16
17impl<T> ChunkedArray<T>
18where
19    T: PolarsNumericType,
20{
21    /// If data is aligned in a single chunk and has no Null values a zero copy view is returned
22    /// as an [ndarray]
23    pub fn to_ndarray(&self) -> PolarsResult<ArrayView1<T::Native>> {
24        let slice = self.cont_slice()?;
25        Ok(aview1(slice))
26    }
27}
28
29impl ListChunked {
30    /// If all nested [`Series`] have the same length, a 2 dimensional [`ndarray::Array`] is returned.
31    pub fn to_ndarray<N>(&self) -> PolarsResult<Array2<N::Native>>
32    where
33        N: PolarsNumericType,
34    {
35        polars_ensure!(
36            self.null_count() == 0,
37            ComputeError: "creation of ndarray with null values is not supported"
38        );
39
40        // first iteration determine the size
41        let mut iter = self.into_no_null_iter();
42        let series = iter
43            .next()
44            .ok_or_else(|| polars_err!(NoData: "unable to create ndarray of empty ListChunked"))?;
45
46        let width = series.len();
47        let mut row_idx = 0;
48        let mut ndarray = ndarray::Array::uninit((self.len(), width));
49
50        let series = series.cast(&N::get_dtype())?;
51        let ca = series.unpack::<N>()?;
52        let a = ca.to_ndarray()?;
53        let mut row = ndarray.slice_mut(s![row_idx, ..]);
54        a.assign_to(&mut row);
55        row_idx += 1;
56
57        for series in iter {
58            polars_ensure!(
59                series.len() == width,
60                ShapeMismatch: "unable to create a 2-D array, series have different lengths"
61            );
62            let series = series.cast(&N::get_dtype())?;
63            let ca = series.unpack::<N>()?;
64            let a = ca.to_ndarray()?;
65            let mut row = ndarray.slice_mut(s![row_idx, ..]);
66            a.assign_to(&mut row);
67            row_idx += 1;
68        }
69
70        debug_assert_eq!(row_idx, self.len());
71        // SAFETY:
72        // We have assigned to every row and element of the array
73        unsafe { Ok(ndarray.assume_init()) }
74    }
75}
76
77impl DataFrame {
78    /// Create a 2D [`ndarray::Array`] from this [`DataFrame`]. This requires all columns in the
79    /// [`DataFrame`] to be non-null and numeric. They will be cast to the same data type
80    /// (if they aren't already).
81    ///
82    /// For floating point data we implicitly convert `None` to `NaN` without failure.
83    ///
84    /// ```rust
85    /// use polars_core::prelude::*;
86    /// let a = UInt32Chunked::new("a".into(), &[1, 2, 3]).into_column();
87    /// let b = Float64Chunked::new("b".into(), &[10., 8., 6.]).into_column();
88    ///
89    /// let df = DataFrame::new(vec![a, b]).unwrap();
90    /// let ndarray = df.to_ndarray::<Float64Type>(IndexOrder::Fortran).unwrap();
91    /// println!("{:?}", ndarray);
92    /// ```
93    /// Outputs:
94    /// ```text
95    /// [[1.0, 10.0],
96    ///  [2.0, 8.0],
97    ///  [3.0, 6.0]], shape=[3, 2], strides=[1, 3], layout=Ff (0xa), const ndim=2
98    /// ```
99    pub fn to_ndarray<N>(&self, ordering: IndexOrder) -> PolarsResult<Array2<N::Native>>
100    where
101        N: PolarsNumericType,
102    {
103        let shape = self.shape();
104        let height = self.height();
105        let mut membuf = Vec::with_capacity(shape.0 * shape.1);
106        let ptr = membuf.as_ptr() as usize;
107
108        let columns = self.get_columns();
109        POOL.install(|| {
110            columns.par_iter().enumerate().try_for_each(|(col_idx, s)| {
111                let s = s.as_materialized_series().cast(&N::get_dtype())?;
112                let s = match s.dtype() {
113                    DataType::Float32 => {
114                        let ca = s.f32().unwrap();
115                        ca.none_to_nan().into_series()
116                    },
117                    DataType::Float64 => {
118                        let ca = s.f64().unwrap();
119                        ca.none_to_nan().into_series()
120                    },
121                    _ => s,
122                };
123                polars_ensure!(
124                    s.null_count() == 0,
125                    ComputeError: "creation of ndarray with null values is not supported"
126                );
127                let ca = s.unpack::<N>()?;
128
129                let mut chunk_offset = 0;
130                for arr in ca.downcast_iter() {
131                    let vals = arr.values();
132
133                    // Depending on the desired order, we add items to the buffer.
134                    // SAFETY:
135                    // We get parallel access to the vector by offsetting index access accordingly.
136                    // For C-order, we only operate on every num-col-th element, starting from the
137                    // column index. For Fortran-order we only operate on n contiguous elements,
138                    // offset by n * the column index.
139                    match ordering {
140                        IndexOrder::C => unsafe {
141                            let num_cols = columns.len();
142                            let mut offset =
143                                (ptr as *mut N::Native).add(col_idx + chunk_offset * num_cols);
144                            for v in vals.iter() {
145                                *offset = *v;
146                                offset = offset.add(num_cols);
147                            }
148                        },
149                        IndexOrder::Fortran => unsafe {
150                            let offset_ptr =
151                                (ptr as *mut N::Native).add(col_idx * height + chunk_offset);
152                            // SAFETY:
153                            // this is uninitialized memory, so we must never read from this data
154                            // copy_from_slice does not read
155                            let buf = std::slice::from_raw_parts_mut(offset_ptr, vals.len());
156                            buf.copy_from_slice(vals)
157                        },
158                    }
159                    chunk_offset += vals.len();
160                }
161
162                Ok(())
163            })
164        })?;
165
166        // SAFETY:
167        // we have written all data, so we can now safely set length
168        unsafe {
169            membuf.set_len(shape.0 * shape.1);
170        }
171        // Depending on the desired order, we can either return the array buffer as-is or reverse
172        // the axes.
173        match ordering {
174            IndexOrder::C => Ok(Array2::from_shape_vec((shape.0, shape.1), membuf).unwrap()),
175            IndexOrder::Fortran => {
176                let ndarr = Array2::from_shape_vec((shape.1, shape.0), membuf).unwrap();
177                Ok(ndarr.reversed_axes())
178            },
179        }
180    }
181}
182
183#[cfg(test)]
184mod test {
185    use super::*;
186
187    #[test]
188    fn test_ndarray_from_ca() -> PolarsResult<()> {
189        let ca = Float64Chunked::new(PlSmallStr::EMPTY, &[1.0, 2.0, 3.0]);
190        let ndarr = ca.to_ndarray()?;
191        assert_eq!(ndarr, ArrayView1::from(&[1.0, 2.0, 3.0]));
192
193        let mut builder = ListPrimitiveChunkedBuilder::<Float64Type>::new(
194            PlSmallStr::EMPTY,
195            10,
196            10,
197            DataType::Float64,
198        );
199        builder.append_opt_slice(Some(&[1.0, 2.0, 3.0]));
200        builder.append_opt_slice(Some(&[2.0, 4.0, 5.0]));
201        builder.append_opt_slice(Some(&[6.0, 7.0, 8.0]));
202        let list = builder.finish();
203
204        let ndarr = list.to_ndarray::<Float64Type>()?;
205        let expected = array![[1.0, 2.0, 3.0], [2.0, 4.0, 5.0], [6.0, 7.0, 8.0]];
206        assert_eq!(ndarr, expected);
207
208        // test list array that is not square
209        let mut builder = ListPrimitiveChunkedBuilder::<Float64Type>::new(
210            PlSmallStr::EMPTY,
211            10,
212            10,
213            DataType::Float64,
214        );
215        builder.append_opt_slice(Some(&[1.0, 2.0, 3.0]));
216        builder.append_opt_slice(Some(&[2.0]));
217        builder.append_opt_slice(Some(&[6.0, 7.0, 8.0]));
218        let list = builder.finish();
219        assert!(list.to_ndarray::<Float64Type>().is_err());
220        Ok(())
221    }
222
223    #[test]
224    fn test_ndarray_from_df_order_fortran() -> PolarsResult<()> {
225        let df = df!["a"=> [1.0, 2.0, 3.0],
226            "b" => [2.0, 3.0, 4.0]
227        ]?;
228
229        let ndarr = df.to_ndarray::<Float64Type>(IndexOrder::Fortran)?;
230        let expected = array![[1.0, 2.0], [2.0, 3.0], [3.0, 4.0]];
231        assert!(!ndarr.is_standard_layout());
232        assert_eq!(ndarr, expected);
233
234        Ok(())
235    }
236
237    #[test]
238    fn test_ndarray_from_df_order_c() -> PolarsResult<()> {
239        let df = df!["a"=> [1.0, 2.0, 3.0],
240            "b" => [2.0, 3.0, 4.0]
241        ]?;
242
243        let ndarr = df.to_ndarray::<Float64Type>(IndexOrder::C)?;
244        let expected = array![[1.0, 2.0], [2.0, 3.0], [3.0, 4.0]];
245        assert!(ndarr.is_standard_layout());
246        assert_eq!(ndarr, expected);
247
248        Ok(())
249    }
250}