polars_core/frame/row/
dataframe.rs

1use super::*;
2
3impl DataFrame {
4    /// Get a row from a [`DataFrame`]. Use of this is discouraged as it will likely be slow.
5    pub fn get_row(&self, idx: usize) -> PolarsResult<Row<'_>> {
6        let values = self
7            .materialized_column_iter()
8            .map(|s| s.get(idx))
9            .collect::<PolarsResult<Vec<_>>>()?;
10        Ok(Row(values))
11    }
12
13    /// Amortize allocations by reusing a row.
14    /// The caller is responsible to make sure that the row has at least the capacity for the number
15    /// of columns in the [`DataFrame`]
16    pub fn get_row_amortized<'a>(&'a self, idx: usize, row: &mut Row<'a>) -> PolarsResult<()> {
17        for (s, any_val) in self.materialized_column_iter().zip(&mut row.0) {
18            *any_val = s.get(idx)?;
19        }
20        Ok(())
21    }
22
23    /// Amortize allocations by reusing a row.
24    /// The caller is responsible to make sure that the row has at least the capacity for the number
25    /// of columns in the [`DataFrame`]
26    ///
27    /// # Safety
28    /// Does not do any bounds checking.
29    #[inline]
30    pub unsafe fn get_row_amortized_unchecked<'a>(&'a self, idx: usize, row: &mut Row<'a>) {
31        self.materialized_column_iter()
32            .zip(&mut row.0)
33            .for_each(|(s, any_val)| {
34                *any_val = s.get_unchecked(idx);
35            });
36    }
37
38    /// Create a new [`DataFrame`] from rows.
39    ///
40    /// This should only be used when you have row wise data, as this is a lot slower
41    /// than creating the [`Series`] in a columnar fashion
42    pub fn from_rows_and_schema(rows: &[Row], schema: &Schema) -> PolarsResult<Self> {
43        Self::from_rows_iter_and_schema(rows.iter(), schema)
44    }
45
46    /// Create a new [`DataFrame`] from an iterator over rows.
47    ///
48    /// This should only be used when you have row wise data, as this is a lot slower
49    /// than creating the [`Series`] in a columnar fashion.
50    pub fn from_rows_iter_and_schema<'a, I>(mut rows: I, schema: &Schema) -> PolarsResult<Self>
51    where
52        I: Iterator<Item = &'a Row<'a>>,
53    {
54        if schema.is_empty() {
55            let height = rows.count();
56            let columns = Vec::new();
57            return Ok(unsafe { DataFrame::new_unchecked(height, columns) });
58        }
59
60        let capacity = rows.size_hint().0;
61
62        let mut buffers: Vec<_> = schema
63            .iter_values()
64            .map(|dtype| {
65                let buf: AnyValueBuffer = (dtype, capacity).into();
66                buf
67            })
68            .collect();
69
70        let mut expected_len = 0;
71        rows.try_for_each::<_, PolarsResult<()>>(|row| {
72            expected_len += 1;
73            for (value, buf) in row.0.iter().zip(&mut buffers) {
74                buf.add_fallible(value)?
75            }
76            Ok(())
77        })?;
78
79        let v = buffers
80            .into_iter()
81            .zip(schema.iter_names())
82            .map(|(b, name)| {
83                let mut c = b.into_series().into_column();
84                // if the schema adds a column not in the rows, we
85                // fill it with nulls
86                if c.is_empty() {
87                    Column::full_null(name.clone(), expected_len, c.dtype())
88                } else {
89                    c.rename(name.clone());
90                    c
91                }
92            })
93            .collect();
94
95        DataFrame::new(expected_len, v)
96    }
97
98    /// Create a new [`DataFrame`] from an iterator over rows. This should only be used when you have row wise data,
99    /// as this is a lot slower than creating the [`Series`] in a columnar fashion
100    pub fn try_from_rows_iter_and_schema<'a, I>(mut rows: I, schema: &Schema) -> PolarsResult<Self>
101    where
102        I: Iterator<Item = PolarsResult<&'a Row<'a>>>,
103    {
104        let capacity = rows.size_hint().0;
105
106        let mut buffers: Vec<_> = schema
107            .iter_values()
108            .map(|dtype| {
109                let buf: AnyValueBuffer = (dtype, capacity).into();
110                buf
111            })
112            .collect();
113
114        let mut expected_len = 0;
115        rows.try_for_each::<_, PolarsResult<()>>(|row| {
116            expected_len += 1;
117            for (value, buf) in row?.0.iter().zip(&mut buffers) {
118                buf.add_fallible(value)?
119            }
120            Ok(())
121        })?;
122        let v = buffers
123            .into_iter()
124            .zip(schema.iter_names())
125            .map(|(b, name)| {
126                let mut c = b.into_series().into_column();
127                // if the schema adds a column not in the rows, we
128                // fill it with nulls
129                if c.is_empty() {
130                    Column::full_null(name.clone(), expected_len, c.dtype())
131                } else {
132                    c.rename(name.clone());
133                    c
134                }
135            })
136            .collect();
137
138        DataFrame::new(expected_len, v)
139    }
140
141    /// Create a new [`DataFrame`] from rows. This should only be used when you have row wise data,
142    /// as this is a lot slower than creating the [`Series`] in a columnar fashion
143    pub fn from_rows(rows: &[Row]) -> PolarsResult<Self> {
144        let schema = rows_to_schema_first_non_null(rows, Some(50))?;
145        let has_nulls = schema
146            .iter_values()
147            .any(|dtype| matches!(dtype, DataType::Null));
148        polars_ensure!(
149            !has_nulls, ComputeError: "unable to infer row types because of null values"
150        );
151        Self::from_rows_and_schema(rows, &schema)
152    }
153}