polars_core/frame/row/
dataframe.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
use super::*;

impl DataFrame {
    /// Get a row from a [`DataFrame`]. Use of this is discouraged as it will likely be slow.
    pub fn get_row(&self, idx: usize) -> PolarsResult<Row> {
        let values = self
            .materialized_column_iter()
            .map(|s| s.get(idx))
            .collect::<PolarsResult<Vec<_>>>()?;
        Ok(Row(values))
    }

    /// Amortize allocations by reusing a row.
    /// The caller is responsible to make sure that the row has at least the capacity for the number
    /// of columns in the [`DataFrame`]
    pub fn get_row_amortized<'a>(&'a self, idx: usize, row: &mut Row<'a>) -> PolarsResult<()> {
        for (s, any_val) in self.materialized_column_iter().zip(&mut row.0) {
            *any_val = s.get(idx)?;
        }
        Ok(())
    }

    /// Amortize allocations by reusing a row.
    /// The caller is responsible to make sure that the row has at least the capacity for the number
    /// of columns in the [`DataFrame`]
    ///
    /// # Safety
    /// Does not do any bounds checking.
    #[inline]
    pub unsafe fn get_row_amortized_unchecked<'a>(&'a self, idx: usize, row: &mut Row<'a>) {
        self.materialized_column_iter()
            .zip(&mut row.0)
            .for_each(|(s, any_val)| {
                *any_val = s.get_unchecked(idx);
            });
    }

    /// Create a new [`DataFrame`] from rows.
    ///
    /// This should only be used when you have row wise data, as this is a lot slower
    /// than creating the [`Series`] in a columnar fashion
    pub fn from_rows_and_schema(rows: &[Row], schema: &Schema) -> PolarsResult<Self> {
        Self::from_rows_iter_and_schema(rows.iter(), schema)
    }

    /// Create a new [`DataFrame`] from an iterator over rows.
    ///
    /// This should only be used when you have row wise data, as this is a lot slower
    /// than creating the [`Series`] in a columnar fashion.
    pub fn from_rows_iter_and_schema<'a, I>(mut rows: I, schema: &Schema) -> PolarsResult<Self>
    where
        I: Iterator<Item = &'a Row<'a>>,
    {
        if schema.is_empty() {
            let height = rows.count();
            let columns = Vec::new();
            return Ok(unsafe { DataFrame::new_no_checks(height, columns) });
        }

        let capacity = rows.size_hint().0;

        let mut buffers: Vec<_> = schema
            .iter_values()
            .map(|dtype| {
                let buf: AnyValueBuffer = (dtype, capacity).into();
                buf
            })
            .collect();

        let mut expected_len = 0;
        rows.try_for_each::<_, PolarsResult<()>>(|row| {
            expected_len += 1;
            for (value, buf) in row.0.iter().zip(&mut buffers) {
                buf.add_fallible(value)?
            }
            Ok(())
        })?;
        let v = buffers
            .into_iter()
            .zip(schema.iter_names())
            .map(|(b, name)| {
                let mut c = b.into_series().into_column();
                // if the schema adds a column not in the rows, we
                // fill it with nulls
                if c.is_empty() {
                    Column::full_null(name.clone(), expected_len, c.dtype())
                } else {
                    c.rename(name.clone());
                    c
                }
            })
            .collect();
        DataFrame::new(v)
    }

    /// Create a new [`DataFrame`] from an iterator over rows. This should only be used when you have row wise data,
    /// as this is a lot slower than creating the [`Series`] in a columnar fashion
    pub fn try_from_rows_iter_and_schema<'a, I>(mut rows: I, schema: &Schema) -> PolarsResult<Self>
    where
        I: Iterator<Item = PolarsResult<&'a Row<'a>>>,
    {
        let capacity = rows.size_hint().0;

        let mut buffers: Vec<_> = schema
            .iter_values()
            .map(|dtype| {
                let buf: AnyValueBuffer = (dtype, capacity).into();
                buf
            })
            .collect();

        let mut expected_len = 0;
        rows.try_for_each::<_, PolarsResult<()>>(|row| {
            expected_len += 1;
            for (value, buf) in row?.0.iter().zip(&mut buffers) {
                buf.add_fallible(value)?
            }
            Ok(())
        })?;
        let v = buffers
            .into_iter()
            .zip(schema.iter_names())
            .map(|(b, name)| {
                let mut c = b.into_series().into_column();
                // if the schema adds a column not in the rows, we
                // fill it with nulls
                if c.is_empty() {
                    Column::full_null(name.clone(), expected_len, c.dtype())
                } else {
                    c.rename(name.clone());
                    c
                }
            })
            .collect();
        DataFrame::new(v)
    }

    /// Create a new [`DataFrame`] from rows. This should only be used when you have row wise data,
    /// as this is a lot slower than creating the [`Series`] in a columnar fashion
    pub fn from_rows(rows: &[Row]) -> PolarsResult<Self> {
        let schema = rows_to_schema_first_non_null(rows, Some(50))?;
        let has_nulls = schema
            .iter_values()
            .any(|dtype| matches!(dtype, DataType::Null));
        polars_ensure!(
            !has_nulls, ComputeError: "unable to infer row types because of null values"
        );
        Self::from_rows_and_schema(rows, &schema)
    }
}