Skip to main content

polars_ops/frame/
mod.rs

1pub mod gather;
2pub mod join;
3#[cfg(feature = "pivot")]
4pub mod unpivot;
5
6pub use join::*;
7use polars_core::prelude::*;
8#[cfg(feature = "to_dummies")]
9use polars_core::runtime::RAYON;
10#[cfg(feature = "to_dummies")]
11use polars_core::utils::accumulate_dataframes_horizontal;
12#[cfg(feature = "to_dummies")]
13use rayon::prelude::*;
14
15pub trait IntoDf {
16    fn to_df(&self) -> &DataFrame;
17}
18
19impl IntoDf for DataFrame {
20    fn to_df(&self) -> &DataFrame {
21        self
22    }
23}
24
25impl<T: IntoDf> DataFrameOps for T {}
26
27pub trait DataFrameOps: IntoDf {
28    /// Create dummy variables.
29    ///
30    /// # Example
31    ///
32    /// ```ignore
33    ///
34    /// # #[macro_use] extern crate polars_core;
35    /// # fn main() {
36    ///
37    ///  use polars_core::prelude::*;
38    ///
39    ///  let df = df! {
40    ///       "id" => &[1, 2, 3, 1, 2, 3, 1, 1],
41    ///       "type" => &["A", "B", "B", "B", "C", "C", "C", "B"],
42    ///       "code" => &["X1", "X2", "X3", "X3", "X2", "X2", "X1", "X1"]
43    ///   }.unwrap();
44    ///
45    ///   let dummies = df.to_dummies(None, false, false).unwrap();
46    ///   println!("{}", dummies);
47    /// # }
48    /// ```
49    /// Outputs:
50    /// ```text
51    ///  +------+------+------+--------+--------+--------+---------+---------+---------+
52    ///  | id_1 | id_3 | id_2 | type_A | type_B | type_C | code_X1 | code_X2 | code_X3 |
53    ///  | ---  | ---  | ---  | ---    | ---    | ---    | ---     | ---     | ---     |
54    ///  | u8   | u8   | u8   | u8     | u8     | u8     | u8      | u8      | u8      |
55    ///  +======+======+======+========+========+========+=========+=========+=========+
56    ///  | 1    | 0    | 0    | 1      | 0      | 0      | 1       | 0       | 0       |
57    ///  +------+------+------+--------+--------+--------+---------+---------+---------+
58    ///  | 0    | 0    | 1    | 0      | 1      | 0      | 0       | 1       | 0       |
59    ///  +------+------+------+--------+--------+--------+---------+---------+---------+
60    ///  | 0    | 1    | 0    | 0      | 1      | 0      | 0       | 0       | 1       |
61    ///  +------+------+------+--------+--------+--------+---------+---------+---------+
62    ///  | 1    | 0    | 0    | 0      | 1      | 0      | 0       | 0       | 1       |
63    ///  +------+------+------+--------+--------+--------+---------+---------+---------+
64    ///  | 0    | 0    | 1    | 0      | 0      | 1      | 0       | 1       | 0       |
65    ///  +------+------+------+--------+--------+--------+---------+---------+---------+
66    ///  | 0    | 1    | 0    | 0      | 0      | 1      | 0       | 1       | 0       |
67    ///  +------+------+------+--------+--------+--------+---------+---------+---------+
68    ///  | 1    | 0    | 0    | 0      | 0      | 1      | 1       | 0       | 0       |
69    ///  +------+------+------+--------+--------+--------+---------+---------+---------+
70    ///  | 1    | 0    | 0    | 0      | 1      | 0      | 1       | 0       | 0       |
71    ///  +------+------+------+--------+--------+--------+---------+---------+---------+
72    /// ```
73    #[cfg(feature = "to_dummies")]
74    fn to_dummies(
75        &self,
76        separator: Option<&str>,
77        drop_first: bool,
78        drop_nulls: bool,
79    ) -> PolarsResult<DataFrame> {
80        self._to_dummies(None, separator, drop_first, drop_nulls)
81    }
82
83    #[cfg(feature = "to_dummies")]
84    fn columns_to_dummies(
85        &self,
86        columns: Vec<&str>,
87        separator: Option<&str>,
88        drop_first: bool,
89        drop_nulls: bool,
90    ) -> PolarsResult<DataFrame> {
91        self._to_dummies(Some(columns), separator, drop_first, drop_nulls)
92    }
93
94    #[cfg(feature = "to_dummies")]
95    fn _to_dummies(
96        &self,
97        columns: Option<Vec<&str>>,
98        separator: Option<&str>,
99        drop_first: bool,
100        drop_nulls: bool,
101    ) -> PolarsResult<DataFrame> {
102        use crate::series::ToDummies;
103
104        let df = self.to_df();
105
106        let set: PlHashSet<&str> = if let Some(columns) = columns {
107            PlHashSet::from_iter(columns)
108        } else {
109            PlHashSet::from_iter(df.columns().iter().map(|s| s.name().as_str()))
110        };
111
112        let cols = RAYON.install(|| {
113            df.columns()
114                .par_iter()
115                .map(|s| match set.contains(s.name().as_str()) {
116                    true => s
117                        .as_materialized_series()
118                        .to_dummies(separator, drop_first, drop_nulls),
119                    false => Ok(s.clone().into_frame()),
120                })
121                .collect::<PolarsResult<Vec<_>>>()
122        })?;
123
124        accumulate_dataframes_horizontal(cols)
125    }
126}