polars_ops/frame/
mod.rs

1pub mod join;
2#[cfg(feature = "pivot")]
3pub mod pivot;
4
5pub use join::*;
6#[cfg(feature = "to_dummies")]
7use polars_core::POOL;
8use polars_core::prelude::*;
9#[cfg(feature = "to_dummies")]
10use polars_core::utils::accumulate_dataframes_horizontal;
11#[cfg(feature = "to_dummies")]
12use rayon::prelude::*;
13
14pub trait IntoDf {
15    fn to_df(&self) -> &DataFrame;
16}
17
18impl IntoDf for DataFrame {
19    fn to_df(&self) -> &DataFrame {
20        self
21    }
22}
23
24impl<T: IntoDf> DataFrameOps for T {}
25
26pub trait DataFrameOps: IntoDf {
27    /// Create dummy variables.
28    ///
29    /// # Example
30    ///
31    /// ```ignore
32    ///
33    /// # #[macro_use] extern crate polars_core;
34    /// # fn main() {
35    ///
36    ///  use polars_core::prelude::*;
37    ///
38    ///  let df = df! {
39    ///       "id" => &[1, 2, 3, 1, 2, 3, 1, 1],
40    ///       "type" => &["A", "B", "B", "B", "C", "C", "C", "B"],
41    ///       "code" => &["X1", "X2", "X3", "X3", "X2", "X2", "X1", "X1"]
42    ///   }.unwrap();
43    ///
44    ///   let dummies = df.to_dummies(None, false).unwrap();
45    ///   println!("{}", dummies);
46    /// # }
47    /// ```
48    /// Outputs:
49    /// ```text
50    ///  +------+------+------+--------+--------+--------+---------+---------+---------+
51    ///  | id_1 | id_3 | id_2 | type_A | type_B | type_C | code_X1 | code_X2 | code_X3 |
52    ///  | ---  | ---  | ---  | ---    | ---    | ---    | ---     | ---     | ---     |
53    ///  | u8   | u8   | u8   | u8     | u8     | u8     | u8      | u8      | u8      |
54    ///  +======+======+======+========+========+========+=========+=========+=========+
55    ///  | 1    | 0    | 0    | 1      | 0      | 0      | 1       | 0       | 0       |
56    ///  +------+------+------+--------+--------+--------+---------+---------+---------+
57    ///  | 0    | 0    | 1    | 0      | 1      | 0      | 0       | 1       | 0       |
58    ///  +------+------+------+--------+--------+--------+---------+---------+---------+
59    ///  | 0    | 1    | 0    | 0      | 1      | 0      | 0       | 0       | 1       |
60    ///  +------+------+------+--------+--------+--------+---------+---------+---------+
61    ///  | 1    | 0    | 0    | 0      | 1      | 0      | 0       | 0       | 1       |
62    ///  +------+------+------+--------+--------+--------+---------+---------+---------+
63    ///  | 0    | 0    | 1    | 0      | 0      | 1      | 0       | 1       | 0       |
64    ///  +------+------+------+--------+--------+--------+---------+---------+---------+
65    ///  | 0    | 1    | 0    | 0      | 0      | 1      | 0       | 1       | 0       |
66    ///  +------+------+------+--------+--------+--------+---------+---------+---------+
67    ///  | 1    | 0    | 0    | 0      | 0      | 1      | 1       | 0       | 0       |
68    ///  +------+------+------+--------+--------+--------+---------+---------+---------+
69    ///  | 1    | 0    | 0    | 0      | 1      | 0      | 1       | 0       | 0       |
70    ///  +------+------+------+--------+--------+--------+---------+---------+---------+
71    /// ```
72    #[cfg(feature = "to_dummies")]
73    fn to_dummies(&self, separator: Option<&str>, drop_first: bool) -> PolarsResult<DataFrame> {
74        self._to_dummies(None, separator, drop_first)
75    }
76
77    #[cfg(feature = "to_dummies")]
78    fn columns_to_dummies(
79        &self,
80        columns: Vec<&str>,
81        separator: Option<&str>,
82        drop_first: bool,
83    ) -> PolarsResult<DataFrame> {
84        self._to_dummies(Some(columns), separator, drop_first)
85    }
86
87    #[cfg(feature = "to_dummies")]
88    fn _to_dummies(
89        &self,
90        columns: Option<Vec<&str>>,
91        separator: Option<&str>,
92        drop_first: bool,
93    ) -> PolarsResult<DataFrame> {
94        use crate::series::ToDummies;
95
96        let df = self.to_df();
97
98        let set: PlHashSet<&str> = if let Some(columns) = columns {
99            PlHashSet::from_iter(columns)
100        } else {
101            PlHashSet::from_iter(df.iter().map(|s| s.name().as_str()))
102        };
103
104        let cols = POOL.install(|| {
105            df.get_columns()
106                .par_iter()
107                .map(|s| match set.contains(s.name().as_str()) {
108                    true => s.as_materialized_series().to_dummies(separator, drop_first),
109                    false => Ok(s.clone().into_frame()),
110                })
111                .collect::<PolarsResult<Vec<_>>>()
112        })?;
113
114        accumulate_dataframes_horizontal(cols)
115    }
116}