Skip to main content

polars_ops/frame/
mod.rs

1pub mod gather;
2pub mod is_sorted;
3pub mod join;
4#[cfg(feature = "pivot")]
5pub mod unpivot;
6
7pub use join::*;
8use polars_core::prelude::*;
9#[cfg(feature = "to_dummies")]
10use polars_core::runtime::RAYON;
11#[cfg(feature = "to_dummies")]
12use polars_core::utils::accumulate_dataframes_horizontal;
13#[cfg(feature = "to_dummies")]
14use rayon::prelude::*;
15
16pub trait IntoDf {
17    fn to_df(&self) -> &DataFrame;
18}
19
20impl IntoDf for DataFrame {
21    fn to_df(&self) -> &DataFrame {
22        self
23    }
24}
25
26impl<T: IntoDf> DataFrameOps for T {}
27
28pub trait DataFrameOps: IntoDf {
29    /// Create dummy variables.
30    ///
31    /// # Example
32    ///
33    /// ```ignore
34    ///
35    /// # #[macro_use] extern crate polars_core;
36    /// # fn main() {
37    ///
38    ///  use polars_core::prelude::*;
39    ///
40    ///  let df = df! {
41    ///       "id" => &[1, 2, 3, 1, 2, 3, 1, 1],
42    ///       "type" => &["A", "B", "B", "B", "C", "C", "C", "B"],
43    ///       "code" => &["X1", "X2", "X3", "X3", "X2", "X2", "X1", "X1"]
44    ///   }.unwrap();
45    ///
46    ///   let dummies = df.to_dummies(None, false, false).unwrap();
47    ///   println!("{}", dummies);
48    /// # }
49    /// ```
50    /// Outputs:
51    /// ```text
52    ///  +------+------+------+--------+--------+--------+---------+---------+---------+
53    ///  | id_1 | id_3 | id_2 | type_A | type_B | type_C | code_X1 | code_X2 | code_X3 |
54    ///  | ---  | ---  | ---  | ---    | ---    | ---    | ---     | ---     | ---     |
55    ///  | u8   | u8   | u8   | u8     | u8     | u8     | u8      | u8      | u8      |
56    ///  +======+======+======+========+========+========+=========+=========+=========+
57    ///  | 1    | 0    | 0    | 1      | 0      | 0      | 1       | 0       | 0       |
58    ///  +------+------+------+--------+--------+--------+---------+---------+---------+
59    ///  | 0    | 0    | 1    | 0      | 1      | 0      | 0       | 1       | 0       |
60    ///  +------+------+------+--------+--------+--------+---------+---------+---------+
61    ///  | 0    | 1    | 0    | 0      | 1      | 0      | 0       | 0       | 1       |
62    ///  +------+------+------+--------+--------+--------+---------+---------+---------+
63    ///  | 1    | 0    | 0    | 0      | 1      | 0      | 0       | 0       | 1       |
64    ///  +------+------+------+--------+--------+--------+---------+---------+---------+
65    ///  | 0    | 0    | 1    | 0      | 0      | 1      | 0       | 1       | 0       |
66    ///  +------+------+------+--------+--------+--------+---------+---------+---------+
67    ///  | 0    | 1    | 0    | 0      | 0      | 1      | 0       | 1       | 0       |
68    ///  +------+------+------+--------+--------+--------+---------+---------+---------+
69    ///  | 1    | 0    | 0    | 0      | 0      | 1      | 1       | 0       | 0       |
70    ///  +------+------+------+--------+--------+--------+---------+---------+---------+
71    ///  | 1    | 0    | 0    | 0      | 1      | 0      | 1       | 0       | 0       |
72    ///  +------+------+------+--------+--------+--------+---------+---------+---------+
73    /// ```
74    #[cfg(feature = "to_dummies")]
75    fn to_dummies(
76        &self,
77        separator: Option<&str>,
78        drop_first: bool,
79        drop_nulls: bool,
80    ) -> PolarsResult<DataFrame> {
81        self._to_dummies(None, separator, drop_first, drop_nulls)
82    }
83
84    #[cfg(feature = "to_dummies")]
85    fn columns_to_dummies(
86        &self,
87        columns: Vec<&str>,
88        separator: Option<&str>,
89        drop_first: bool,
90        drop_nulls: bool,
91    ) -> PolarsResult<DataFrame> {
92        self._to_dummies(Some(columns), separator, drop_first, drop_nulls)
93    }
94
95    #[cfg(feature = "to_dummies")]
96    fn _to_dummies(
97        &self,
98        columns: Option<Vec<&str>>,
99        separator: Option<&str>,
100        drop_first: bool,
101        drop_nulls: bool,
102    ) -> PolarsResult<DataFrame> {
103        use crate::series::ToDummies;
104
105        let df = self.to_df();
106
107        let set: PlHashSet<&str> = if let Some(columns) = columns {
108            PlHashSet::from_iter(columns)
109        } else {
110            PlHashSet::from_iter(df.columns().iter().map(|s| s.name().as_str()))
111        };
112
113        let cols = RAYON.install(|| {
114            df.columns()
115                .par_iter()
116                .map(|s| match set.contains(s.name().as_str()) {
117                    true => s
118                        .as_materialized_series()
119                        .to_dummies(separator, drop_first, drop_nulls),
120                    false => Ok(s.clone().into_frame()),
121                })
122                .collect::<PolarsResult<Vec<_>>>()
123        })?;
124
125        accumulate_dataframes_horizontal(cols)
126    }
127}