polars_ops/frame/mod.rs
1pub mod join;
2#[cfg(feature = "pivot")]
3pub mod pivot;
4
5pub use join::*;
6#[cfg(feature = "to_dummies")]
7use polars_core::POOL;
8use polars_core::prelude::*;
9#[cfg(feature = "to_dummies")]
10use polars_core::utils::accumulate_dataframes_horizontal;
11#[cfg(feature = "to_dummies")]
12use rayon::prelude::*;
13
14pub trait IntoDf {
15 fn to_df(&self) -> &DataFrame;
16}
17
18impl IntoDf for DataFrame {
19 fn to_df(&self) -> &DataFrame {
20 self
21 }
22}
23
24impl<T: IntoDf> DataFrameOps for T {}
25
26pub trait DataFrameOps: IntoDf {
27 /// Create dummy variables.
28 ///
29 /// # Example
30 ///
31 /// ```ignore
32 ///
33 /// # #[macro_use] extern crate polars_core;
34 /// # fn main() {
35 ///
36 /// use polars_core::prelude::*;
37 ///
38 /// let df = df! {
39 /// "id" => &[1, 2, 3, 1, 2, 3, 1, 1],
40 /// "type" => &["A", "B", "B", "B", "C", "C", "C", "B"],
41 /// "code" => &["X1", "X2", "X3", "X3", "X2", "X2", "X1", "X1"]
42 /// }.unwrap();
43 ///
44 /// let dummies = df.to_dummies(None, false, false).unwrap();
45 /// println!("{}", dummies);
46 /// # }
47 /// ```
48 /// Outputs:
49 /// ```text
50 /// +------+------+------+--------+--------+--------+---------+---------+---------+
51 /// | id_1 | id_3 | id_2 | type_A | type_B | type_C | code_X1 | code_X2 | code_X3 |
52 /// | --- | --- | --- | --- | --- | --- | --- | --- | --- |
53 /// | u8 | u8 | u8 | u8 | u8 | u8 | u8 | u8 | u8 |
54 /// +======+======+======+========+========+========+=========+=========+=========+
55 /// | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 |
56 /// +------+------+------+--------+--------+--------+---------+---------+---------+
57 /// | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 |
58 /// +------+------+------+--------+--------+--------+---------+---------+---------+
59 /// | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 |
60 /// +------+------+------+--------+--------+--------+---------+---------+---------+
61 /// | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 |
62 /// +------+------+------+--------+--------+--------+---------+---------+---------+
63 /// | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 0 |
64 /// +------+------+------+--------+--------+--------+---------+---------+---------+
65 /// | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 |
66 /// +------+------+------+--------+--------+--------+---------+---------+---------+
67 /// | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 |
68 /// +------+------+------+--------+--------+--------+---------+---------+---------+
69 /// | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 |
70 /// +------+------+------+--------+--------+--------+---------+---------+---------+
71 /// ```
72 #[cfg(feature = "to_dummies")]
73 fn to_dummies(
74 &self,
75 separator: Option<&str>,
76 drop_first: bool,
77 drop_nulls: bool,
78 ) -> PolarsResult<DataFrame> {
79 self._to_dummies(None, separator, drop_first, drop_nulls)
80 }
81
82 #[cfg(feature = "to_dummies")]
83 fn columns_to_dummies(
84 &self,
85 columns: Vec<&str>,
86 separator: Option<&str>,
87 drop_first: bool,
88 drop_nulls: bool,
89 ) -> PolarsResult<DataFrame> {
90 self._to_dummies(Some(columns), separator, drop_first, drop_nulls)
91 }
92
93 #[cfg(feature = "to_dummies")]
94 fn _to_dummies(
95 &self,
96 columns: Option<Vec<&str>>,
97 separator: Option<&str>,
98 drop_first: bool,
99 drop_nulls: bool,
100 ) -> PolarsResult<DataFrame> {
101 use crate::series::ToDummies;
102
103 let df = self.to_df();
104
105 let set: PlHashSet<&str> = if let Some(columns) = columns {
106 PlHashSet::from_iter(columns)
107 } else {
108 PlHashSet::from_iter(df.iter().map(|s| s.name().as_str()))
109 };
110
111 let cols = POOL.install(|| {
112 df.get_columns()
113 .par_iter()
114 .map(|s| match set.contains(s.name().as_str()) {
115 true => s
116 .as_materialized_series()
117 .to_dummies(separator, drop_first, drop_nulls),
118 false => Ok(s.clone().into_frame()),
119 })
120 .collect::<PolarsResult<Vec<_>>>()
121 })?;
122
123 accumulate_dataframes_horizontal(cols)
124 }
125}