polars_lazy/frame/
pivot.rs

1//! Module containing implementation of the pivot operation.
2//!
3//! Polars lazy does not implement a pivot because it is impossible to know the schema without
4//! materializing the whole dataset. This makes a pivot quite a terrible operation for performant
5//! workflows. An optimization can never be pushed down passed a pivot.
6//!
7//! We can do a pivot on an eager `DataFrame` as that is already materialized. The code for the
8//! pivot is here, because we want to be able to pass expressions to the pivot operation.
9//!
10
11use polars_core::frame::group_by::expr::PhysicalAggExpr;
12use polars_core::prelude::*;
13use polars_ops::pivot::PivotAgg;
14
15use crate::physical_plan::exotic::{prepare_eval_expr, prepare_expression_for_context};
16use crate::prelude::*;
17
18struct PivotExpr(Expr);
19
20impl PhysicalAggExpr for PivotExpr {
21    fn evaluate(&self, df: &DataFrame, groups: &GroupPositions) -> PolarsResult<Series> {
22        let state = ExecutionState::new();
23        let dtype = df.get_columns()[0].dtype();
24        let phys_expr = prepare_expression_for_context(
25            PlSmallStr::EMPTY,
26            &self.0,
27            dtype,
28            Context::Aggregation,
29        )?;
30        phys_expr
31            .evaluate_on_groups(df, groups, &state)
32            .map(|mut ac| ac.aggregated().take_materialized_series())
33    }
34
35    fn root_name(&self) -> PolarsResult<&PlSmallStr> {
36        Ok(PlSmallStr::EMPTY_REF)
37    }
38}
39
40pub fn pivot<I0, I1, I2, S0, S1, S2>(
41    df: &DataFrame,
42    on: I0,
43    index: Option<I1>,
44    values: Option<I2>,
45    sort_columns: bool,
46    agg_expr: Option<Expr>,
47    // used as separator/delimiter in generated column names.
48    separator: Option<&str>,
49) -> PolarsResult<DataFrame>
50where
51    I0: IntoIterator<Item = S0>,
52    I1: IntoIterator<Item = S1>,
53    I2: IntoIterator<Item = S2>,
54    S0: Into<PlSmallStr>,
55    S1: Into<PlSmallStr>,
56    S2: Into<PlSmallStr>,
57{
58    // make sure that the root column is replaced
59    let agg_expr = agg_expr.map(|agg_expr| {
60        let expr = prepare_eval_expr(agg_expr);
61        PivotAgg::Expr(Arc::new(PivotExpr(expr)))
62    });
63    polars_ops::pivot::pivot(df, on, index, values, sort_columns, agg_expr, separator)
64}
65
66pub fn pivot_stable<I0, I1, I2, S0, S1, S2>(
67    df: &DataFrame,
68    on: I0,
69    index: Option<I1>,
70    values: Option<I2>,
71    sort_columns: bool,
72    agg_expr: Option<Expr>,
73    // used as separator/delimiter in generated column names.
74    separator: Option<&str>,
75) -> PolarsResult<DataFrame>
76where
77    I0: IntoIterator<Item = S0>,
78    I1: IntoIterator<Item = S1>,
79    I2: IntoIterator<Item = S2>,
80    S0: Into<PlSmallStr>,
81    S1: Into<PlSmallStr>,
82    S2: Into<PlSmallStr>,
83{
84    // make sure that the root column is replaced
85    let agg_expr = agg_expr.map(|agg_expr| {
86        let expr = prepare_eval_expr(agg_expr);
87        PivotAgg::Expr(Arc::new(PivotExpr(expr)))
88    });
89    polars_ops::pivot::pivot_stable(df, on, index, values, sort_columns, agg_expr, separator)
90}