polars_lazy/frame/
pivot.rs

1//! Module containing implementation of the pivot operation.
2//!
3//! Polars lazy does not implement a pivot because it is impossible to know the schema without
4//! materializing the whole dataset. This makes a pivot quite a terrible operation for performant
5//! workflows. An optimization can never be pushed down past a pivot.
6//!
7//! We can do a pivot on an eager `DataFrame` as that is already materialized. The code for the
8//! pivot is here, because we want to be able to pass expressions to the pivot operation.
9//!
10
11use polars_core::frame::group_by::expr::PhysicalAggExpr;
12use polars_core::prelude::*;
13use polars_ops::pivot::PivotAgg;
14
15use crate::physical_plan::exotic::{contains_column_refs, prepare_expression_for_context};
16use crate::prelude::*;
17
18pub struct PivotExpr(Expr);
19
20impl PivotExpr {
21    pub fn from_expr(expr: Expr) -> Self {
22        PivotExpr(expr)
23    }
24}
25
26impl PhysicalAggExpr for PivotExpr {
27    fn evaluate_on_groups(&self, df: &DataFrame, groups: &GroupPositions) -> PolarsResult<Series> {
28        let state = ExecutionState::new();
29        let dtype = df.get_columns()[0].dtype();
30        let phys_expr = prepare_expression_for_context(
31            PlSmallStr::EMPTY,
32            &self.0,
33            dtype,
34            Context::Aggregation,
35        )?;
36        phys_expr
37            .evaluate_on_groups(df, groups, &state)
38            .map(|mut ac| ac.aggregated().take_materialized_series())
39    }
40
41    fn root_name(&self) -> PolarsResult<&PlSmallStr> {
42        Ok(PlSmallStr::EMPTY_REF)
43    }
44}
45
46pub fn pivot<I0, I1, I2, S0, S1, S2>(
47    df: &DataFrame,
48    on: I0,
49    index: Option<I1>,
50    values: Option<I2>,
51    sort_columns: bool,
52    agg_expr: Option<Expr>,
53    // used as separator/delimiter in generated column names.
54    separator: Option<&str>,
55) -> PolarsResult<DataFrame>
56where
57    I0: IntoIterator<Item = S0>,
58    I1: IntoIterator<Item = S1>,
59    I2: IntoIterator<Item = S2>,
60    S0: Into<PlSmallStr>,
61    S1: Into<PlSmallStr>,
62    S2: Into<PlSmallStr>,
63{
64    // we are strict:
65    // agg_expr can only access data as generated by the pivot operation through pl.element()
66    if agg_expr.as_ref().is_some_and(contains_column_refs) {
67        polars_bail!(InvalidOperation: "explicit column references are not allowed in aggregate_function");
68    }
69
70    let agg_expr = agg_expr.map(|ae| PivotAgg(Arc::new(PivotExpr(ae))));
71    polars_ops::pivot::pivot(df, on, index, values, sort_columns, agg_expr, separator)
72}
73
74pub fn pivot_stable<I0, I1, I2, S0, S1, S2>(
75    df: &DataFrame,
76    on: I0,
77    index: Option<I1>,
78    values: Option<I2>,
79    sort_columns: bool,
80    agg_expr: Option<Expr>,
81    // used as separator/delimiter in generated column names.
82    separator: Option<&str>,
83) -> PolarsResult<DataFrame>
84where
85    I0: IntoIterator<Item = S0>,
86    I1: IntoIterator<Item = S1>,
87    I2: IntoIterator<Item = S2>,
88    S0: Into<PlSmallStr>,
89    S1: Into<PlSmallStr>,
90    S2: Into<PlSmallStr>,
91{
92    // we are strict:
93    // agg_expr can only access data as generated by the pivot operation through pl.element()
94    if agg_expr.as_ref().is_some_and(contains_column_refs) {
95        polars_bail!(InvalidOperation: "explicit column references are not allowed in aggregate_function");
96    }
97
98    let agg_expr = agg_expr.map(|ae| PivotAgg(Arc::new(PivotExpr(ae))));
99    polars_ops::pivot::pivot_stable(df, on, index, values, sort_columns, agg_expr, separator)
100}