Struct polars_lazy::frame::LazyFrame  
source · pub struct LazyFrame {
    pub logical_plan: DslPlan,
    /* private fields */
}Expand description
Lazy abstraction over an eager DataFrame.
It really is an abstraction over a logical plan. The methods of this struct will incrementally
modify a logical plan until output is requested (via collect).
Fields§
§logical_plan: DslPlanImplementations§
source§impl LazyFrame
 
impl LazyFrame
pub fn scan_from_python_function( schema: Schema, scan_fn: PyObject, pyarrow: bool ) -> Self
python only.source§impl LazyFrame
 
impl LazyFrame
pub fn set_cached_arena(&self, lp_arena: Arena<IR>, expr_arena: Arena<AExpr>)
pub fn schema_with_arenas( &mut self, lp_arena: &mut Arena<IR>, expr_arena: &mut Arena<AExpr> ) -> PolarsResult<SchemaRef>
source§impl LazyFrame
 
impl LazyFrame
pub fn collect_concurrently(self) -> PolarsResult<InProcessQuery>
source§impl LazyFrame
 
impl LazyFrame
sourcepub fn get_current_optimizations(&self) -> OptState
 
pub fn get_current_optimizations(&self) -> OptState
Get current optimizations.
sourcepub fn with_optimizations(self, opt_state: OptState) -> Self
 
pub fn with_optimizations(self, opt_state: OptState) -> Self
Set allowed optimizations.
sourcepub fn without_optimizations(self) -> Self
 
pub fn without_optimizations(self) -> Self
Turn off all optimizations.
sourcepub fn with_projection_pushdown(self, toggle: bool) -> Self
 
pub fn with_projection_pushdown(self, toggle: bool) -> Self
Toggle projection pushdown optimization.
sourcepub fn with_cluster_with_columns(self, toggle: bool) -> Self
 
pub fn with_cluster_with_columns(self, toggle: bool) -> Self
Toggle cluster with columns optimization.
sourcepub fn with_predicate_pushdown(self, toggle: bool) -> Self
 
pub fn with_predicate_pushdown(self, toggle: bool) -> Self
Toggle predicate pushdown optimization.
sourcepub fn with_type_coercion(self, toggle: bool) -> Self
 
pub fn with_type_coercion(self, toggle: bool) -> Self
Toggle type coercion optimization.
sourcepub fn with_simplify_expr(self, toggle: bool) -> Self
 
pub fn with_simplify_expr(self, toggle: bool) -> Self
Toggle expression simplification optimization on or off.
sourcepub fn with_comm_subplan_elim(self, toggle: bool) -> Self
 Available on crate feature cse only.
pub fn with_comm_subplan_elim(self, toggle: bool) -> Self
cse only.Toggle common subplan elimination optimization on or off
sourcepub fn with_comm_subexpr_elim(self, toggle: bool) -> Self
 Available on crate feature cse only.
pub fn with_comm_subexpr_elim(self, toggle: bool) -> Self
cse only.Toggle common subexpression elimination optimization on or off
sourcepub fn with_slice_pushdown(self, toggle: bool) -> Self
 
pub fn with_slice_pushdown(self, toggle: bool) -> Self
Toggle slice pushdown optimization.
sourcepub fn with_streaming(self, toggle: bool) -> Self
 
pub fn with_streaming(self, toggle: bool) -> Self
Run nodes that are capably of doing so on the streaming engine.
sourcepub fn with_row_estimate(self, toggle: bool) -> Self
 
pub fn with_row_estimate(self, toggle: bool) -> Self
Try to estimate the number of rows so that joins can determine which side to keep in memory.
sourcepub fn _with_eager(self, toggle: bool) -> Self
 
pub fn _with_eager(self, toggle: bool) -> Self
Run every node eagerly. This turns off multi-node optimizations.
sourcepub fn describe_plan(&self) -> PolarsResult<String>
 
pub fn describe_plan(&self) -> PolarsResult<String>
Return a String describing the naive (un-optimized) logical plan.
sourcepub fn describe_plan_tree(&self) -> PolarsResult<String>
 
pub fn describe_plan_tree(&self) -> PolarsResult<String>
Return a String describing the naive (un-optimized) logical plan in tree format.
sourcepub fn describe_optimized_plan(&self) -> PolarsResult<String>
 
pub fn describe_optimized_plan(&self) -> PolarsResult<String>
Return a String describing the optimized logical plan.
Returns Err if optimizing the logical plan fails.
sourcepub fn describe_optimized_plan_tree(&self) -> PolarsResult<String>
 
pub fn describe_optimized_plan_tree(&self) -> PolarsResult<String>
Return a String describing the optimized logical plan in tree format.
Returns Err if optimizing the logical plan fails.
sourcepub fn explain(&self, optimized: bool) -> PolarsResult<String>
 
pub fn explain(&self, optimized: bool) -> PolarsResult<String>
Return a String describing the logical plan.
If optimized is true, explains the optimized plan. If optimized is `false,
explains the naive, un-optimized plan.
sourcepub fn sort(
    self,
    by: impl IntoVec<SmartString>,
    sort_options: SortMultipleOptions
) -> Self
 
pub fn sort( self, by: impl IntoVec<SmartString>, sort_options: SortMultipleOptions ) -> Self
Add a sort operation to the logical plan.
Sorts the LazyFrame by the column name specified using the provided options.
§Example
Sort DataFrame by ‘sepal_width’ column:
fn sort_by_a(df: DataFrame) -> LazyFrame {
    df.lazy().sort(["sepal_width"], Default::default())
}Sort by a single column with specific order:
fn sort_with_specific_order(df: DataFrame, descending: bool) -> LazyFrame {
    df.lazy().sort(
        ["sepal_width"],
        SortMultipleOptions::new()
            .with_order_descending(descending)
    )
}Sort by multiple columns with specifying order for each column:
fn sort_by_multiple_columns_with_specific_order(df: DataFrame) -> LazyFrame {
    df.lazy().sort(
        &["sepal_width", "sepal_length"],
        SortMultipleOptions::new()
            .with_order_descending_multi([false, true])
    )
}See SortMultipleOptions for more options.
sourcepub fn sort_by_exprs<E: AsRef<[Expr]>>(
    self,
    by_exprs: E,
    sort_options: SortMultipleOptions
) -> Self
 
pub fn sort_by_exprs<E: AsRef<[Expr]>>( self, by_exprs: E, sort_options: SortMultipleOptions ) -> Self
Add a sort operation to the logical plan.
Sorts the LazyFrame by the provided list of expressions, which will be turned into concrete columns before sorting.
See SortMultipleOptions for more options.
§Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
/// Sort DataFrame by 'sepal_width' column
fn example(df: DataFrame) -> LazyFrame {
      df.lazy()
        .sort_by_exprs(vec![col("sepal_width")], Default::default())
}pub fn top_k<E: AsRef<[Expr]>>( self, k: IdxSize, by_exprs: E, sort_options: SortMultipleOptions ) -> Self
pub fn bottom_k<E: AsRef<[Expr]>>( self, k: IdxSize, by_exprs: E, sort_options: SortMultipleOptions ) -> Self
sourcepub fn reverse(self) -> Self
 
pub fn reverse(self) -> Self
Reverse the DataFrame from top to bottom.
Row i becomes row number_of_rows - i - 1.
§Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn example(df: DataFrame) -> LazyFrame {
      df.lazy()
        .reverse()
}sourcepub fn rename<I, J, T, S>(self, existing: I, new: J) -> Self
 
pub fn rename<I, J, T, S>(self, existing: I, new: J) -> Self
Rename columns in the DataFrame.
existing and new are iterables of the same length containing the old and
corresponding new column names. Renaming happens to all existing columns
simultaneously, not iteratively. (In particular, all columns in existing must
already exist in the LazyFrame when rename is called.)
sourcepub fn drop<I, T>(self, columns: I) -> Self
 
pub fn drop<I, T>(self, columns: I) -> Self
Removes columns from the DataFrame. Note that it’s better to only select the columns you need and let the projection pushdown optimize away the unneeded columns.
sourcepub fn shift<E: Into<Expr>>(self, n: E) -> Self
 
pub fn shift<E: Into<Expr>>(self, n: E) -> Self
Shift the values by a given period and fill the parts that will be empty due to this operation
with Nones.
See the method on Series for more info on the shift operation.
sourcepub fn shift_and_fill<E: Into<Expr>, IE: Into<Expr>>(
    self,
    n: E,
    fill_value: IE
) -> Self
 
pub fn shift_and_fill<E: Into<Expr>, IE: Into<Expr>>( self, n: E, fill_value: IE ) -> Self
Shift the values by a given period and fill the parts that will be empty due to this operation
with the result of the fill_value expression.
See the method on Series for more info on the shift operation.
sourcepub fn fill_null<E: Into<Expr>>(self, fill_value: E) -> LazyFrame
 
pub fn fill_null<E: Into<Expr>>(self, fill_value: E) -> LazyFrame
Fill None values in the DataFrame with an expression.
sourcepub fn fill_nan<E: Into<Expr>>(self, fill_value: E) -> LazyFrame
 
pub fn fill_nan<E: Into<Expr>>(self, fill_value: E) -> LazyFrame
Fill NaN values in the DataFrame with an expression.
sourcepub fn cache(self) -> Self
 
pub fn cache(self) -> Self
Caches the result into a new LazyFrame.
This should be used to prevent computations running multiple times.
sourcepub fn cast(self, dtypes: PlHashMap<&str, DataType>, strict: bool) -> Self
 
pub fn cast(self, dtypes: PlHashMap<&str, DataType>, strict: bool) -> Self
Cast named frame columns, resulting in a new LazyFrame with updated dtypes
sourcepub fn cast_all(self, dtype: DataType, strict: bool) -> Self
 
pub fn cast_all(self, dtype: DataType, strict: bool) -> Self
Cast all frame columns to the given dtype, resulting in a new LazyFrame
sourcepub fn fetch(self, n_rows: usize) -> PolarsResult<DataFrame>
 
pub fn fetch(self, n_rows: usize) -> PolarsResult<DataFrame>
Fetch is like a collect operation, but it overwrites the number of rows read by every scan operation. This is a utility that helps debug a query on a smaller number of rows.
Note that the fetch does not guarantee the final number of rows in the DataFrame. Filter, join operations and a lower number of rows available in the scanned file influence the final number of rows.
pub fn optimize( self, lp_arena: &mut Arena<IR>, expr_arena: &mut Arena<AExpr> ) -> PolarsResult<Node>
pub fn to_alp_optimized(self) -> PolarsResult<IRPlan>
pub fn to_alp(self) -> PolarsResult<IRPlan>
pub fn _collect_post_opt<P>(self, post_opt: P) -> PolarsResult<DataFrame>
sourcepub fn collect(self) -> PolarsResult<DataFrame>
 
pub fn collect(self) -> PolarsResult<DataFrame>
Execute all the lazy operations and collect them into a DataFrame.
The query is optimized prior to execution.
§Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    df.lazy()
      .group_by([col("foo")])
      .agg([col("bar").sum(), col("ham").mean().alias("avg_ham")])
      .collect()
}sourcepub fn profile(self) -> PolarsResult<(DataFrame, DataFrame)>
 
pub fn profile(self) -> PolarsResult<(DataFrame, DataFrame)>
Profile a LazyFrame.
This will run the query and return a tuple containing the materialized DataFrame and a DataFrame that contains profiling information of each node that is executed.
The units of the timings are microseconds.
sourcepub fn sink_parquet(
    self,
    path: PathBuf,
    options: ParquetWriteOptions
) -> PolarsResult<()>
 Available on crate feature parquet only.
pub fn sink_parquet( self, path: PathBuf, options: ParquetWriteOptions ) -> PolarsResult<()>
parquet only.Stream a query result into a parquet file. This is useful if the final result doesn’t fit into memory. This methods will return an error if the query cannot be completely done in a streaming fashion.
sourcepub fn sink_parquet_cloud(
    self,
    uri: String,
    cloud_options: Option<CloudOptions>,
    parquet_options: ParquetWriteOptions
) -> PolarsResult<()>
 Available on crate features cloud_write and parquet only.
pub fn sink_parquet_cloud( self, uri: String, cloud_options: Option<CloudOptions>, parquet_options: ParquetWriteOptions ) -> PolarsResult<()>
cloud_write and parquet only.Stream a query result into a parquet file on an ObjectStore-compatible cloud service. This is useful if the final result doesn’t fit into memory, and where you do not want to write to a local file but to a location in the cloud. This method will return an error if the query cannot be completely done in a streaming fashion.
sourcepub fn sink_ipc(
    self,
    path: PathBuf,
    options: IpcWriterOptions
) -> PolarsResult<()>
 Available on crate feature ipc only.
pub fn sink_ipc( self, path: PathBuf, options: IpcWriterOptions ) -> PolarsResult<()>
ipc only.Stream a query result into an ipc/arrow file. This is useful if the final result doesn’t fit into memory. This methods will return an error if the query cannot be completely done in a streaming fashion.
sourcepub fn sink_ipc_cloud(
    self,
    uri: String,
    cloud_options: Option<CloudOptions>,
    ipc_options: IpcWriterOptions
) -> PolarsResult<()>
 Available on crate features cloud_write and ipc only.
pub fn sink_ipc_cloud( self, uri: String, cloud_options: Option<CloudOptions>, ipc_options: IpcWriterOptions ) -> PolarsResult<()>
cloud_write and ipc only.Stream a query result into an ipc/arrow file on an ObjectStore-compatible cloud service. This is useful if the final result doesn’t fit into memory, and where you do not want to write to a local file but to a location in the cloud. This method will return an error if the query cannot be completely done in a streaming fashion.
sourcepub fn sink_csv(
    self,
    path: PathBuf,
    options: CsvWriterOptions
) -> PolarsResult<()>
 Available on crate feature csv only.
pub fn sink_csv( self, path: PathBuf, options: CsvWriterOptions ) -> PolarsResult<()>
csv only.Stream a query result into an csv file. This is useful if the final result doesn’t fit into memory. This methods will return an error if the query cannot be completely done in a streaming fashion.
sourcepub fn sink_json(
    self,
    path: PathBuf,
    options: JsonWriterOptions
) -> PolarsResult<()>
 Available on crate feature json only.
pub fn sink_json( self, path: PathBuf, options: JsonWriterOptions ) -> PolarsResult<()>
json only.Stream a query result into a json file. This is useful if the final result doesn’t fit into memory. This methods will return an error if the query cannot be completely done in a streaming fashion.
sourcepub fn filter(self, predicate: Expr) -> Self
 
pub fn filter(self, predicate: Expr) -> Self
Filter by some predicate expression.
The expression must yield boolean values.
§Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn example(df: DataFrame) -> LazyFrame {
      df.lazy()
        .filter(col("sepal_width").is_not_null())
        .select(&[col("sepal_width"), col("sepal_length")])
}sourcepub fn select<E: AsRef<[Expr]>>(self, exprs: E) -> Self
 
pub fn select<E: AsRef<[Expr]>>(self, exprs: E) -> Self
Select (and optionally rename, with alias) columns from the query.
Columns can be selected with col;
If you want to select all columns use col("*").
§Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
/// This function selects column "foo" and column "bar".
/// Column "bar" is renamed to "ham".
fn example(df: DataFrame) -> LazyFrame {
      df.lazy()
        .select(&[col("foo"),
                  col("bar").alias("ham")])
}
/// This function selects all columns except "foo"
fn exclude_a_column(df: DataFrame) -> LazyFrame {
      df.lazy()
        .select(&[col("*").exclude(["foo"])])
}pub fn select_seq<E: AsRef<[Expr]>>(self, exprs: E) -> Self
sourcepub fn group_by<E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
    self,
    by: E
) -> LazyGroupBy
 
pub fn group_by<E: AsRef<[IE]>, IE: Into<Expr> + Clone>( self, by: E ) -> LazyGroupBy
Performs a “group-by” on a LazyFrame, producing a LazyGroupBy, which can subsequently be aggregated.
Takes a list of expressions to group on.
§Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
use arrow::legacy::prelude::QuantileInterpolOptions;
fn example(df: DataFrame) -> LazyFrame {
      df.lazy()
       .group_by([col("date")])
       .agg([
           col("rain").min().alias("min_rain"),
           col("rain").sum().alias("sum_rain"),
           col("rain").quantile(lit(0.5), QuantileInterpolOptions::Nearest).alias("median_rain"),
       ])
}sourcepub fn rolling<E: AsRef<[Expr]>>(
    self,
    index_column: Expr,
    group_by: E,
    options: RollingGroupOptions
) -> LazyGroupBy
 Available on crate feature dynamic_group_by only.
pub fn rolling<E: AsRef<[Expr]>>( self, index_column: Expr, group_by: E, options: RollingGroupOptions ) -> LazyGroupBy
dynamic_group_by only.Create rolling groups based on a time column.
Also works for index values of type UInt32, UInt64, Int32, or Int64.
Different from a group_by_dynamic, the windows are now determined by the
individual values and are not of constant intervals. For constant intervals use
group_by_dynamic
sourcepub fn group_by_dynamic<E: AsRef<[Expr]>>(
    self,
    index_column: Expr,
    group_by: E,
    options: DynamicGroupOptions
) -> LazyGroupBy
 Available on crate feature dynamic_group_by only.
pub fn group_by_dynamic<E: AsRef<[Expr]>>( self, index_column: Expr, group_by: E, options: DynamicGroupOptions ) -> LazyGroupBy
dynamic_group_by only.Group based on a time value (or index value of type Int32, Int64).
Time windows are calculated and rows are assigned to windows. Different from a normal group_by is that a row can be member of multiple groups. The time/index window could be seen as a rolling window, with a window size determined by dates/times/values instead of slots in the DataFrame.
A window is defined by:
- every: interval of the window
- period: length of the window
- offset: offset of the window
The group_by argument should be empty [] if you don’t want to combine this
with a ordinary group_by on these keys.
sourcepub fn group_by_stable<E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
    self,
    by: E
) -> LazyGroupBy
 
pub fn group_by_stable<E: AsRef<[IE]>, IE: Into<Expr> + Clone>( self, by: E ) -> LazyGroupBy
Similar to group_by, but order of the DataFrame is maintained.
sourcepub fn anti_join<E: Into<Expr>>(
    self,
    other: LazyFrame,
    left_on: E,
    right_on: E
) -> LazyFrame
 Available on crate feature semi_anti_join only.
pub fn anti_join<E: Into<Expr>>( self, other: LazyFrame, left_on: E, right_on: E ) -> LazyFrame
semi_anti_join only.Left anti join this query with another lazy query.
Matches on the values of the expressions left_on and right_on. For more
flexible join logic, see join or
join_builder.
§Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn anti_join_dataframes(ldf: LazyFrame, other: LazyFrame) -> LazyFrame {
        ldf
        .anti_join(other, col("foo"), col("bar").cast(DataType::String))
}sourcepub fn cross_join(self, other: LazyFrame, suffix: Option<String>) -> LazyFrame
 Available on crate feature cross_join only.
pub fn cross_join(self, other: LazyFrame, suffix: Option<String>) -> LazyFrame
cross_join only.Creates the Cartesian product from both frames, preserving the order of the left keys.
sourcepub fn left_join<E: Into<Expr>>(
    self,
    other: LazyFrame,
    left_on: E,
    right_on: E
) -> LazyFrame
 
pub fn left_join<E: Into<Expr>>( self, other: LazyFrame, left_on: E, right_on: E ) -> LazyFrame
Left outer join this query with another lazy query.
Matches on the values of the expressions left_on and right_on. For more
flexible join logic, see join or
join_builder.
§Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn left_join_dataframes(ldf: LazyFrame, other: LazyFrame) -> LazyFrame {
        ldf
        .left_join(other, col("foo"), col("bar"))
}sourcepub fn inner_join<E: Into<Expr>>(
    self,
    other: LazyFrame,
    left_on: E,
    right_on: E
) -> LazyFrame
 
pub fn inner_join<E: Into<Expr>>( self, other: LazyFrame, left_on: E, right_on: E ) -> LazyFrame
Inner join this query with another lazy query.
Matches on the values of the expressions left_on and right_on. For more
flexible join logic, see join or
join_builder.
§Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn inner_join_dataframes(ldf: LazyFrame, other: LazyFrame) -> LazyFrame {
        ldf
        .inner_join(other, col("foo"), col("bar").cast(DataType::String))
}sourcepub fn full_join<E: Into<Expr>>(
    self,
    other: LazyFrame,
    left_on: E,
    right_on: E
) -> LazyFrame
 
pub fn full_join<E: Into<Expr>>( self, other: LazyFrame, left_on: E, right_on: E ) -> LazyFrame
Full outer join this query with another lazy query.
Matches on the values of the expressions left_on and right_on. For more
flexible join logic, see join or
join_builder.
§Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn full_join_dataframes(ldf: LazyFrame, other: LazyFrame) -> LazyFrame {
        ldf
        .full_join(other, col("foo"), col("bar"))
}sourcepub fn semi_join<E: Into<Expr>>(
    self,
    other: LazyFrame,
    left_on: E,
    right_on: E
) -> LazyFrame
 Available on crate feature semi_anti_join only.
pub fn semi_join<E: Into<Expr>>( self, other: LazyFrame, left_on: E, right_on: E ) -> LazyFrame
semi_anti_join only.Left semi join this query with another lazy query.
Matches on the values of the expressions left_on and right_on. For more
flexible join logic, see join or
join_builder.
§Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn semi_join_dataframes(ldf: LazyFrame, other: LazyFrame) -> LazyFrame {
        ldf
        .semi_join(other, col("foo"), col("bar").cast(DataType::String))
}sourcepub fn join<E: AsRef<[Expr]>>(
    self,
    other: LazyFrame,
    left_on: E,
    right_on: E,
    args: JoinArgs
) -> LazyFrame
 
pub fn join<E: AsRef<[Expr]>>( self, other: LazyFrame, left_on: E, right_on: E, args: JoinArgs ) -> LazyFrame
Generic function to join two LazyFrames.
join can join on multiple columns, given as two list of expressions, and with a
JoinType specified by how. Non-joined column names in the right DataFrame
that already exist in this DataFrame are suffixed with "_right". For control
over how columns are renamed and parallelization options, use
join_builder.
Any provided args.slice parameter is not considered, but set by the internal optimizer.
§Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn example(ldf: LazyFrame, other: LazyFrame) -> LazyFrame {
        ldf
        .join(other, [col("foo"), col("bar")], [col("foo"), col("bar")], JoinArgs::new(JoinType::Inner))
}sourcepub fn join_builder(self) -> JoinBuilder
 
pub fn join_builder(self) -> JoinBuilder
Consume self and return a JoinBuilder to customize a join on this LazyFrame.
After the JoinBuilder has been created and set up, calling
finish() on it will give back the LazyFrame
representing the join operation.
sourcepub fn with_column(self, expr: Expr) -> LazyFrame
 
pub fn with_column(self, expr: Expr) -> LazyFrame
Add or replace a column, given as an expression, to a DataFrame.
§Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn add_column(df: DataFrame) -> LazyFrame {
    df.lazy()
        .with_column(
            when(col("sepal_length").lt(lit(5.0)))
            .then(lit(10))
            .otherwise(lit(1))
            .alias("new_column_name"),
        )
}sourcepub fn with_columns<E: AsRef<[Expr]>>(self, exprs: E) -> LazyFrame
 
pub fn with_columns<E: AsRef<[Expr]>>(self, exprs: E) -> LazyFrame
Add or replace multiple columns, given as expressions, to a DataFrame.
§Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn add_columns(df: DataFrame) -> LazyFrame {
    df.lazy()
        .with_columns(
            vec![lit(10).alias("foo"), lit(100).alias("bar")]
         )
}sourcepub fn with_columns_seq<E: AsRef<[Expr]>>(self, exprs: E) -> LazyFrame
 
pub fn with_columns_seq<E: AsRef<[Expr]>>(self, exprs: E) -> LazyFrame
Add or replace multiple columns to a DataFrame, but evaluate them sequentially.
pub fn with_context<C: AsRef<[LazyFrame]>>(self, contexts: C) -> LazyFrame
sourcepub fn max(self) -> Self
 
pub fn max(self) -> Self
Aggregate all the columns as their maximum values.
Aggregated columns will have the same names as the original columns.
sourcepub fn min(self) -> Self
 
pub fn min(self) -> Self
Aggregate all the columns as their minimum values.
Aggregated columns will have the same names as the original columns.
sourcepub fn sum(self) -> Self
 
pub fn sum(self) -> Self
Aggregate all the columns as their sum values.
Aggregated columns will have the same names as the original columns.
- Boolean columns will sum to a u32containing the number oftrues.
- For integer columns, the ordinary checks for overflow are performed:
if running in debugmode, overflows will panic, whereas inreleasemode overflows will silently wrap.
- String columns will sum to None.
sourcepub fn mean(self) -> Self
 
pub fn mean(self) -> Self
Aggregate all the columns as their mean values.
- Boolean and integer columns are converted to f64before computing the mean.
- String columns will have a mean of None.
sourcepub fn median(self) -> Self
 
pub fn median(self) -> Self
Aggregate all the columns as their median values.
- Boolean and integer results are converted to f64. However, they are still susceptible to overflow before this conversion occurs.
- String columns will sum to None.
sourcepub fn quantile(self, quantile: Expr, interpol: QuantileInterpolOptions) -> Self
 
pub fn quantile(self, quantile: Expr, interpol: QuantileInterpolOptions) -> Self
Aggregate all the columns as their quantile values.
sourcepub fn std(self, ddof: u8) -> Self
 
pub fn std(self, ddof: u8) -> Self
Aggregate all the columns as their standard deviation values.
ddof is the “Delta Degrees of Freedom”; N - ddof will be the denominator when
computing the variance, where N is the number of rows.
In standard statistical practice,
ddof=1provides an unbiased estimator of the variance of a hypothetical infinite population.ddof=0provides a maximum likelihood estimate of the variance for normally distributed variables. The standard deviation computed in this function is the square root of the estimated variance, so even withddof=1, it will not be an unbiased estimate of the standard deviation per se.
Source: Numpy
sourcepub fn var(self, ddof: u8) -> Self
 
pub fn var(self, ddof: u8) -> Self
Aggregate all the columns as their variance values.
ddof is the “Delta Degrees of Freedom”; N - ddof will be the denominator when
computing the variance, where N is the number of rows.
In standard statistical practice,
ddof=1provides an unbiased estimator of the variance of a hypothetical infinite population.ddof=0provides a maximum likelihood estimate of the variance for normally distributed variables.
Source: Numpy
sourcepub fn explode<E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
    self,
    columns: E
) -> LazyFrame
 
pub fn explode<E: AsRef<[IE]>, IE: Into<Expr> + Clone>( self, columns: E ) -> LazyFrame
Apply explode operation. See eager explode.
sourcepub fn null_count(self) -> LazyFrame
 
pub fn null_count(self) -> LazyFrame
Aggregate all the columns as the sum of their null value count.
sourcepub fn unique_stable(
    self,
    subset: Option<Vec<String>>,
    keep_strategy: UniqueKeepStrategy
) -> LazyFrame
 
pub fn unique_stable( self, subset: Option<Vec<String>>, keep_strategy: UniqueKeepStrategy ) -> LazyFrame
Drop non-unique rows and maintain the order of kept rows.
subset is an optional Vec of column names to consider for uniqueness; if
None, all columns are considered.
sourcepub fn unique(
    self,
    subset: Option<Vec<String>>,
    keep_strategy: UniqueKeepStrategy
) -> LazyFrame
 
pub fn unique( self, subset: Option<Vec<String>>, keep_strategy: UniqueKeepStrategy ) -> LazyFrame
Drop non-unique rows without maintaining the order of kept rows.
The order of the kept rows may change; to maintain the original row order, use
unique_stable.
subset is an optional Vec of column names to consider for uniqueness; if None,
all columns are considered.
sourcepub fn drop_nulls(self, subset: Option<Vec<Expr>>) -> LazyFrame
 
pub fn drop_nulls(self, subset: Option<Vec<Expr>>) -> LazyFrame
Drop rows containing None.
subset is an optional Vec of column names to consider for nulls; if None, all
columns are considered.
sourcepub fn slice(self, offset: i64, len: IdxSize) -> LazyFrame
 
pub fn slice(self, offset: i64, len: IdxSize) -> LazyFrame
Slice the DataFrame using an offset (starting row) and a length.
If offset is negative, it is counted from the end of the DataFrame. For
instance, lf.slice(-5, 3) gets three rows, starting at the row fifth from the
end.
If offset and len are such that the slice extends beyond the end of the
DataFrame, the portion between offset and the end will be returned. In this
case, the number of rows in the returned DataFrame will be less than len.
sourcepub fn tail(self, n: IdxSize) -> LazyFrame
 
pub fn tail(self, n: IdxSize) -> LazyFrame
Get the last n rows.
Equivalent to self.slice(-(n as i64), n).
sourcepub fn melt(self, args: MeltArgs) -> LazyFrame
 
pub fn melt(self, args: MeltArgs) -> LazyFrame
Melt the DataFrame from wide to long format.
See MeltArgs for information on how to melt a DataFrame.
sourcepub fn limit(self, n: IdxSize) -> LazyFrame
 
pub fn limit(self, n: IdxSize) -> LazyFrame
Limit the DataFrame to the first n rows.
Note if you don’t want the rows to be scanned, use fetch.
sourcepub fn map<F>(
    self,
    function: F,
    optimizations: AllowedOptimizations,
    schema: Option<Arc<dyn UdfSchema>>,
    name: Option<&'static str>
) -> LazyFrame
 
pub fn map<F>( self, function: F, optimizations: AllowedOptimizations, schema: Option<Arc<dyn UdfSchema>>, name: Option<&'static str> ) -> LazyFrame
Apply a function/closure once the logical plan get executed.
The function has access to the whole materialized DataFrame at the time it is called.
To apply specific functions to specific columns, use Expr::map in conjunction
with LazyFrame::with_column or with_columns.
§Warning
This can blow up in your face if the schema is changed due to the operation. The optimizer relies on a correct schema.
You can toggle certain optimizations off.
pub fn map_python( self, function: PythonFunction, optimizations: AllowedOptimizations, schema: Option<SchemaRef>, validate_output: bool ) -> LazyFrame
python only.sourcepub fn with_row_index(self, name: &str, offset: Option<IdxSize>) -> LazyFrame
 
pub fn with_row_index(self, name: &str, offset: Option<IdxSize>) -> LazyFrame
Add a new column at index 0 that counts the rows.
name is the name of the new column. offset is where to start counting from; if
None, it is set to 0.
§Warning
This can have a negative effect on query performance. This may for instance block predicate pushdown optimization.
sourcepub fn unnest<I: IntoIterator<Item = S>, S: AsRef<str>>(self, cols: I) -> Self
 Available on crate feature dtype-struct only.
pub fn unnest<I: IntoIterator<Item = S>, S: AsRef<str>>(self, cols: I) -> Self
dtype-struct only.Unnest the given Struct columns: the fields of the Struct type will be
inserted as columns.
pub fn merge_sorted( self, other: LazyFrame, key: &str ) -> PolarsResult<LazyFrame>
merge_sorted only.source§impl LazyFrame
 
impl LazyFrame
pub fn anonymous_scan( function: Arc<dyn AnonymousScan>, args: ScanArgsAnonymous ) -> PolarsResult<Self>
source§impl LazyFrame
 
impl LazyFrame
sourcepub fn scan_ipc(path: impl AsRef<Path>, args: ScanArgsIpc) -> PolarsResult<Self>
 Available on crate feature ipc only.
pub fn scan_ipc(path: impl AsRef<Path>, args: ScanArgsIpc) -> PolarsResult<Self>
ipc only.Create a LazyFrame directly from a ipc scan.
pub fn scan_ipc_files( paths: Arc<[PathBuf]>, args: ScanArgsIpc ) -> PolarsResult<Self>
ipc only.source§impl LazyFrame
 
impl LazyFrame
sourcepub fn scan_parquet(
    path: impl AsRef<Path>,
    args: ScanArgsParquet
) -> PolarsResult<Self>
 Available on crate feature parquet only.
pub fn scan_parquet( path: impl AsRef<Path>, args: ScanArgsParquet ) -> PolarsResult<Self>
parquet only.Create a LazyFrame directly from a parquet scan.
sourcepub fn scan_parquet_files(
    paths: Arc<[PathBuf]>,
    args: ScanArgsParquet
) -> PolarsResult<Self>
 Available on crate feature parquet only.
pub fn scan_parquet_files( paths: Arc<[PathBuf]>, args: ScanArgsParquet ) -> PolarsResult<Self>
parquet only.Create a LazyFrame directly from a parquet scan.
Trait Implementations§
source§impl From<LazyGroupBy> for LazyFrame
 
impl From<LazyGroupBy> for LazyFrame
source§fn from(lgb: LazyGroupBy) -> Self
 
fn from(lgb: LazyGroupBy) -> Self
Auto Trait Implementations§
impl !Freeze for LazyFrame
impl !RefUnwindSafe for LazyFrame
impl Send for LazyFrame
impl Sync for LazyFrame
impl Unpin for LazyFrame
impl !UnwindSafe for LazyFrame
Blanket Implementations§
source§impl<T> BorrowMut<T> for Twhere
    T: ?Sized,
 
impl<T> BorrowMut<T> for Twhere
    T: ?Sized,
source§fn borrow_mut(&mut self) -> &mut T
 
fn borrow_mut(&mut self) -> &mut T
source§impl<T> IntoEither for T
 
impl<T> IntoEither for T
source§fn into_either(self, into_left: bool) -> Either<Self, Self>
 
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moresource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
 
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more