pub struct GroupBy<'a> {
pub df: &'a DataFrame,
/* private fields */
}Expand description
Returned by a group_by operation on a DataFrame. This struct supports several aggregations.
Until described otherwise, the examples in this struct are performed on the following DataFrame:
use polars_core::prelude::*;
let dates = &[
"2020-08-21",
"2020-08-21",
"2020-08-22",
"2020-08-23",
"2020-08-22",
];
// date format
let fmt = "%Y-%m-%d";
// create date series
let s0 = DateChunked::parse_from_str_slice("date", dates, fmt)
.into_series();
// create temperature series
let s1 = Series::new("temp".into(), [20, 10, 7, 9, 1]);
// create rain series
let s2 = Series::new("rain".into(), [0.2, 0.1, 0.3, 0.1, 0.01]);
// create a new DataFrame
let df = DataFrame::new(vec![s0, s1, s2]).unwrap();
println!("{:?}", df);Outputs:
+------------+------+------+
| date | temp | rain |
| --- | --- | --- |
| Date | i32 | f64 |
+============+======+======+
| 2020-08-21 | 20 | 0.2 |
+------------+------+------+
| 2020-08-21 | 10 | 0.1 |
+------------+------+------+
| 2020-08-22 | 7 | 0.3 |
+------------+------+------+
| 2020-08-23 | 9 | 0.1 |
+------------+------+------+
| 2020-08-22 | 1 | 0.01 |
+------------+------+------+Fields§
§df: &'a DataFrameImplementations§
Source§impl<'a> GroupBy<'a>
impl<'a> GroupBy<'a>
pub fn new( df: &'a DataFrame, by: Vec<Column>, groups: GroupPositions, selected_agg: Option<Vec<PlSmallStr>>, ) -> GroupBy<'a>
Sourcepub fn select<I, S>(self, selection: I) -> GroupBy<'a>
pub fn select<I, S>(self, selection: I) -> GroupBy<'a>
Select the column(s) that should be aggregated. You can select a single column or a slice of columns.
Note that making a selection with this method is not required. If you skip it all columns (except for the keys) will be selected for aggregation.
Sourcepub fn get_groups(&self) -> &GroupPositions
pub fn get_groups(&self) -> &GroupPositions
Get the internal representation of the GroupBy operation.
The Vec returned contains:
(first_idx, Vec<indexes>)
Where second value in the tuple is a vector with all matching indexes.
Sourcepub unsafe fn get_groups_mut(&mut self) -> &mut GroupPositions
pub unsafe fn get_groups_mut(&mut self) -> &mut GroupPositions
Get the internal representation of the GroupBy operation.
The Vec returned contains:
(first_idx, Vec<indexes>)
Where second value in the tuple is a vector with all matching indexes.
§Safety
Groups should always be in bounds of the DataFrame hold by this GroupBy.
If you mutate it, you must hold that invariant.
pub fn take_groups(self) -> GroupPositions
pub fn take_groups_mut(&mut self) -> GroupPositions
pub fn keys_sliced(&self, slice: Option<(i64, usize)>) -> Vec<Column>
pub fn keys(&self) -> Vec<Column>
Sourcepub fn mean(&self) -> Result<DataFrame, PolarsError>
👎Deprecated since 0.24.1: use polars.lazy aggregations
pub fn mean(&self) -> Result<DataFrame, PolarsError>
Aggregate grouped series and compute the mean per group.
§Example
fn example(df: DataFrame) -> PolarsResult<DataFrame> {
df.group_by(["date"])?.select(["temp", "rain"]).mean()
}Returns:
+------------+-----------+-----------+
| date | temp_mean | rain_mean |
| --- | --- | --- |
| Date | f64 | f64 |
+============+===========+===========+
| 2020-08-23 | 9 | 0.1 |
+------------+-----------+-----------+
| 2020-08-22 | 4 | 0.155 |
+------------+-----------+-----------+
| 2020-08-21 | 15 | 0.15 |
+------------+-----------+-----------+Sourcepub fn sum(&self) -> Result<DataFrame, PolarsError>
👎Deprecated since 0.24.1: use polars.lazy aggregations
pub fn sum(&self) -> Result<DataFrame, PolarsError>
Aggregate grouped series and compute the sum per group.
§Example
fn example(df: DataFrame) -> PolarsResult<DataFrame> {
df.group_by(["date"])?.select(["temp"]).sum()
}Returns:
+------------+----------+
| date | temp_sum |
| --- | --- |
| Date | i32 |
+============+==========+
| 2020-08-23 | 9 |
+------------+----------+
| 2020-08-22 | 8 |
+------------+----------+
| 2020-08-21 | 30 |
+------------+----------+Sourcepub fn min(&self) -> Result<DataFrame, PolarsError>
👎Deprecated since 0.24.1: use polars.lazy aggregations
pub fn min(&self) -> Result<DataFrame, PolarsError>
Aggregate grouped series and compute the minimal value per group.
§Example
fn example(df: DataFrame) -> PolarsResult<DataFrame> {
df.group_by(["date"])?.select(["temp"]).min()
}Returns:
+------------+----------+
| date | temp_min |
| --- | --- |
| Date | i32 |
+============+==========+
| 2020-08-23 | 9 |
+------------+----------+
| 2020-08-22 | 1 |
+------------+----------+
| 2020-08-21 | 10 |
+------------+----------+Sourcepub fn max(&self) -> Result<DataFrame, PolarsError>
👎Deprecated since 0.24.1: use polars.lazy aggregations
pub fn max(&self) -> Result<DataFrame, PolarsError>
Aggregate grouped series and compute the maximum value per group.
§Example
fn example(df: DataFrame) -> PolarsResult<DataFrame> {
df.group_by(["date"])?.select(["temp"]).max()
}Returns:
+------------+----------+
| date | temp_max |
| --- | --- |
| Date | i32 |
+============+==========+
| 2020-08-23 | 9 |
+------------+----------+
| 2020-08-22 | 7 |
+------------+----------+
| 2020-08-21 | 20 |
+------------+----------+Sourcepub fn first(&self) -> Result<DataFrame, PolarsError>
👎Deprecated since 0.24.1: use polars.lazy aggregations
pub fn first(&self) -> Result<DataFrame, PolarsError>
Aggregate grouped Series and find the first value per group.
§Example
fn example(df: DataFrame) -> PolarsResult<DataFrame> {
df.group_by(["date"])?.select(["temp"]).first()
}Returns:
+------------+------------+
| date | temp_first |
| --- | --- |
| Date | i32 |
+============+============+
| 2020-08-23 | 9 |
+------------+------------+
| 2020-08-22 | 7 |
+------------+------------+
| 2020-08-21 | 20 |
+------------+------------+Sourcepub fn last(&self) -> Result<DataFrame, PolarsError>
👎Deprecated since 0.24.1: use polars.lazy aggregations
pub fn last(&self) -> Result<DataFrame, PolarsError>
Aggregate grouped Series and return the last value per group.
§Example
fn example(df: DataFrame) -> PolarsResult<DataFrame> {
df.group_by(["date"])?.select(["temp"]).last()
}Returns:
+------------+------------+
| date | temp_last |
| --- | --- |
| Date | i32 |
+============+============+
| 2020-08-23 | 9 |
+------------+------------+
| 2020-08-22 | 1 |
+------------+------------+
| 2020-08-21 | 10 |
+------------+------------+Sourcepub fn n_unique(&self) -> Result<DataFrame, PolarsError>
👎Deprecated since 0.24.1: use polars.lazy aggregations
pub fn n_unique(&self) -> Result<DataFrame, PolarsError>
Aggregate grouped Series by counting the number of unique values.
§Example
fn example(df: DataFrame) -> PolarsResult<DataFrame> {
df.group_by(["date"])?.select(["temp"]).n_unique()
}Returns:
+------------+---------------+
| date | temp_n_unique |
| --- | --- |
| Date | u32 |
+============+===============+
| 2020-08-23 | 1 |
+------------+---------------+
| 2020-08-22 | 2 |
+------------+---------------+
| 2020-08-21 | 2 |
+------------+---------------+Sourcepub fn quantile(
&self,
quantile: f64,
method: QuantileMethod,
) -> Result<DataFrame, PolarsError>
👎Deprecated since 0.24.1: use polars.lazy aggregations
pub fn quantile( &self, quantile: f64, method: QuantileMethod, ) -> Result<DataFrame, PolarsError>
Sourcepub fn median(&self) -> Result<DataFrame, PolarsError>
👎Deprecated since 0.24.1: use polars.lazy aggregations
pub fn median(&self) -> Result<DataFrame, PolarsError>
Sourcepub fn var(&self, ddof: u8) -> Result<DataFrame, PolarsError>
👎Deprecated since 0.24.1: use polars.lazy aggregations
pub fn var(&self, ddof: u8) -> Result<DataFrame, PolarsError>
Aggregate grouped Series and determine the variance per group.
Sourcepub fn std(&self, ddof: u8) -> Result<DataFrame, PolarsError>
👎Deprecated since 0.24.1: use polars.lazy aggregations
pub fn std(&self, ddof: u8) -> Result<DataFrame, PolarsError>
Aggregate grouped Series and determine the standard deviation per group.
Sourcepub fn count(&self) -> Result<DataFrame, PolarsError>
pub fn count(&self) -> Result<DataFrame, PolarsError>
Aggregate grouped series and compute the number of values per group.
§Example
fn example(df: DataFrame) -> PolarsResult<DataFrame> {
df.group_by(["date"])?.select(["temp"]).count()
}Returns:
+------------+------------+
| date | temp_count |
| --- | --- |
| Date | u32 |
+============+============+
| 2020-08-23 | 1 |
+------------+------------+
| 2020-08-22 | 2 |
+------------+------------+
| 2020-08-21 | 2 |
+------------+------------+Sourcepub fn groups(&self) -> Result<DataFrame, PolarsError>
pub fn groups(&self) -> Result<DataFrame, PolarsError>
Get the group_by group indexes.
§Example
fn example(df: DataFrame) -> PolarsResult<DataFrame> {
df.group_by(["date"])?.groups()
}Returns:
+--------------+------------+
| date | groups |
| --- | --- |
| Date(days) | list [u32] |
+==============+============+
| 2020-08-23 | "[3]" |
+--------------+------------+
| 2020-08-22 | "[2, 4]" |
+--------------+------------+
| 2020-08-21 | "[0, 1]" |
+--------------+------------+Sourcepub fn agg_list(&self) -> Result<DataFrame, PolarsError>
👎Deprecated since 0.24.1: use polars.lazy aggregations
pub fn agg_list(&self) -> Result<DataFrame, PolarsError>
Aggregate the groups of the group_by operation into lists.
§Example
fn example(df: DataFrame) -> PolarsResult<DataFrame> {
// GroupBy and aggregate to Lists
df.group_by(["date"])?.select(["temp"]).agg_list()
}Returns:
+------------+------------------------+
| date | temp_agg_list |
| --- | --- |
| Date | list [i32] |
+============+========================+
| 2020-08-23 | "[Some(9)]" |
+------------+------------------------+
| 2020-08-22 | "[Some(7), Some(1)]" |
+------------+------------------------+
| 2020-08-21 | "[Some(20), Some(10)]" |
+------------+------------------------+Sourcepub fn par_apply<F>(&self, f: F) -> Result<DataFrame, PolarsError>
👎Deprecated since 0.24.1: use polars.lazy aggregations
pub fn par_apply<F>(&self, f: F) -> Result<DataFrame, PolarsError>
Apply a closure over the groups as a new DataFrame in parallel.
Sourcepub fn apply<F>(&self, f: F) -> Result<DataFrame, PolarsError>
pub fn apply<F>(&self, f: F) -> Result<DataFrame, PolarsError>
Apply a closure over the groups as a new DataFrame.
pub fn sliced(self, slice: Option<(i64, usize)>) -> GroupBy<'a>
Trait Implementations§
Auto Trait Implementations§
impl<'a> Freeze for GroupBy<'a>
impl<'a> !RefUnwindSafe for GroupBy<'a>
impl<'a> Send for GroupBy<'a>
impl<'a> Sync for GroupBy<'a>
impl<'a> Unpin for GroupBy<'a>
impl<'a> !UnwindSafe for GroupBy<'a>
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
§impl<T> Instrument for T
impl<T> Instrument for T
§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self> ⓘ
fn into_either(self, into_left: bool) -> Either<Self, Self> ⓘ
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self> ⓘ
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self> ⓘ
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more