pub struct DataFrame { /* private fields */ }
Expand description
A contiguous growable collection of Series
that have the same length.
§Use declarations
All the common tools can be found in crate::prelude
(or in polars::prelude
).
use polars_core::prelude::*; // if the crate polars-core is used directly
// use polars::prelude::*; if the crate polars is used
§Initialization
§Default
A DataFrame
can be initialized empty:
let df = DataFrame::default();
assert!(df.is_empty());
§Wrapping a Vec<Series>
A DataFrame
is built upon a Vec<Series>
where the Series
have the same length.
let s1 = Column::new("Fruit".into(), ["Apple", "Apple", "Pear"]);
let s2 = Column::new("Color".into(), ["Red", "Yellow", "Green"]);
let df: PolarsResult<DataFrame> = DataFrame::new(vec![s1, s2]);
§Using a macro
The df!
macro is a convenient method:
let df: PolarsResult<DataFrame> = df!("Fruit" => ["Apple", "Apple", "Pear"],
"Color" => ["Red", "Yellow", "Green"]);
§Using a CSV file
See the polars_io::csv::CsvReader
.
§Indexing
§By a number
The Index<usize>
is implemented for the DataFrame
.
let df = df!("Fruit" => ["Apple", "Apple", "Pear"],
"Color" => ["Red", "Yellow", "Green"])?;
assert_eq!(df[0], Column::new("Fruit".into(), &["Apple", "Apple", "Pear"]));
assert_eq!(df[1], Column::new("Color".into(), &["Red", "Yellow", "Green"]));
§By a Series
name
let df = df!("Fruit" => ["Apple", "Apple", "Pear"],
"Color" => ["Red", "Yellow", "Green"])?;
assert_eq!(df["Fruit"], Column::new("Fruit".into(), &["Apple", "Apple", "Pear"]));
assert_eq!(df["Color"], Column::new("Color".into(), &["Red", "Yellow", "Green"]));
Implementations§
Source§impl DataFrame
impl DataFrame
Sourcepub fn to_ndarray<N>(
&self,
ordering: IndexOrder,
) -> Result<ArrayBase<OwnedRepr<<N as PolarsNumericType>::Native>, Dim<[usize; 2]>>, PolarsError>where
N: PolarsNumericType,
pub fn to_ndarray<N>(
&self,
ordering: IndexOrder,
) -> Result<ArrayBase<OwnedRepr<<N as PolarsNumericType>::Native>, Dim<[usize; 2]>>, PolarsError>where
N: PolarsNumericType,
Create a 2D ndarray::Array
from this DataFrame
. This requires all columns in the
DataFrame
to be non-null and numeric. They will be casted to the same data type
(if they aren’t already).
For floating point data we implicitly convert None
to NaN
without failure.
use polars_core::prelude::*;
let a = UInt32Chunked::new("a".into(), &[1, 2, 3]).into_column();
let b = Float64Chunked::new("b".into(), &[10., 8., 6.]).into_column();
let df = DataFrame::new(vec![a, b]).unwrap();
let ndarray = df.to_ndarray::<Float64Type>(IndexOrder::Fortran).unwrap();
println!("{:?}", ndarray);
Outputs:
[[1.0, 10.0],
[2.0, 8.0],
[3.0, 6.0]], shape=[3, 2], strides=[1, 3], layout=Ff (0xa), const ndim=2
Source§impl DataFrame
impl DataFrame
Sourcepub fn sample_n(
&self,
n: &Series,
with_replacement: bool,
shuffle: bool,
seed: Option<u64>,
) -> Result<DataFrame, PolarsError>
pub fn sample_n( &self, n: &Series, with_replacement: bool, shuffle: bool, seed: Option<u64>, ) -> Result<DataFrame, PolarsError>
Sample n datapoints from this DataFrame
.
pub fn sample_n_literal( &self, n: usize, with_replacement: bool, shuffle: bool, seed: Option<u64>, ) -> Result<DataFrame, PolarsError>
Source§impl DataFrame
impl DataFrame
pub fn into_struct(self, name: PlSmallStr) -> ChunkedArray<StructType>
Source§impl DataFrame
impl DataFrame
pub fn split_chunks(&mut self) -> impl Iterator<Item = DataFrame>
pub fn split_chunks_by_n(self, n: usize, parallel: bool) -> Vec<DataFrame>
Source§impl DataFrame
impl DataFrame
pub fn explode_impl( &self, columns: Vec<Column>, ) -> Result<DataFrame, PolarsError>
Sourcepub fn explode<I, S>(&self, columns: I) -> Result<DataFrame, PolarsError>
pub fn explode<I, S>(&self, columns: I) -> Result<DataFrame, PolarsError>
Explode DataFrame
to long format by exploding a column with Lists.
§Example
let s0 = Series::new("a".into(), &[1i64, 2, 3]);
let s1 = Series::new("b".into(), &[1i64, 1, 1]);
let s2 = Series::new("c".into(), &[2i64, 2, 2]);
let list = Series::new("foo", &[s0, s1, s2]);
let s0 = Series::new("B".into(), [1, 2, 3]);
let s1 = Series::new("C".into(), [1, 1, 1]);
let df = DataFrame::new(vec![list, s0, s1])?;
let exploded = df.explode(["foo"])?;
println!("{:?}", df);
println!("{:?}", exploded);
Outputs:
+-------------+-----+-----+
| foo | B | C |
| --- | --- | --- |
| list [i64] | i32 | i32 |
+=============+=====+=====+
| "[1, 2, 3]" | 1 | 1 |
+-------------+-----+-----+
| "[1, 1, 1]" | 2 | 1 |
+-------------+-----+-----+
| "[2, 2, 2]" | 3 | 1 |
+-------------+-----+-----+
+-----+-----+-----+
| foo | B | C |
| --- | --- | --- |
| i64 | i32 | i32 |
+=====+=====+=====+
| 1 | 1 | 1 |
+-----+-----+-----+
| 2 | 1 | 1 |
+-----+-----+-----+
| 3 | 1 | 1 |
+-----+-----+-----+
| 1 | 2 | 1 |
+-----+-----+-----+
| 1 | 2 | 1 |
+-----+-----+-----+
| 1 | 2 | 1 |
+-----+-----+-----+
| 2 | 3 | 1 |
+-----+-----+-----+
| 2 | 3 | 1 |
+-----+-----+-----+
| 2 | 3 | 1 |
+-----+-----+-----+
Source§impl DataFrame
impl DataFrame
pub fn group_by_with_series( &self, by: Vec<Column>, multithreaded: bool, sorted: bool, ) -> Result<GroupBy<'_>, PolarsError>
Sourcepub fn group_by<I, S>(&self, by: I) -> Result<GroupBy<'_>, PolarsError>
pub fn group_by<I, S>(&self, by: I) -> Result<GroupBy<'_>, PolarsError>
Group DataFrame using a Series column.
§Example
use polars_core::prelude::*;
fn group_by_sum(df: &DataFrame) -> PolarsResult<DataFrame> {
df.group_by(["column_name"])?
.select(["agg_column_name"])
.sum()
}
Sourcepub fn group_by_stable<I, S>(&self, by: I) -> Result<GroupBy<'_>, PolarsError>
pub fn group_by_stable<I, S>(&self, by: I) -> Result<GroupBy<'_>, PolarsError>
Group DataFrame using a Series column. The groups are ordered by their smallest row index.
Source§impl DataFrame
impl DataFrame
Sourcepub unsafe fn hstack_mut_unchecked(
&mut self,
columns: &[Column],
) -> &mut DataFrame
pub unsafe fn hstack_mut_unchecked( &mut self, columns: &[Column], ) -> &mut DataFrame
Sourcepub fn hstack_mut(
&mut self,
columns: &[Column],
) -> Result<&mut DataFrame, PolarsError>
pub fn hstack_mut( &mut self, columns: &[Column], ) -> Result<&mut DataFrame, PolarsError>
Source§impl DataFrame
impl DataFrame
Sourcepub fn get_row(&self, idx: usize) -> Result<Row<'_>, PolarsError>
pub fn get_row(&self, idx: usize) -> Result<Row<'_>, PolarsError>
Get a row from a DataFrame
. Use of this is discouraged as it will likely be slow.
Sourcepub fn get_row_amortized<'a>(
&'a self,
idx: usize,
row: &mut Row<'a>,
) -> Result<(), PolarsError>
pub fn get_row_amortized<'a>( &'a self, idx: usize, row: &mut Row<'a>, ) -> Result<(), PolarsError>
Amortize allocations by reusing a row.
The caller is responsible to make sure that the row has at least the capacity for the number
of columns in the DataFrame
Sourcepub unsafe fn get_row_amortized_unchecked<'a>(
&'a self,
idx: usize,
row: &mut Row<'a>,
)
pub unsafe fn get_row_amortized_unchecked<'a>( &'a self, idx: usize, row: &mut Row<'a>, )
Sourcepub fn from_rows_and_schema(
rows: &[Row<'_>],
schema: &Schema<DataType>,
) -> Result<DataFrame, PolarsError>
pub fn from_rows_and_schema( rows: &[Row<'_>], schema: &Schema<DataType>, ) -> Result<DataFrame, PolarsError>
Sourcepub fn from_rows_iter_and_schema<'a, I>(
rows: I,
schema: &Schema<DataType>,
) -> Result<DataFrame, PolarsError>
pub fn from_rows_iter_and_schema<'a, I>( rows: I, schema: &Schema<DataType>, ) -> Result<DataFrame, PolarsError>
Sourcepub fn try_from_rows_iter_and_schema<'a, I>(
rows: I,
schema: &Schema<DataType>,
) -> Result<DataFrame, PolarsError>
pub fn try_from_rows_iter_and_schema<'a, I>( rows: I, schema: &Schema<DataType>, ) -> Result<DataFrame, PolarsError>
Source§impl DataFrame
impl DataFrame
pub fn transpose( &mut self, keep_names_as: Option<&str>, new_col_names: Option<Either<String, Vec<String>>>, ) -> Result<DataFrame, PolarsError>
Sourcepub fn transpose_impl(
&mut self,
keep_names_as: Option<&str>,
new_col_names: Option<Either<PlSmallStr, Vec<PlSmallStr>>>,
) -> Result<DataFrame, PolarsError>
pub fn transpose_impl( &mut self, keep_names_as: Option<&str>, new_col_names: Option<Either<PlSmallStr, Vec<PlSmallStr>>>, ) -> Result<DataFrame, PolarsError>
Transpose a DataFrame. This is a very expensive operation.
Source§impl DataFrame
impl DataFrame
pub fn materialized_column_iter(&self) -> impl ExactSizeIterator
pub fn par_materialized_column_iter( &self, ) -> impl ParallelIterator<Item = &Series>
Sourcepub fn estimated_size(&self) -> usize
pub fn estimated_size(&self) -> usize
Returns an estimation of the total (heap) allocated size of the DataFrame
in bytes.
§Implementation
This estimation is the sum of the size of its buffers, validity, including nested arrays.
Multiple arrays may share buffers and bitmaps. Therefore, the size of 2 arrays is not the
sum of the sizes computed from this function. In particular, StructArray
’s size is an upper bound.
When an array is sliced, its allocated size remains constant because the buffer unchanged. However, this function will yield a smaller number. This is because this function returns the visible size of the buffer, not its total capacity.
FFI buffers are included in this estimation.
pub fn _apply_columns(&self, func: &dyn Fn(&Column) -> Column) -> Vec<Column>
pub fn _apply_columns_par( &self, func: &(dyn Fn(&Column) -> Column + Sync + Send), ) -> Vec<Column>
Sourcepub fn new(columns: Vec<Column>) -> Result<DataFrame, PolarsError>
pub fn new(columns: Vec<Column>) -> Result<DataFrame, PolarsError>
Create a DataFrame from a Vector of Series.
§Example
let s0 = Column::new("days".into(), [0, 1, 2].as_ref());
let s1 = Column::new("temp".into(), [22.1, 19.9, 7.].as_ref());
let df = DataFrame::new(vec![s0, s1])?;
Sourcepub fn new_with_broadcast(
columns: Vec<Column>,
) -> Result<DataFrame, PolarsError>
pub fn new_with_broadcast( columns: Vec<Column>, ) -> Result<DataFrame, PolarsError>
Converts a sequence of columns into a DataFrame, broadcasting length-1 columns to match the other columns.
Sourcepub fn new_with_broadcast_len(
columns: Vec<Column>,
broadcast_len: usize,
) -> Result<DataFrame, PolarsError>
pub fn new_with_broadcast_len( columns: Vec<Column>, broadcast_len: usize, ) -> Result<DataFrame, PolarsError>
Converts a sequence of columns into a DataFrame, broadcasting length-1 columns to broadcast_len.
Sourcepub unsafe fn new_with_broadcast_no_namecheck(
columns: Vec<Column>,
broadcast_len: usize,
) -> Result<DataFrame, PolarsError>
pub unsafe fn new_with_broadcast_no_namecheck( columns: Vec<Column>, broadcast_len: usize, ) -> Result<DataFrame, PolarsError>
Converts a sequence of columns into a DataFrame, broadcasting length-1 columns to match the other columns.
§Safety
Does not check that the column names are unique (which they must be).
Sourcepub const fn empty() -> DataFrame
pub const fn empty() -> DataFrame
Creates an empty DataFrame
usable in a compile time context (such as static initializers).
§Example
use polars_core::prelude::DataFrame;
static EMPTY: DataFrame = DataFrame::empty();
Sourcepub fn empty_with_schema(schema: &Schema<DataType>) -> DataFrame
pub fn empty_with_schema(schema: &Schema<DataType>) -> DataFrame
Create an empty DataFrame
with empty columns as per the schema
.
Sourcepub fn empty_with_arrow_schema(schema: &Schema<Field>) -> DataFrame
pub fn empty_with_arrow_schema(schema: &Schema<Field>) -> DataFrame
Create an empty DataFrame
with empty columns as per the schema
.
Sourcepub fn full_null(schema: &Schema<DataType>, height: usize) -> DataFrame
pub fn full_null(schema: &Schema<DataType>, height: usize) -> DataFrame
Create a new DataFrame
with the given schema, only containing nulls.
Sourcepub fn pop(&mut self) -> Option<Column>
pub fn pop(&mut self) -> Option<Column>
Removes the last Series
from the DataFrame
and returns it, or None
if it is empty.
§Example
let s1 = Column::new("Ocean".into(), ["Atlantic", "Indian"]);
let s2 = Column::new("Area (km²)".into(), [106_460_000, 70_560_000]);
let mut df = DataFrame::new(vec![s1.clone(), s2.clone()])?;
assert_eq!(df.pop(), Some(s2));
assert_eq!(df.pop(), Some(s1));
assert_eq!(df.pop(), None);
assert!(df.is_empty());
Sourcepub fn with_row_index(
&self,
name: PlSmallStr,
offset: Option<u32>,
) -> Result<DataFrame, PolarsError>
pub fn with_row_index( &self, name: PlSmallStr, offset: Option<u32>, ) -> Result<DataFrame, PolarsError>
Add a new column at index 0 that counts the rows.
§Example
let df1: DataFrame = df!("Name" => ["James", "Mary", "John", "Patricia"])?;
assert_eq!(df1.shape(), (4, 1));
let df2: DataFrame = df1.with_row_index("Id".into(), None)?;
assert_eq!(df2.shape(), (4, 2));
println!("{}", df2);
Output:
shape: (4, 2)
+-----+----------+
| Id | Name |
| --- | --- |
| u32 | str |
+=====+==========+
| 0 | James |
+-----+----------+
| 1 | Mary |
+-----+----------+
| 2 | John |
+-----+----------+
| 3 | Patricia |
+-----+----------+
Sourcepub fn with_row_index_mut(
&mut self,
name: PlSmallStr,
offset: Option<u32>,
) -> &mut DataFrame
pub fn with_row_index_mut( &mut self, name: PlSmallStr, offset: Option<u32>, ) -> &mut DataFrame
Add a row index column in place.
Sourcepub unsafe fn new_no_checks_height_from_first(columns: Vec<Column>) -> DataFrame
pub unsafe fn new_no_checks_height_from_first(columns: Vec<Column>) -> DataFrame
Create a new DataFrame
but does not check the length or duplicate occurrence of the
Series
.
Calculates the height from the first column or 0
if no columns are given.
§Safety
It is the callers responsibility to uphold the contract of all Series
having an equal length and a unique name, if not this may panic down the line.
Sourcepub unsafe fn new_no_checks(height: usize, columns: Vec<Column>) -> DataFrame
pub unsafe fn new_no_checks(height: usize, columns: Vec<Column>) -> DataFrame
Create a new DataFrame
but does not check the length or duplicate occurrence of the
Series
.
It is advised to use DataFrame::new in favor of this method.
§Safety
It is the callers responsibility to uphold the contract of all Series
having an equal length and a unique name, if not this may panic down the line.
Sourcepub const unsafe fn _new_no_checks_impl(
height: usize,
columns: Vec<Column>,
) -> DataFrame
pub const unsafe fn _new_no_checks_impl( height: usize, columns: Vec<Column>, ) -> DataFrame
This will not panic even in debug mode - there are some (rare) use cases where a DataFrame is temporarily constructed containing duplicates for dispatching to functions. A DataFrame constructed with this method is generally highly unsafe and should not be long-lived.
Sourcepub unsafe fn new_no_length_checks(
columns: Vec<Column>,
) -> Result<DataFrame, PolarsError>
pub unsafe fn new_no_length_checks( columns: Vec<Column>, ) -> Result<DataFrame, PolarsError>
Create a new DataFrame
but does not check the length of the Series
,
only check for duplicates.
It is advised to use DataFrame::new in favor of this method.
§Safety
It is the callers responsibility to uphold the contract of all Series
having an equal length, if not this may panic down the line.
Sourcepub fn shrink_to_fit(&mut self)
pub fn shrink_to_fit(&mut self)
Shrink the capacity of this DataFrame to fit its length.
Sourcepub fn as_single_chunk(&mut self) -> &mut DataFrame
pub fn as_single_chunk(&mut self) -> &mut DataFrame
Aggregate all the chunks in the DataFrame to a single chunk.
Sourcepub fn as_single_chunk_par(&mut self) -> &mut DataFrame
pub fn as_single_chunk_par(&mut self) -> &mut DataFrame
Aggregate all the chunks in the DataFrame to a single chunk in parallel. This may lead to more peak memory consumption.
Sourcepub fn rechunk_mut(&mut self)
pub fn rechunk_mut(&mut self)
Rechunks all columns to only have a single chunk.
Sourcepub fn rechunk_to_record_batch(
self,
compat_level: CompatLevel,
) -> RecordBatchT<Box<dyn Array>>
pub fn rechunk_to_record_batch( self, compat_level: CompatLevel, ) -> RecordBatchT<Box<dyn Array>>
Rechunks all columns to only have a single chunk and turns it into a [RecordBatchT
].
Sourcepub fn should_rechunk(&self) -> bool
pub fn should_rechunk(&self) -> bool
Returns true if the chunks of the columns do not align and re-chunking should be done
Sourcepub fn align_chunks_par(&mut self) -> &mut DataFrame
pub fn align_chunks_par(&mut self) -> &mut DataFrame
Ensure all the chunks in the DataFrame
are aligned.
pub fn align_chunks(&mut self) -> &mut DataFrame
Sourcepub fn schema(&self) -> Schema<DataType>
pub fn schema(&self) -> Schema<DataType>
Get the DataFrame
schema.
§Example
let df: DataFrame = df!("Thing" => ["Observable universe", "Human stupidity"],
"Diameter (m)" => [8.8e26, f64::INFINITY])?;
let f1: Field = Field::new("Thing".into(), DataType::String);
let f2: Field = Field::new("Diameter (m)".into(), DataType::Float64);
let sc: Schema = Schema::from_iter(vec![f1, f2]);
assert_eq!(df.schema(), sc);
Sourcepub fn get_columns(&self) -> &[Column]
pub fn get_columns(&self) -> &[Column]
Sourcepub unsafe fn get_columns_mut(&mut self) -> &mut Vec<Column>
pub unsafe fn get_columns_mut(&mut self) -> &mut Vec<Column>
Get mutable access to the underlying columns.
§Safety
The caller must ensure the length of all Series
remains equal to height
or
DataFrame::set_height
is called afterwards with the appropriate height
.
Sourcepub fn clear_columns(&mut self)
pub fn clear_columns(&mut self)
Remove all the columns in the DataFrame
but keep the height
.
Sourcepub unsafe fn column_extend_unchecked(
&mut self,
iter: impl IntoIterator<Item = Column>,
)
pub unsafe fn column_extend_unchecked( &mut self, iter: impl IntoIterator<Item = Column>, )
Extend the columns without checking for name collisions or height.
§Safety
The caller needs to ensure that:
- Column names are unique within the resulting
DataFrame
. - The length of each appended column matches the height of the
DataFrame
. ForDataFrame
]s with no columns (ZCDFs), it is important that the height is set afterwards withDataFrame::set_height
.
Sourcepub fn take_columns(self) -> Vec<Column>
pub fn take_columns(self) -> Vec<Column>
Take ownership of the underlying columns vec.
Sourcepub fn iter(&self) -> impl ExactSizeIterator
pub fn iter(&self) -> impl ExactSizeIterator
Iterator over the columns as Series
.
§Example
let s1 = Column::new("Name".into(), ["Pythagoras' theorem", "Shannon entropy"]);
let s2 = Column::new("Formula".into(), ["a²+b²=c²", "H=-Σ[P(x)log|P(x)|]"]);
let df: DataFrame = DataFrame::new(vec![s1.clone(), s2.clone()])?;
let mut iterator = df.iter();
assert_eq!(iterator.next(), Some(s1.as_materialized_series()));
assert_eq!(iterator.next(), Some(s2.as_materialized_series()));
assert_eq!(iterator.next(), None);
Sourcepub fn get_column_names(&self) -> Vec<&PlSmallStr>
pub fn get_column_names(&self) -> Vec<&PlSmallStr>
§Example
let df: DataFrame = df!("Language" => ["Rust", "Python"],
"Designer" => ["Graydon Hoare", "Guido van Rossum"])?;
assert_eq!(df.get_column_names(), &["Language", "Designer"]);
Sourcepub fn get_column_names_owned(&self) -> Vec<PlSmallStr>
pub fn get_column_names_owned(&self) -> Vec<PlSmallStr>
Get the Vec<PlSmallStr>
representing the column names.
pub fn get_column_names_str(&self) -> Vec<&str>
Sourcepub fn set_column_names<I, S>(&mut self, names: I) -> Result<(), PolarsError>
pub fn set_column_names<I, S>(&mut self, names: I) -> Result<(), PolarsError>
Set the column names.
§Example
let mut df: DataFrame = df!("Mathematical set" => ["ℕ", "ℤ", "𝔻", "ℚ", "ℝ", "ℂ"])?;
df.set_column_names(["Set"])?;
assert_eq!(df.get_column_names(), &["Set"]);
Sourcepub fn first_col_n_chunks(&self) -> usize
pub fn first_col_n_chunks(&self) -> usize
The number of chunks for the first column.
Sourcepub fn max_n_chunks(&self) -> usize
pub fn max_n_chunks(&self) -> usize
The highest number of chunks for any column.
Sourcepub fn fields(&self) -> Vec<Field>
pub fn fields(&self) -> Vec<Field>
Get a reference to the schema fields of the DataFrame
.
§Example
let earth: DataFrame = df!("Surface type" => ["Water", "Land"],
"Fraction" => [0.708, 0.292])?;
let f1: Field = Field::new("Surface type".into(), DataType::String);
let f2: Field = Field::new("Fraction".into(), DataType::Float64);
assert_eq!(earth.fields(), &[f1, f2]);
Sourcepub fn shape(&self) -> (usize, usize)
pub fn shape(&self) -> (usize, usize)
Get (height, width) of the DataFrame
.
§Example
let df0: DataFrame = DataFrame::default();
let df1: DataFrame = df!("1" => [1, 2, 3, 4, 5])?;
let df2: DataFrame = df!("1" => [1, 2, 3, 4, 5],
"2" => [1, 2, 3, 4, 5])?;
assert_eq!(df0.shape(), (0 ,0));
assert_eq!(df1.shape(), (5, 1));
assert_eq!(df2.shape(), (5, 2));
Sourcepub fn width(&self) -> usize
pub fn width(&self) -> usize
Get the width of the DataFrame
which is the number of columns.
§Example
let df0: DataFrame = DataFrame::default();
let df1: DataFrame = df!("Series 1" => [0; 0])?;
let df2: DataFrame = df!("Series 1" => [0; 0],
"Series 2" => [0; 0])?;
assert_eq!(df0.width(), 0);
assert_eq!(df1.width(), 1);
assert_eq!(df2.width(), 2);
Sourcepub fn height(&self) -> usize
pub fn height(&self) -> usize
Get the height of the DataFrame
which is the number of rows.
§Example
let df0: DataFrame = DataFrame::default();
let df1: DataFrame = df!("Currency" => ["€", "$"])?;
let df2: DataFrame = df!("Currency" => ["€", "$", "¥", "£", "₿"])?;
assert_eq!(df0.height(), 0);
assert_eq!(df1.height(), 2);
assert_eq!(df2.height(), 5);
Sourcepub unsafe fn set_height(&mut self, height: usize)
pub unsafe fn set_height(&mut self, height: usize)
Sourcepub fn hstack(&self, columns: &[Column]) -> Result<DataFrame, PolarsError>
pub fn hstack(&self, columns: &[Column]) -> Result<DataFrame, PolarsError>
Add multiple Series
to a DataFrame
.
The added Series
are required to have the same length.
§Example
let df1: DataFrame = df!("Element" => ["Copper", "Silver", "Gold"])?;
let s1 = Column::new("Proton".into(), [29, 47, 79]);
let s2 = Column::new("Electron".into(), [29, 47, 79]);
let df2: DataFrame = df1.hstack(&[s1, s2])?;
assert_eq!(df2.shape(), (3, 3));
println!("{}", df2);
Output:
shape: (3, 3)
+---------+--------+----------+
| Element | Proton | Electron |
| --- | --- | --- |
| str | i32 | i32 |
+=========+========+==========+
| Copper | 29 | 29 |
+---------+--------+----------+
| Silver | 47 | 47 |
+---------+--------+----------+
| Gold | 79 | 79 |
+---------+--------+----------+
Sourcepub fn vstack(&self, other: &DataFrame) -> Result<DataFrame, PolarsError>
pub fn vstack(&self, other: &DataFrame) -> Result<DataFrame, PolarsError>
Concatenate a DataFrame
to this DataFrame
and return as newly allocated DataFrame
.
If many vstack
operations are done, it is recommended to call DataFrame::align_chunks_par
.
§Example
let df1: DataFrame = df!("Element" => ["Copper", "Silver", "Gold"],
"Melting Point (K)" => [1357.77, 1234.93, 1337.33])?;
let df2: DataFrame = df!("Element" => ["Platinum", "Palladium"],
"Melting Point (K)" => [2041.4, 1828.05])?;
let df3: DataFrame = df1.vstack(&df2)?;
assert_eq!(df3.shape(), (5, 2));
println!("{}", df3);
Output:
shape: (5, 2)
+-----------+-------------------+
| Element | Melting Point (K) |
| --- | --- |
| str | f64 |
+===========+===================+
| Copper | 1357.77 |
+-----------+-------------------+
| Silver | 1234.93 |
+-----------+-------------------+
| Gold | 1337.33 |
+-----------+-------------------+
| Platinum | 2041.4 |
+-----------+-------------------+
| Palladium | 1828.05 |
+-----------+-------------------+
Sourcepub fn vstack_mut(
&mut self,
other: &DataFrame,
) -> Result<&mut DataFrame, PolarsError>
pub fn vstack_mut( &mut self, other: &DataFrame, ) -> Result<&mut DataFrame, PolarsError>
Concatenate a DataFrame
to this DataFrame
If many vstack
operations are done, it is recommended to call DataFrame::align_chunks_par
.
§Example
let mut df1: DataFrame = df!("Element" => ["Copper", "Silver", "Gold"],
"Melting Point (K)" => [1357.77, 1234.93, 1337.33])?;
let df2: DataFrame = df!("Element" => ["Platinum", "Palladium"],
"Melting Point (K)" => [2041.4, 1828.05])?;
df1.vstack_mut(&df2)?;
assert_eq!(df1.shape(), (5, 2));
println!("{}", df1);
Output:
shape: (5, 2)
+-----------+-------------------+
| Element | Melting Point (K) |
| --- | --- |
| str | f64 |
+===========+===================+
| Copper | 1357.77 |
+-----------+-------------------+
| Silver | 1234.93 |
+-----------+-------------------+
| Gold | 1337.33 |
+-----------+-------------------+
| Platinum | 2041.4 |
+-----------+-------------------+
| Palladium | 1828.05 |
+-----------+-------------------+
Sourcepub fn vstack_mut_unchecked(&mut self, other: &DataFrame)
pub fn vstack_mut_unchecked(&mut self, other: &DataFrame)
Concatenate a DataFrame
to this DataFrame
If many vstack
operations are done, it is recommended to call DataFrame::align_chunks_par
.
§Panics
Panics if the schema’s don’t match.
Sourcepub fn extend(&mut self, other: &DataFrame) -> Result<(), PolarsError>
pub fn extend(&mut self, other: &DataFrame) -> Result<(), PolarsError>
Extend the memory backed by this DataFrame
with the values from other
.
Different from vstack
which adds the chunks from other
to the chunks of this DataFrame
extend
appends the data from other
to the underlying memory locations and thus may cause a reallocation.
If this does not cause a reallocation, the resulting data structure will not have any extra chunks and thus will yield faster queries.
Prefer extend
over vstack
when you want to do a query after a single append. For instance during
online operations where you add n
rows and rerun a query.
Prefer vstack
over extend
when you want to append many times before doing a query. For instance
when you read in multiple files and when to store them in a single DataFrame
. In the latter case, finish the sequence
of append
operations with a rechunk
.
Sourcepub fn drop_in_place(&mut self, name: &str) -> Result<Column, PolarsError>
pub fn drop_in_place(&mut self, name: &str) -> Result<Column, PolarsError>
Remove a column by name and return the column removed.
§Example
let mut df: DataFrame = df!("Animal" => ["Tiger", "Lion", "Great auk"],
"IUCN" => ["Endangered", "Vulnerable", "Extinct"])?;
let s1: PolarsResult<Column> = df.drop_in_place("Average weight");
assert!(s1.is_err());
let s2: Column = df.drop_in_place("Animal")?;
assert_eq!(s2, Column::new("Animal".into(), &["Tiger", "Lion", "Great auk"]));
Sourcepub fn drop_nulls<S>(
&self,
subset: Option<&[S]>,
) -> Result<DataFrame, PolarsError>
pub fn drop_nulls<S>( &self, subset: Option<&[S]>, ) -> Result<DataFrame, PolarsError>
Return a new DataFrame
where all null values are dropped.
§Example
let df1: DataFrame = df!("Country" => ["Malta", "Liechtenstein", "North Korea"],
"Tax revenue (% GDP)" => [Some(32.7), None, None])?;
assert_eq!(df1.shape(), (3, 2));
let df2: DataFrame = df1.drop_nulls::<String>(None)?;
assert_eq!(df2.shape(), (1, 2));
println!("{}", df2);
Output:
shape: (1, 2)
+---------+---------------------+
| Country | Tax revenue (% GDP) |
| --- | --- |
| str | f64 |
+=========+=====================+
| Malta | 32.7 |
+---------+---------------------+
Sourcepub fn drop_many_amortized(
&self,
names: &HashSet<PlSmallStr, RandomState>,
) -> DataFrame
pub fn drop_many_amortized( &self, names: &HashSet<PlSmallStr, RandomState>, ) -> DataFrame
Drop columns that are in names
without allocating a HashSet
.
Sourcepub fn insert_column<S>(
&mut self,
index: usize,
column: S,
) -> Result<&mut DataFrame, PolarsError>where
S: IntoColumn,
pub fn insert_column<S>(
&mut self,
index: usize,
column: S,
) -> Result<&mut DataFrame, PolarsError>where
S: IntoColumn,
Insert a new column at a given index.
Sourcepub fn with_column<C>(
&mut self,
column: C,
) -> Result<&mut DataFrame, PolarsError>where
C: IntoColumn,
pub fn with_column<C>(
&mut self,
column: C,
) -> Result<&mut DataFrame, PolarsError>where
C: IntoColumn,
Add a new column to this DataFrame
or replace an existing one.
Sourcepub unsafe fn with_column_unchecked(&mut self, column: Column) -> &mut DataFrame
pub unsafe fn with_column_unchecked(&mut self, column: Column) -> &mut DataFrame
pub fn _add_series( &mut self, series: Vec<Series>, schema: &Schema<DataType>, ) -> Result<(), PolarsError>
pub fn _add_columns( &mut self, columns: Vec<Column>, schema: &Schema<DataType>, ) -> Result<(), PolarsError>
Sourcepub fn with_column_and_schema<C>(
&mut self,
column: C,
schema: &Schema<DataType>,
) -> Result<&mut DataFrame, PolarsError>where
C: IntoColumn,
pub fn with_column_and_schema<C>(
&mut self,
column: C,
schema: &Schema<DataType>,
) -> Result<&mut DataFrame, PolarsError>where
C: IntoColumn,
Add a new column to this DataFrame
or replace an existing one.
Uses an existing schema to amortize lookups.
If the schema is incorrect, we will fallback to linear search.
Note: Schema can be both input or output_schema
Sourcepub fn select_at_idx(&self, idx: usize) -> Option<&Column>
pub fn select_at_idx(&self, idx: usize) -> Option<&Column>
Select a Series
by index.
§Example
let df: DataFrame = df!("Star" => ["Sun", "Betelgeuse", "Sirius A", "Sirius B"],
"Absolute magnitude" => [4.83, -5.85, 1.42, 11.18])?;
let s1: Option<&Column> = df.select_at_idx(0);
let s2 = Column::new("Star".into(), ["Sun", "Betelgeuse", "Sirius A", "Sirius B"]);
assert_eq!(s1, Some(&s2));
Sourcepub fn select_by_range<R>(&self, range: R) -> Result<DataFrame, PolarsError>where
R: RangeBounds<usize>,
pub fn select_by_range<R>(&self, range: R) -> Result<DataFrame, PolarsError>where
R: RangeBounds<usize>,
Sourcepub fn get_column_index(&self, name: &str) -> Option<usize>
pub fn get_column_index(&self, name: &str) -> Option<usize>
Get column index of a Series
by name.
§Example
let df: DataFrame = df!("Name" => ["Player 1", "Player 2", "Player 3"],
"Health" => [100, 200, 500],
"Mana" => [250, 100, 0],
"Strength" => [30, 150, 300])?;
assert_eq!(df.get_column_index("Name"), Some(0));
assert_eq!(df.get_column_index("Health"), Some(1));
assert_eq!(df.get_column_index("Mana"), Some(2));
assert_eq!(df.get_column_index("Strength"), Some(3));
assert_eq!(df.get_column_index("Haste"), None);
Sourcepub fn try_get_column_index(&self, name: &str) -> Result<usize, PolarsError>
pub fn try_get_column_index(&self, name: &str) -> Result<usize, PolarsError>
Get column index of a Series
by name.
Sourcepub fn column(&self, name: &str) -> Result<&Column, PolarsError>
pub fn column(&self, name: &str) -> Result<&Column, PolarsError>
Select a single column by name.
§Example
let s1 = Column::new("Password".into(), ["123456", "[]B$u$g$s$B#u#n#n#y[]{}"]);
let s2 = Column::new("Robustness".into(), ["Weak", "Strong"]);
let df: DataFrame = DataFrame::new(vec![s1.clone(), s2])?;
assert_eq!(df.column("Password")?, &s1);
Sourcepub fn columns<I, S>(&self, names: I) -> Result<Vec<&Column>, PolarsError>
pub fn columns<I, S>(&self, names: I) -> Result<Vec<&Column>, PolarsError>
Selected multiple columns by name.
§Example
let df: DataFrame = df!("Latin name" => ["Oncorhynchus kisutch", "Salmo salar"],
"Max weight (kg)" => [16.0, 35.89])?;
let sv: Vec<&Column> = df.columns(["Latin name", "Max weight (kg)"])?;
assert_eq!(&df[0], sv[0]);
assert_eq!(&df[1], sv[1]);
Sourcepub fn select<I, S>(&self, selection: I) -> Result<DataFrame, PolarsError>
pub fn select<I, S>(&self, selection: I) -> Result<DataFrame, PolarsError>
pub fn _select_impl( &self, cols: &[PlSmallStr], ) -> Result<DataFrame, PolarsError>
pub fn _select_impl_unchecked( &self, cols: &[PlSmallStr], ) -> Result<DataFrame, PolarsError>
Sourcepub fn select_with_schema<I, S>(
&self,
selection: I,
schema: &Arc<Schema<DataType>>,
) -> Result<DataFrame, PolarsError>
pub fn select_with_schema<I, S>( &self, selection: I, schema: &Arc<Schema<DataType>>, ) -> Result<DataFrame, PolarsError>
Select with a known schema. The schema names must match the column names of this DataFrame.
Sourcepub fn select_with_schema_unchecked<I, S>(
&self,
selection: I,
schema: &Schema<DataType>,
) -> Result<DataFrame, PolarsError>
pub fn select_with_schema_unchecked<I, S>( &self, selection: I, schema: &Schema<DataType>, ) -> Result<DataFrame, PolarsError>
Select with a known schema without checking for duplicates in selection
.
The schema names must match the column names of this DataFrame.
Sourcepub fn _select_with_schema_impl(
&self,
cols: &[PlSmallStr],
schema: &Schema<DataType>,
check_duplicates: bool,
) -> Result<DataFrame, PolarsError>
pub fn _select_with_schema_impl( &self, cols: &[PlSmallStr], schema: &Schema<DataType>, check_duplicates: bool, ) -> Result<DataFrame, PolarsError>
- The schema names must match the column names of this DataFrame.
pub fn select_physical<I, S>( &self, selection: I, ) -> Result<DataFrame, PolarsError>
Sourcepub fn select_columns(
&self,
selection: impl IntoVec<PlSmallStr>,
) -> Result<Vec<Column>, PolarsError>
pub fn select_columns( &self, selection: impl IntoVec<PlSmallStr>, ) -> Result<Vec<Column>, PolarsError>
Select column(s) from this DataFrame
and return them into a Vec
.
§Example
let df: DataFrame = df!("Name" => ["Methane", "Ethane", "Propane"],
"Carbon" => [1, 2, 3],
"Hydrogen" => [4, 6, 8])?;
let sv: Vec<Column> = df.select_columns(["Carbon", "Hydrogen"])?;
assert_eq!(df["Carbon"], sv[0]);
assert_eq!(df["Hydrogen"], sv[1]);
Sourcepub fn filter(
&self,
mask: &ChunkedArray<BooleanType>,
) -> Result<DataFrame, PolarsError>
pub fn filter( &self, mask: &ChunkedArray<BooleanType>, ) -> Result<DataFrame, PolarsError>
Sourcepub fn _filter_seq(
&self,
mask: &ChunkedArray<BooleanType>,
) -> Result<DataFrame, PolarsError>
pub fn _filter_seq( &self, mask: &ChunkedArray<BooleanType>, ) -> Result<DataFrame, PolarsError>
Same as filter
but does not parallelize.
Sourcepub fn take(
&self,
indices: &ChunkedArray<UInt32Type>,
) -> Result<DataFrame, PolarsError>
pub fn take( &self, indices: &ChunkedArray<UInt32Type>, ) -> Result<DataFrame, PolarsError>
Sourcepub unsafe fn take_unchecked(&self, idx: &ChunkedArray<UInt32Type>) -> DataFrame
pub unsafe fn take_unchecked(&self, idx: &ChunkedArray<UInt32Type>) -> DataFrame
§Safety
The indices must be in-bounds.
Sourcepub unsafe fn take_unchecked_impl(
&self,
idx: &ChunkedArray<UInt32Type>,
allow_threads: bool,
) -> DataFrame
pub unsafe fn take_unchecked_impl( &self, idx: &ChunkedArray<UInt32Type>, allow_threads: bool, ) -> DataFrame
§Safety
The indices must be in-bounds.
Sourcepub unsafe fn take_slice_unchecked(&self, idx: &[u32]) -> DataFrame
pub unsafe fn take_slice_unchecked(&self, idx: &[u32]) -> DataFrame
§Safety
The indices must be in-bounds.
Sourcepub unsafe fn take_slice_unchecked_impl(
&self,
idx: &[u32],
allow_threads: bool,
) -> DataFrame
pub unsafe fn take_slice_unchecked_impl( &self, idx: &[u32], allow_threads: bool, ) -> DataFrame
§Safety
The indices must be in-bounds.
Sourcepub fn rename(
&mut self,
column: &str,
name: PlSmallStr,
) -> Result<&mut DataFrame, PolarsError>
pub fn rename( &mut self, column: &str, name: PlSmallStr, ) -> Result<&mut DataFrame, PolarsError>
Sourcepub fn sort_in_place(
&mut self,
by: impl IntoVec<PlSmallStr>,
sort_options: SortMultipleOptions,
) -> Result<&mut DataFrame, PolarsError>
pub fn sort_in_place( &mut self, by: impl IntoVec<PlSmallStr>, sort_options: SortMultipleOptions, ) -> Result<&mut DataFrame, PolarsError>
Sort DataFrame
in place.
See DataFrame::sort
for more instruction.
Sourcepub fn _to_metadata(&self) -> DataFrame
pub fn _to_metadata(&self) -> DataFrame
Create a DataFrame
that has fields for all the known runtime metadata for each column.
This dataframe does not necessarily have a specified schema and may be changed at any point. It is primarily used for debugging.
Sourcepub fn sort(
&self,
by: impl IntoVec<PlSmallStr>,
sort_options: SortMultipleOptions,
) -> Result<DataFrame, PolarsError>
pub fn sort( &self, by: impl IntoVec<PlSmallStr>, sort_options: SortMultipleOptions, ) -> Result<DataFrame, PolarsError>
Return a sorted clone of this DataFrame
.
In many cases the output chunks will be continuous in memory but this is not guaranteed
§Example
Sort by a single column with default options:
fn sort_by_sepal_width(df: &DataFrame) -> PolarsResult<DataFrame> {
df.sort(["sepal_width"], Default::default())
}
Sort by a single column with specific order:
fn sort_with_specific_order(df: &DataFrame, descending: bool) -> PolarsResult<DataFrame> {
df.sort(
["sepal_width"],
SortMultipleOptions::new()
.with_order_descending(descending)
)
}
Sort by multiple columns with specifying order for each column:
fn sort_by_multiple_columns_with_specific_order(df: &DataFrame) -> PolarsResult<DataFrame> {
df.sort(
["sepal_width", "sepal_length"],
SortMultipleOptions::new()
.with_order_descending_multi([false, true])
)
}
See SortMultipleOptions
for more options.
Also see DataFrame::sort_in_place
.
Sourcepub fn replace<S>(
&mut self,
column: &str,
new_col: S,
) -> Result<&mut DataFrame, PolarsError>where
S: IntoSeries,
pub fn replace<S>(
&mut self,
column: &str,
new_col: S,
) -> Result<&mut DataFrame, PolarsError>where
S: IntoSeries,
Replace a column with a Series
.
§Example
let mut df: DataFrame = df!("Country" => ["United States", "China"],
"Area (km²)" => [9_833_520, 9_596_961])?;
let s: Series = Series::new("Country".into(), ["USA", "PRC"]);
assert!(df.replace("Nation", s.clone()).is_err());
assert!(df.replace("Country", s).is_ok());
Sourcepub fn replace_or_add<S>(
&mut self,
column: PlSmallStr,
new_col: S,
) -> Result<&mut DataFrame, PolarsError>where
S: IntoSeries,
pub fn replace_or_add<S>(
&mut self,
column: PlSmallStr,
new_col: S,
) -> Result<&mut DataFrame, PolarsError>where
S: IntoSeries,
Replace or update a column. The difference between this method and DataFrame::with_column
is that now the value of column: &str
determines the name of the column and not the name
of the Series
passed to this method.
Sourcepub fn replace_column<C>(
&mut self,
index: usize,
new_column: C,
) -> Result<&mut DataFrame, PolarsError>where
C: IntoColumn,
pub fn replace_column<C>(
&mut self,
index: usize,
new_column: C,
) -> Result<&mut DataFrame, PolarsError>where
C: IntoColumn,
Replace column at index idx
with a Series
.
§Example
# use polars_core::prelude::*;
let s0 = Series::new("foo".into(), ["ham", "spam", "egg"]);
let s1 = Series::new("ascii".into(), [70, 79, 79]);
let mut df = DataFrame::new(vec![s0, s1])?;
// Add 32 to get lowercase ascii values
df.replace_column(1, df.select_at_idx(1).unwrap() + 32);
# Ok::<(), PolarsError>(())
Sourcepub fn apply<F, C>(
&mut self,
name: &str,
f: F,
) -> Result<&mut DataFrame, PolarsError>
pub fn apply<F, C>( &mut self, name: &str, f: F, ) -> Result<&mut DataFrame, PolarsError>
Apply a closure to a column. This is the recommended way to do in place modification.
§Example
let s0 = Column::new("foo".into(), ["ham", "spam", "egg"]);
let s1 = Column::new("names".into(), ["Jean", "Claude", "van"]);
let mut df = DataFrame::new(vec![s0, s1])?;
fn str_to_len(str_val: &Column) -> Column {
str_val.str()
.unwrap()
.into_iter()
.map(|opt_name: Option<&str>| {
opt_name.map(|name: &str| name.len() as u32)
})
.collect::<UInt32Chunked>()
.into_column()
}
// Replace the names column by the length of the names.
df.apply("names", str_to_len);
Results in:
+--------+-------+
| foo | |
| --- | names |
| str | u32 |
+========+=======+
| "ham" | 4 |
+--------+-------+
| "spam" | 6 |
+--------+-------+
| "egg" | 3 |
+--------+-------+
Sourcepub fn apply_at_idx<F, C>(
&mut self,
idx: usize,
f: F,
) -> Result<&mut DataFrame, PolarsError>
pub fn apply_at_idx<F, C>( &mut self, idx: usize, f: F, ) -> Result<&mut DataFrame, PolarsError>
Apply a closure to a column at index idx
. This is the recommended way to do in place
modification.
§Example
let s0 = Column::new("foo".into(), ["ham", "spam", "egg"]);
let s1 = Column::new("ascii".into(), [70, 79, 79]);
let mut df = DataFrame::new(vec![s0, s1])?;
// Add 32 to get lowercase ascii values
df.apply_at_idx(1, |s| s + 32);
Results in:
+--------+-------+
| foo | ascii |
| --- | --- |
| str | i32 |
+========+=======+
| "ham" | 102 |
+--------+-------+
| "spam" | 111 |
+--------+-------+
| "egg" | 111 |
+--------+-------+
Sourcepub fn try_apply_at_idx<F, C>(
&mut self,
idx: usize,
f: F,
) -> Result<&mut DataFrame, PolarsError>
pub fn try_apply_at_idx<F, C>( &mut self, idx: usize, f: F, ) -> Result<&mut DataFrame, PolarsError>
Apply a closure that may fail to a column at index idx
. This is the recommended way to do in place
modification.
§Example
This is the idiomatic way to replace some values a column of a DataFrame
given range of indexes.
let s0 = Column::new("foo".into(), ["ham", "spam", "egg", "bacon", "quack"]);
let s1 = Column::new("values".into(), [1, 2, 3, 4, 5]);
let mut df = DataFrame::new(vec![s0, s1])?;
let idx = vec![0, 1, 4];
df.try_apply("foo", |c| {
c.str()?
.scatter_with(idx, |opt_val| opt_val.map(|string| format!("{}-is-modified", string)))
});
Results in:
+---------------------+--------+
| foo | values |
| --- | --- |
| str | i32 |
+=====================+========+
| "ham-is-modified" | 1 |
+---------------------+--------+
| "spam-is-modified" | 2 |
+---------------------+--------+
| "egg" | 3 |
+---------------------+--------+
| "bacon" | 4 |
+---------------------+--------+
| "quack-is-modified" | 5 |
+---------------------+--------+
Sourcepub fn try_apply<F, C>(
&mut self,
column: &str,
f: F,
) -> Result<&mut DataFrame, PolarsError>
pub fn try_apply<F, C>( &mut self, column: &str, f: F, ) -> Result<&mut DataFrame, PolarsError>
Apply a closure that may fail to a column. This is the recommended way to do in place modification.
§Example
This is the idiomatic way to replace some values a column of a DataFrame
given a boolean mask.
let s0 = Column::new("foo".into(), ["ham", "spam", "egg", "bacon", "quack"]);
let s1 = Column::new("values".into(), [1, 2, 3, 4, 5]);
let mut df = DataFrame::new(vec![s0, s1])?;
// create a mask
let values = df.column("values")?.as_materialized_series();
let mask = values.lt_eq(1)? | values.gt_eq(5_i32)?;
df.try_apply("foo", |c| {
c.str()?
.set(&mask, Some("not_within_bounds"))
});
Results in:
+---------------------+--------+
| foo | values |
| --- | --- |
| str | i32 |
+=====================+========+
| "not_within_bounds" | 1 |
+---------------------+--------+
| "spam" | 2 |
+---------------------+--------+
| "egg" | 3 |
+---------------------+--------+
| "bacon" | 4 |
+---------------------+--------+
| "not_within_bounds" | 5 |
+---------------------+--------+
Sourcepub fn slice(&self, offset: i64, length: usize) -> DataFrame
pub fn slice(&self, offset: i64, length: usize) -> DataFrame
Slice the DataFrame
along the rows.
§Example
let df: DataFrame = df!("Fruit" => ["Apple", "Grape", "Grape", "Fig", "Fig"],
"Color" => ["Green", "Red", "White", "White", "Red"])?;
let sl: DataFrame = df.slice(2, 3);
assert_eq!(sl.shape(), (3, 2));
println!("{}", sl);
Output:
shape: (3, 2)
+-------+-------+
| Fruit | Color |
| --- | --- |
| str | str |
+=======+=======+
| Grape | White |
+-------+-------+
| Fig | White |
+-------+-------+
| Fig | Red |
+-------+-------+
Sourcepub fn split_at(&self, offset: i64) -> (DataFrame, DataFrame)
pub fn split_at(&self, offset: i64) -> (DataFrame, DataFrame)
Split DataFrame
at the given offset
.
pub fn clear(&self) -> DataFrame
pub fn slice_par(&self, offset: i64, length: usize) -> DataFrame
pub fn _slice_and_realloc(&self, offset: i64, length: usize) -> DataFrame
Sourcepub fn head(&self, length: Option<usize>) -> DataFrame
pub fn head(&self, length: Option<usize>) -> DataFrame
Get the head of the DataFrame
.
§Example
let countries: DataFrame =
df!("Rank by GDP (2021)" => [1, 2, 3, 4, 5],
"Continent" => ["North America", "Asia", "Asia", "Europe", "Europe"],
"Country" => ["United States", "China", "Japan", "Germany", "United Kingdom"],
"Capital" => ["Washington", "Beijing", "Tokyo", "Berlin", "London"])?;
assert_eq!(countries.shape(), (5, 4));
println!("{}", countries.head(Some(3)));
Output:
shape: (3, 4)
+--------------------+---------------+---------------+------------+
| Rank by GDP (2021) | Continent | Country | Capital |
| --- | --- | --- | --- |
| i32 | str | str | str |
+====================+===============+===============+============+
| 1 | North America | United States | Washington |
+--------------------+---------------+---------------+------------+
| 2 | Asia | China | Beijing |
+--------------------+---------------+---------------+------------+
| 3 | Asia | Japan | Tokyo |
+--------------------+---------------+---------------+------------+
Sourcepub fn tail(&self, length: Option<usize>) -> DataFrame
pub fn tail(&self, length: Option<usize>) -> DataFrame
Get the tail of the DataFrame
.
§Example
let countries: DataFrame =
df!("Rank (2021)" => [105, 106, 107, 108, 109],
"Apple Price (€/kg)" => [0.75, 0.70, 0.70, 0.65, 0.52],
"Country" => ["Kosovo", "Moldova", "North Macedonia", "Syria", "Turkey"])?;
assert_eq!(countries.shape(), (5, 3));
println!("{}", countries.tail(Some(2)));
Output:
shape: (2, 3)
+-------------+--------------------+---------+
| Rank (2021) | Apple Price (€/kg) | Country |
| --- | --- | --- |
| i32 | f64 | str |
+=============+====================+=========+
| 108 | 0.63 | Syria |
+-------------+--------------------+---------+
| 109 | 0.63 | Turkey |
+-------------+--------------------+---------+
Sourcepub fn iter_chunks(
&self,
compat_level: CompatLevel,
parallel: bool,
) -> RecordBatchIter<'_> ⓘ
pub fn iter_chunks( &self, compat_level: CompatLevel, parallel: bool, ) -> RecordBatchIter<'_> ⓘ
Iterator over the rows in this DataFrame
as Arrow RecordBatches.
§Panics
Panics if the DataFrame
that is passed is not rechunked.
This responsibility is left to the caller as we don’t want to take mutable references here, but we also don’t want to rechunk here, as this operation is costly and would benefit the caller as well.
Sourcepub fn iter_chunks_physical(&self) -> PhysRecordBatchIter<'_> ⓘ
pub fn iter_chunks_physical(&self) -> PhysRecordBatchIter<'_> ⓘ
Iterator over the rows in this DataFrame
as Arrow RecordBatches as physical values.
§Panics
Panics if the DataFrame
that is passed is not rechunked.
This responsibility is left to the caller as we don’t want to take mutable references here, but we also don’t want to rechunk here, as this operation is costly and would benefit the caller as well.
Sourcepub fn shift(&self, periods: i64) -> DataFrame
pub fn shift(&self, periods: i64) -> DataFrame
Shift the values by a given period and fill the parts that will be empty due to this operation
with Nones
.
See the method on Series for more info on the shift
operation.
Sourcepub fn fill_null(
&self,
strategy: FillNullStrategy,
) -> Result<DataFrame, PolarsError>
pub fn fill_null( &self, strategy: FillNullStrategy, ) -> Result<DataFrame, PolarsError>
Replace None values with one of the following strategies:
- Forward fill (replace None with the previous value)
- Backward fill (replace None with the next value)
- Mean fill (replace None with the mean of the whole array)
- Min fill (replace None with the minimum of the whole array)
- Max fill (replace None with the maximum of the whole array)
See the method on Series for more info on the fill_null
operation.
Sourcepub fn pipe<F, B>(self, f: F) -> Result<B, PolarsError>
pub fn pipe<F, B>(self, f: F) -> Result<B, PolarsError>
Pipe different functions/ closure operations that work on a DataFrame together.
Sourcepub fn pipe_mut<F, B>(&mut self, f: F) -> Result<B, PolarsError>
pub fn pipe_mut<F, B>(&mut self, f: F) -> Result<B, PolarsError>
Pipe different functions/ closure operations that work on a DataFrame together.
Sourcepub fn pipe_with_args<F, B, Args>(
self,
f: F,
args: Args,
) -> Result<B, PolarsError>
pub fn pipe_with_args<F, B, Args>( self, f: F, args: Args, ) -> Result<B, PolarsError>
Pipe different functions/ closure operations that work on a DataFrame together.
Sourcepub fn unique_stable(
&self,
subset: Option<&[String]>,
keep: UniqueKeepStrategy,
slice: Option<(i64, usize)>,
) -> Result<DataFrame, PolarsError>
Available on crate feature algorithm_group_by
only.
pub fn unique_stable( &self, subset: Option<&[String]>, keep: UniqueKeepStrategy, slice: Option<(i64, usize)>, ) -> Result<DataFrame, PolarsError>
algorithm_group_by
only.Drop duplicate rows from a DataFrame
.
This fails when there is a column of type List in DataFrame
Stable means that the order is maintained. This has a higher cost than an unstable distinct.
§Example
let df = df! {
"flt" => [1., 1., 2., 2., 3., 3.],
"int" => [1, 1, 2, 2, 3, 3, ],
"str" => ["a", "a", "b", "b", "c", "c"]
}?;
println!("{}", df.unique_stable(None, UniqueKeepStrategy::First, None)?);
Returns
+-----+-----+-----+
| flt | int | str |
| --- | --- | --- |
| f64 | i32 | str |
+=====+=====+=====+
| 1 | 1 | "a" |
+-----+-----+-----+
| 2 | 2 | "b" |
+-----+-----+-----+
| 3 | 3 | "c" |
+-----+-----+-----+
Sourcepub fn unique<I, S>(
&self,
subset: Option<&[String]>,
keep: UniqueKeepStrategy,
slice: Option<(i64, usize)>,
) -> Result<DataFrame, PolarsError>
Available on crate feature algorithm_group_by
only.
pub fn unique<I, S>( &self, subset: Option<&[String]>, keep: UniqueKeepStrategy, slice: Option<(i64, usize)>, ) -> Result<DataFrame, PolarsError>
algorithm_group_by
only.Unstable distinct. See DataFrame::unique_stable
.
pub fn unique_impl( &self, maintain_order: bool, subset: Option<Vec<PlSmallStr>>, keep: UniqueKeepStrategy, slice: Option<(i64, usize)>, ) -> Result<DataFrame, PolarsError>
algorithm_group_by
only.Sourcepub fn is_unique(&self) -> Result<ChunkedArray<BooleanType>, PolarsError>
Available on crate feature algorithm_group_by
only.
pub fn is_unique(&self) -> Result<ChunkedArray<BooleanType>, PolarsError>
algorithm_group_by
only.Sourcepub fn is_duplicated(&self) -> Result<ChunkedArray<BooleanType>, PolarsError>
Available on crate feature algorithm_group_by
only.
pub fn is_duplicated(&self) -> Result<ChunkedArray<BooleanType>, PolarsError>
algorithm_group_by
only.Sourcepub fn null_count(&self) -> DataFrame
pub fn null_count(&self) -> DataFrame
Create a new DataFrame
that shows the null counts per column.
Sourcepub fn get_supertype(&self) -> Option<Result<DataType, PolarsError>>
pub fn get_supertype(&self) -> Option<Result<DataType, PolarsError>>
Get the supertype of the columns in this DataFrame
Sourcepub fn partition_by<I, S>(
&self,
cols: I,
include_key: bool,
) -> Result<Vec<DataFrame>, PolarsError>
Available on crate feature partition_by
only.
pub fn partition_by<I, S>( &self, cols: I, include_key: bool, ) -> Result<Vec<DataFrame>, PolarsError>
partition_by
only.Split into multiple DataFrames partitioned by groups
Sourcepub fn partition_by_stable<I, S>(
&self,
cols: I,
include_key: bool,
) -> Result<Vec<DataFrame>, PolarsError>
Available on crate feature partition_by
only.
pub fn partition_by_stable<I, S>( &self, cols: I, include_key: bool, ) -> Result<Vec<DataFrame>, PolarsError>
partition_by
only.Split into multiple DataFrames partitioned by groups Order of the groups are maintained.
Sourcepub fn unnest<I>(&self, cols: I) -> Result<DataFrame, PolarsError>where
I: IntoVec<PlSmallStr>,
Available on crate feature dtype-struct
only.
pub fn unnest<I>(&self, cols: I) -> Result<DataFrame, PolarsError>where
I: IntoVec<PlSmallStr>,
dtype-struct
only.Unnest the given Struct
columns. This means that the fields of the Struct
type will be
inserted as columns.
pub fn append_record_batch( &mut self, rb: RecordBatchT<Box<dyn Array>>, ) -> Result<(), PolarsError>
Source§impl DataFrame
impl DataFrame
Sourcepub fn schema_equal(&self, other: &DataFrame) -> Result<(), PolarsError>
pub fn schema_equal(&self, other: &DataFrame) -> Result<(), PolarsError>
Check if DataFrame
’ schemas are equal.
Sourcepub fn equals(&self, other: &DataFrame) -> bool
pub fn equals(&self, other: &DataFrame) -> bool
Check if DataFrame
s are equal. Note that None == None
evaluates to false
§Example
let df1: DataFrame = df!("Atomic number" => &[1, 51, 300],
"Element" => &[Some("Hydrogen"), Some("Antimony"), None])?;
let df2: DataFrame = df!("Atomic number" => &[1, 51, 300],
"Element" => &[Some("Hydrogen"), Some("Antimony"), None])?;
assert!(!df1.equals(&df2));
Sourcepub fn equals_missing(&self, other: &DataFrame) -> bool
pub fn equals_missing(&self, other: &DataFrame) -> bool
Check if all values in DataFrame
s are equal where None == None
evaluates to true
.
§Example
let df1: DataFrame = df!("Atomic number" => &[1, 51, 300],
"Element" => &[Some("Hydrogen"), Some("Antimony"), None])?;
let df2: DataFrame = df!("Atomic number" => &[1, 51, 300],
"Element" => &[Some("Hydrogen"), Some("Antimony"), None])?;
assert!(df1.equals_missing(&df2));
Trait Implementations§
Source§impl AsofJoinBy for DataFrame
impl AsofJoinBy for DataFrame
Source§fn join_asof_by<I, S>(
&self,
other: &DataFrame,
left_on: &str,
right_on: &str,
left_by: I,
right_by: I,
strategy: AsofStrategy,
tolerance: Option<AnyValue<'static>>,
) -> Result<DataFrame, PolarsError>
fn join_asof_by<I, S>( &self, other: &DataFrame, left_on: &str, right_on: &str, left_by: I, right_by: I, strategy: AsofStrategy, tolerance: Option<AnyValue<'static>>, ) -> Result<DataFrame, PolarsError>
by
.Source§impl Container for DataFrame
impl Container for DataFrame
fn slice(&self, offset: i64, len: usize) -> DataFrame
fn split_at(&self, offset: i64) -> (DataFrame, DataFrame)
fn len(&self) -> usize
fn iter_chunks(&self) -> impl Iterator<Item = DataFrame>
fn n_chunks(&self) -> usize
fn chunk_lengths(&self) -> impl Iterator<Item = usize>
Source§impl CrossJoin for DataFrame
impl CrossJoin for DataFrame
fn cross_join_dfs( &self, other: &DataFrame, slice: Option<(i64, usize)>, parallel: bool, ) -> Result<(DataFrame, DataFrame), PolarsError>
Source§fn cross_join(
&self,
other: &DataFrame,
suffix: Option<PlSmallStr>,
slice: Option<(i64, usize)>,
) -> Result<DataFrame, PolarsError>
fn cross_join( &self, other: &DataFrame, suffix: Option<PlSmallStr>, slice: Option<(i64, usize)>, ) -> Result<DataFrame, PolarsError>
Source§impl DataFrameJoinOps for DataFrame
impl DataFrameJoinOps for DataFrame
Source§fn join(
&self,
other: &DataFrame,
left_on: impl IntoIterator<Item = impl Into<PlSmallStr>>,
right_on: impl IntoIterator<Item = impl Into<PlSmallStr>>,
args: JoinArgs,
) -> Result<DataFrame, PolarsError>
fn join( &self, other: &DataFrame, left_on: impl IntoIterator<Item = impl Into<PlSmallStr>>, right_on: impl IntoIterator<Item = impl Into<PlSmallStr>>, args: JoinArgs, ) -> Result<DataFrame, PolarsError>
Source§fn inner_join(
&self,
other: &DataFrame,
left_on: impl IntoIterator<Item = impl Into<PlSmallStr>>,
right_on: impl IntoIterator<Item = impl Into<PlSmallStr>>,
) -> Result<DataFrame, PolarsError>
fn inner_join( &self, other: &DataFrame, left_on: impl IntoIterator<Item = impl Into<PlSmallStr>>, right_on: impl IntoIterator<Item = impl Into<PlSmallStr>>, ) -> Result<DataFrame, PolarsError>
Source§fn left_join(
&self,
other: &DataFrame,
left_on: impl IntoIterator<Item = impl Into<PlSmallStr>>,
right_on: impl IntoIterator<Item = impl Into<PlSmallStr>>,
) -> Result<DataFrame, PolarsError>
fn left_join( &self, other: &DataFrame, left_on: impl IntoIterator<Item = impl Into<PlSmallStr>>, right_on: impl IntoIterator<Item = impl Into<PlSmallStr>>, ) -> Result<DataFrame, PolarsError>
Source§fn full_join(
&self,
other: &DataFrame,
left_on: impl IntoIterator<Item = impl Into<PlSmallStr>>,
right_on: impl IntoIterator<Item = impl Into<PlSmallStr>>,
) -> Result<DataFrame, PolarsError>
fn full_join( &self, other: &DataFrame, left_on: impl IntoIterator<Item = impl Into<PlSmallStr>>, right_on: impl IntoIterator<Item = impl Into<PlSmallStr>>, ) -> Result<DataFrame, PolarsError>
Source§impl<'de> Deserialize<'de> for DataFrame
impl<'de> Deserialize<'de> for DataFrame
Source§fn deserialize<D>(
deserializer: D,
) -> Result<DataFrame, <D as Deserializer<'de>>::Error>where
D: Deserializer<'de>,
fn deserialize<D>(
deserializer: D,
) -> Result<DataFrame, <D as Deserializer<'de>>::Error>where
D: Deserializer<'de>,
Source§impl FromIterator<Column> for DataFrame
impl FromIterator<Column> for DataFrame
Source§impl FromIterator<Series> for DataFrame
impl FromIterator<Series> for DataFrame
Source§impl JoinDispatch for DataFrame
impl JoinDispatch for DataFrame
Source§unsafe fn create_left_df_chunked(
&self,
chunk_ids: &[ChunkId],
left_join: bool,
was_sliced: bool,
) -> DataFrame
unsafe fn create_left_df_chunked( &self, chunk_ids: &[ChunkId], left_join: bool, was_sliced: bool, ) -> DataFrame
chunked_ids
only.Source§unsafe fn _create_left_df_from_slice(
&self,
join_tuples: &[u32],
left_join: bool,
was_sliced: bool,
sorted_tuple_idx: bool,
) -> DataFrame
unsafe fn _create_left_df_from_slice( &self, join_tuples: &[u32], left_join: bool, was_sliced: bool, sorted_tuple_idx: bool, ) -> DataFrame
Source§unsafe fn _finish_anti_semi_join(
&self,
idx: &[u32],
slice: Option<(i64, usize)>,
) -> DataFrame
unsafe fn _finish_anti_semi_join( &self, idx: &[u32], slice: Option<(i64, usize)>, ) -> DataFrame
semi_anti_join
only.Source§fn _semi_anti_join_from_series(
&self,
s_left: &Series,
s_right: &Series,
slice: Option<(i64, usize)>,
anti: bool,
join_nulls: bool,
) -> Result<DataFrame, PolarsError>
fn _semi_anti_join_from_series( &self, s_left: &Series, s_right: &Series, slice: Option<(i64, usize)>, anti: bool, join_nulls: bool, ) -> Result<DataFrame, PolarsError>
semi_anti_join
only.fn _full_join_from_series( &self, other: &DataFrame, s_left: &Series, s_right: &Series, args: JoinArgs, ) -> Result<DataFrame, PolarsError>
Source§impl MinMaxHorizontal for DataFrame
impl MinMaxHorizontal for DataFrame
Source§fn min_horizontal(&self) -> Result<Option<Column>, PolarsError>
fn min_horizontal(&self) -> Result<Option<Column>, PolarsError>
Source§fn max_horizontal(&self) -> Result<Option<Column>, PolarsError>
fn max_horizontal(&self) -> Result<Option<Column>, PolarsError>
Source§impl PolarsTemporalGroupby for DataFrame
impl PolarsTemporalGroupby for DataFrame
fn rolling( &self, group_by: Vec<Column>, options: &RollingGroupOptions, ) -> Result<(Column, Vec<Column>, GroupsProxy), PolarsError>
fn group_by_dynamic( &self, group_by: Vec<Column>, options: &DynamicGroupOptions, ) -> Result<(Column, Vec<Column>, GroupsProxy), PolarsError>
Source§impl PolarsUpsample for DataFrame
impl PolarsUpsample for DataFrame
Source§fn upsample<I>(
&self,
by: I,
time_column: &str,
every: Duration,
) -> Result<DataFrame, PolarsError>where
I: IntoVec<PlSmallStr>,
fn upsample<I>(
&self,
by: I,
time_column: &str,
every: Duration,
) -> Result<DataFrame, PolarsError>where
I: IntoVec<PlSmallStr>,
Source§fn upsample_stable<I>(
&self,
by: I,
time_column: &str,
every: Duration,
) -> Result<DataFrame, PolarsError>where
I: IntoVec<PlSmallStr>,
fn upsample_stable<I>(
&self,
by: I,
time_column: &str,
every: Duration,
) -> Result<DataFrame, PolarsError>where
I: IntoVec<PlSmallStr>,
Source§impl Serialize for DataFrame
impl Serialize for DataFrame
Source§fn serialize<S>(
&self,
serializer: S,
) -> Result<<S as Serializer>::Ok, <S as Serializer>::Error>where
S: Serializer,
fn serialize<S>(
&self,
serializer: S,
) -> Result<<S as Serializer>::Ok, <S as Serializer>::Error>where
S: Serializer,
Source§impl SumMeanHorizontal for DataFrame
impl SumMeanHorizontal for DataFrame
Source§fn sum_horizontal(
&self,
null_strategy: NullStrategy,
) -> Result<Option<Column>, PolarsError>
fn sum_horizontal( &self, null_strategy: NullStrategy, ) -> Result<Option<Column>, PolarsError>
Source§fn mean_horizontal(
&self,
null_strategy: NullStrategy,
) -> Result<Option<Column>, PolarsError>
fn mean_horizontal( &self, null_strategy: NullStrategy, ) -> Result<Option<Column>, PolarsError>
Source§impl TakeChunked for DataFrame
impl TakeChunked for DataFrame
Source§impl TakeChunkedHorPar for DataFrame
impl TakeChunkedHorPar for DataFrame
Source§impl TryExtend<RecordBatchT<Box<dyn Array>>> for DataFrame
impl TryExtend<RecordBatchT<Box<dyn Array>>> for DataFrame
Source§fn try_extend<I>(&mut self, iter: I) -> Result<(), PolarsError>where
I: IntoIterator<Item = RecordBatchT<Box<dyn Array>>>,
fn try_extend<I>(&mut self, iter: I) -> Result<(), PolarsError>where
I: IntoIterator<Item = RecordBatchT<Box<dyn Array>>>,
Extend::extend
.Source§impl TryExtend<Result<RecordBatchT<Box<dyn Array>>, PolarsError>> for DataFrame
impl TryExtend<Result<RecordBatchT<Box<dyn Array>>, PolarsError>> for DataFrame
Source§fn try_extend<I>(&mut self, iter: I) -> Result<(), PolarsError>
fn try_extend<I>(&mut self, iter: I) -> Result<(), PolarsError>
Extend::extend
.Source§impl TryFrom<StructArray> for DataFrame
impl TryFrom<StructArray> for DataFrame
Source§type Error = PolarsError
type Error = PolarsError
Source§fn try_from(arr: StructArray) -> Result<DataFrame, PolarsError>
fn try_from(arr: StructArray) -> Result<DataFrame, PolarsError>
impl AsofJoin for DataFrame
Auto Trait Implementations§
impl !Freeze for DataFrame
impl !RefUnwindSafe for DataFrame
impl Send for DataFrame
impl Sync for DataFrame
impl Unpin for DataFrame
impl !UnwindSafe for DataFrame
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> DataFrameOps for Twhere
T: IntoDf,
impl<T> DataFrameOps for Twhere
T: IntoDf,
Source§fn to_dummies(
&self,
separator: Option<&str>,
drop_first: bool,
) -> Result<DataFrame, PolarsError>
fn to_dummies( &self, separator: Option<&str>, drop_first: bool, ) -> Result<DataFrame, PolarsError>
to_dummies
only.§impl<T> Instrument for T
impl<T> Instrument for T
§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self> ⓘ
fn into_either(self, into_left: bool) -> Either<Self, Self> ⓘ
self
into a Left
variant of Either<Self, Self>
if into_left
is true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self> ⓘ
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self> ⓘ
self
into a Left
variant of Either<Self, Self>
if into_left(&self)
returns true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read more§impl<T> Pointable for T
impl<T> Pointable for T
§impl<T> ToCompactString for Twhere
T: Display,
impl<T> ToCompactString for Twhere
T: Display,
§fn try_to_compact_string(&self) -> Result<CompactString, ToCompactStringError>
fn try_to_compact_string(&self) -> Result<CompactString, ToCompactStringError>
ToCompactString::to_compact_string()
] Read more§fn to_compact_string(&self) -> CompactString
fn to_compact_string(&self) -> CompactString
CompactString
]. Read more