pub struct DataFrame { /* private fields */ }Expand description
A contiguous growable collection of Columns that have the same length.
§Use declarations
All the common tools can be found in crate::prelude (or in polars::prelude).
use polars_core::prelude::*; // if the crate polars-core is used directly
// use polars::prelude::*; if the crate polars is used§Initialization
§Default
A DataFrame can be initialized empty:
let df = DataFrame::empty();
assert_eq!(df.shape(), (0, 0));§Wrapping a Vec<Series>
A DataFrame is built upon a Vec<Series> where the Series have the same length.
let s1 = Column::new("Fruit".into(), ["Apple", "Apple", "Pear"]);
let s2 = Column::new("Color".into(), ["Red", "Yellow", "Green"]);
let df: PolarsResult<DataFrame> = DataFrame::new_infer_height(vec![s1, s2]);§Using a macro
The df! macro is a convenient method:
let df: PolarsResult<DataFrame> = df!("Fruit" => ["Apple", "Apple", "Pear"],
"Color" => ["Red", "Yellow", "Green"]);§Using a CSV file
See the polars_io::csv::CsvReader.
§Indexing
§By a number
The Index<usize> is implemented for the DataFrame.
let df = df!("Fruit" => ["Apple", "Apple", "Pear"],
"Color" => ["Red", "Yellow", "Green"])?;
assert_eq!(df[0], Column::new("Fruit".into(), &["Apple", "Apple", "Pear"]));
assert_eq!(df[1], Column::new("Color".into(), &["Red", "Yellow", "Green"]));§By a Series name
let df = df!("Fruit" => ["Apple", "Apple", "Pear"],
"Color" => ["Red", "Yellow", "Green"])?;
assert_eq!(df["Fruit"], Column::new("Fruit".into(), &["Apple", "Apple", "Pear"]));
assert_eq!(df["Color"], Column::new("Color".into(), &["Red", "Yellow", "Green"]));Implementations§
Source§impl DataFrame
impl DataFrame
Sourcepub fn to_ndarray<N>(
&self,
ordering: IndexOrder,
) -> Result<ArrayBase<OwnedRepr<<N as PolarsNumericType>::Native>, Dim<[usize; 2]>>, PolarsError>where
N: PolarsNumericType,
pub fn to_ndarray<N>(
&self,
ordering: IndexOrder,
) -> Result<ArrayBase<OwnedRepr<<N as PolarsNumericType>::Native>, Dim<[usize; 2]>>, PolarsError>where
N: PolarsNumericType,
Create a 2D ndarray::Array from this DataFrame. This requires all columns in the
DataFrame to be non-null and numeric. They will be cast to the same data type
(if they aren’t already).
For floating point data we implicitly convert None to NaN without failure.
use polars_core::prelude::*;
let a = UInt32Chunked::new("a".into(), &[1, 2, 3]).into_column();
let b = Float64Chunked::new("b".into(), &[10., 8., 6.]).into_column();
let df = DataFrame::new_infer_height(vec![a, b]).unwrap();
let ndarray = df.to_ndarray::<Float64Type>(IndexOrder::Fortran).unwrap();
println!("{:?}", ndarray);Outputs:
[[1.0, 10.0],
[2.0, 8.0],
[3.0, 6.0]], shape=[3, 2], strides=[1, 3], layout=Ff (0xa), const ndim=2Source§impl DataFrame
impl DataFrame
Sourcepub fn sample_n(
&self,
n: &Series,
with_replacement: bool,
shuffle: bool,
seed: Option<u64>,
) -> Result<DataFrame, PolarsError>
pub fn sample_n( &self, n: &Series, with_replacement: bool, shuffle: bool, seed: Option<u64>, ) -> Result<DataFrame, PolarsError>
Sample n datapoints from this DataFrame.
pub fn sample_n_literal( &self, n: usize, with_replacement: bool, shuffle: bool, seed: Option<u64>, ) -> Result<DataFrame, PolarsError>
Source§impl DataFrame
impl DataFrame
pub fn into_struct(self, name: PlSmallStr) -> ChunkedArray<StructType>
Source§impl DataFrame
impl DataFrame
pub fn split_chunks(&mut self) -> impl Iterator<Item = DataFrame>
pub fn split_chunks_by_n(self, n: usize, parallel: bool) -> Vec<DataFrame>
Sourcepub fn rechunk_to_arrow(&self, compat_level: CompatLevel) -> Vec<Box<dyn Array>>
pub fn rechunk_to_arrow(&self, compat_level: CompatLevel) -> Vec<Box<dyn Array>>
Convert the columns of this DataFrame to arrow arrays.
Sourcepub fn rechunk_into_arrow(
self,
compat_level: CompatLevel,
) -> Vec<Box<dyn Array>>
pub fn rechunk_into_arrow( self, compat_level: CompatLevel, ) -> Vec<Box<dyn Array>>
Convert the columns of this DataFrame to arrow arrays.
Source§impl DataFrame
impl DataFrame
Sourcepub const fn empty() -> DataFrame
pub const fn empty() -> DataFrame
Creates an empty DataFrame usable in a compile time context (such as static initializers).
§Example
use polars_core::prelude::DataFrame;
static EMPTY: DataFrame = DataFrame::empty();pub const fn empty_with_height(height: usize) -> DataFrame
pub fn new( height: usize, columns: Vec<Column>, ) -> Result<DataFrame, PolarsError>
Sourcepub fn new_infer_height(columns: Vec<Column>) -> Result<DataFrame, PolarsError>
pub fn new_infer_height(columns: Vec<Column>) -> Result<DataFrame, PolarsError>
Height is sourced from first column.
Sourcepub unsafe fn new_unchecked_infer_height(columns: Vec<Column>) -> DataFrame
pub unsafe fn new_unchecked_infer_height(columns: Vec<Column>) -> DataFrame
Sourcepub const unsafe fn _new_unchecked_impl(
height: usize,
columns: Vec<Column>,
) -> DataFrame
pub const unsafe fn _new_unchecked_impl( height: usize, columns: Vec<Column>, ) -> DataFrame
This will not panic even in debug mode - there are some (rare) use cases where a DataFrame is temporarily constructed containing duplicates for dispatching to functions. A DataFrame constructed with this method is generally highly unsafe and should not be long-lived.
Sourcepub fn new_with_broadcast(
height: usize,
columns: Vec<Column>,
) -> Result<DataFrame, PolarsError>
pub fn new_with_broadcast( height: usize, columns: Vec<Column>, ) -> Result<DataFrame, PolarsError>
Broadcasts unit-length columns to height. Errors if a column has height that is non-unit
length and not equal to self.height().
Sourcepub fn new_infer_broadcast(
columns: Vec<Column>,
) -> Result<DataFrame, PolarsError>
pub fn new_infer_broadcast( columns: Vec<Column>, ) -> Result<DataFrame, PolarsError>
Infers height as the first non-unit length column or 1 if not found.
Sourcepub unsafe fn new_unchecked_with_broadcast(
height: usize,
columns: Vec<Column>,
) -> Result<DataFrame, PolarsError>
pub unsafe fn new_unchecked_with_broadcast( height: usize, columns: Vec<Column>, ) -> Result<DataFrame, PolarsError>
Sourcepub unsafe fn new_unchecked_infer_broadcast(
columns: Vec<Column>,
) -> Result<DataFrame, PolarsError>
pub unsafe fn new_unchecked_infer_broadcast( columns: Vec<Column>, ) -> Result<DataFrame, PolarsError>
Sourcepub fn empty_with_schema(schema: &Schema<DataType>) -> DataFrame
pub fn empty_with_schema(schema: &Schema<DataType>) -> DataFrame
Create a DataFrame 0 height and columns as per the schema.
Sourcepub fn empty_with_arc_schema(schema: Arc<Schema<DataType>>) -> DataFrame
pub fn empty_with_arc_schema(schema: Arc<Schema<DataType>>) -> DataFrame
Create an empty DataFrame with empty columns as per the schema.
Sourcepub unsafe fn set_height(&mut self, height: usize) -> &mut DataFrame
pub unsafe fn set_height(&mut self, height: usize) -> &mut DataFrame
Sourcepub fn shape(&self) -> (usize, usize)
pub fn shape(&self) -> (usize, usize)
Get (height, width) of the DataFrame.
§Example
let df0: DataFrame = DataFrame::empty();
let df1: DataFrame = df!("1" => [1, 2, 3, 4, 5])?;
let df2: DataFrame = df!("1" => [1, 2, 3, 4, 5],
"2" => [1, 2, 3, 4, 5])?;
assert_eq!(df0.shape(), (0 ,0));
assert_eq!(df1.shape(), (5, 1));
assert_eq!(df2.shape(), (5, 2));Sourcepub fn shape_has_zero(&self) -> bool
pub fn shape_has_zero(&self) -> bool
0 width or height.
pub fn columns(&self) -> &[Column]
pub fn into_columns(self) -> Vec<Column>
Sourcepub unsafe fn columns_mut(&mut self) -> &mut Vec<Column>
pub unsafe fn columns_mut(&mut self) -> &mut Vec<Column>
§Safety
The caller must ensure the length of all Columns remains equal to self.height, or
that DataFrame::set_height is called afterwards with the new height.
Sourcepub unsafe fn columns_mut_retain_schema(&mut self) -> &mut Vec<Column>
pub unsafe fn columns_mut_retain_schema(&mut self) -> &mut Vec<Column>
§Safety
Adheres to all safety requirements of DataFrame::columns_mut, and that the list of column
names remains unchanged.
pub fn cached_schema(&self) -> Option<&Arc<Schema<DataType>>>
Sourcepub unsafe fn set_schema(
&mut self,
schema: Arc<Schema<DataType>>,
) -> &mut DataFrame
pub unsafe fn set_schema( &mut self, schema: Arc<Schema<DataType>>, ) -> &mut DataFrame
Sourcepub unsafe fn with_schema(self, schema: Arc<Schema<DataType>>) -> DataFrame
pub unsafe fn with_schema(self, schema: Arc<Schema<DataType>>) -> DataFrame
Sourcepub unsafe fn set_opt_schema(
&mut self,
schema: Option<Arc<Schema<DataType>>>,
) -> &mut DataFrame
pub unsafe fn set_opt_schema( &mut self, schema: Option<Arc<Schema<DataType>>>, ) -> &mut DataFrame
Sourcepub unsafe fn set_schema_from(&mut self, from: &DataFrame) -> &mut DataFrame
pub unsafe fn set_schema_from(&mut self, from: &DataFrame) -> &mut DataFrame
Clones the cached schema from from to self.cached_schema if there is one.
§Safety
Schema must match the columns in self.
Sourcepub unsafe fn with_schema_from(self, from: &DataFrame) -> DataFrame
pub unsafe fn with_schema_from(self, from: &DataFrame) -> DataFrame
Clones the cached schema from from to self.cached_schema if there is one.
§Safety
Schema must match the columns in self.
Source§impl DataFrame
impl DataFrame
pub fn explode_impl( &self, columns: Vec<Column>, options: ExplodeOptions, ) -> Result<DataFrame, PolarsError>
Sourcepub fn explode<I, S>(
&self,
columns: I,
options: ExplodeOptions,
) -> Result<DataFrame, PolarsError>
pub fn explode<I, S>( &self, columns: I, options: ExplodeOptions, ) -> Result<DataFrame, PolarsError>
Explode DataFrame to long format by exploding a column with Lists.
§Example
let s0 = Series::new("a".into(), &[1i64, 2, 3]);
let s1 = Series::new("b".into(), &[1i64, 1, 1]);
let s2 = Series::new("c".into(), &[2i64, 2, 2]);
let list = Series::new("foo", &[s0, s1, s2]);
let s0 = Series::new("B".into(), [1, 2, 3]);
let s1 = Series::new("C".into(), [1, 1, 1]);
let df = DataFrame::new_infer_height(vec![list, s0, s1])?;
let exploded = df.explode(["foo"])?;
println!("{:?}", df);
println!("{:?}", exploded);Outputs:
+-------------+-----+-----+
| foo | B | C |
| --- | --- | --- |
| list [i64] | i32 | i32 |
+=============+=====+=====+
| "[1, 2, 3]" | 1 | 1 |
+-------------+-----+-----+
| "[1, 1, 1]" | 2 | 1 |
+-------------+-----+-----+
| "[2, 2, 2]" | 3 | 1 |
+-------------+-----+-----+
+-----+-----+-----+
| foo | B | C |
| --- | --- | --- |
| i64 | i32 | i32 |
+=====+=====+=====+
| 1 | 1 | 1 |
+-----+-----+-----+
| 2 | 1 | 1 |
+-----+-----+-----+
| 3 | 1 | 1 |
+-----+-----+-----+
| 1 | 2 | 1 |
+-----+-----+-----+
| 1 | 2 | 1 |
+-----+-----+-----+
| 1 | 2 | 1 |
+-----+-----+-----+
| 2 | 3 | 1 |
+-----+-----+-----+
| 2 | 3 | 1 |
+-----+-----+-----+
| 2 | 3 | 1 |
+-----+-----+-----+Source§impl DataFrame
impl DataFrame
pub fn group_by_with_series( &self, by: Vec<Column>, multithreaded: bool, sorted: bool, ) -> Result<GroupBy<'_>, PolarsError>
Sourcepub fn group_by<I, S>(&self, by: I) -> Result<GroupBy<'_>, PolarsError>
pub fn group_by<I, S>(&self, by: I) -> Result<GroupBy<'_>, PolarsError>
Group DataFrame using a Series column.
§Example
use polars_core::prelude::*;
fn group_by_sum(df: &DataFrame) -> PolarsResult<DataFrame> {
df.group_by(["column_name"])?
.select(["agg_column_name"])
.sum()
}Sourcepub fn group_by_stable<I, S>(&self, by: I) -> Result<GroupBy<'_>, PolarsError>
pub fn group_by_stable<I, S>(&self, by: I) -> Result<GroupBy<'_>, PolarsError>
Group DataFrame using a Series column. The groups are ordered by their smallest row index.
Source§impl DataFrame
impl DataFrame
Sourcepub unsafe fn hstack_mut_unchecked(
&mut self,
columns: &[Column],
) -> &mut DataFrame
pub unsafe fn hstack_mut_unchecked( &mut self, columns: &[Column], ) -> &mut DataFrame
Add columns horizontally.
§Safety
The caller must ensure:
Note: If self is empty, self.height will always be overridden by the height of the first
column in columns.
Note that on a debug build this will panic on duplicates / height mismatch.
Sourcepub fn hstack_mut(
&mut self,
columns: &[Column],
) -> Result<&mut DataFrame, PolarsError>
pub fn hstack_mut( &mut self, columns: &[Column], ) -> Result<&mut DataFrame, PolarsError>
Add multiple Column to a DataFrame.
Errors if the resulting DataFrame columns have duplicate names or unequal heights.
Note: If self is empty, self.height will always be overridden by the height of the first
column in columns.
§Example
fn stack(df: &mut DataFrame, columns: &[Column]) {
df.hstack_mut(columns);
}Source§impl DataFrame
impl DataFrame
Sourcepub fn get_row(&self, idx: usize) -> Result<Row<'_>, PolarsError>
pub fn get_row(&self, idx: usize) -> Result<Row<'_>, PolarsError>
Get a row from a DataFrame. Use of this is discouraged as it will likely be slow.
Sourcepub fn get_row_amortized<'a>(
&'a self,
idx: usize,
row: &mut Row<'a>,
) -> Result<(), PolarsError>
pub fn get_row_amortized<'a>( &'a self, idx: usize, row: &mut Row<'a>, ) -> Result<(), PolarsError>
Amortize allocations by reusing a row.
The caller is responsible to make sure that the row has at least the capacity for the number
of columns in the DataFrame
Sourcepub unsafe fn get_row_amortized_unchecked<'a>(
&'a self,
idx: usize,
row: &mut Row<'a>,
)
pub unsafe fn get_row_amortized_unchecked<'a>( &'a self, idx: usize, row: &mut Row<'a>, )
Sourcepub fn from_rows_and_schema(
rows: &[Row<'_>],
schema: &Schema<DataType>,
) -> Result<DataFrame, PolarsError>
pub fn from_rows_and_schema( rows: &[Row<'_>], schema: &Schema<DataType>, ) -> Result<DataFrame, PolarsError>
Sourcepub fn from_rows_iter_and_schema<'a, I>(
rows: I,
schema: &Schema<DataType>,
) -> Result<DataFrame, PolarsError>
pub fn from_rows_iter_and_schema<'a, I>( rows: I, schema: &Schema<DataType>, ) -> Result<DataFrame, PolarsError>
Sourcepub fn try_from_rows_iter_and_schema<'a, I>(
rows: I,
schema: &Schema<DataType>,
) -> Result<DataFrame, PolarsError>
pub fn try_from_rows_iter_and_schema<'a, I>( rows: I, schema: &Schema<DataType>, ) -> Result<DataFrame, PolarsError>
Source§impl DataFrame
impl DataFrame
pub fn transpose( &mut self, keep_names_as: Option<&str>, new_col_names: Option<Either<String, Vec<String>>>, ) -> Result<DataFrame, PolarsError>
Sourcepub fn transpose_impl(
&mut self,
keep_names_as: Option<&str>,
new_col_names: Option<Either<PlSmallStr, Vec<PlSmallStr>>>,
) -> Result<DataFrame, PolarsError>
pub fn transpose_impl( &mut self, keep_names_as: Option<&str>, new_col_names: Option<Either<PlSmallStr, Vec<PlSmallStr>>>, ) -> Result<DataFrame, PolarsError>
Transpose a DataFrame. This is a very expensive operation.
Source§impl DataFrame
impl DataFrame
pub fn materialized_column_iter(&self) -> impl ExactSizeIterator
Sourcepub fn estimated_size(&self) -> usize
pub fn estimated_size(&self) -> usize
Returns an estimation of the total (heap) allocated size of the DataFrame in bytes.
§Implementation
This estimation is the sum of the size of its buffers, validity, including nested arrays.
Multiple arrays may share buffers and bitmaps. Therefore, the size of 2 arrays is not the
sum of the sizes computed from this function. In particular, StructArray’s size is an upper bound.
When an array is sliced, its allocated size remains constant because the buffer unchanged. However, this function will yield a smaller number. This is because this function returns the visible size of the buffer, not its total capacity.
FFI buffers are included in this estimation.
pub fn try_apply_columns( &self, func: impl Fn(&Column) -> Result<Column, PolarsError> + Send + Sync, ) -> Result<Vec<Column>, PolarsError>
pub fn apply_columns( &self, func: impl Fn(&Column) -> Column + Send + Sync, ) -> Vec<Column>
pub fn try_apply_columns_par( &self, func: impl Fn(&Column) -> Result<Column, PolarsError> + Send + Sync, ) -> Result<Vec<Column>, PolarsError>
pub fn apply_columns_par( &self, func: impl Fn(&Column) -> Column + Send + Sync, ) -> Vec<Column>
pub fn new_from_index(&self, index: usize, height: usize) -> DataFrame
Sourcepub fn full_null(schema: &Schema<DataType>, height: usize) -> DataFrame
pub fn full_null(schema: &Schema<DataType>, height: usize) -> DataFrame
Create a new DataFrame with the given schema, only containing nulls.
Sourcepub fn ensure_matches_schema(
&mut self,
schema: &Schema<DataType>,
) -> Result<(), PolarsError>
pub fn ensure_matches_schema( &mut self, schema: &Schema<DataType>, ) -> Result<(), PolarsError>
Ensure this DataFrame matches the given schema. Casts null columns to the expected schema if necessary (but nothing else).
Sourcepub fn with_row_index(
&self,
name: PlSmallStr,
offset: Option<u32>,
) -> Result<DataFrame, PolarsError>
pub fn with_row_index( &self, name: PlSmallStr, offset: Option<u32>, ) -> Result<DataFrame, PolarsError>
Add a new column at index 0 that counts the rows.
§Example
let df1: DataFrame = df!("Name" => ["James", "Mary", "John", "Patricia"])?;
assert_eq!(df1.shape(), (4, 1));
let df2: DataFrame = df1.with_row_index("Id".into(), None)?;
assert_eq!(df2.shape(), (4, 2));
println!("{}", df2);
Output:
shape: (4, 2)
+-----+----------+
| Id | Name |
| --- | --- |
| u32 | str |
+=====+==========+
| 0 | James |
+-----+----------+
| 1 | Mary |
+-----+----------+
| 2 | John |
+-----+----------+
| 3 | Patricia |
+-----+----------+Sourcepub unsafe fn with_row_index_mut(
&mut self,
name: PlSmallStr,
offset: Option<u32>,
) -> &mut DataFrame
pub unsafe fn with_row_index_mut( &mut self, name: PlSmallStr, offset: Option<u32>, ) -> &mut DataFrame
Sourcepub fn shrink_to_fit(&mut self)
pub fn shrink_to_fit(&mut self)
Shrink the capacity of this DataFrame to fit its length.
Sourcepub fn rechunk_mut_par(&mut self) -> &mut DataFrame
pub fn rechunk_mut_par(&mut self) -> &mut DataFrame
Aggregate all the chunks in the DataFrame to a single chunk in parallel. This may lead to more peak memory consumption.
Sourcepub fn rechunk_mut(&mut self) -> &mut DataFrame
pub fn rechunk_mut(&mut self) -> &mut DataFrame
Rechunks all columns to only have a single chunk.
Sourcepub fn should_rechunk(&self) -> bool
pub fn should_rechunk(&self) -> bool
Returns true if the chunks of the columns do not align and re-chunking should be done
Sourcepub fn align_chunks_par(&mut self) -> &mut DataFrame
pub fn align_chunks_par(&mut self) -> &mut DataFrame
Ensure all the chunks in the DataFrame are aligned.
Sourcepub fn align_chunks(&mut self) -> &mut DataFrame
pub fn align_chunks(&mut self) -> &mut DataFrame
Ensure all the chunks in the DataFrame are aligned.
Sourcepub fn get_column_names(&self) -> Vec<&PlSmallStr>
pub fn get_column_names(&self) -> Vec<&PlSmallStr>
§Example
let df: DataFrame = df!("Language" => ["Rust", "Python"],
"Designer" => ["Graydon Hoare", "Guido van Rossum"])?;
assert_eq!(df.get_column_names(), &["Language", "Designer"]);Sourcepub fn get_column_names_owned(&self) -> Vec<PlSmallStr>
pub fn get_column_names_owned(&self) -> Vec<PlSmallStr>
Get the Vec<PlSmallStr> representing the column names.
Sourcepub fn set_column_names<T>(
&mut self,
new_names: &[T],
) -> Result<(), PolarsError>
pub fn set_column_names<T>( &mut self, new_names: &[T], ) -> Result<(), PolarsError>
Set the column names.
§Example
let mut df: DataFrame = df!("Mathematical set" => ["ℕ", "ℤ", "𝔻", "ℚ", "ℝ", "ℂ"])?;
df.set_column_names(&["Set"])?;
assert_eq!(df.get_column_names(), &["Set"]);Sourcepub fn first_col_n_chunks(&self) -> usize
pub fn first_col_n_chunks(&self) -> usize
The number of chunks for the first column.
Sourcepub fn max_n_chunks(&self) -> usize
pub fn max_n_chunks(&self) -> usize
The highest number of chunks for any column.
Sourcepub fn fields(&self) -> Vec<Field>
pub fn fields(&self) -> Vec<Field>
Generate the schema fields of the DataFrame.
§Example
let earth: DataFrame = df!("Surface type" => ["Water", "Land"],
"Fraction" => [0.708, 0.292])?;
let f1: Field = Field::new("Surface type".into(), DataType::String);
let f2: Field = Field::new("Fraction".into(), DataType::Float64);
assert_eq!(earth.fields(), &[f1, f2]);Sourcepub fn hstack(&self, columns: &[Column]) -> Result<DataFrame, PolarsError>
pub fn hstack(&self, columns: &[Column]) -> Result<DataFrame, PolarsError>
Add multiple Series to a DataFrame.
The added Series are required to have the same length.
§Example
let df1: DataFrame = df!("Element" => ["Copper", "Silver", "Gold"])?;
let s1 = Column::new("Proton".into(), [29, 47, 79]);
let s2 = Column::new("Electron".into(), [29, 47, 79]);
let df2: DataFrame = df1.hstack(&[s1, s2])?;
assert_eq!(df2.shape(), (3, 3));
println!("{}", df2);Output:
shape: (3, 3)
+---------+--------+----------+
| Element | Proton | Electron |
| --- | --- | --- |
| str | i32 | i32 |
+=========+========+==========+
| Copper | 29 | 29 |
+---------+--------+----------+
| Silver | 47 | 47 |
+---------+--------+----------+
| Gold | 79 | 79 |
+---------+--------+----------+Sourcepub fn vstack(&self, other: &DataFrame) -> Result<DataFrame, PolarsError>
pub fn vstack(&self, other: &DataFrame) -> Result<DataFrame, PolarsError>
Concatenate a DataFrame to this DataFrame and return as newly allocated DataFrame.
If many vstack operations are done, it is recommended to call DataFrame::align_chunks_par.
§Example
let df1: DataFrame = df!("Element" => ["Copper", "Silver", "Gold"],
"Melting Point (K)" => [1357.77, 1234.93, 1337.33])?;
let df2: DataFrame = df!("Element" => ["Platinum", "Palladium"],
"Melting Point (K)" => [2041.4, 1828.05])?;
let df3: DataFrame = df1.vstack(&df2)?;
assert_eq!(df3.shape(), (5, 2));
println!("{}", df3);Output:
shape: (5, 2)
+-----------+-------------------+
| Element | Melting Point (K) |
| --- | --- |
| str | f64 |
+===========+===================+
| Copper | 1357.77 |
+-----------+-------------------+
| Silver | 1234.93 |
+-----------+-------------------+
| Gold | 1337.33 |
+-----------+-------------------+
| Platinum | 2041.4 |
+-----------+-------------------+
| Palladium | 1828.05 |
+-----------+-------------------+Sourcepub fn vstack_mut(
&mut self,
other: &DataFrame,
) -> Result<&mut DataFrame, PolarsError>
pub fn vstack_mut( &mut self, other: &DataFrame, ) -> Result<&mut DataFrame, PolarsError>
Concatenate a DataFrame to this DataFrame
If many vstack operations are done, it is recommended to call DataFrame::align_chunks_par.
§Example
let mut df1: DataFrame = df!("Element" => ["Copper", "Silver", "Gold"],
"Melting Point (K)" => [1357.77, 1234.93, 1337.33])?;
let df2: DataFrame = df!("Element" => ["Platinum", "Palladium"],
"Melting Point (K)" => [2041.4, 1828.05])?;
df1.vstack_mut(&df2)?;
assert_eq!(df1.shape(), (5, 2));
println!("{}", df1);Output:
shape: (5, 2)
+-----------+-------------------+
| Element | Melting Point (K) |
| --- | --- |
| str | f64 |
+===========+===================+
| Copper | 1357.77 |
+-----------+-------------------+
| Silver | 1234.93 |
+-----------+-------------------+
| Gold | 1337.33 |
+-----------+-------------------+
| Platinum | 2041.4 |
+-----------+-------------------+
| Palladium | 1828.05 |
+-----------+-------------------+pub fn vstack_mut_owned( &mut self, other: DataFrame, ) -> Result<&mut DataFrame, PolarsError>
Sourcepub fn vstack_mut_unchecked(&mut self, other: &DataFrame) -> &mut DataFrame
pub fn vstack_mut_unchecked(&mut self, other: &DataFrame) -> &mut DataFrame
Concatenate a DataFrame to this DataFrame
If many vstack operations are done, it is recommended to call DataFrame::align_chunks_par.
§Panics
Panics if the schema’s don’t match.
Sourcepub fn vstack_mut_owned_unchecked(&mut self, other: DataFrame) -> &mut DataFrame
pub fn vstack_mut_owned_unchecked(&mut self, other: DataFrame) -> &mut DataFrame
Concatenate a DataFrame to this DataFrame
If many vstack operations are done, it is recommended to call DataFrame::align_chunks_par.
§Panics
Panics if the schema’s don’t match.
Sourcepub fn extend(&mut self, other: &DataFrame) -> Result<(), PolarsError>
pub fn extend(&mut self, other: &DataFrame) -> Result<(), PolarsError>
Extend the memory backed by this DataFrame with the values from other.
Different from vstack which adds the chunks from other to the chunks of this DataFrame
extend appends the data from other to the underlying memory locations and thus may cause a reallocation.
If this does not cause a reallocation, the resulting data structure will not have any extra chunks and thus will yield faster queries.
Prefer extend over vstack when you want to do a query after a single append. For instance during
online operations where you add n rows and rerun a query.
Prefer vstack over extend when you want to append many times before doing a query. For instance
when you read in multiple files and when to store them in a single DataFrame. In the latter case, finish the sequence
of append operations with a rechunk.
Sourcepub fn drop_in_place(&mut self, name: &str) -> Result<Column, PolarsError>
pub fn drop_in_place(&mut self, name: &str) -> Result<Column, PolarsError>
Remove a column by name and return the column removed.
§Example
let mut df: DataFrame = df!("Animal" => ["Tiger", "Lion", "Great auk"],
"IUCN" => ["Endangered", "Vulnerable", "Extinct"])?;
let s1: PolarsResult<Column> = df.drop_in_place("Average weight");
assert!(s1.is_err());
let s2: Column = df.drop_in_place("Animal")?;
assert_eq!(s2, Column::new("Animal".into(), &["Tiger", "Lion", "Great auk"]));Sourcepub fn drop_nulls<S>(
&self,
subset: Option<&[S]>,
) -> Result<DataFrame, PolarsError>
pub fn drop_nulls<S>( &self, subset: Option<&[S]>, ) -> Result<DataFrame, PolarsError>
Return a new DataFrame where all null values are dropped.
§Example
let df1: DataFrame = df!("Country" => ["Malta", "Liechtenstein", "North Korea"],
"Tax revenue (% GDP)" => [Some(32.7), None, None])?;
assert_eq!(df1.shape(), (3, 2));
let df2: DataFrame = df1.drop_nulls::<String>(None)?;
assert_eq!(df2.shape(), (1, 2));
println!("{}", df2);Output:
shape: (1, 2)
+---------+---------------------+
| Country | Tax revenue (% GDP) |
| --- | --- |
| str | f64 |
+=========+=====================+
| Malta | 32.7 |
+---------+---------------------+Sourcepub fn drop_many_amortized(
&self,
names: &HashSet<PlSmallStr, RandomState>,
) -> DataFrame
pub fn drop_many_amortized( &self, names: &HashSet<PlSmallStr, RandomState>, ) -> DataFrame
Drop columns that are in names without allocating a HashSet.
Sourcepub fn insert_column(
&mut self,
index: usize,
column: Column,
) -> Result<&mut DataFrame, PolarsError>
pub fn insert_column( &mut self, index: usize, column: Column, ) -> Result<&mut DataFrame, PolarsError>
Insert a new column at a given index.
Sourcepub fn with_column(
&mut self,
column: Column,
) -> Result<&mut DataFrame, PolarsError>
pub fn with_column( &mut self, column: Column, ) -> Result<&mut DataFrame, PolarsError>
Add a new column to this DataFrame or replace an existing one. Broadcasts unit-length
columns.
Sourcepub unsafe fn push_column_unchecked(&mut self, column: Column) -> &mut DataFrame
pub unsafe fn push_column_unchecked(&mut self, column: Column) -> &mut DataFrame
Sourcepub fn with_columns_mut(
&mut self,
columns: impl IntoIterator<Item = Column>,
output_schema: &Schema<DataType>,
) -> Result<(), PolarsError>
pub fn with_columns_mut( &mut self, columns: impl IntoIterator<Item = Column>, output_schema: &Schema<DataType>, ) -> Result<(), PolarsError>
Add or replace columns to this DataFrame or replace an existing one.
Broadcasts unit-length columns, and uses an existing schema to amortize lookups.
Sourcepub fn select_at_idx(&self, idx: usize) -> Option<&Column>
pub fn select_at_idx(&self, idx: usize) -> Option<&Column>
Select a Series by index.
§Example
let df: DataFrame = df!("Star" => ["Sun", "Betelgeuse", "Sirius A", "Sirius B"],
"Absolute magnitude" => [4.83, -5.85, 1.42, 11.18])?;
let s1: Option<&Column> = df.select_at_idx(0);
let s2 = Column::new("Star".into(), ["Sun", "Betelgeuse", "Sirius A", "Sirius B"]);
assert_eq!(s1, Some(&s2));Sourcepub fn get_column_index(&self, name: &str) -> Option<usize>
pub fn get_column_index(&self, name: &str) -> Option<usize>
Get column index of a Series by name.
§Example
let df: DataFrame = df!("Name" => ["Player 1", "Player 2", "Player 3"],
"Health" => [100, 200, 500],
"Mana" => [250, 100, 0],
"Strength" => [30, 150, 300])?;
assert_eq!(df.get_column_index("Name"), Some(0));
assert_eq!(df.get_column_index("Health"), Some(1));
assert_eq!(df.get_column_index("Mana"), Some(2));
assert_eq!(df.get_column_index("Strength"), Some(3));
assert_eq!(df.get_column_index("Haste"), None);Sourcepub fn try_get_column_index(&self, name: &str) -> Result<usize, PolarsError>
pub fn try_get_column_index(&self, name: &str) -> Result<usize, PolarsError>
Get column index of a Series by name.
Sourcepub fn column(&self, name: &str) -> Result<&Column, PolarsError>
pub fn column(&self, name: &str) -> Result<&Column, PolarsError>
Select a single column by name.
§Example
let s1 = Column::new("Password".into(), ["123456", "[]B$u$g$s$B#u#n#n#y[]{}"]);
let s2 = Column::new("Robustness".into(), ["Weak", "Strong"]);
let df: DataFrame = DataFrame::new_infer_height(vec![s1.clone(), s2])?;
assert_eq!(df.column("Password")?, &s1);Sourcepub fn select<I, S>(&self, names: I) -> Result<DataFrame, PolarsError>
pub fn select<I, S>(&self, names: I) -> Result<DataFrame, PolarsError>
Sourcepub unsafe fn select_unchecked<I, S>(
&self,
names: I,
) -> Result<DataFrame, PolarsError>
pub unsafe fn select_unchecked<I, S>( &self, names: I, ) -> Result<DataFrame, PolarsError>
Sourcepub fn select_to_vec(
&self,
selection: impl IntoIterator<Item = impl AsRef<str>>,
) -> Result<Vec<Column>, PolarsError>
pub fn select_to_vec( &self, selection: impl IntoIterator<Item = impl AsRef<str>>, ) -> Result<Vec<Column>, PolarsError>
Select column(s) from this DataFrame and return them into a Vec.
This does not error on duplicate selections.
§Example
let df: DataFrame = df!("Name" => ["Methane", "Ethane", "Propane"],
"Carbon" => [1, 2, 3],
"Hydrogen" => [4, 6, 8])?;
let sv: Vec<Column> = df.select_to_vec(["Carbon", "Hydrogen"])?;
assert_eq!(df["Carbon"], sv[0]);
assert_eq!(df["Hydrogen"], sv[1]);Sourcepub fn filter(
&self,
mask: &ChunkedArray<BooleanType>,
) -> Result<DataFrame, PolarsError>
pub fn filter( &self, mask: &ChunkedArray<BooleanType>, ) -> Result<DataFrame, PolarsError>
Sourcepub fn filter_seq(
&self,
mask: &ChunkedArray<BooleanType>,
) -> Result<DataFrame, PolarsError>
pub fn filter_seq( &self, mask: &ChunkedArray<BooleanType>, ) -> Result<DataFrame, PolarsError>
Same as filter but does not parallelize.
Sourcepub fn take(
&self,
indices: &ChunkedArray<UInt32Type>,
) -> Result<DataFrame, PolarsError>
pub fn take( &self, indices: &ChunkedArray<UInt32Type>, ) -> Result<DataFrame, PolarsError>
Sourcepub unsafe fn take_unchecked(&self, idx: &ChunkedArray<UInt32Type>) -> DataFrame
pub unsafe fn take_unchecked(&self, idx: &ChunkedArray<UInt32Type>) -> DataFrame
§Safety
The indices must be in-bounds.
Sourcepub unsafe fn gather_group_unchecked(
&self,
group: &GroupsIndicator<'_>,
) -> DataFrame
pub unsafe fn gather_group_unchecked( &self, group: &GroupsIndicator<'_>, ) -> DataFrame
§Safety
The indices must be in-bounds.
Sourcepub unsafe fn take_unchecked_impl(
&self,
idx: &ChunkedArray<UInt32Type>,
allow_threads: bool,
) -> DataFrame
pub unsafe fn take_unchecked_impl( &self, idx: &ChunkedArray<UInt32Type>, allow_threads: bool, ) -> DataFrame
§Safety
The indices must be in-bounds.
Sourcepub unsafe fn take_slice_unchecked(&self, idx: &[u32]) -> DataFrame
pub unsafe fn take_slice_unchecked(&self, idx: &[u32]) -> DataFrame
§Safety
The indices must be in-bounds.
Sourcepub unsafe fn take_slice_unchecked_impl(
&self,
idx: &[u32],
allow_threads: bool,
) -> DataFrame
pub unsafe fn take_slice_unchecked_impl( &self, idx: &[u32], allow_threads: bool, ) -> DataFrame
§Safety
The indices must be in-bounds.
Sourcepub fn rename(
&mut self,
column: &str,
name: PlSmallStr,
) -> Result<&mut DataFrame, PolarsError>
pub fn rename( &mut self, column: &str, name: PlSmallStr, ) -> Result<&mut DataFrame, PolarsError>
pub fn rename_many<'a>( &mut self, renames: impl Iterator<Item = (&'a str, PlSmallStr)>, ) -> Result<&mut DataFrame, PolarsError>
Sourcepub fn sort_in_place(
&mut self,
by: impl IntoIterator<Item = impl AsRef<str>>,
sort_options: SortMultipleOptions,
) -> Result<&mut DataFrame, PolarsError>
pub fn sort_in_place( &mut self, by: impl IntoIterator<Item = impl AsRef<str>>, sort_options: SortMultipleOptions, ) -> Result<&mut DataFrame, PolarsError>
Sort DataFrame in place.
See DataFrame::sort for more instruction.
Sourcepub fn _to_metadata(&self) -> DataFrame
pub fn _to_metadata(&self) -> DataFrame
Create a DataFrame that has fields for all the known runtime metadata for each column.
This dataframe does not necessarily have a specified schema and may be changed at any point. It is primarily used for debugging.
Sourcepub fn sort(
&self,
by: impl IntoIterator<Item = impl AsRef<str>>,
sort_options: SortMultipleOptions,
) -> Result<DataFrame, PolarsError>
pub fn sort( &self, by: impl IntoIterator<Item = impl AsRef<str>>, sort_options: SortMultipleOptions, ) -> Result<DataFrame, PolarsError>
Return a sorted clone of this DataFrame.
In many cases the output chunks will be continuous in memory but this is not guaranteed
§Example
Sort by a single column with default options:
fn sort_by_sepal_width(df: &DataFrame) -> PolarsResult<DataFrame> {
df.sort(["sepal_width"], Default::default())
}Sort by a single column with specific order:
fn sort_with_specific_order(df: &DataFrame, descending: bool) -> PolarsResult<DataFrame> {
df.sort(
["sepal_width"],
SortMultipleOptions::new()
.with_order_descending(descending)
)
}Sort by multiple columns with specifying order for each column:
fn sort_by_multiple_columns_with_specific_order(df: &DataFrame) -> PolarsResult<DataFrame> {
df.sort(
["sepal_width", "sepal_length"],
SortMultipleOptions::new()
.with_order_descending_multi([false, true])
)
}See SortMultipleOptions for more options.
Also see DataFrame::sort_in_place.
Sourcepub fn replace(
&mut self,
column: &str,
new_col: Column,
) -> Result<&mut DataFrame, PolarsError>
pub fn replace( &mut self, column: &str, new_col: Column, ) -> Result<&mut DataFrame, PolarsError>
Replace a column with a Column.
§Example
let mut df: DataFrame = df!("Country" => ["United States", "China"],
"Area (km²)" => [9_833_520, 9_596_961])?;
let s: Column = Column::new("Country".into(), ["USA", "PRC"]);
assert!(df.replace("Nation", s.clone()).is_err());
assert!(df.replace("Country", s).is_ok());Sourcepub fn replace_column(
&mut self,
index: usize,
new_column: Column,
) -> Result<&mut DataFrame, PolarsError>
pub fn replace_column( &mut self, index: usize, new_column: Column, ) -> Result<&mut DataFrame, PolarsError>
Replace column at index idx with a Series.
§Example
# use polars_core::prelude::*;
let s0 = Series::new("foo".into(), ["ham", "spam", "egg"]);
let s1 = Series::new("ascii".into(), [70, 79, 79]);
let mut df = DataFrame::new_infer_height(vec![s0, s1])?;
// Add 32 to get lowercase ascii values
df.replace_column(1, df.select_at_idx(1).unwrap() + 32);
# Ok::<(), PolarsError>(())Sourcepub fn apply<F, C>(
&mut self,
name: &str,
f: F,
) -> Result<&mut DataFrame, PolarsError>
pub fn apply<F, C>( &mut self, name: &str, f: F, ) -> Result<&mut DataFrame, PolarsError>
Apply a closure to a column. This is the recommended way to do in place modification.
§Example
let s0 = Column::new("foo".into(), ["ham", "spam", "egg"]);
let s1 = Column::new("names".into(), ["Jean", "Claude", "van"]);
let mut df = DataFrame::new_infer_height(vec![s0, s1])?;
fn str_to_len(str_val: &Column) -> Column {
str_val.str()
.unwrap()
.into_iter()
.map(|opt_name: Option<&str>| {
opt_name.map(|name: &str| name.len() as u32)
})
.collect::<UInt32Chunked>()
.into_column()
}
// Replace the names column by the length of the names.
df.apply("names", str_to_len);Results in:
+--------+-------+
| foo | |
| --- | names |
| str | u32 |
+========+=======+
| "ham" | 4 |
+--------+-------+
| "spam" | 6 |
+--------+-------+
| "egg" | 3 |
+--------+-------+Sourcepub fn apply_at_idx<F, C>(
&mut self,
idx: usize,
f: F,
) -> Result<&mut DataFrame, PolarsError>
pub fn apply_at_idx<F, C>( &mut self, idx: usize, f: F, ) -> Result<&mut DataFrame, PolarsError>
Apply a closure to a column at index idx. This is the recommended way to do in place
modification.
§Example
let s0 = Column::new("foo".into(), ["ham", "spam", "egg"]);
let s1 = Column::new("ascii".into(), [70, 79, 79]);
let mut df = DataFrame::new_infer_height(vec![s0, s1])?;
// Add 32 to get lowercase ascii values
df.apply_at_idx(1, |s| s + 32);Results in:
+--------+-------+
| foo | ascii |
| --- | --- |
| str | i32 |
+========+=======+
| "ham" | 102 |
+--------+-------+
| "spam" | 111 |
+--------+-------+
| "egg" | 111 |
+--------+-------+Sourcepub fn try_apply_at_idx<F, C>(
&mut self,
idx: usize,
f: F,
) -> Result<&mut DataFrame, PolarsError>
pub fn try_apply_at_idx<F, C>( &mut self, idx: usize, f: F, ) -> Result<&mut DataFrame, PolarsError>
Apply a closure that may fail to a column at index idx. This is the recommended way to do in place
modification.
§Example
This is the idiomatic way to replace some values a column of a DataFrame given range of indexes.
let s0 = Column::new("foo".into(), ["ham", "spam", "egg", "bacon", "quack"]);
let s1 = Column::new("values".into(), [1, 2, 3, 4, 5]);
let mut df = DataFrame::new_infer_height(vec![s0, s1])?;
let idx = vec![0, 1, 4];
df.try_apply("foo", |c| {
c.str()?
.scatter_with(idx, |opt_val| opt_val.map(|string| format!("{}-is-modified", string)))
});Results in:
+---------------------+--------+
| foo | values |
| --- | --- |
| str | i32 |
+=====================+========+
| "ham-is-modified" | 1 |
+---------------------+--------+
| "spam-is-modified" | 2 |
+---------------------+--------+
| "egg" | 3 |
+---------------------+--------+
| "bacon" | 4 |
+---------------------+--------+
| "quack-is-modified" | 5 |
+---------------------+--------+Sourcepub fn try_apply<F, C>(
&mut self,
column: &str,
f: F,
) -> Result<&mut DataFrame, PolarsError>
pub fn try_apply<F, C>( &mut self, column: &str, f: F, ) -> Result<&mut DataFrame, PolarsError>
Apply a closure that may fail to a column. This is the recommended way to do in place modification.
§Example
This is the idiomatic way to replace some values a column of a DataFrame given a boolean mask.
let s0 = Column::new("foo".into(), ["ham", "spam", "egg", "bacon", "quack"]);
let s1 = Column::new("values".into(), [1, 2, 3, 4, 5]);
let mut df = DataFrame::new_infer_height(vec![s0, s1])?;
// create a mask
let values = df.column("values")?.as_materialized_series();
let mask = values.lt_eq(1)? | values.gt_eq(5_i32)?;
df.try_apply("foo", |c| {
c.str()?
.set(&mask, Some("not_within_bounds"))
});Results in:
+---------------------+--------+
| foo | values |
| --- | --- |
| str | i32 |
+=====================+========+
| "not_within_bounds" | 1 |
+---------------------+--------+
| "spam" | 2 |
+---------------------+--------+
| "egg" | 3 |
+---------------------+--------+
| "bacon" | 4 |
+---------------------+--------+
| "not_within_bounds" | 5 |
+---------------------+--------+Sourcepub fn slice(&self, offset: i64, length: usize) -> DataFrame
pub fn slice(&self, offset: i64, length: usize) -> DataFrame
Slice the DataFrame along the rows.
§Example
let df: DataFrame = df!("Fruit" => ["Apple", "Grape", "Grape", "Fig", "Fig"],
"Color" => ["Green", "Red", "White", "White", "Red"])?;
let sl: DataFrame = df.slice(2, 3);
assert_eq!(sl.shape(), (3, 2));
println!("{}", sl);Output:
shape: (3, 2)
+-------+-------+
| Fruit | Color |
| --- | --- |
| str | str |
+=======+=======+
| Grape | White |
+-------+-------+
| Fig | White |
+-------+-------+
| Fig | Red |
+-------+-------+Sourcepub fn split_at(&self, offset: i64) -> (DataFrame, DataFrame)
pub fn split_at(&self, offset: i64) -> (DataFrame, DataFrame)
Split DataFrame at the given offset.
pub fn clear(&self) -> DataFrame
pub fn slice_par(&self, offset: i64, length: usize) -> DataFrame
pub fn _slice_and_realloc(&self, offset: i64, length: usize) -> DataFrame
Sourcepub fn head(&self, length: Option<usize>) -> DataFrame
pub fn head(&self, length: Option<usize>) -> DataFrame
Get the head of the DataFrame.
§Example
let countries: DataFrame =
df!("Rank by GDP (2021)" => [1, 2, 3, 4, 5],
"Continent" => ["North America", "Asia", "Asia", "Europe", "Europe"],
"Country" => ["United States", "China", "Japan", "Germany", "United Kingdom"],
"Capital" => ["Washington", "Beijing", "Tokyo", "Berlin", "London"])?;
assert_eq!(countries.shape(), (5, 4));
println!("{}", countries.head(Some(3)));Output:
shape: (3, 4)
+--------------------+---------------+---------------+------------+
| Rank by GDP (2021) | Continent | Country | Capital |
| --- | --- | --- | --- |
| i32 | str | str | str |
+====================+===============+===============+============+
| 1 | North America | United States | Washington |
+--------------------+---------------+---------------+------------+
| 2 | Asia | China | Beijing |
+--------------------+---------------+---------------+------------+
| 3 | Asia | Japan | Tokyo |
+--------------------+---------------+---------------+------------+Sourcepub fn tail(&self, length: Option<usize>) -> DataFrame
pub fn tail(&self, length: Option<usize>) -> DataFrame
Get the tail of the DataFrame.
§Example
let countries: DataFrame =
df!("Rank (2021)" => [105, 106, 107, 108, 109],
"Apple Price (€/kg)" => [0.75, 0.70, 0.70, 0.65, 0.52],
"Country" => ["Kosovo", "Moldova", "North Macedonia", "Syria", "Turkey"])?;
assert_eq!(countries.shape(), (5, 3));
println!("{}", countries.tail(Some(2)));Output:
shape: (2, 3)
+-------------+--------------------+---------+
| Rank (2021) | Apple Price (€/kg) | Country |
| --- | --- | --- |
| i32 | f64 | str |
+=============+====================+=========+
| 108 | 0.65 | Syria |
+-------------+--------------------+---------+
| 109 | 0.52 | Turkey |
+-------------+--------------------+---------+Sourcepub fn iter_chunks(
&self,
compat_level: CompatLevel,
parallel: bool,
) -> impl Iterator<Item = RecordBatchT<Box<dyn Array>>>
pub fn iter_chunks( &self, compat_level: CompatLevel, parallel: bool, ) -> impl Iterator<Item = RecordBatchT<Box<dyn Array>>>
Iterator over the rows in this DataFrame as Arrow RecordBatches.
§Panics
Panics if the DataFrame that is passed is not rechunked.
This responsibility is left to the caller as we don’t want to take mutable references here, but we also don’t want to rechunk here, as this operation is costly and would benefit the caller as well.
Sourcepub fn iter_chunks_physical(
&self,
) -> impl Iterator<Item = RecordBatchT<Box<dyn Array>>>
pub fn iter_chunks_physical( &self, ) -> impl Iterator<Item = RecordBatchT<Box<dyn Array>>>
Iterator over the rows in this DataFrame as Arrow RecordBatches as physical values.
§Panics
Panics if the DataFrame that is passed is not rechunked.
This responsibility is left to the caller as we don’t want to take mutable references here, but we also don’t want to rechunk here, as this operation is costly and would benefit the caller as well.
Sourcepub fn shift(&self, periods: i64) -> DataFrame
pub fn shift(&self, periods: i64) -> DataFrame
Shift the values by a given period and fill the parts that will be empty due to this operation
with Nones.
See the method on Series for more info on the shift operation.
Sourcepub fn fill_null(
&self,
strategy: FillNullStrategy,
) -> Result<DataFrame, PolarsError>
pub fn fill_null( &self, strategy: FillNullStrategy, ) -> Result<DataFrame, PolarsError>
Replace None values with one of the following strategies:
- Forward fill (replace None with the previous value)
- Backward fill (replace None with the next value)
- Mean fill (replace None with the mean of the whole array)
- Min fill (replace None with the minimum of the whole array)
- Max fill (replace None with the maximum of the whole array)
See the method on Series for more info on the fill_null operation.
Sourcepub fn pipe<F, B>(self, f: F) -> Result<B, PolarsError>
pub fn pipe<F, B>(self, f: F) -> Result<B, PolarsError>
Pipe different functions/ closure operations that work on a DataFrame together.
Sourcepub fn pipe_mut<F, B>(&mut self, f: F) -> Result<B, PolarsError>
pub fn pipe_mut<F, B>(&mut self, f: F) -> Result<B, PolarsError>
Pipe different functions/ closure operations that work on a DataFrame together.
Sourcepub fn pipe_with_args<F, B, Args>(
self,
f: F,
args: Args,
) -> Result<B, PolarsError>
pub fn pipe_with_args<F, B, Args>( self, f: F, args: Args, ) -> Result<B, PolarsError>
Pipe different functions/ closure operations that work on a DataFrame together.
Sourcepub fn unique_stable(
&self,
subset: Option<&[String]>,
keep: UniqueKeepStrategy,
slice: Option<(i64, usize)>,
) -> Result<DataFrame, PolarsError>
Available on crate feature algorithm_group_by only.
pub fn unique_stable( &self, subset: Option<&[String]>, keep: UniqueKeepStrategy, slice: Option<(i64, usize)>, ) -> Result<DataFrame, PolarsError>
algorithm_group_by only.Drop duplicate rows from a DataFrame.
This fails when there is a column of type List in DataFrame
Stable means that the order is maintained. This has a higher cost than an unstable distinct.
§Example
let df = df! {
"flt" => [1., 1., 2., 2., 3., 3.],
"int" => [1, 1, 2, 2, 3, 3, ],
"str" => ["a", "a", "b", "b", "c", "c"]
}?;
println!("{}", df.unique_stable(None, UniqueKeepStrategy::First, None)?);Returns
+-----+-----+-----+
| flt | int | str |
| --- | --- | --- |
| f64 | i32 | str |
+=====+=====+=====+
| 1 | 1 | "a" |
+-----+-----+-----+
| 2 | 2 | "b" |
+-----+-----+-----+
| 3 | 3 | "c" |
+-----+-----+-----+Sourcepub fn unique<I, S>(
&self,
subset: Option<&[String]>,
keep: UniqueKeepStrategy,
slice: Option<(i64, usize)>,
) -> Result<DataFrame, PolarsError>
Available on crate feature algorithm_group_by only.
pub fn unique<I, S>( &self, subset: Option<&[String]>, keep: UniqueKeepStrategy, slice: Option<(i64, usize)>, ) -> Result<DataFrame, PolarsError>
algorithm_group_by only.Unstable distinct. See DataFrame::unique_stable.
pub fn unique_impl( &self, maintain_order: bool, subset: Option<Vec<PlSmallStr>>, keep: UniqueKeepStrategy, slice: Option<(i64, usize)>, ) -> Result<DataFrame, PolarsError>
algorithm_group_by only.Sourcepub fn is_unique(&self) -> Result<ChunkedArray<BooleanType>, PolarsError>
Available on crate feature algorithm_group_by only.
pub fn is_unique(&self) -> Result<ChunkedArray<BooleanType>, PolarsError>
algorithm_group_by only.Sourcepub fn is_duplicated(&self) -> Result<ChunkedArray<BooleanType>, PolarsError>
Available on crate feature algorithm_group_by only.
pub fn is_duplicated(&self) -> Result<ChunkedArray<BooleanType>, PolarsError>
algorithm_group_by only.Sourcepub fn null_count(&self) -> DataFrame
pub fn null_count(&self) -> DataFrame
Create a new DataFrame that shows the null counts per column.
Sourcepub fn get_supertype(&self) -> Option<Result<DataType, PolarsError>>
pub fn get_supertype(&self) -> Option<Result<DataType, PolarsError>>
Get the supertype of the columns in this DataFrame
Sourcepub fn partition_by<I, S>(
&self,
cols: I,
include_key: bool,
) -> Result<Vec<DataFrame>, PolarsError>
Available on crate feature partition_by only.
pub fn partition_by<I, S>( &self, cols: I, include_key: bool, ) -> Result<Vec<DataFrame>, PolarsError>
partition_by only.Split into multiple DataFrames partitioned by groups
Sourcepub fn partition_by_stable<I, S>(
&self,
cols: I,
include_key: bool,
) -> Result<Vec<DataFrame>, PolarsError>
Available on crate feature partition_by only.
pub fn partition_by_stable<I, S>( &self, cols: I, include_key: bool, ) -> Result<Vec<DataFrame>, PolarsError>
partition_by only.Split into multiple DataFrames partitioned by groups Order of the groups are maintained.
Sourcepub fn unnest(
&self,
cols: impl IntoIterator<Item = impl Into<PlSmallStr>>,
separator: Option<&str>,
) -> Result<DataFrame, PolarsError>
Available on crate feature dtype-struct only.
pub fn unnest( &self, cols: impl IntoIterator<Item = impl Into<PlSmallStr>>, separator: Option<&str>, ) -> Result<DataFrame, PolarsError>
dtype-struct only.Unnest the given Struct columns. This means that the fields of the Struct type will be
inserted as columns.
pub fn append_record_batch( &mut self, rb: RecordBatchT<Box<dyn Array>>, ) -> Result<(), PolarsError>
Source§impl DataFrame
impl DataFrame
pub fn serialize_into_writer( &mut self, writer: &mut dyn Write, ) -> Result<(), PolarsError>
pub fn serialize_to_bytes(&mut self) -> Result<Vec<u8>, PolarsError>
pub fn deserialize_from_reader<T>( reader: &mut T, ) -> Result<DataFrame, PolarsError>
Source§impl DataFrame
impl DataFrame
Sourcepub fn schema_equal(&self, other: &DataFrame) -> Result<(), PolarsError>
pub fn schema_equal(&self, other: &DataFrame) -> Result<(), PolarsError>
Check if DataFrame’ schemas are equal.
Sourcepub fn equals(&self, other: &DataFrame) -> bool
pub fn equals(&self, other: &DataFrame) -> bool
Check if DataFrames are equal. Note that None == None evaluates to false
§Example
let df1: DataFrame = df!("Atomic number" => &[1, 51, 300],
"Element" => &[Some("Hydrogen"), Some("Antimony"), None])?;
let df2: DataFrame = df!("Atomic number" => &[1, 51, 300],
"Element" => &[Some("Hydrogen"), Some("Antimony"), None])?;
assert!(!df1.equals(&df2));Sourcepub fn equals_missing(&self, other: &DataFrame) -> bool
pub fn equals_missing(&self, other: &DataFrame) -> bool
Check if all values in DataFrames are equal where None == None evaluates to true.
§Example
let df1: DataFrame = df!("Atomic number" => &[1, 51, 300],
"Element" => &[Some("Hydrogen"), Some("Antimony"), None])?;
let df2: DataFrame = df!("Atomic number" => &[1, 51, 300],
"Element" => &[Some("Hydrogen"), Some("Antimony"), None])?;
assert!(df1.equals_missing(&df2));Trait Implementations§
Source§impl AsofJoinBy for DataFrame
impl AsofJoinBy for DataFrame
Source§fn join_asof_by<I, S>(
&self,
other: &DataFrame,
left_on: &str,
right_on: &str,
left_by: I,
right_by: I,
strategy: AsofStrategy,
tolerance: Option<AnyValue<'static>>,
allow_eq: bool,
check_sortedness: bool,
) -> Result<DataFrame, PolarsError>
fn join_asof_by<I, S>( &self, other: &DataFrame, left_on: &str, right_on: &str, left_by: I, right_by: I, strategy: AsofStrategy, tolerance: Option<AnyValue<'static>>, allow_eq: bool, check_sortedness: bool, ) -> Result<DataFrame, PolarsError>
by.Source§impl Container for DataFrame
impl Container for DataFrame
fn slice(&self, offset: i64, len: usize) -> DataFrame
fn split_at(&self, offset: i64) -> (DataFrame, DataFrame)
fn len(&self) -> usize
fn iter_chunks(&self) -> impl Iterator<Item = DataFrame>
fn should_rechunk(&self) -> bool
fn n_chunks(&self) -> usize
fn chunk_lengths(&self) -> impl Iterator<Item = usize>
Source§impl CrossJoin for DataFrame
impl CrossJoin for DataFrame
Source§fn cross_join(
&self,
other: &DataFrame,
suffix: Option<PlSmallStr>,
slice: Option<(i64, usize)>,
maintain_order: MaintainOrderJoin,
) -> Result<DataFrame, PolarsError>
fn cross_join( &self, other: &DataFrame, suffix: Option<PlSmallStr>, slice: Option<(i64, usize)>, maintain_order: MaintainOrderJoin, ) -> Result<DataFrame, PolarsError>
Source§impl DataFrameJoinOps for DataFrame
impl DataFrameJoinOps for DataFrame
Source§fn join(
&self,
other: &DataFrame,
left_on: impl IntoIterator<Item = impl AsRef<str>>,
right_on: impl IntoIterator<Item = impl AsRef<str>>,
args: JoinArgs,
options: Option<JoinTypeOptions>,
) -> Result<DataFrame, PolarsError>
fn join( &self, other: &DataFrame, left_on: impl IntoIterator<Item = impl AsRef<str>>, right_on: impl IntoIterator<Item = impl AsRef<str>>, args: JoinArgs, options: Option<JoinTypeOptions>, ) -> Result<DataFrame, PolarsError>
Source§fn inner_join(
&self,
other: &DataFrame,
left_on: impl IntoIterator<Item = impl AsRef<str>>,
right_on: impl IntoIterator<Item = impl AsRef<str>>,
) -> Result<DataFrame, PolarsError>
fn inner_join( &self, other: &DataFrame, left_on: impl IntoIterator<Item = impl AsRef<str>>, right_on: impl IntoIterator<Item = impl AsRef<str>>, ) -> Result<DataFrame, PolarsError>
Source§fn left_join(
&self,
other: &DataFrame,
left_on: impl IntoIterator<Item = impl AsRef<str>>,
right_on: impl IntoIterator<Item = impl AsRef<str>>,
) -> Result<DataFrame, PolarsError>
fn left_join( &self, other: &DataFrame, left_on: impl IntoIterator<Item = impl AsRef<str>>, right_on: impl IntoIterator<Item = impl AsRef<str>>, ) -> Result<DataFrame, PolarsError>
Source§fn full_join(
&self,
other: &DataFrame,
left_on: impl IntoIterator<Item = impl AsRef<str>>,
right_on: impl IntoIterator<Item = impl AsRef<str>>,
) -> Result<DataFrame, PolarsError>
fn full_join( &self, other: &DataFrame, left_on: impl IntoIterator<Item = impl AsRef<str>>, right_on: impl IntoIterator<Item = impl AsRef<str>>, ) -> Result<DataFrame, PolarsError>
Source§impl<'de> Deserialize<'de> for DataFrame
impl<'de> Deserialize<'de> for DataFrame
Source§fn deserialize<D>(
deserializer: D,
) -> Result<DataFrame, <D as Deserializer<'de>>::Error>where
D: Deserializer<'de>,
fn deserialize<D>(
deserializer: D,
) -> Result<DataFrame, <D as Deserializer<'de>>::Error>where
D: Deserializer<'de>,
Source§impl JoinDispatch for DataFrame
impl JoinDispatch for DataFrame
Source§unsafe fn create_left_df_chunked(
&self,
chunk_ids: &[ChunkId],
left_join: bool,
was_sliced: bool,
) -> DataFrame
unsafe fn create_left_df_chunked( &self, chunk_ids: &[ChunkId], left_join: bool, was_sliced: bool, ) -> DataFrame
chunked_ids only.Source§unsafe fn _create_left_df_from_slice(
&self,
join_tuples: &[u32],
left_join: bool,
was_sliced: bool,
sorted_tuple_idx: bool,
) -> DataFrame
unsafe fn _create_left_df_from_slice( &self, join_tuples: &[u32], left_join: bool, was_sliced: bool, sorted_tuple_idx: bool, ) -> DataFrame
Source§unsafe fn _finish_anti_semi_join(
&self,
idx: &[u32],
slice: Option<(i64, usize)>,
) -> DataFrame
unsafe fn _finish_anti_semi_join( &self, idx: &[u32], slice: Option<(i64, usize)>, ) -> DataFrame
semi_anti_join only.Source§fn _semi_anti_join_from_series(
&self,
s_left: &Series,
s_right: &Series,
slice: Option<(i64, usize)>,
anti: bool,
nulls_equal: bool,
) -> Result<DataFrame, PolarsError>
fn _semi_anti_join_from_series( &self, s_left: &Series, s_right: &Series, slice: Option<(i64, usize)>, anti: bool, nulls_equal: bool, ) -> Result<DataFrame, PolarsError>
semi_anti_join only.fn _full_join_from_series( &self, other: &DataFrame, s_left: &Series, s_right: &Series, args: JoinArgs, ) -> Result<DataFrame, PolarsError>
Source§impl MinMaxHorizontal for DataFrame
impl MinMaxHorizontal for DataFrame
Source§fn min_horizontal(&self) -> Result<Option<Column>, PolarsError>
fn min_horizontal(&self) -> Result<Option<Column>, PolarsError>
Source§fn max_horizontal(&self) -> Result<Option<Column>, PolarsError>
fn max_horizontal(&self) -> Result<Option<Column>, PolarsError>
Source§impl PolarsTemporalGroupby for DataFrame
impl PolarsTemporalGroupby for DataFrame
fn rolling( &self, group_by: Option<Vec<[u32; 2]>>, options: &RollingGroupOptions, ) -> Result<(Column, GroupPositions), PolarsError>
fn group_by_dynamic( &self, group_by: Option<Vec<[u32; 2]>>, options: &DynamicGroupOptions, ) -> Result<(Column, Vec<Column>, GroupPositions), PolarsError>
Source§impl PolarsUpsample for DataFrame
impl PolarsUpsample for DataFrame
Source§fn upsample<I>(
&self,
by: I,
time_column: &str,
every: Duration,
) -> Result<DataFrame, PolarsError>where
I: IntoVec<PlSmallStr>,
fn upsample<I>(
&self,
by: I,
time_column: &str,
every: Duration,
) -> Result<DataFrame, PolarsError>where
I: IntoVec<PlSmallStr>,
Source§fn upsample_stable<I>(
&self,
by: I,
time_column: &str,
every: Duration,
) -> Result<DataFrame, PolarsError>where
I: IntoVec<PlSmallStr>,
fn upsample_stable<I>(
&self,
by: I,
time_column: &str,
every: Duration,
) -> Result<DataFrame, PolarsError>where
I: IntoVec<PlSmallStr>,
Source§impl Serialize for DataFrame
impl Serialize for DataFrame
Source§fn serialize<S>(
&self,
serializer: S,
) -> Result<<S as Serializer>::Ok, <S as Serializer>::Error>where
S: Serializer,
fn serialize<S>(
&self,
serializer: S,
) -> Result<<S as Serializer>::Ok, <S as Serializer>::Error>where
S: Serializer,
Source§impl SumMeanHorizontal for DataFrame
impl SumMeanHorizontal for DataFrame
Source§fn sum_horizontal(
&self,
null_strategy: NullStrategy,
) -> Result<Option<Column>, PolarsError>
fn sum_horizontal( &self, null_strategy: NullStrategy, ) -> Result<Option<Column>, PolarsError>
Source§fn mean_horizontal(
&self,
null_strategy: NullStrategy,
) -> Result<Option<Column>, PolarsError>
fn mean_horizontal( &self, null_strategy: NullStrategy, ) -> Result<Option<Column>, PolarsError>
Source§impl TakeChunked for DataFrame
impl TakeChunked for DataFrame
Source§impl TakeChunkedHorPar for DataFrame
impl TakeChunkedHorPar for DataFrame
Source§impl TryExtend<RecordBatchT<Box<dyn Array>>> for DataFrame
impl TryExtend<RecordBatchT<Box<dyn Array>>> for DataFrame
Source§fn try_extend<I>(&mut self, iter: I) -> Result<(), PolarsError>where
I: IntoIterator<Item = RecordBatchT<Box<dyn Array>>>,
fn try_extend<I>(&mut self, iter: I) -> Result<(), PolarsError>where
I: IntoIterator<Item = RecordBatchT<Box<dyn Array>>>,
Extend::extend.Source§impl TryExtend<Result<RecordBatchT<Box<dyn Array>>, PolarsError>> for DataFrame
impl TryExtend<Result<RecordBatchT<Box<dyn Array>>, PolarsError>> for DataFrame
Source§fn try_extend<I>(&mut self, iter: I) -> Result<(), PolarsError>
fn try_extend<I>(&mut self, iter: I) -> Result<(), PolarsError>
Extend::extend.Source§impl TryFrom<StructArray> for DataFrame
impl TryFrom<StructArray> for DataFrame
Source§type Error = PolarsError
type Error = PolarsError
Source§fn try_from(arr: StructArray) -> Result<DataFrame, PolarsError>
fn try_from(arr: StructArray) -> Result<DataFrame, PolarsError>
impl AsofJoin for DataFrame
Auto Trait Implementations§
impl !Freeze for DataFrame
impl !RefUnwindSafe for DataFrame
impl Send for DataFrame
impl Sync for DataFrame
impl Unpin for DataFrame
impl !UnwindSafe for DataFrame
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> DataFrameOps for Twhere
T: IntoDf,
impl<T> DataFrameOps for Twhere
T: IntoDf,
Source§fn to_dummies(
&self,
separator: Option<&str>,
drop_first: bool,
drop_nulls: bool,
) -> Result<DataFrame, PolarsError>
fn to_dummies( &self, separator: Option<&str>, drop_first: bool, drop_nulls: bool, ) -> Result<DataFrame, PolarsError>
to_dummies only.§impl<T> Instrument for T
impl<T> Instrument for T
§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self> ⓘ
fn into_either(self, into_left: bool) -> Either<Self, Self> ⓘ
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self> ⓘ
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self> ⓘ
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§impl<T> Key for Twhere
T: Clone,
impl<T> Key for Twhere
T: Clone,
§impl<T> Pointable for T
impl<T> Pointable for T
§impl<T> PolicyExt for Twhere
T: ?Sized,
impl<T> PolicyExt for Twhere
T: ?Sized,
§impl<T> ToCompactString for Twhere
T: Display,
impl<T> ToCompactString for Twhere
T: Display,
§fn try_to_compact_string(&self) -> Result<CompactString, ToCompactStringError>
fn try_to_compact_string(&self) -> Result<CompactString, ToCompactStringError>
ToCompactString::to_compact_string()] Read more§fn to_compact_string(&self) -> CompactString
fn to_compact_string(&self) -> CompactString
CompactString]. Read more§impl<T> ToStringFallible for Twhere
T: Display,
impl<T> ToStringFallible for Twhere
T: Display,
§fn try_to_string(&self) -> Result<String, TryReserveError>
fn try_to_string(&self) -> Result<String, TryReserveError>
ToString::to_string, but without panic on OOM.