polars.PartitionBy#

class polars.PartitionBy(
base_path: str | Path,
*,
file_path_provider: Callable[[FileProviderArgs], str | Path | IO[bytes] | IO[str]] | None = None,
key: str | Expr | Sequence[str | Expr] | Mapping[str, Expr] | None = None,
include_key: bool | None = None,
max_rows_per_file: int | None = None,
approximate_bytes_per_file: int | Literal['auto'] | None = 'auto',
)[source]#

Configuration for writing to multiple output files.

Warning

This functionality is currently considered unstable. It may be changed at any point without it being considered a breaking change.

Parameters:
base_path

Base path to write to.

file_path_provider

Callable for custom file output paths.

key

Expressions to partition by.

include_key

Include the partition key expression outputs in the output files.

max_rows_per_file

Maximum number of rows to write for each file. Note that files may have less than this amount of rows.

approximate_bytes_per_file

Approximate number of bytes to write to each file. This is measured as the estimated size of the DataFrame in memory.

Examples

Split to multiple files partitioned by year:

>>> pl.LazyFrame({"year": [2026, 2027, 1970], "month": [0, 0, 0]}).sink_parquet(
...     pl.PartitionBy("data/", key="year")
... )  

Split to multiple files based on size:

>>> pl.LazyFrame({"year": [2026, 2027, 1970], "month": [0, 0, 0]}).sink_parquet(
...     pl.PartitionBy(
...         "data/", max_rows_per_file=1000, approximate_bytes_per_file=100_000_000
...     )
... )  

Split to multiple files partitioned by year, with limits on individual file sizes:

>>> pl.LazyFrame({"year": [2026, 2027, 1970], "month": [0, 0, 0]}).sink_parquet(
...     pl.PartitionBy(
...         "data/",
...         key="year",
...         max_rows_per_file=1000,
...         approximate_bytes_per_file=100_000_000,
...     )
... )  
__init__(
base_path: str | Path,
*,
file_path_provider: Callable[[FileProviderArgs], str | Path | IO[bytes] | IO[str]] | None = None,
key: str | Expr | Sequence[str | Expr] | Mapping[str, Expr] | None = None,
include_key: bool | None = None,
max_rows_per_file: int | None = None,
approximate_bytes_per_file: int | Literal['auto'] | None = 'auto',
) None[source]#

Methods

__init__(base_path, *[, file_path_provider, ...])