polars.io.parquet.ParquetFieldOverwrites#

class polars.io.parquet.ParquetFieldOverwrites(
*,
name: str | None = None,
children: None | ParquetFieldOverwrites | Sequence[ParquetFieldOverwrites] | Mapping[str, ParquetFieldOverwrites] = None,
field_id: int | None = None,
metadata: Mapping[str, None | str] | None = None,
required: bool | None = None,
)[source]#

Write-option overwrites for individual Parquet fields.

Warning

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

Examples

>>> lf = pl.LazyFrame(
...     {
...         "a": [None, 2, 3, 4],
...         "b": [[1, 2, 3], [42], [13], [37]],
...         "c": [
...             {"x": "a", "y": 42},
...             {"x": "b", "y": 13},
...             {"x": "X", "y": 37},
...             {"x": "Y", "y": 15},
...         ],
...     }
... )  
>>> lf.sink_parquet(
...     "./out/parquet",
...     field_overwrites={
...         "a": ParquetFieldOverwrites(metadata={"flat_from_polars": "yes"}),
...         "b": ParquetFieldOverwrites(
...             children=ParquetFieldOverwrites(metadata={"listitem": "yes"}),
...             metadata={"list": "true"},
...         ),
...         "c": ParquetFieldOverwrites(
...             children=[
...                 ParquetFieldOverwrites(name="x", metadata={"md": "yes"}),
...                 ParquetFieldOverwrites(name="y", metadata={"md2": "Yes!"}),
...             ],
...             metadata={"struct": "true"},
...         ),
...     },
... )  
__init__(
*,
name: str | None = None,
children: None | ParquetFieldOverwrites | Sequence[ParquetFieldOverwrites] | Mapping[str, ParquetFieldOverwrites] = None,
field_id: int | None = None,
metadata: Mapping[str, None | str] | None = None,
required: bool | None = None,
) None[source]#

Methods

__init__(*[, name, children, field_id, ...])

Attributes

field_id

The field ID used in the Parquet schema

required

Is the field not allowed to have missing values

name

Name of the column or field

children

Children of the column or field.

metadata

Arrow metadata added to the field before writing