polars_io/parquet/write/
options.rs

1use polars_parquet::write::{
2    BrotliLevel, CompressionOptions, GzipLevel, StatisticsOptions, ZstdLevel,
3};
4use polars_utils::pl_str::PlSmallStr;
5#[cfg(feature = "serde")]
6use serde::{Deserialize, Serialize};
7
8use super::KeyValueMetadata;
9
10#[derive(Clone, Debug, PartialEq, Eq, Default, Hash)]
11#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
12#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
13pub struct ParquetWriteOptions {
14    /// Data page compression
15    pub compression: ParquetCompression,
16    /// Compute and write column statistics.
17    pub statistics: StatisticsOptions,
18    /// If `None` will be all written to a single row group.
19    pub row_group_size: Option<usize>,
20    /// if `None` will be 1024^2 bytes
21    pub data_page_size: Option<usize>,
22    /// Custom file-level key value metadata
23    pub key_value_metadata: Option<KeyValueMetadata>,
24
25    /// Per-field overwrites for writing properties.
26    pub field_overwrites: Vec<ParquetFieldOverwrites>,
27}
28
29#[derive(Clone, Debug, PartialEq, Eq, Hash)]
30#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
31#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
32pub enum ChildFieldOverwrites {
33    /// Flat datatypes
34    None,
35    /// List / Array
36    ListLike(Box<ParquetFieldOverwrites>),
37    Struct(Vec<ParquetFieldOverwrites>),
38}
39
40#[derive(Clone, Debug, PartialEq, Eq, Hash)]
41#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
42#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
43pub struct MetadataKeyValue {
44    pub key: PlSmallStr,
45    pub value: Option<PlSmallStr>,
46}
47
48#[derive(Clone, Debug, PartialEq, Eq, Hash)]
49#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
50#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
51pub struct ParquetFieldOverwrites {
52    pub name: Option<PlSmallStr>,
53    pub children: ChildFieldOverwrites,
54
55    pub required: Option<bool>,
56    pub field_id: Option<i32>,
57    pub metadata: Option<Vec<MetadataKeyValue>>,
58}
59
60/// The compression strategy to use for writing Parquet files.
61#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
62#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
63#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
64pub enum ParquetCompression {
65    Uncompressed,
66    Snappy,
67    Gzip(Option<GzipLevel>),
68    Brotli(Option<BrotliLevel>),
69    Zstd(Option<ZstdLevel>),
70    Lz4Raw,
71}
72
73impl Default for ParquetCompression {
74    fn default() -> Self {
75        Self::Zstd(None)
76    }
77}
78
79impl From<ParquetCompression> for CompressionOptions {
80    fn from(value: ParquetCompression) -> Self {
81        use ParquetCompression::*;
82        match value {
83            Uncompressed => CompressionOptions::Uncompressed,
84            Snappy => CompressionOptions::Snappy,
85            Gzip(level) => CompressionOptions::Gzip(level),
86            Brotli(level) => CompressionOptions::Brotli(level),
87            Lz4Raw => CompressionOptions::Lz4Raw,
88            Zstd(level) => CompressionOptions::Zstd(level),
89        }
90    }
91}