polars_io/parquet/write/
options.rs

1use polars_error::PolarsResult;
2use polars_parquet::write::{
3    BrotliLevel as BrotliLevelParquet, CompressionOptions, GzipLevel as GzipLevelParquet,
4    StatisticsOptions, ZstdLevel as ZstdLevelParquet,
5};
6use polars_utils::pl_str::PlSmallStr;
7#[cfg(feature = "serde")]
8use serde::{Deserialize, Serialize};
9
10use super::KeyValueMetadata;
11
12#[derive(Clone, Debug, PartialEq, Eq, Default, Hash)]
13#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
14#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
15pub struct ParquetWriteOptions {
16    /// Data page compression
17    pub compression: ParquetCompression,
18    /// Compute and write column statistics.
19    pub statistics: StatisticsOptions,
20    /// If `None` will be all written to a single row group.
21    pub row_group_size: Option<usize>,
22    /// if `None` will be 1024^2 bytes
23    pub data_page_size: Option<usize>,
24    /// Custom file-level key value metadata
25    pub key_value_metadata: Option<KeyValueMetadata>,
26
27    /// Per-field overwrites for writing properties.
28    pub field_overwrites: Vec<ParquetFieldOverwrites>,
29}
30
31#[derive(Clone, Debug, PartialEq, Eq, Hash)]
32#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
33#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
34pub enum ChildFieldOverwrites {
35    /// Flat datatypes
36    None,
37    /// List / Array
38    ListLike(Box<ParquetFieldOverwrites>),
39    Struct(Vec<ParquetFieldOverwrites>),
40}
41
42#[derive(Clone, Debug, PartialEq, Eq, Hash)]
43#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
44#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
45pub struct MetadataKeyValue {
46    pub key: PlSmallStr,
47    pub value: Option<PlSmallStr>,
48}
49
50#[derive(Clone, Debug, PartialEq, Eq, Hash)]
51#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
52#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
53pub struct ParquetFieldOverwrites {
54    pub name: Option<PlSmallStr>,
55    pub children: ChildFieldOverwrites,
56
57    pub required: Option<bool>,
58    pub field_id: Option<i32>,
59    pub metadata: Option<Vec<MetadataKeyValue>>,
60}
61
62/// The compression strategy to use for writing Parquet files.
63#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
64#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
65#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
66pub enum ParquetCompression {
67    Uncompressed,
68    Snappy,
69    Gzip(Option<GzipLevel>),
70    Lzo,
71    Brotli(Option<BrotliLevel>),
72    Zstd(Option<ZstdLevel>),
73    Lz4Raw,
74}
75
76impl Default for ParquetCompression {
77    fn default() -> Self {
78        Self::Zstd(None)
79    }
80}
81
82/// A valid Gzip compression level.
83#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
84#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
85#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
86pub struct GzipLevel(u8);
87
88impl GzipLevel {
89    pub fn try_new(level: u8) -> PolarsResult<Self> {
90        GzipLevelParquet::try_new(level)?;
91        Ok(GzipLevel(level))
92    }
93}
94
95/// A valid Brotli compression level.
96#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
97#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
98#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
99pub struct BrotliLevel(u32);
100
101impl BrotliLevel {
102    pub fn try_new(level: u32) -> PolarsResult<Self> {
103        BrotliLevelParquet::try_new(level)?;
104        Ok(BrotliLevel(level))
105    }
106}
107
108/// A valid Zstandard compression level.
109#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
110#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
111#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
112pub struct ZstdLevel(i32);
113
114impl ZstdLevel {
115    pub fn try_new(level: i32) -> PolarsResult<Self> {
116        ZstdLevelParquet::try_new(level)?;
117        Ok(ZstdLevel(level))
118    }
119}
120
121impl From<ParquetCompression> for CompressionOptions {
122    fn from(value: ParquetCompression) -> Self {
123        use ParquetCompression::*;
124        match value {
125            Uncompressed => CompressionOptions::Uncompressed,
126            Snappy => CompressionOptions::Snappy,
127            Gzip(level) => {
128                CompressionOptions::Gzip(level.map(|v| GzipLevelParquet::try_new(v.0).unwrap()))
129            },
130            Lzo => CompressionOptions::Lzo,
131            Brotli(level) => {
132                CompressionOptions::Brotli(level.map(|v| BrotliLevelParquet::try_new(v.0).unwrap()))
133            },
134            Lz4Raw => CompressionOptions::Lz4Raw,
135            Zstd(level) => {
136                CompressionOptions::Zstd(level.map(|v| ZstdLevelParquet::try_new(v.0).unwrap()))
137            },
138        }
139    }
140}