polars_io/parquet/write/
options.rs

1use polars_error::PolarsResult;
2use polars_parquet::write::{
3    BrotliLevel as BrotliLevelParquet, CompressionOptions, GzipLevel as GzipLevelParquet,
4    StatisticsOptions, ZstdLevel as ZstdLevelParquet,
5};
6use polars_utils::pl_str::PlSmallStr;
7#[cfg(feature = "serde")]
8use serde::{Deserialize, Serialize};
9
10use super::KeyValueMetadata;
11
12#[derive(Clone, Debug, PartialEq, Eq, Default, Hash)]
13#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
14pub struct ParquetWriteOptions {
15    /// Data page compression
16    pub compression: ParquetCompression,
17    /// Compute and write column statistics.
18    pub statistics: StatisticsOptions,
19    /// If `None` will be all written to a single row group.
20    pub row_group_size: Option<usize>,
21    /// if `None` will be 1024^2 bytes
22    pub data_page_size: Option<usize>,
23    /// Custom file-level key value metadata
24    pub key_value_metadata: Option<KeyValueMetadata>,
25
26    /// Per-field overwrites for writing properties.
27    pub field_overwrites: Vec<ParquetFieldOverwrites>,
28}
29
30#[derive(Clone, Debug, PartialEq, Eq, Hash)]
31#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
32pub enum ChildFieldOverwrites {
33    /// Flat datatypes
34    None,
35    /// List / Array
36    ListLike(Box<ParquetFieldOverwrites>),
37    Struct(Vec<ParquetFieldOverwrites>),
38}
39
40#[derive(Clone, Debug, PartialEq, Eq, Hash)]
41#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
42pub struct MetadataKeyValue {
43    pub key: PlSmallStr,
44    pub value: Option<PlSmallStr>,
45}
46
47#[derive(Clone, Debug, PartialEq, Eq, Hash)]
48#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
49pub struct ParquetFieldOverwrites {
50    pub name: Option<PlSmallStr>,
51    pub children: ChildFieldOverwrites,
52    pub field_id: Option<i32>,
53    pub metadata: Option<Vec<MetadataKeyValue>>,
54}
55
56/// The compression strategy to use for writing Parquet files.
57#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
58#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
59pub enum ParquetCompression {
60    Uncompressed,
61    Snappy,
62    Gzip(Option<GzipLevel>),
63    Lzo,
64    Brotli(Option<BrotliLevel>),
65    Zstd(Option<ZstdLevel>),
66    Lz4Raw,
67}
68
69impl Default for ParquetCompression {
70    fn default() -> Self {
71        Self::Zstd(None)
72    }
73}
74
75/// A valid Gzip compression level.
76#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
77#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
78pub struct GzipLevel(u8);
79
80impl GzipLevel {
81    pub fn try_new(level: u8) -> PolarsResult<Self> {
82        GzipLevelParquet::try_new(level)?;
83        Ok(GzipLevel(level))
84    }
85}
86
87/// A valid Brotli compression level.
88#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
89#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
90pub struct BrotliLevel(u32);
91
92impl BrotliLevel {
93    pub fn try_new(level: u32) -> PolarsResult<Self> {
94        BrotliLevelParquet::try_new(level)?;
95        Ok(BrotliLevel(level))
96    }
97}
98
99/// A valid Zstandard compression level.
100#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
101#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
102pub struct ZstdLevel(i32);
103
104impl ZstdLevel {
105    pub fn try_new(level: i32) -> PolarsResult<Self> {
106        ZstdLevelParquet::try_new(level)?;
107        Ok(ZstdLevel(level))
108    }
109}
110
111impl From<ParquetCompression> for CompressionOptions {
112    fn from(value: ParquetCompression) -> Self {
113        use ParquetCompression::*;
114        match value {
115            Uncompressed => CompressionOptions::Uncompressed,
116            Snappy => CompressionOptions::Snappy,
117            Gzip(level) => {
118                CompressionOptions::Gzip(level.map(|v| GzipLevelParquet::try_new(v.0).unwrap()))
119            },
120            Lzo => CompressionOptions::Lzo,
121            Brotli(level) => {
122                CompressionOptions::Brotli(level.map(|v| BrotliLevelParquet::try_new(v.0).unwrap()))
123            },
124            Lz4Raw => CompressionOptions::Lz4Raw,
125            Zstd(level) => {
126                CompressionOptions::Zstd(level.map(|v| ZstdLevelParquet::try_new(v.0).unwrap()))
127            },
128        }
129    }
130}