polars_io/parquet/write/
options.rs

1use polars_error::PolarsResult;
2use polars_parquet::write::{
3    BrotliLevel as BrotliLevelParquet, CompressionOptions, GzipLevel as GzipLevelParquet,
4    StatisticsOptions, ZstdLevel as ZstdLevelParquet,
5};
6#[cfg(feature = "serde")]
7use serde::{Deserialize, Serialize};
8
9#[derive(Copy, Clone, Debug, PartialEq, Eq, Default, Hash)]
10#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
11pub struct ParquetWriteOptions {
12    /// Data page compression
13    pub compression: ParquetCompression,
14    /// Compute and write column statistics.
15    pub statistics: StatisticsOptions,
16    /// If `None` will be all written to a single row group.
17    pub row_group_size: Option<usize>,
18    /// if `None` will be 1024^2 bytes
19    pub data_page_size: Option<usize>,
20}
21
22/// The compression strategy to use for writing Parquet files.
23#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
24#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
25pub enum ParquetCompression {
26    Uncompressed,
27    Snappy,
28    Gzip(Option<GzipLevel>),
29    Lzo,
30    Brotli(Option<BrotliLevel>),
31    Zstd(Option<ZstdLevel>),
32    Lz4Raw,
33}
34
35impl Default for ParquetCompression {
36    fn default() -> Self {
37        Self::Zstd(None)
38    }
39}
40
41/// A valid Gzip compression level.
42#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
43#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
44pub struct GzipLevel(u8);
45
46impl GzipLevel {
47    pub fn try_new(level: u8) -> PolarsResult<Self> {
48        GzipLevelParquet::try_new(level)?;
49        Ok(GzipLevel(level))
50    }
51}
52
53/// A valid Brotli compression level.
54#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
55#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
56pub struct BrotliLevel(u32);
57
58impl BrotliLevel {
59    pub fn try_new(level: u32) -> PolarsResult<Self> {
60        BrotliLevelParquet::try_new(level)?;
61        Ok(BrotliLevel(level))
62    }
63}
64
65/// A valid Zstandard compression level.
66#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
67#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
68pub struct ZstdLevel(i32);
69
70impl ZstdLevel {
71    pub fn try_new(level: i32) -> PolarsResult<Self> {
72        ZstdLevelParquet::try_new(level)?;
73        Ok(ZstdLevel(level))
74    }
75}
76
77impl From<ParquetCompression> for CompressionOptions {
78    fn from(value: ParquetCompression) -> Self {
79        use ParquetCompression::*;
80        match value {
81            Uncompressed => CompressionOptions::Uncompressed,
82            Snappy => CompressionOptions::Snappy,
83            Gzip(level) => {
84                CompressionOptions::Gzip(level.map(|v| GzipLevelParquet::try_new(v.0).unwrap()))
85            },
86            Lzo => CompressionOptions::Lzo,
87            Brotli(level) => {
88                CompressionOptions::Brotli(level.map(|v| BrotliLevelParquet::try_new(v.0).unwrap()))
89            },
90            Lz4Raw => CompressionOptions::Lz4Raw,
91            Zstd(level) => {
92                CompressionOptions::Zstd(level.map(|v| ZstdLevelParquet::try_new(v.0).unwrap()))
93            },
94        }
95    }
96}