polars_io/parquet/write/
options.rs

1use arrow::datatypes::ArrowSchemaRef;
2use polars_core::prelude::CompatLevel;
3use polars_parquet::write::{
4    BrotliLevel, CompressionOptions, GzipLevel, StatisticsOptions, ZstdLevel,
5};
6#[cfg(feature = "serde")]
7use serde::{Deserialize, Serialize};
8
9use super::KeyValueMetadata;
10
11#[derive(Default, Clone, Debug, PartialEq, Hash, Eq)]
12#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
13#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
14pub struct ParquetWriteOptions {
15    /// Data page compression
16    pub compression: ParquetCompression,
17    /// Compute and write column statistics.
18    pub statistics: StatisticsOptions,
19    /// If `None` will be all written to a single row group.
20    pub row_group_size: Option<usize>,
21    /// if `None` will be 1024^2 bytes
22    pub data_page_size: Option<usize>,
23    /// Custom file-level key value metadata
24    pub key_value_metadata: Option<KeyValueMetadata>,
25    pub arrow_schema: Option<ArrowSchemaRef>,
26    #[cfg_attr(feature = "serde", serde(default))]
27    pub compat_level: Option<CompatLevel>,
28}
29
30impl ParquetWriteOptions {
31    pub fn compat_level(&self) -> CompatLevel {
32        self.compat_level.unwrap_or(CompatLevel::oldest())
33    }
34}
35
36/// The compression strategy to use for writing Parquet files.
37#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
38#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
39#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
40pub enum ParquetCompression {
41    Uncompressed,
42    Snappy,
43    Gzip(Option<GzipLevel>),
44    Brotli(Option<BrotliLevel>),
45    Zstd(Option<ZstdLevel>),
46    Lz4Raw,
47}
48
49impl Default for ParquetCompression {
50    fn default() -> Self {
51        Self::Zstd(None)
52    }
53}
54
55impl From<ParquetCompression> for CompressionOptions {
56    fn from(value: ParquetCompression) -> Self {
57        use ParquetCompression::*;
58        match value {
59            Uncompressed => CompressionOptions::Uncompressed,
60            Snappy => CompressionOptions::Snappy,
61            Gzip(level) => CompressionOptions::Gzip(level),
62            Brotli(level) => CompressionOptions::Brotli(level),
63            Lz4Raw => CompressionOptions::Lz4Raw,
64            Zstd(level) => CompressionOptions::Zstd(level),
65        }
66    }
67}