1use polars_error::PolarsResult;
2use polars_parquet::write::{
3 BrotliLevel as BrotliLevelParquet, CompressionOptions, GzipLevel as GzipLevelParquet,
4 StatisticsOptions, ZstdLevel as ZstdLevelParquet,
5};
6use polars_utils::pl_str::PlSmallStr;
7#[cfg(feature = "serde")]
8use serde::{Deserialize, Serialize};
9
10use super::KeyValueMetadata;
11
12#[derive(Clone, Debug, PartialEq, Eq, Default, Hash)]
13#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
14pub struct ParquetWriteOptions {
15 pub compression: ParquetCompression,
17 pub statistics: StatisticsOptions,
19 pub row_group_size: Option<usize>,
21 pub data_page_size: Option<usize>,
23 pub key_value_metadata: Option<KeyValueMetadata>,
25
26 pub field_overwrites: Vec<ParquetFieldOverwrites>,
28}
29
30#[derive(Clone, Debug, PartialEq, Eq, Hash)]
31#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
32pub enum ChildFieldOverwrites {
33 None,
35 ListLike(Box<ParquetFieldOverwrites>),
37 Struct(Vec<ParquetFieldOverwrites>),
38}
39
40#[derive(Clone, Debug, PartialEq, Eq, Hash)]
41#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
42pub struct MetadataKeyValue {
43 pub key: PlSmallStr,
44 pub value: Option<PlSmallStr>,
45}
46
47#[derive(Clone, Debug, PartialEq, Eq, Hash)]
48#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
49pub struct ParquetFieldOverwrites {
50 pub name: Option<PlSmallStr>,
51 pub children: ChildFieldOverwrites,
52 pub field_id: Option<i32>,
53 pub metadata: Option<Vec<MetadataKeyValue>>,
54}
55
56#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
58#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
59pub enum ParquetCompression {
60 Uncompressed,
61 Snappy,
62 Gzip(Option<GzipLevel>),
63 Lzo,
64 Brotli(Option<BrotliLevel>),
65 Zstd(Option<ZstdLevel>),
66 Lz4Raw,
67}
68
69impl Default for ParquetCompression {
70 fn default() -> Self {
71 Self::Zstd(None)
72 }
73}
74
75#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
77#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
78pub struct GzipLevel(u8);
79
80impl GzipLevel {
81 pub fn try_new(level: u8) -> PolarsResult<Self> {
82 GzipLevelParquet::try_new(level)?;
83 Ok(GzipLevel(level))
84 }
85}
86
87#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
89#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
90pub struct BrotliLevel(u32);
91
92impl BrotliLevel {
93 pub fn try_new(level: u32) -> PolarsResult<Self> {
94 BrotliLevelParquet::try_new(level)?;
95 Ok(BrotliLevel(level))
96 }
97}
98
99#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
101#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
102pub struct ZstdLevel(i32);
103
104impl ZstdLevel {
105 pub fn try_new(level: i32) -> PolarsResult<Self> {
106 ZstdLevelParquet::try_new(level)?;
107 Ok(ZstdLevel(level))
108 }
109}
110
111impl From<ParquetCompression> for CompressionOptions {
112 fn from(value: ParquetCompression) -> Self {
113 use ParquetCompression::*;
114 match value {
115 Uncompressed => CompressionOptions::Uncompressed,
116 Snappy => CompressionOptions::Snappy,
117 Gzip(level) => {
118 CompressionOptions::Gzip(level.map(|v| GzipLevelParquet::try_new(v.0).unwrap()))
119 },
120 Lzo => CompressionOptions::Lzo,
121 Brotli(level) => {
122 CompressionOptions::Brotli(level.map(|v| BrotliLevelParquet::try_new(v.0).unwrap()))
123 },
124 Lz4Raw => CompressionOptions::Lz4Raw,
125 Zstd(level) => {
126 CompressionOptions::Zstd(level.map(|v| ZstdLevelParquet::try_new(v.0).unwrap()))
127 },
128 }
129 }
130}