polars_io/
options.rs

1use polars_core::schema::SchemaRef;
2use polars_error::{PolarsError, PolarsResult};
3use polars_utils::IdxSize;
4use polars_utils::pl_str::PlSmallStr;
5#[cfg(feature = "serde")]
6use serde::{Deserialize, Serialize};
7
8#[derive(Clone, Debug, Eq, PartialEq, Hash)]
9#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
10#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
11pub struct RowIndex {
12    pub name: PlSmallStr,
13    pub offset: IdxSize,
14}
15
16/// Options for Hive partitioning.
17#[derive(Clone, Debug, Eq, PartialEq, Hash)]
18#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
19#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
20pub struct HiveOptions {
21    /// This can be `None` to automatically enable for single directory scans
22    /// and disable otherwise. However it should be initialized if it is inside
23    /// a DSL / IR plan.
24    pub enabled: Option<bool>,
25    pub hive_start_idx: usize,
26    pub schema: Option<SchemaRef>,
27    pub try_parse_dates: bool,
28}
29
30impl HiveOptions {
31    pub fn new_enabled() -> Self {
32        Self {
33            enabled: Some(true),
34            hive_start_idx: 0,
35            schema: None,
36            try_parse_dates: true,
37        }
38    }
39
40    pub fn new_disabled() -> Self {
41        Self {
42            enabled: Some(false),
43            hive_start_idx: 0,
44            schema: None,
45            try_parse_dates: false,
46        }
47    }
48}
49
50impl Default for HiveOptions {
51    fn default() -> Self {
52        Self::new_enabled()
53    }
54}
55
56/// Compression options for file that are expressed externally like CSV and NDJSON. Externally does
57/// not mean by an external tool, more that it doesn't happen internally like it does for Parquet
58/// and IPC.
59///
60/// Compared to other formats like IPC and Parquet, compression is external.
61#[derive(Copy, Clone, Debug, Default, Eq, Hash, PartialEq)]
62#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
63#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
64#[derive()]
65pub enum ExternalCompression {
66    #[default]
67    Uncompressed,
68    Gzip {
69        level: Option<u32>,
70    },
71    Zstd {
72        level: Option<u32>,
73    },
74}
75
76impl ExternalCompression {
77    /// Returns the expected file suffix associated with the compression format.
78    pub fn file_suffix(self) -> Option<&'static str> {
79        match self {
80            Self::Uncompressed => None,
81            Self::Gzip { .. } => Some(".gz"),
82            Self::Zstd { .. } => Some(".zst"),
83        }
84    }
85
86    pub fn try_from(value: &str, level: Option<u32>) -> PolarsResult<Self> {
87        match value {
88            "uncompressed" => Ok(Self::Uncompressed),
89            "gzip" => Ok(Self::Gzip { level }),
90            "zstd" => Ok(Self::Zstd { level }),
91            _ => Err(PolarsError::InvalidOperation(
92                format!("Invalid compression format: ({value})").into(),
93            )),
94        }
95    }
96}