polars_io/parquet/read/
options.rs

1use polars_core::schema::SchemaRef;
2#[cfg(feature = "serde")]
3use serde::{Deserialize, Serialize};
4
5#[derive(Clone, Debug, PartialEq, Eq, Hash)]
6#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
7pub struct ParquetOptions {
8    pub schema: Option<SchemaRef>,
9    pub parallel: ParallelStrategy,
10    pub low_memory: bool,
11    pub use_statistics: bool,
12}
13
14#[derive(Copy, Clone, Debug, Eq, PartialEq, Default, Hash)]
15#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
16pub enum ParallelStrategy {
17    /// Don't parallelize
18    None,
19    /// Parallelize over the columns
20    Columns,
21    /// Parallelize over the row groups
22    RowGroups,
23    /// First evaluates the pushed-down predicates in parallel and determines a mask of which rows
24    /// to read. Then, it parallelizes over both the columns and the row groups while filtering out
25    /// rows that do not need to be read. This can provide significant speedups for large files
26    /// (i.e. many row-groups) with a predicate that filters clustered rows or filters heavily. In
27    /// other cases, this may slow down the scan compared other strategies.
28    ///
29    /// If no predicate is given, this falls back to back to [`ParallelStrategy::Auto`].
30    Prefiltered,
31    /// Automatically determine over which unit to parallelize
32    /// This will choose the most occurring unit.
33    #[default]
34    Auto,
35}