polars_io/csv/write/
options.rs

1use std::num::NonZeroUsize;
2use std::sync::Arc;
3
4use polars_utils::pl_str::PlSmallStr;
5#[cfg(feature = "serde")]
6use serde::{Deserialize, Serialize};
7
8use crate::ExternalCompression;
9
10/// Options for writing CSV files.
11#[derive(Clone, Debug, Eq, Hash, PartialEq)]
12#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
13#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
14pub struct CsvWriterOptions {
15    pub include_bom: bool,
16    #[cfg_attr(feature = "serde", serde(default))]
17    pub compression: ExternalCompression,
18    pub check_extension: bool,
19    pub include_header: bool,
20    pub batch_size: NonZeroUsize,
21    pub serialize_options: Arc<SerializeOptions>,
22}
23
24impl Default for CsvWriterOptions {
25    fn default() -> Self {
26        Self {
27            include_bom: false,
28            compression: ExternalCompression::default(),
29            check_extension: true,
30            include_header: true,
31            batch_size: NonZeroUsize::new(1024).unwrap(),
32            serialize_options: SerializeOptions::default().into(),
33        }
34    }
35}
36
37/// Options to serialize logical types to CSV.
38///
39/// The default is to format times and dates as `chrono` crate formats them.
40#[derive(Clone, Debug, Eq, Hash, PartialEq)]
41#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
42#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
43pub struct SerializeOptions {
44    /// Used for [`DataType::Date`](polars_core::datatypes::DataType::Date).
45    pub date_format: Option<PlSmallStr>,
46    /// Used for [`DataType::Time`](polars_core::datatypes::DataType::Time).
47    pub time_format: Option<PlSmallStr>,
48    /// Used for [`DataType::Datetime`](polars_core::datatypes::DataType::Datetime).
49    pub datetime_format: Option<PlSmallStr>,
50    /// Used for [`DataType::Float64`](polars_core::datatypes::DataType::Float64)
51    /// and [`DataType::Float32`](polars_core::datatypes::DataType::Float32).
52    pub float_scientific: Option<bool>,
53    pub float_precision: Option<usize>,
54    /// Use comma as the decimal separator.
55    pub decimal_comma: bool,
56    /// Used as separator.
57    pub separator: u8,
58    /// Quoting character.
59    pub quote_char: u8,
60    /// Null value representation.
61    pub null: PlSmallStr,
62    /// String appended after every row.
63    pub line_terminator: PlSmallStr,
64    /// When to insert quotes.
65    pub quote_style: QuoteStyle,
66}
67
68impl Default for SerializeOptions {
69    fn default() -> Self {
70        Self {
71            date_format: None,
72            time_format: None,
73            datetime_format: None,
74            float_scientific: None,
75            float_precision: None,
76            decimal_comma: false,
77            separator: b',',
78            quote_char: b'"',
79            null: PlSmallStr::EMPTY,
80            line_terminator: "\n".into(),
81            quote_style: Default::default(),
82        }
83    }
84}
85
86/// Quote style indicating when to insert quotes around a field.
87#[derive(Copy, Clone, Debug, Default, Eq, Hash, PartialEq)]
88#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
89#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
90pub enum QuoteStyle {
91    /// Quote fields only when necessary.
92    ///
93    /// Quotes are necessary when fields contain a quote, separator or record terminator.
94    /// Quotes are also necessary when writing an empty record (which is indistinguishable
95    /// from arecord with one empty field).
96    /// This is the default.
97    #[default]
98    Necessary,
99    /// Quote every field. Always.
100    Always,
101    /// Quote non-numeric fields.
102    ///
103    /// When writing a field that does not parse as a valid float or integer,
104    /// quotes will be used even if they aren't strictly necessary.
105    NonNumeric,
106    /// Never quote any fields, even if it would produce invalid CSV data.
107    Never,
108}