Skip to main content

polars_io/csv/write/write_impl/
serializer.rs

1//! This file is complicated because we have complicated escape handling. We want to avoid having
2//! to write down each combination of type & escaping, but we also want the compiler to optimize them
3//! to efficient machine code - so no dynamic dispatch. That means a lot of generics and macros.
4//!
5//! We need to differentiate between several kinds of types, and several kinds of escaping we support:
6//!
7//!  - The simplest escaping mechanism are [`QuoteStyle::Always`] and [`QuoteStyle::Never`].
8//!    For `Never` we just never quote. For `Always` we pass any serializer that never quotes
9//!    to [`quote_serializer()`] then it becomes quoted properly.
10//!  - [`QuoteStyle::Necessary`] (the default) is only relevant for strings and floats with decimal_comma,
11//!    as these are the only types that can have newlines (row separators), commas (default column separators)
12//!    or quotes. String escaping is complicated anyway, and it is all inside [`string_serializer()`].
13//!  - The real complication is [`QuoteStyle::NonNumeric`], that doesn't quote numbers (unless necessary)
14//!    and nulls, and quotes any other thing. The problem is that nulls can be within any type, so we
15//!    need to handle two possibilities of quoting everywhere.
16//!
17//! So in case the chosen style is anything but `NonNumeric`, we statically know for each column except strings
18//! whether it should be quoted (and for strings too when not `Necessary`). There we use
19//! `quote_serializer()` or nothing.
20//!
21//! But to help with `NonNumeric`, each serializer carry the potential to distinguish between nulls and non-nulls,
22//! and quote the latter and not the former. But in order to not have the branch when we statically know the answer,
23//! we have an option to statically disable it with a const generic flag `QUOTE_NON_NULL`. Numbers (that should never
24//! be quoted with `NonNumeric`) just always disable this flag.
25//!
26//! So we have three possibilities:
27//!
28//!  1. A serializer that never quotes. This is a bare serializer with `QUOTE_NON_NULL = false`.
29//!  2. A serializer that always quotes. This is a serializer wrapped with `quote_serializer()`,
30//!     but also with `QUOTE_NON_NULL = false`.
31//!  3. A serializer that quotes only non-nulls. This is a bare serializer with `QUOTE_NON_NULL = true`.
32
33use std::fmt::LowerExp;
34use std::io::Write;
35
36use arrow::array::{Array, BooleanArray, Float16Array, NullArray, PrimitiveArray, Utf8ViewArray};
37use arrow::legacy::time_zone::Tz;
38use arrow::types::NativeType;
39#[cfg(feature = "timezones")]
40use chrono::TimeZone;
41use memchr::{memchr_iter, memchr3};
42use num_traits::NumCast;
43use polars_core::prelude::*;
44use polars_utils::float16::pf16;
45
46use crate::csv::write::{QuoteStyle, SerializeOptions};
47
48const TOO_MANY_MSG: &str = "too many items requested from CSV serializer";
49const ARRAY_MISMATCH_MSG: &str = "wrong array type";
50
51#[allow(dead_code)]
52struct IgnoreFmt;
53impl std::fmt::Write for IgnoreFmt {
54    fn write_str(&mut self, _s: &str) -> std::fmt::Result {
55        Ok(())
56    }
57}
58
59pub(super) trait Serializer<'a> {
60    fn serialize(&mut self, buf: &mut Vec<u8>, options: &SerializeOptions);
61}
62
63fn make_serializer<'a, T, I: Iterator<Item = Option<T>>, const QUOTE_NON_NULL: bool>(
64    f: impl FnMut(T, &mut Vec<u8>, &SerializeOptions),
65    iter: I,
66) -> impl Serializer<'a> {
67    struct SerializerImpl<F, I, const QUOTE_NON_NULL: bool> {
68        f: F,
69        iter: I,
70    }
71
72    impl<'a, T, F, I, const QUOTE_NON_NULL: bool> Serializer<'a>
73        for SerializerImpl<F, I, QUOTE_NON_NULL>
74    where
75        F: FnMut(T, &mut Vec<u8>, &SerializeOptions),
76        I: Iterator<Item = Option<T>>,
77    {
78        fn serialize(&mut self, buf: &mut Vec<u8>, options: &SerializeOptions) {
79            let item = self.iter.next().expect(TOO_MANY_MSG);
80            match item {
81                Some(item) => {
82                    if QUOTE_NON_NULL {
83                        buf.push(options.quote_char);
84                    }
85                    (self.f)(item, buf, options);
86                    if QUOTE_NON_NULL {
87                        buf.push(options.quote_char);
88                    }
89                },
90                None => buf.extend_from_slice(options.null.as_bytes()),
91            }
92        }
93    }
94
95    SerializerImpl::<_, _, QUOTE_NON_NULL> { f, iter }
96}
97
98fn integer_serializer<I: NativeType + itoa::Integer>(
99    array: &PrimitiveArray<I>,
100) -> impl Serializer<'_> {
101    let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
102        let mut buffer = itoa::Buffer::new();
103        let value = buffer.format(item);
104        buf.extend_from_slice(value.as_bytes());
105    };
106
107    make_serializer::<_, _, false>(f, array.iter())
108}
109
110fn float_serializer_no_precision_autoformat_f16(array: &Float16Array) -> impl Serializer<'_> {
111    let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
112        let mut buffer = zmij::Buffer::new();
113        let cast: f32 = NumCast::from(item).unwrap();
114        let value = buffer.format(cast);
115        buf.extend_from_slice(value.as_bytes());
116    };
117    float_serializer_no_precision_autoformat_(array, f)
118}
119
120fn float_serializer_no_precision_autoformat<I: NativeType + zmij::Float>(
121    array: &PrimitiveArray<I>,
122) -> impl Serializer<'_> {
123    let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
124        let mut buffer = zmij::Buffer::new();
125        let value = buffer.format(item);
126        buf.extend_from_slice(value.as_bytes());
127    };
128    float_serializer_no_precision_autoformat_(array, f)
129}
130
131fn float_serializer_no_precision_autoformat_<
132    'a,
133    I: NativeType,
134    F: Fn(&'a I, &mut Vec<u8>, &SerializeOptions),
135>(
136    array: &'a PrimitiveArray<I>,
137    f: F,
138) -> impl Serializer<'a> {
139    make_serializer::<_, _, false>(f, array.iter())
140}
141
142fn float_serializer_no_precision_autoformat_decimal_comma_f16(
143    array: &Float16Array,
144) -> impl Serializer<'_> {
145    let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
146        let mut buffer = zmij::Buffer::new();
147        let cast: f32 = NumCast::from(item).unwrap();
148        let value = buffer.format(cast);
149
150        for ch in value.as_bytes() {
151            buf.push(if *ch == b'.' { b',' } else { *ch });
152        }
153    };
154    float_serializer_no_precision_autoformat_decimal_comma_(array, f)
155}
156
157fn float_serializer_no_precision_autoformat_decimal_comma<I: NativeType + zmij::Float>(
158    array: &PrimitiveArray<I>,
159) -> impl Serializer<'_> {
160    let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
161        let mut buffer = zmij::Buffer::new();
162        let value = buffer.format(item).as_bytes();
163
164        for ch in value {
165            buf.push(if *ch == b'.' { b',' } else { *ch });
166        }
167    };
168    float_serializer_no_precision_autoformat_decimal_comma_(array, f)
169}
170
171fn float_serializer_no_precision_autoformat_decimal_comma_<
172    'a,
173    I: NativeType,
174    F: Fn(&'a I, &mut Vec<u8>, &SerializeOptions),
175>(
176    array: &'a PrimitiveArray<I>,
177    f: F,
178) -> impl Serializer<'a> {
179    make_serializer::<_, _, false>(f, array.iter())
180}
181
182fn float_serializer_no_precision_scientific<I: NativeType + LowerExp>(
183    array: &PrimitiveArray<I>,
184) -> impl Serializer<'_> {
185    let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
186        // Float writing into a buffer of `Vec<u8>` cannot fail.
187        let _ = write!(buf, "{item:.e}");
188    };
189
190    make_serializer::<_, _, false>(f, array.iter())
191}
192
193fn float_serializer_no_precision_scientific_decimal_comma<I: NativeType + LowerExp>(
194    array: &PrimitiveArray<I>,
195) -> impl Serializer<'_> {
196    let mut scratch = Vec::new();
197
198    let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
199        // Float writing into a buffer of `Vec<u8>` cannot fail.
200        let _ = write!(&mut scratch, "{item:.e}");
201        for c in &mut scratch {
202            if *c == b'.' {
203                *c = b',';
204                break;
205            }
206        }
207        buf.extend_from_slice(&scratch);
208    };
209
210    make_serializer::<_, _, false>(f, array.iter())
211}
212
213fn float_serializer_no_precision_positional<I: NativeType + NumCast>(
214    array: &PrimitiveArray<I>,
215) -> impl Serializer<'_> {
216    let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
217        let v: f64 = NumCast::from(item).unwrap();
218        let _ = write!(buf, "{v}");
219    };
220
221    make_serializer::<_, _, false>(f, array.iter())
222}
223
224fn float_serializer_no_precision_positional_decimal_comma<I: NativeType + NumCast>(
225    array: &PrimitiveArray<I>,
226) -> impl Serializer<'_> {
227    let mut scratch = Vec::new();
228
229    let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
230        scratch.clear();
231        let v: f64 = NumCast::from(item).unwrap();
232        let _ = write!(&mut scratch, "{v}");
233        for c in &mut scratch {
234            if *c == b'.' {
235                *c = b',';
236                break;
237            }
238        }
239        buf.extend_from_slice(&scratch);
240    };
241
242    make_serializer::<_, _, false>(f, array.iter())
243}
244
245fn float_serializer_with_precision_scientific<I: NativeType + LowerExp>(
246    array: &PrimitiveArray<I>,
247    precision: usize,
248) -> impl Serializer<'_> {
249    let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
250        // Float writing into a buffer of `Vec<u8>` cannot fail.
251        let _ = write!(buf, "{item:.precision$e}");
252    };
253
254    make_serializer::<_, _, false>(f, array.iter())
255}
256
257fn float_serializer_with_precision_scientific_decimal_comma<I: NativeType + LowerExp>(
258    array: &PrimitiveArray<I>,
259    precision: usize,
260) -> impl Serializer<'_> {
261    let mut scratch = Vec::new();
262
263    let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
264        scratch.clear();
265        // Float writing into a buffer of `Vec<u8>` cannot fail.
266        let _ = write!(&mut scratch, "{item:.precision$e}");
267        for c in &mut scratch {
268            if *c == b'.' {
269                *c = b',';
270                break;
271            }
272        }
273        buf.extend_from_slice(&scratch);
274    };
275
276    make_serializer::<_, _, false>(f, array.iter())
277}
278
279fn float_serializer_with_precision_positional<I: NativeType>(
280    array: &PrimitiveArray<I>,
281    precision: usize,
282) -> impl Serializer<'_> {
283    let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
284        // Float writing into a buffer of `Vec<u8>` cannot fail.
285        let _ = write!(buf, "{item:.precision$}");
286    };
287
288    make_serializer::<_, _, false>(f, array.iter())
289}
290
291fn float_serializer_with_precision_positional_decimal_comma<I: NativeType>(
292    array: &PrimitiveArray<I>,
293    precision: usize,
294) -> impl Serializer<'_> {
295    let mut scratch = Vec::new();
296
297    let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
298        scratch.clear();
299        let _ = write!(&mut scratch, "{item:.precision$}");
300        for c in &mut scratch {
301            if *c == b'.' {
302                *c = b',';
303                break;
304            }
305        }
306        buf.extend_from_slice(&scratch);
307    };
308
309    make_serializer::<_, _, false>(f, array.iter())
310}
311
312fn null_serializer(_array: &NullArray) -> impl Serializer<'_> {
313    struct NullSerializer;
314    impl<'a> Serializer<'a> for NullSerializer {
315        fn serialize(&mut self, buf: &mut Vec<u8>, options: &SerializeOptions) {
316            buf.extend_from_slice(options.null.as_bytes());
317        }
318    }
319    NullSerializer
320}
321
322fn bool_serializer<const QUOTE_NON_NULL: bool>(array: &BooleanArray) -> impl Serializer<'_> {
323    let f = move |item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
324        let s = if item { "true" } else { "false" };
325        buf.extend_from_slice(s.as_bytes());
326    };
327
328    make_serializer::<_, _, QUOTE_NON_NULL>(f, array.iter())
329}
330
331#[cfg(feature = "dtype-decimal")]
332fn decimal_serializer(array: &PrimitiveArray<i128>, scale: usize) -> impl Serializer<'_> {
333    let trim_zeros = arrow::compute::decimal::get_trim_decimal_zeros();
334
335    let mut fmt_buf = polars_compute::decimal::DecimalFmtBuffer::new();
336    let f = move |&item, buf: &mut Vec<u8>, options: &SerializeOptions| {
337        buf.extend_from_slice(
338            fmt_buf
339                .format_dec128(item, scale, trim_zeros, options.decimal_comma)
340                .as_bytes(),
341        );
342    };
343
344    make_serializer::<_, _, false>(f, array.iter())
345}
346
347#[cfg(any(
348    feature = "dtype-date",
349    feature = "dtype-time",
350    feature = "dtype-datetime"
351))]
352fn callback_serializer<'a, T: NativeType, const QUOTE_NON_NULL: bool>(
353    array: &'a PrimitiveArray<T>,
354    mut callback: impl FnMut(T, &mut Vec<u8>) + 'a,
355) -> impl Serializer<'a> {
356    let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
357        callback(item, buf);
358    };
359
360    make_serializer::<_, _, QUOTE_NON_NULL>(f, array.iter())
361}
362
363#[cfg(any(feature = "dtype-date", feature = "dtype-time"))]
364type ChronoFormatIter<'a, 'b> = std::slice::Iter<'a, chrono::format::Item<'b>>;
365
366#[cfg(any(feature = "dtype-date", feature = "dtype-time"))]
367fn date_and_time_serializer<'a, Underlying: NativeType, T: std::fmt::Display>(
368    format_str: Option<&'a str>,
369    description: &str,
370    array: &'a dyn Array,
371    sample_value: T,
372    mut convert: impl FnMut(Underlying) -> T + Send + 'a,
373    mut format_fn: impl for<'b> FnMut(
374        &T,
375        ChronoFormatIter<'b, 'a>,
376    ) -> chrono::format::DelayedFormat<ChronoFormatIter<'b, 'a>>
377    + Send
378    + 'a,
379    options: &SerializeOptions,
380) -> PolarsResult<Box<dyn Serializer<'a> + Send + 'a>> {
381    let array = array.as_any().downcast_ref().unwrap();
382    let serializer = match format_str {
383        Some(format_str) => {
384            let format = chrono::format::StrftimeItems::new(format_str).parse().map_err(
385                |_| polars_err!(ComputeError: "cannot format {description} with format '{format_str}'"),
386            )?;
387            use std::fmt::Write;
388            // Fail fast for invalid format. This return error faster to the user, and allows us to not return
389            // `Result` from `serialize()`.
390            write!(IgnoreFmt, "{}", format_fn(&sample_value, format.iter())).map_err(
391                |_| polars_err!(ComputeError: "cannot format {description} with format '{format_str}'"),
392            )?;
393            let callback = move |item, buf: &mut Vec<u8>| {
394                let item = convert(item);
395                // We checked the format is valid above.
396                let _ = write!(buf, "{}", format_fn(&item, format.iter()));
397            };
398            date_and_time_final_serializer(array, callback, options)
399        },
400        None => {
401            let callback = move |item, buf: &mut Vec<u8>| {
402                let item = convert(item);
403                // Formatting dates into `Vec<u8>` cannot fail.
404                let _ = write!(buf, "{item}");
405            };
406            date_and_time_final_serializer(array, callback, options)
407        },
408    };
409    Ok(serializer)
410}
411
412#[cfg(any(
413    feature = "dtype-date",
414    feature = "dtype-time",
415    feature = "dtype-datetime"
416))]
417fn date_and_time_final_serializer<'a, T: NativeType>(
418    array: &'a PrimitiveArray<T>,
419    callback: impl FnMut(T, &mut Vec<u8>) + Send + 'a,
420    options: &SerializeOptions,
421) -> Box<dyn Serializer<'a> + Send + 'a> {
422    match options.quote_style {
423        QuoteStyle::Always => Box::new(quote_serializer(callback_serializer::<T, false>(
424            array, callback,
425        ))) as Box<dyn Serializer + Send>,
426        QuoteStyle::NonNumeric => Box::new(callback_serializer::<T, true>(array, callback)),
427        _ => Box::new(callback_serializer::<T, false>(array, callback)),
428    }
429}
430
431pub(super) fn string_serializer<'a, Iter: Send + 'a>(
432    mut f: impl FnMut(&mut Iter) -> Option<&str> + Send + 'a,
433    options: &SerializeOptions,
434    mut update: impl FnMut(&'a dyn Array) -> Iter + Send + 'a,
435    array: &'a dyn Array,
436) -> Box<dyn Serializer<'a> + 'a + Send> {
437    const LF: u8 = b'\n';
438    const CR: u8 = b'\r';
439
440    struct StringSerializer<F, Iter> {
441        serialize: F,
442        iter: Iter,
443    }
444
445    impl<'a, F, Iter> Serializer<'a> for StringSerializer<F, Iter>
446    where
447        F: FnMut(&mut Iter, &mut Vec<u8>, &SerializeOptions),
448    {
449        fn serialize(&mut self, buf: &mut Vec<u8>, options: &SerializeOptions) {
450            (self.serialize)(&mut self.iter, buf, options);
451        }
452    }
453
454    fn serialize_str_escaped(buf: &mut Vec<u8>, s: &[u8], quote_char: u8, quoted: bool) {
455        let mut iter = memchr_iter(quote_char, s);
456        let first_quote = iter.next();
457        match first_quote {
458            None => buf.extend_from_slice(s),
459            Some(mut quote_pos) => {
460                if !quoted {
461                    buf.push(quote_char);
462                }
463                let mut start_pos = 0;
464                loop {
465                    buf.extend_from_slice(&s[start_pos..quote_pos]);
466                    buf.extend_from_slice(&[quote_char, quote_char]);
467                    match iter.next() {
468                        Some(quote) => {
469                            start_pos = quote_pos + 1;
470                            quote_pos = quote;
471                        },
472                        None => {
473                            buf.extend_from_slice(&s[quote_pos + 1..]);
474                            break;
475                        },
476                    }
477                }
478                if !quoted {
479                    buf.push(quote_char);
480                }
481            },
482        }
483    }
484
485    let iter = update(array);
486    match options.quote_style {
487        QuoteStyle::Always => {
488            let serialize =
489                move |iter: &mut Iter, buf: &mut Vec<u8>, options: &SerializeOptions| {
490                    let quote_char = options.quote_char;
491                    buf.push(quote_char);
492                    let Some(s) = f(iter) else {
493                        buf.extend_from_slice(options.null.as_bytes());
494                        buf.push(quote_char);
495                        return;
496                    };
497                    serialize_str_escaped(buf, s.as_bytes(), quote_char, true);
498                    buf.push(quote_char);
499                };
500            Box::new(StringSerializer { serialize, iter })
501        },
502        QuoteStyle::NonNumeric => {
503            let serialize =
504                move |iter: &mut Iter, buf: &mut Vec<u8>, options: &SerializeOptions| {
505                    let Some(s) = f(iter) else {
506                        buf.extend_from_slice(options.null.as_bytes());
507                        return;
508                    };
509                    let quote_char = options.quote_char;
510                    buf.push(quote_char);
511                    serialize_str_escaped(buf, s.as_bytes(), quote_char, true);
512                    buf.push(quote_char);
513                };
514            Box::new(StringSerializer { serialize, iter })
515        },
516        QuoteStyle::Necessary => {
517            let serialize =
518                move |iter: &mut Iter, buf: &mut Vec<u8>, options: &SerializeOptions| {
519                    let Some(s) = f(iter) else {
520                        buf.extend_from_slice(options.null.as_bytes());
521                        return;
522                    };
523                    let quote_char = options.quote_char;
524                    // An empty string conflicts with null, so it is necessary to quote.
525                    if s.is_empty() {
526                        buf.extend_from_slice(&[quote_char, quote_char]);
527                        return;
528                    }
529                    let needs_quote = memchr3(options.separator, LF, CR, s.as_bytes()).is_some();
530                    if needs_quote {
531                        buf.push(quote_char);
532                    }
533                    serialize_str_escaped(buf, s.as_bytes(), quote_char, needs_quote);
534                    if needs_quote {
535                        buf.push(quote_char);
536                    }
537                };
538            Box::new(StringSerializer { serialize, iter })
539        },
540        QuoteStyle::Never => {
541            let serialize =
542                move |iter: &mut Iter, buf: &mut Vec<u8>, options: &SerializeOptions| {
543                    let Some(s) = f(iter) else {
544                        buf.extend_from_slice(options.null.as_bytes());
545                        return;
546                    };
547                    buf.extend_from_slice(s.as_bytes());
548                };
549            Box::new(StringSerializer { serialize, iter })
550        },
551    }
552}
553
554fn quote_serializer<'a>(serializer: impl Serializer<'a>) -> impl Serializer<'a> {
555    struct QuoteSerializer<S>(S);
556    impl<'a, S: Serializer<'a>> Serializer<'a> for QuoteSerializer<S> {
557        fn serialize(&mut self, buf: &mut Vec<u8>, options: &SerializeOptions) {
558            buf.push(options.quote_char);
559            self.0.serialize(buf, options);
560            buf.push(options.quote_char);
561        }
562    }
563    QuoteSerializer(serializer)
564}
565
566pub(super) fn serializer_for<'a>(
567    array: &'a dyn Array,
568    options: &'a SerializeOptions,
569    dtype: &'a DataType,
570    _datetime_format: &'a str,
571    _time_zone: Option<Tz>,
572) -> PolarsResult<Box<dyn Serializer<'a> + Send + 'a>> {
573    // The needs_quotes flag captures the quote logic for the quote_wrapper! macro
574    // It is targeted at numerical types primarily; other types may required additional logic
575    let needs_quotes = match dtype {
576        DataType::Float16 | DataType::Float32 | DataType::Float64 => {
577            // When comma is used as both the field separator and decimal separator, quoting
578            // may be required. Specifically, when:
579            // - quote_style is Always, or
580            // - quote_style is Necessary or Non-Numeric, the field separator is also a comma,
581            //   and the float string field contains a comma character (no precision or precision > 0)
582            //
583            // In some rare cases, a field may get quoted when it is not strictly necessary
584            // (e.g., in scientific notation when only the first digit is non-zero such as '1e12',
585            // or null values in 'non_numeric' quote_style).
586
587            let mut should_quote = options.decimal_comma && options.separator == b',';
588            if let Some(precision) = options.float_precision {
589                should_quote &= precision > 0;
590            }
591
592            match options.quote_style {
593                QuoteStyle::Always => true,
594                QuoteStyle::Necessary | QuoteStyle::NonNumeric => should_quote,
595                QuoteStyle::Never => false,
596            }
597        },
598        #[cfg(feature = "dtype-decimal")]
599        DataType::Decimal(_, scale) => {
600            // Similar to logic for float data-types, but need to consider scale rather than precision
601            let should_quote = options.decimal_comma && options.separator == b',' && *scale > 0;
602
603            match options.quote_style {
604                QuoteStyle::Always => true,
605                QuoteStyle::Necessary | QuoteStyle::NonNumeric => should_quote,
606                QuoteStyle::Never => false,
607            }
608        },
609        _ => options.quote_style == QuoteStyle::Always,
610    };
611
612    macro_rules! quote_wrapper {
613        ($make_serializer:path, $($arg:tt)*) => {{
614            let serializer = $make_serializer(array.as_any().downcast_ref().unwrap(), $($arg)*);
615            if needs_quotes {
616                Box::new(quote_serializer(serializer)) as Box<dyn Serializer + Send>
617            } else {
618                Box::new(serializer)
619            }
620        }};
621        ($make_serializer:path) => { quote_wrapper!($make_serializer,) };
622    }
623
624    let serializer = match dtype {
625        DataType::Int8 => quote_wrapper!(integer_serializer::<i8>),
626        DataType::UInt8 => quote_wrapper!(integer_serializer::<u8>),
627        DataType::Int16 => quote_wrapper!(integer_serializer::<i16>),
628        DataType::UInt16 => quote_wrapper!(integer_serializer::<u16>),
629        DataType::Int32 => quote_wrapper!(integer_serializer::<i32>),
630        DataType::UInt32 => quote_wrapper!(integer_serializer::<u32>),
631        DataType::Int64 => quote_wrapper!(integer_serializer::<i64>),
632        DataType::UInt64 => quote_wrapper!(integer_serializer::<u64>),
633        DataType::Int128 => quote_wrapper!(integer_serializer::<i128>),
634        DataType::UInt128 => quote_wrapper!(integer_serializer::<u128>),
635        DataType::Float16 => {
636            match (
637                options.decimal_comma,
638                options.float_precision,
639                options.float_scientific,
640            ) {
641                // standard decimal separator (period)
642                (false, Some(precision), Some(true)) => {
643                    quote_wrapper!(
644                        float_serializer_with_precision_scientific::<pf16>,
645                        precision
646                    )
647                },
648                (false, Some(precision), _) => {
649                    quote_wrapper!(
650                        float_serializer_with_precision_positional::<pf16>,
651                        precision
652                    )
653                },
654                (false, None, Some(true)) => {
655                    quote_wrapper!(float_serializer_no_precision_scientific::<pf16>)
656                },
657                (false, None, Some(false)) => {
658                    quote_wrapper!(float_serializer_no_precision_positional::<pf16>)
659                },
660                (false, None, None) => {
661                    quote_wrapper!(float_serializer_no_precision_autoformat_f16)
662                },
663
664                // comma as the decimal separator
665                (true, Some(precision), Some(true)) => quote_wrapper!(
666                    float_serializer_with_precision_scientific_decimal_comma::<pf16>,
667                    precision
668                ),
669                (true, Some(precision), _) => quote_wrapper!(
670                    float_serializer_with_precision_positional_decimal_comma::<pf16>,
671                    precision
672                ),
673                (true, None, Some(true)) => {
674                    quote_wrapper!(float_serializer_no_precision_scientific_decimal_comma::<pf16>)
675                },
676                (true, None, Some(false)) => {
677                    quote_wrapper!(float_serializer_no_precision_positional_decimal_comma::<pf16>)
678                },
679                (true, None, None) => {
680                    quote_wrapper!(float_serializer_no_precision_autoformat_decimal_comma_f16)
681                },
682            }
683        },
684        DataType::Float32 => {
685            match (
686                options.decimal_comma,
687                options.float_precision,
688                options.float_scientific,
689            ) {
690                // standard decimal separator (period)
691                (false, Some(precision), Some(true)) => {
692                    quote_wrapper!(float_serializer_with_precision_scientific::<f32>, precision)
693                },
694                (false, Some(precision), _) => {
695                    quote_wrapper!(float_serializer_with_precision_positional::<f32>, precision)
696                },
697                (false, None, Some(true)) => {
698                    quote_wrapper!(float_serializer_no_precision_scientific::<f32>)
699                },
700                (false, None, Some(false)) => {
701                    quote_wrapper!(float_serializer_no_precision_positional::<f32>)
702                },
703                (false, None, None) => {
704                    quote_wrapper!(float_serializer_no_precision_autoformat::<f32>)
705                },
706
707                // comma as the decimal separator
708                (true, Some(precision), Some(true)) => quote_wrapper!(
709                    float_serializer_with_precision_scientific_decimal_comma::<f32>,
710                    precision
711                ),
712                (true, Some(precision), _) => quote_wrapper!(
713                    float_serializer_with_precision_positional_decimal_comma::<f32>,
714                    precision
715                ),
716                (true, None, Some(true)) => {
717                    quote_wrapper!(float_serializer_no_precision_scientific_decimal_comma::<f32>)
718                },
719                (true, None, Some(false)) => {
720                    quote_wrapper!(float_serializer_no_precision_positional_decimal_comma::<f32>)
721                },
722                (true, None, None) => {
723                    quote_wrapper!(float_serializer_no_precision_autoformat_decimal_comma::<f32>)
724                },
725            }
726        },
727        DataType::Float64 => {
728            match (
729                options.decimal_comma,
730                options.float_precision,
731                options.float_scientific,
732            ) {
733                // standard decimal separator (period)
734                (false, Some(precision), Some(true)) => {
735                    quote_wrapper!(float_serializer_with_precision_scientific::<f64>, precision)
736                },
737                (false, Some(precision), _) => {
738                    quote_wrapper!(float_serializer_with_precision_positional::<f64>, precision)
739                },
740                (false, None, Some(true)) => {
741                    quote_wrapper!(float_serializer_no_precision_scientific::<f64>)
742                },
743                (false, None, Some(false)) => {
744                    quote_wrapper!(float_serializer_no_precision_positional::<f64>)
745                },
746                (false, None, None) => {
747                    quote_wrapper!(float_serializer_no_precision_autoformat::<f64>)
748                },
749
750                // comma as the decimal separator
751                (true, Some(precision), Some(true)) => quote_wrapper!(
752                    float_serializer_with_precision_scientific_decimal_comma::<f64>,
753                    precision
754                ),
755                (true, Some(precision), _) => quote_wrapper!(
756                    float_serializer_with_precision_positional_decimal_comma::<f64>,
757                    precision
758                ),
759                (true, None, Some(true)) => {
760                    quote_wrapper!(float_serializer_no_precision_scientific_decimal_comma::<f64>)
761                },
762                (true, None, Some(false)) => {
763                    quote_wrapper!(float_serializer_no_precision_positional_decimal_comma::<f64>)
764                },
765                (true, None, None) => {
766                    quote_wrapper!(float_serializer_no_precision_autoformat_decimal_comma::<f64>)
767                },
768            }
769        },
770        DataType::Null => quote_wrapper!(null_serializer),
771        DataType::Boolean => {
772            let array = array.as_any().downcast_ref().unwrap();
773            match options.quote_style {
774                QuoteStyle::Always => Box::new(quote_serializer(bool_serializer::<false>(array)))
775                    as Box<dyn Serializer + Send>,
776                QuoteStyle::NonNumeric => Box::new(bool_serializer::<true>(array)),
777                _ => Box::new(bool_serializer::<false>(array)),
778            }
779        },
780        #[cfg(feature = "dtype-date")]
781        DataType::Date => date_and_time_serializer(
782            options.date_format.as_deref(),
783            "NaiveDate",
784            array,
785            chrono::NaiveDate::MAX,
786            arrow::temporal_conversions::date32_to_date,
787            |date, items| date.format_with_items(items),
788            options,
789        )?,
790        #[cfg(feature = "dtype-time")]
791        DataType::Time => date_and_time_serializer(
792            Some(options.time_format.as_deref().unwrap_or("%T%.9f")),
793            "NaiveTime",
794            array,
795            chrono::NaiveTime::MIN,
796            arrow::temporal_conversions::time64ns_to_time,
797            |time, items| time.format_with_items(items),
798            options,
799        )?,
800        #[cfg(feature = "dtype-datetime")]
801        DataType::Datetime(time_unit, _) => {
802            let format = chrono::format::StrftimeItems::new(_datetime_format)
803                .parse()
804                .map_err(|_| {
805                    polars_err!(
806                        ComputeError: "cannot format {} with format '{_datetime_format}'",
807                        if _time_zone.is_some() { "DateTime" } else { "NaiveDateTime" },
808                    )
809                })?;
810            use std::fmt::Write;
811            let sample_datetime = match _time_zone {
812                #[cfg(feature = "timezones")]
813                Some(time_zone) => time_zone
814                    .from_utc_datetime(&chrono::NaiveDateTime::MAX)
815                    .format_with_items(format.iter()),
816                #[cfg(not(feature = "timezones"))]
817                Some(_) => panic!("activate 'timezones' feature"),
818                None => chrono::NaiveDateTime::MAX.format_with_items(format.iter()),
819            };
820            // Fail fast for invalid format. This return error faster to the user, and allows us to not return
821            // `Result` from `serialize()`.
822            write!(IgnoreFmt, "{sample_datetime}").map_err(|_| {
823                polars_err!(
824                    ComputeError: "cannot format {} with format '{_datetime_format}'",
825                    if _time_zone.is_some() { "DateTime" } else { "NaiveDateTime" },
826                )
827            })?;
828
829            let array = array.as_any().downcast_ref().unwrap();
830
831            macro_rules! time_unit_serializer {
832                ($convert:ident) => {
833                    match _time_zone {
834                        #[cfg(feature = "timezones")]
835                        Some(time_zone) => {
836                            let callback = move |item, buf: &mut Vec<u8>| {
837                                let item = arrow::temporal_conversions::$convert(item);
838                                let item = time_zone.from_utc_datetime(&item);
839                                // We checked the format is valid above.
840                                let _ = write!(buf, "{}", item.format_with_items(format.iter()));
841                            };
842                            date_and_time_final_serializer(array, callback, options)
843                        },
844                        #[cfg(not(feature = "timezones"))]
845                        Some(_) => panic!("activate 'timezones' feature"),
846                        None => {
847                            let callback = move |item, buf: &mut Vec<u8>| {
848                                let item = arrow::temporal_conversions::$convert(item);
849                                // We checked the format is valid above.
850                                let _ = write!(buf, "{}", item.format_with_items(format.iter()));
851                            };
852                            date_and_time_final_serializer(array, callback, options)
853                        },
854                    }
855                };
856            }
857
858            match time_unit {
859                TimeUnit::Nanoseconds => time_unit_serializer!(timestamp_ns_to_datetime),
860                TimeUnit::Microseconds => time_unit_serializer!(timestamp_us_to_datetime),
861                TimeUnit::Milliseconds => time_unit_serializer!(timestamp_ms_to_datetime),
862            }
863        },
864        DataType::String => string_serializer(
865            |iter| Iterator::next(iter).expect(TOO_MANY_MSG),
866            options,
867            |arr| {
868                arr.as_any()
869                    .downcast_ref::<Utf8ViewArray>()
870                    .expect(ARRAY_MISMATCH_MSG)
871                    .iter()
872            },
873            array,
874        ),
875        #[cfg(feature = "dtype-categorical")]
876        DataType::Categorical(_, mapping) | DataType::Enum(_, mapping) => {
877            polars_core::with_match_categorical_physical_type!(dtype.cat_physical().unwrap(), |$C| {
878                string_serializer(
879                    |iter| {
880                        let &idx: &<$C as PolarsCategoricalType>::Native = Iterator::next(iter).expect(TOO_MANY_MSG)?;
881                        Some(unsafe { mapping.cat_to_str_unchecked(idx.as_cat()) })
882                    },
883                    options,
884                    |arr| {
885                        arr.as_any()
886                            .downcast_ref::<PrimitiveArray<<$C as PolarsCategoricalType>::Native>>()
887                            .expect(ARRAY_MISMATCH_MSG)
888                            .iter()
889                    },
890                    array,
891                )
892            })
893        },
894        #[cfg(feature = "dtype-decimal")]
895        DataType::Decimal(_, scale) => {
896            quote_wrapper!(decimal_serializer, *scale)
897        },
898        _ => {
899            polars_bail!(ComputeError: "datatype {dtype} cannot be written to CSV\n\nConsider using JSON or a binary format.")
900        },
901    };
902    Ok(serializer)
903}
904
905#[cfg(test)]
906mod test {
907    use arrow::array::NullArray;
908    use polars_core::prelude::ArrowDataType;
909
910    use super::string_serializer;
911    use crate::csv::write::options::{QuoteStyle, SerializeOptions};
912
913    // It is the most complex serializer with most edge cases, it definitely needs a comprehensive test.
914    #[test]
915    fn test_string_serializer() {
916        #[track_caller]
917        fn check_string_serialization(options: &SerializeOptions, s: Option<&str>, expected: &str) {
918            let fake_array = NullArray::new(ArrowDataType::Null, 0);
919            let mut serializer = string_serializer(|s| *s, options, |_| s, &fake_array);
920            let mut buf = Vec::new();
921            serializer.serialize(&mut buf, options);
922            let serialized = std::str::from_utf8(&buf).unwrap();
923            // Don't use `assert_eq!()` because it prints debug format and it's hard to read with all the escapes.
924            if serialized != expected {
925                panic!(
926                    "CSV string {s:?} wasn't serialized correctly: expected: `{expected}`, got: `{serialized}`"
927                );
928            }
929        }
930
931        let always_quote = SerializeOptions {
932            quote_style: QuoteStyle::Always,
933            ..SerializeOptions::default()
934        };
935        check_string_serialization(&always_quote, None, r#""""#);
936        check_string_serialization(&always_quote, Some(""), r#""""#);
937        check_string_serialization(&always_quote, Some("a"), r#""a""#);
938        check_string_serialization(&always_quote, Some("\""), r#""""""#);
939        check_string_serialization(&always_quote, Some("a\"\"b"), r#""a""""b""#);
940
941        let necessary_quote = SerializeOptions {
942            quote_style: QuoteStyle::Necessary,
943            ..SerializeOptions::default()
944        };
945        check_string_serialization(&necessary_quote, None, r#""#);
946        check_string_serialization(&necessary_quote, Some(""), r#""""#);
947        check_string_serialization(&necessary_quote, Some("a"), r#"a"#);
948        check_string_serialization(&necessary_quote, Some("\""), r#""""""#);
949        check_string_serialization(&necessary_quote, Some("a\"\"b"), r#""a""""b""#);
950        check_string_serialization(&necessary_quote, Some("a b"), r#"a b"#);
951        check_string_serialization(&necessary_quote, Some("a,b"), r#""a,b""#);
952        check_string_serialization(&necessary_quote, Some("a\nb"), "\"a\nb\"");
953        check_string_serialization(&necessary_quote, Some("a\rb"), "\"a\rb\"");
954
955        let never_quote = SerializeOptions {
956            quote_style: QuoteStyle::Never,
957            ..SerializeOptions::default()
958        };
959        check_string_serialization(&never_quote, None, "");
960        check_string_serialization(&never_quote, Some(""), "");
961        check_string_serialization(&never_quote, Some("a"), "a");
962        check_string_serialization(&never_quote, Some("\""), "\"");
963        check_string_serialization(&never_quote, Some("a\"\"b"), "a\"\"b");
964        check_string_serialization(&never_quote, Some("a b"), "a b");
965        check_string_serialization(&never_quote, Some("a,b"), "a,b");
966        check_string_serialization(&never_quote, Some("a\nb"), "a\nb");
967        check_string_serialization(&never_quote, Some("a\rb"), "a\rb");
968
969        let non_numeric_quote = SerializeOptions {
970            quote_style: QuoteStyle::NonNumeric,
971            ..SerializeOptions::default()
972        };
973        check_string_serialization(&non_numeric_quote, None, "");
974        check_string_serialization(&non_numeric_quote, Some(""), r#""""#);
975        check_string_serialization(&non_numeric_quote, Some("a"), r#""a""#);
976        check_string_serialization(&non_numeric_quote, Some("\""), r#""""""#);
977        check_string_serialization(&non_numeric_quote, Some("a\"\"b"), r#""a""""b""#);
978        check_string_serialization(&non_numeric_quote, Some("a b"), r#""a b""#);
979        check_string_serialization(&non_numeric_quote, Some("a,b"), r#""a,b""#);
980        check_string_serialization(&non_numeric_quote, Some("a\nb"), "\"a\nb\"");
981        check_string_serialization(&non_numeric_quote, Some("a\rb"), "\"a\rb\"");
982    }
983}