polars_io/csv/write/write_impl/
serializer.rs

1//! This file is complicated because we have complicated escape handling. We want to avoid having
2//! to write down each combination of type & escaping, but we also want the compiler to optimize them
3//! to efficient machine code - so no dynamic dispatch. That means a lot of generics and macros.
4//!
5//! We need to differentiate between several kinds of types, and several kinds of escaping we support:
6//!
7//!  - The simplest escaping mechanism are [`QuoteStyle::Always`] and [`QuoteStyle::Never`].
8//!    For `Never` we just never quote. For `Always` we pass any serializer that never quotes
9//!    to [`quote_serializer()`] then it becomes quoted properly.
10//!  - [`QuoteStyle::Necessary`] (the default) is only relevant for strings, as it is the only type that
11//!    can have newlines (row separators), commas (column separators) or quotes. String
12//!    escaping is complicated anyway, and it is all inside [`string_serializer()`].
13//!  - The real complication is [`QuoteStyle::NonNumeric`], that doesn't quote numbers and nulls,
14//!    and quotes any other thing. The problem is that nulls can be within any type, so we need to handle
15//!    two possibilities of quoting everywhere.
16//!
17//! So in case the chosen style is anything but `NonNumeric`, we statically know for each column except strings
18//! whether it should be quoted (and for strings too when not `Necessary`). There we use `quote_serializer()`
19//! or nothing.
20//!
21//! But to help with `NonNumeric`, each serializer carry the potential to distinguish between nulls and non-nulls,
22//! and quote the later and not the former. But in order to not have the branch when we statically know the answer,
23//! we have an option to statically disable it with a const generic flag `QUOTE_NON_NULL`. Numbers (that should never
24//! be quoted with `NonNumeric`) just always disable this flag.
25//!
26//! So we have three possibilities:
27//!
28//!  1. A serializer that never quotes. This is a bare serializer with `QUOTE_NON_NULL = false`.
29//!  2. A serializer that always quotes. This is a serializer wrapped with `quote_serializer()`,
30//!     but also with `QUOTE_NON_NULL = false`.
31//!  3. A serializer that quotes only non-nulls. This is a bare serializer with `QUOTE_NON_NULL = true`.
32
33use std::fmt::LowerExp;
34use std::io::Write;
35
36use arrow::array::{Array, BooleanArray, NullArray, PrimitiveArray, Utf8ViewArray};
37use arrow::legacy::time_zone::Tz;
38use arrow::types::NativeType;
39#[cfg(feature = "timezones")]
40use chrono::TimeZone;
41use memchr::{memchr_iter, memchr3};
42use num_traits::NumCast;
43use polars_core::prelude::*;
44
45use crate::csv::write::{QuoteStyle, SerializeOptions};
46
47const TOO_MANY_MSG: &str = "too many items requested from CSV serializer";
48const ARRAY_MISMATCH_MSG: &str = "wrong array type";
49
50#[allow(dead_code)]
51struct IgnoreFmt;
52impl std::fmt::Write for IgnoreFmt {
53    fn write_str(&mut self, _s: &str) -> std::fmt::Result {
54        Ok(())
55    }
56}
57
58pub(super) trait Serializer<'a> {
59    fn serialize(&mut self, buf: &mut Vec<u8>, options: &SerializeOptions);
60    // Updates the array without changing the configuration.
61    fn update_array(&mut self, array: &'a dyn Array);
62}
63
64fn make_serializer<'a, T, I: Iterator<Item = Option<T>>, const QUOTE_NON_NULL: bool>(
65    f: impl FnMut(T, &mut Vec<u8>, &SerializeOptions),
66    iter: I,
67    update_array: impl FnMut(&'a dyn Array) -> I,
68) -> impl Serializer<'a> {
69    struct SerializerImpl<F, I, Update, const QUOTE_NON_NULL: bool> {
70        f: F,
71        iter: I,
72        update_array: Update,
73    }
74
75    impl<'a, T, F, I, Update, const QUOTE_NON_NULL: bool> Serializer<'a>
76        for SerializerImpl<F, I, Update, QUOTE_NON_NULL>
77    where
78        F: FnMut(T, &mut Vec<u8>, &SerializeOptions),
79        I: Iterator<Item = Option<T>>,
80        Update: FnMut(&'a dyn Array) -> I,
81    {
82        fn serialize(&mut self, buf: &mut Vec<u8>, options: &SerializeOptions) {
83            let item = self.iter.next().expect(TOO_MANY_MSG);
84            match item {
85                Some(item) => {
86                    if QUOTE_NON_NULL {
87                        buf.push(options.quote_char);
88                    }
89                    (self.f)(item, buf, options);
90                    if QUOTE_NON_NULL {
91                        buf.push(options.quote_char);
92                    }
93                },
94                None => buf.extend_from_slice(options.null.as_bytes()),
95            }
96        }
97
98        fn update_array(&mut self, array: &'a dyn Array) {
99            self.iter = (self.update_array)(array);
100        }
101    }
102
103    SerializerImpl::<_, _, _, QUOTE_NON_NULL> {
104        f,
105        iter,
106        update_array,
107    }
108}
109
110fn integer_serializer<I: NativeType + itoa::Integer>(array: &PrimitiveArray<I>) -> impl Serializer {
111    let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
112        let mut buffer = itoa::Buffer::new();
113        let value = buffer.format(item);
114        buf.extend_from_slice(value.as_bytes());
115    };
116
117    make_serializer::<_, _, false>(f, array.iter(), |array| {
118        array
119            .as_any()
120            .downcast_ref::<PrimitiveArray<I>>()
121            .expect(ARRAY_MISMATCH_MSG)
122            .iter()
123    })
124}
125
126fn float_serializer_no_precision_autoformat<I: NativeType + ryu::Float>(
127    array: &PrimitiveArray<I>,
128) -> impl Serializer {
129    let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
130        let mut buffer = ryu::Buffer::new();
131        let value = buffer.format(item);
132        buf.extend_from_slice(value.as_bytes());
133    };
134
135    make_serializer::<_, _, false>(f, array.iter(), |array| {
136        array
137            .as_any()
138            .downcast_ref::<PrimitiveArray<I>>()
139            .expect(ARRAY_MISMATCH_MSG)
140            .iter()
141    })
142}
143
144fn float_serializer_no_precision_scientific<I: NativeType + LowerExp>(
145    array: &PrimitiveArray<I>,
146) -> impl Serializer {
147    let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
148        // Float writing into a buffer of `Vec<u8>` cannot fail.
149        let _ = write!(buf, "{item:.e}");
150    };
151
152    make_serializer::<_, _, false>(f, array.iter(), |array| {
153        array
154            .as_any()
155            .downcast_ref::<PrimitiveArray<I>>()
156            .expect(ARRAY_MISMATCH_MSG)
157            .iter()
158    })
159}
160
161fn float_serializer_no_precision_positional<I: NativeType + NumCast>(
162    array: &PrimitiveArray<I>,
163) -> impl Serializer {
164    let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
165        let v: f64 = NumCast::from(item).unwrap();
166        let value = v.to_string();
167        buf.extend_from_slice(value.as_bytes());
168    };
169
170    make_serializer::<_, _, false>(f, array.iter(), |array| {
171        array
172            .as_any()
173            .downcast_ref::<PrimitiveArray<I>>()
174            .expect(ARRAY_MISMATCH_MSG)
175            .iter()
176    })
177}
178
179fn float_serializer_with_precision_scientific<I: NativeType + LowerExp>(
180    array: &PrimitiveArray<I>,
181    precision: usize,
182) -> impl Serializer {
183    let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
184        // Float writing into a buffer of `Vec<u8>` cannot fail.
185        let _ = write!(buf, "{item:.precision$e}");
186    };
187
188    make_serializer::<_, _, false>(f, array.iter(), |array| {
189        array
190            .as_any()
191            .downcast_ref::<PrimitiveArray<I>>()
192            .expect(ARRAY_MISMATCH_MSG)
193            .iter()
194    })
195}
196
197fn float_serializer_with_precision_positional<I: NativeType>(
198    array: &PrimitiveArray<I>,
199    precision: usize,
200) -> impl Serializer {
201    let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
202        // Float writing into a buffer of `Vec<u8>` cannot fail.
203        let _ = write!(buf, "{item:.precision$}");
204    };
205
206    make_serializer::<_, _, false>(f, array.iter(), |array| {
207        array
208            .as_any()
209            .downcast_ref::<PrimitiveArray<I>>()
210            .expect(ARRAY_MISMATCH_MSG)
211            .iter()
212    })
213}
214
215fn null_serializer(_array: &NullArray) -> impl Serializer {
216    struct NullSerializer;
217    impl<'a> Serializer<'a> for NullSerializer {
218        fn serialize(&mut self, buf: &mut Vec<u8>, options: &SerializeOptions) {
219            buf.extend_from_slice(options.null.as_bytes());
220        }
221        fn update_array(&mut self, _array: &'a dyn Array) {}
222    }
223    NullSerializer
224}
225
226fn bool_serializer<const QUOTE_NON_NULL: bool>(array: &BooleanArray) -> impl Serializer {
227    let f = move |item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
228        let s = if item { "true" } else { "false" };
229        buf.extend_from_slice(s.as_bytes());
230    };
231
232    make_serializer::<_, _, QUOTE_NON_NULL>(f, array.iter(), |array| {
233        array
234            .as_any()
235            .downcast_ref::<BooleanArray>()
236            .expect(ARRAY_MISMATCH_MSG)
237            .iter()
238    })
239}
240
241#[cfg(feature = "dtype-decimal")]
242fn decimal_serializer(array: &PrimitiveArray<i128>, scale: usize) -> impl Serializer {
243    let trim_zeros = arrow::compute::decimal::get_trim_decimal_zeros();
244
245    let mut fmt_buf = arrow::compute::decimal::DecimalFmtBuffer::new();
246    let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
247        buf.extend_from_slice(fmt_buf.format(item, scale, trim_zeros).as_bytes());
248    };
249
250    make_serializer::<_, _, false>(f, array.iter(), |array| {
251        array
252            .as_any()
253            .downcast_ref::<PrimitiveArray<i128>>()
254            .expect(ARRAY_MISMATCH_MSG)
255            .iter()
256    })
257}
258
259#[cfg(any(
260    feature = "dtype-date",
261    feature = "dtype-time",
262    feature = "dtype-datetime"
263))]
264fn callback_serializer<'a, T: NativeType, const QUOTE_NON_NULL: bool>(
265    array: &'a PrimitiveArray<T>,
266    mut callback: impl FnMut(T, &mut Vec<u8>) + 'a,
267) -> impl Serializer<'a> {
268    let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
269        callback(item, buf);
270    };
271
272    make_serializer::<_, _, QUOTE_NON_NULL>(f, array.iter(), |array| {
273        array
274            .as_any()
275            .downcast_ref::<PrimitiveArray<T>>()
276            .expect(ARRAY_MISMATCH_MSG)
277            .iter()
278    })
279}
280
281#[cfg(any(feature = "dtype-date", feature = "dtype-time"))]
282type ChronoFormatIter<'a, 'b> = std::slice::Iter<'a, chrono::format::Item<'b>>;
283
284#[cfg(any(feature = "dtype-date", feature = "dtype-time"))]
285fn date_and_time_serializer<'a, Underlying: NativeType, T: std::fmt::Display>(
286    format_str: &'a Option<String>,
287    description: &str,
288    array: &'a dyn Array,
289    sample_value: T,
290    mut convert: impl FnMut(Underlying) -> T + Send + 'a,
291    mut format_fn: impl for<'b> FnMut(
292        &T,
293        ChronoFormatIter<'b, 'a>,
294    ) -> chrono::format::DelayedFormat<ChronoFormatIter<'b, 'a>>
295    + Send
296    + 'a,
297    options: &SerializeOptions,
298) -> PolarsResult<Box<dyn Serializer<'a> + Send + 'a>> {
299    let array = array.as_any().downcast_ref().unwrap();
300    let serializer = match format_str {
301        Some(format_str) => {
302            let format = chrono::format::StrftimeItems::new(format_str).parse().map_err(
303                |_| polars_err!(ComputeError: "cannot format {description} with format '{format_str}'"),
304            )?;
305            use std::fmt::Write;
306            // Fail fast for invalid format. This return error faster to the user, and allows us to not return
307            // `Result` from `serialize()`.
308            write!(IgnoreFmt, "{}", format_fn(&sample_value, format.iter())).map_err(
309                |_| polars_err!(ComputeError: "cannot format {description} with format '{format_str}'"),
310            )?;
311            let callback = move |item, buf: &mut Vec<u8>| {
312                let item = convert(item);
313                // We checked the format is valid above.
314                let _ = write!(buf, "{}", format_fn(&item, format.iter()));
315            };
316            date_and_time_final_serializer(array, callback, options)
317        },
318        None => {
319            let callback = move |item, buf: &mut Vec<u8>| {
320                let item = convert(item);
321                // Formatting dates into `Vec<u8>` cannot fail.
322                let _ = write!(buf, "{item}");
323            };
324            date_and_time_final_serializer(array, callback, options)
325        },
326    };
327    Ok(serializer)
328}
329
330#[cfg(any(
331    feature = "dtype-date",
332    feature = "dtype-time",
333    feature = "dtype-datetime"
334))]
335fn date_and_time_final_serializer<'a, T: NativeType>(
336    array: &'a PrimitiveArray<T>,
337    callback: impl FnMut(T, &mut Vec<u8>) + Send + 'a,
338    options: &SerializeOptions,
339) -> Box<dyn Serializer<'a> + Send + 'a> {
340    match options.quote_style {
341        QuoteStyle::Always => Box::new(quote_serializer(callback_serializer::<T, false>(
342            array, callback,
343        ))) as Box<dyn Serializer + Send>,
344        QuoteStyle::NonNumeric => Box::new(callback_serializer::<T, true>(array, callback)),
345        _ => Box::new(callback_serializer::<T, false>(array, callback)),
346    }
347}
348
349pub(super) fn string_serializer<'a, Iter: Send + 'a>(
350    mut f: impl FnMut(&mut Iter) -> Option<&str> + Send + 'a,
351    options: &SerializeOptions,
352    mut update: impl FnMut(&'a dyn Array) -> Iter + Send + 'a,
353    array: &'a dyn Array,
354) -> Box<dyn Serializer<'a> + 'a + Send> {
355    const LF: u8 = b'\n';
356    const CR: u8 = b'\r';
357
358    struct StringSerializer<F, Iter, Update> {
359        serialize: F,
360        update: Update,
361        iter: Iter,
362    }
363
364    impl<'a, F, Iter, Update> Serializer<'a> for StringSerializer<F, Iter, Update>
365    where
366        F: FnMut(&mut Iter, &mut Vec<u8>, &SerializeOptions),
367        Update: FnMut(&'a dyn Array) -> Iter,
368    {
369        fn serialize(&mut self, buf: &mut Vec<u8>, options: &SerializeOptions) {
370            (self.serialize)(&mut self.iter, buf, options);
371        }
372
373        fn update_array(&mut self, array: &'a dyn Array) {
374            self.iter = (self.update)(array);
375        }
376    }
377
378    fn serialize_str_escaped(buf: &mut Vec<u8>, s: &[u8], quote_char: u8, quoted: bool) {
379        let mut iter = memchr_iter(quote_char, s);
380        let first_quote = iter.next();
381        match first_quote {
382            None => buf.extend_from_slice(s),
383            Some(mut quote_pos) => {
384                if !quoted {
385                    buf.push(quote_char);
386                }
387                let mut start_pos = 0;
388                loop {
389                    buf.extend_from_slice(&s[start_pos..quote_pos]);
390                    buf.extend_from_slice(&[quote_char, quote_char]);
391                    match iter.next() {
392                        Some(quote) => {
393                            start_pos = quote_pos + 1;
394                            quote_pos = quote;
395                        },
396                        None => {
397                            buf.extend_from_slice(&s[quote_pos + 1..]);
398                            break;
399                        },
400                    }
401                }
402                if !quoted {
403                    buf.push(quote_char);
404                }
405            },
406        }
407    }
408
409    let iter = update(array);
410    match options.quote_style {
411        QuoteStyle::Always => {
412            let serialize =
413                move |iter: &mut Iter, buf: &mut Vec<u8>, options: &SerializeOptions| {
414                    let quote_char = options.quote_char;
415                    buf.push(quote_char);
416                    let Some(s) = f(iter) else {
417                        buf.extend_from_slice(options.null.as_bytes());
418                        buf.push(quote_char);
419                        return;
420                    };
421                    serialize_str_escaped(buf, s.as_bytes(), quote_char, true);
422                    buf.push(quote_char);
423                };
424            Box::new(StringSerializer {
425                serialize,
426                update,
427                iter,
428            })
429        },
430        QuoteStyle::NonNumeric => {
431            let serialize =
432                move |iter: &mut Iter, buf: &mut Vec<u8>, options: &SerializeOptions| {
433                    let Some(s) = f(iter) else {
434                        buf.extend_from_slice(options.null.as_bytes());
435                        return;
436                    };
437                    let quote_char = options.quote_char;
438                    buf.push(quote_char);
439                    serialize_str_escaped(buf, s.as_bytes(), quote_char, true);
440                    buf.push(quote_char);
441                };
442            Box::new(StringSerializer {
443                serialize,
444                update,
445                iter,
446            })
447        },
448        QuoteStyle::Necessary => {
449            let serialize =
450                move |iter: &mut Iter, buf: &mut Vec<u8>, options: &SerializeOptions| {
451                    let Some(s) = f(iter) else {
452                        buf.extend_from_slice(options.null.as_bytes());
453                        return;
454                    };
455                    let quote_char = options.quote_char;
456                    // An empty string conflicts with null, so it is necessary to quote.
457                    if s.is_empty() {
458                        buf.extend_from_slice(&[quote_char, quote_char]);
459                        return;
460                    }
461                    let needs_quote = memchr3(options.separator, LF, CR, s.as_bytes()).is_some();
462                    if needs_quote {
463                        buf.push(quote_char);
464                    }
465                    serialize_str_escaped(buf, s.as_bytes(), quote_char, needs_quote);
466                    if needs_quote {
467                        buf.push(quote_char);
468                    }
469                };
470            Box::new(StringSerializer {
471                serialize,
472                update,
473                iter,
474            })
475        },
476        QuoteStyle::Never => {
477            let serialize =
478                move |iter: &mut Iter, buf: &mut Vec<u8>, options: &SerializeOptions| {
479                    let Some(s) = f(iter) else {
480                        buf.extend_from_slice(options.null.as_bytes());
481                        return;
482                    };
483                    buf.extend_from_slice(s.as_bytes());
484                };
485            Box::new(StringSerializer {
486                serialize,
487                update,
488                iter,
489            })
490        },
491    }
492}
493
494fn quote_serializer<'a>(serializer: impl Serializer<'a>) -> impl Serializer<'a> {
495    struct QuoteSerializer<S>(S);
496    impl<'a, S: Serializer<'a>> Serializer<'a> for QuoteSerializer<S> {
497        fn serialize(&mut self, buf: &mut Vec<u8>, options: &SerializeOptions) {
498            buf.push(options.quote_char);
499            self.0.serialize(buf, options);
500            buf.push(options.quote_char);
501        }
502
503        fn update_array(&mut self, array: &'a dyn Array) {
504            self.0.update_array(array);
505        }
506    }
507    QuoteSerializer(serializer)
508}
509
510pub(super) fn serializer_for<'a>(
511    array: &'a dyn Array,
512    options: &'a SerializeOptions,
513    dtype: &'a DataType,
514    _datetime_format: &'a str,
515    _time_zone: Option<Tz>,
516) -> PolarsResult<Box<dyn Serializer<'a> + Send + 'a>> {
517    macro_rules! quote_if_always {
518        ($make_serializer:path, $($arg:tt)*) => {{
519            let serializer = $make_serializer(array.as_any().downcast_ref().unwrap(), $($arg)*);
520            if let QuoteStyle::Always = options.quote_style {
521                Box::new(quote_serializer(serializer)) as Box<dyn Serializer + Send>
522            } else {
523                Box::new(serializer)
524            }
525        }};
526        ($make_serializer:path) => { quote_if_always!($make_serializer,) };
527    }
528
529    let serializer = match dtype {
530        DataType::Int8 => quote_if_always!(integer_serializer::<i8>),
531        DataType::UInt8 => quote_if_always!(integer_serializer::<u8>),
532        DataType::Int16 => quote_if_always!(integer_serializer::<i16>),
533        DataType::UInt16 => quote_if_always!(integer_serializer::<u16>),
534        DataType::Int32 => quote_if_always!(integer_serializer::<i32>),
535        DataType::UInt32 => quote_if_always!(integer_serializer::<u32>),
536        DataType::Int64 => quote_if_always!(integer_serializer::<i64>),
537        DataType::UInt64 => quote_if_always!(integer_serializer::<u64>),
538        DataType::Int128 => quote_if_always!(integer_serializer::<i128>),
539        DataType::Float32 => match options.float_precision {
540            Some(precision) => match options.float_scientific {
541                Some(true) => {
542                    quote_if_always!(float_serializer_with_precision_scientific::<f32>, precision)
543                },
544                _ => quote_if_always!(float_serializer_with_precision_positional::<f32>, precision),
545            },
546            None => match options.float_scientific {
547                Some(true) => quote_if_always!(float_serializer_no_precision_scientific::<f32>),
548                Some(false) => quote_if_always!(float_serializer_no_precision_positional::<f32>),
549                None => quote_if_always!(float_serializer_no_precision_autoformat::<f32>),
550            },
551        },
552        DataType::Float64 => match options.float_precision {
553            Some(precision) => match options.float_scientific {
554                Some(true) => {
555                    quote_if_always!(float_serializer_with_precision_scientific::<f64>, precision)
556                },
557                _ => quote_if_always!(float_serializer_with_precision_positional::<f64>, precision),
558            },
559            None => match options.float_scientific {
560                Some(true) => quote_if_always!(float_serializer_no_precision_scientific::<f64>),
561                Some(false) => quote_if_always!(float_serializer_no_precision_positional::<f64>),
562                None => quote_if_always!(float_serializer_no_precision_autoformat::<f64>),
563            },
564        },
565        DataType::Null => quote_if_always!(null_serializer),
566        DataType::Boolean => {
567            let array = array.as_any().downcast_ref().unwrap();
568            match options.quote_style {
569                QuoteStyle::Always => Box::new(quote_serializer(bool_serializer::<false>(array)))
570                    as Box<dyn Serializer + Send>,
571                QuoteStyle::NonNumeric => Box::new(bool_serializer::<true>(array)),
572                _ => Box::new(bool_serializer::<false>(array)),
573            }
574        },
575        #[cfg(feature = "dtype-date")]
576        DataType::Date => date_and_time_serializer(
577            &options.date_format,
578            "NaiveDate",
579            array,
580            chrono::NaiveDate::MAX,
581            arrow::temporal_conversions::date32_to_date,
582            |date, items| date.format_with_items(items),
583            options,
584        )?,
585        #[cfg(feature = "dtype-time")]
586        DataType::Time => date_and_time_serializer(
587            &options.time_format,
588            "NaiveTime",
589            array,
590            chrono::NaiveTime::MIN,
591            arrow::temporal_conversions::time64ns_to_time,
592            |time, items| time.format_with_items(items),
593            options,
594        )?,
595        #[cfg(feature = "dtype-datetime")]
596        DataType::Datetime(time_unit, _) => {
597            let format = chrono::format::StrftimeItems::new(_datetime_format)
598                .parse()
599                .map_err(|_| {
600                    polars_err!(
601                        ComputeError: "cannot format {} with format '{_datetime_format}'",
602                        if _time_zone.is_some() { "DateTime" } else { "NaiveDateTime" },
603                    )
604                })?;
605            use std::fmt::Write;
606            let sample_datetime = match _time_zone {
607                #[cfg(feature = "timezones")]
608                Some(time_zone) => time_zone
609                    .from_utc_datetime(&chrono::NaiveDateTime::MAX)
610                    .format_with_items(format.iter()),
611                #[cfg(not(feature = "timezones"))]
612                Some(_) => panic!("activate 'timezones' feature"),
613                None => chrono::NaiveDateTime::MAX.format_with_items(format.iter()),
614            };
615            // Fail fast for invalid format. This return error faster to the user, and allows us to not return
616            // `Result` from `serialize()`.
617            write!(IgnoreFmt, "{sample_datetime}").map_err(|_| {
618                polars_err!(
619                    ComputeError: "cannot format {} with format '{_datetime_format}'",
620                    if _time_zone.is_some() { "DateTime" } else { "NaiveDateTime" },
621                )
622            })?;
623
624            let array = array.as_any().downcast_ref().unwrap();
625
626            macro_rules! time_unit_serializer {
627                ($convert:ident) => {
628                    match _time_zone {
629                        #[cfg(feature = "timezones")]
630                        Some(time_zone) => {
631                            let callback = move |item, buf: &mut Vec<u8>| {
632                                let item = arrow::temporal_conversions::$convert(item);
633                                let item = time_zone.from_utc_datetime(&item);
634                                // We checked the format is valid above.
635                                let _ = write!(buf, "{}", item.format_with_items(format.iter()));
636                            };
637                            date_and_time_final_serializer(array, callback, options)
638                        },
639                        #[cfg(not(feature = "timezones"))]
640                        Some(_) => panic!("activate 'timezones' feature"),
641                        None => {
642                            let callback = move |item, buf: &mut Vec<u8>| {
643                                let item = arrow::temporal_conversions::$convert(item);
644                                // We checked the format is valid above.
645                                let _ = write!(buf, "{}", item.format_with_items(format.iter()));
646                            };
647                            date_and_time_final_serializer(array, callback, options)
648                        },
649                    }
650                };
651            }
652
653            match time_unit {
654                TimeUnit::Nanoseconds => time_unit_serializer!(timestamp_ns_to_datetime),
655                TimeUnit::Microseconds => time_unit_serializer!(timestamp_us_to_datetime),
656                TimeUnit::Milliseconds => time_unit_serializer!(timestamp_ms_to_datetime),
657            }
658        },
659        DataType::String => string_serializer(
660            |iter| Iterator::next(iter).expect(TOO_MANY_MSG),
661            options,
662            |arr| {
663                arr.as_any()
664                    .downcast_ref::<Utf8ViewArray>()
665                    .expect(ARRAY_MISMATCH_MSG)
666                    .iter()
667            },
668            array,
669        ),
670        #[cfg(feature = "dtype-categorical")]
671        DataType::Categorical(rev_map, _) | DataType::Enum(rev_map, _) => {
672            let rev_map = rev_map.as_deref().unwrap();
673            string_serializer(
674                |iter| {
675                    let &idx: &u32 = Iterator::next(iter).expect(TOO_MANY_MSG)?;
676                    Some(rev_map.get(idx))
677                },
678                options,
679                |arr| {
680                    arr.as_any()
681                        .downcast_ref::<PrimitiveArray<u32>>()
682                        .expect(ARRAY_MISMATCH_MSG)
683                        .iter()
684                },
685                array,
686            )
687        },
688        #[cfg(feature = "dtype-decimal")]
689        DataType::Decimal(_, scale) => {
690            quote_if_always!(decimal_serializer, scale.unwrap_or(0))
691        },
692        _ => {
693            polars_bail!(ComputeError: "datatype {dtype} cannot be written to CSV\n\nConsider using JSON or a binary format.")
694        },
695    };
696    Ok(serializer)
697}
698
699#[cfg(test)]
700mod test {
701    use arrow::array::NullArray;
702    use polars_core::prelude::ArrowDataType;
703
704    use super::string_serializer;
705    use crate::csv::write::options::{QuoteStyle, SerializeOptions};
706
707    // It is the most complex serializer with most edge cases, it definitely needs a comprehensive test.
708    #[test]
709    fn test_string_serializer() {
710        #[track_caller]
711        fn check_string_serialization(options: &SerializeOptions, s: Option<&str>, expected: &str) {
712            let fake_array = NullArray::new(ArrowDataType::Null, 0);
713            let mut serializer = string_serializer(|s| *s, options, |_| s, &fake_array);
714            let mut buf = Vec::new();
715            serializer.serialize(&mut buf, options);
716            let serialized = std::str::from_utf8(&buf).unwrap();
717            // Don't use `assert_eq!()` because it prints debug format and it's hard to read with all the escapes.
718            if serialized != expected {
719                panic!(
720                    "CSV string {s:?} wasn't serialized correctly: expected: `{expected}`, got: `{serialized}`"
721                );
722            }
723        }
724
725        let always_quote = SerializeOptions {
726            quote_style: QuoteStyle::Always,
727            ..SerializeOptions::default()
728        };
729        check_string_serialization(&always_quote, None, r#""""#);
730        check_string_serialization(&always_quote, Some(""), r#""""#);
731        check_string_serialization(&always_quote, Some("a"), r#""a""#);
732        check_string_serialization(&always_quote, Some("\""), r#""""""#);
733        check_string_serialization(&always_quote, Some("a\"\"b"), r#""a""""b""#);
734
735        let necessary_quote = SerializeOptions {
736            quote_style: QuoteStyle::Necessary,
737            ..SerializeOptions::default()
738        };
739        check_string_serialization(&necessary_quote, None, r#""#);
740        check_string_serialization(&necessary_quote, Some(""), r#""""#);
741        check_string_serialization(&necessary_quote, Some("a"), r#"a"#);
742        check_string_serialization(&necessary_quote, Some("\""), r#""""""#);
743        check_string_serialization(&necessary_quote, Some("a\"\"b"), r#""a""""b""#);
744        check_string_serialization(&necessary_quote, Some("a b"), r#"a b"#);
745        check_string_serialization(&necessary_quote, Some("a,b"), r#""a,b""#);
746        check_string_serialization(&necessary_quote, Some("a\nb"), "\"a\nb\"");
747        check_string_serialization(&necessary_quote, Some("a\rb"), "\"a\rb\"");
748
749        let never_quote = SerializeOptions {
750            quote_style: QuoteStyle::Never,
751            ..SerializeOptions::default()
752        };
753        check_string_serialization(&never_quote, None, "");
754        check_string_serialization(&never_quote, Some(""), "");
755        check_string_serialization(&never_quote, Some("a"), "a");
756        check_string_serialization(&never_quote, Some("\""), "\"");
757        check_string_serialization(&never_quote, Some("a\"\"b"), "a\"\"b");
758        check_string_serialization(&never_quote, Some("a b"), "a b");
759        check_string_serialization(&never_quote, Some("a,b"), "a,b");
760        check_string_serialization(&never_quote, Some("a\nb"), "a\nb");
761        check_string_serialization(&never_quote, Some("a\rb"), "a\rb");
762
763        let non_numeric_quote = SerializeOptions {
764            quote_style: QuoteStyle::NonNumeric,
765            ..SerializeOptions::default()
766        };
767        check_string_serialization(&non_numeric_quote, None, "");
768        check_string_serialization(&non_numeric_quote, Some(""), r#""""#);
769        check_string_serialization(&non_numeric_quote, Some("a"), r#""a""#);
770        check_string_serialization(&non_numeric_quote, Some("\""), r#""""""#);
771        check_string_serialization(&non_numeric_quote, Some("a\"\"b"), r#""a""""b""#);
772        check_string_serialization(&non_numeric_quote, Some("a b"), r#""a b""#);
773        check_string_serialization(&non_numeric_quote, Some("a,b"), r#""a,b""#);
774        check_string_serialization(&non_numeric_quote, Some("a\nb"), "\"a\nb\"");
775        check_string_serialization(&non_numeric_quote, Some("a\rb"), "\"a\rb\"");
776    }
777}