1use std::fmt::LowerExp;
34use std::io::Write;
35
36use arrow::array::{Array, BooleanArray, Float16Array, NullArray, PrimitiveArray, Utf8ViewArray};
37use arrow::legacy::time_zone::Tz;
38use arrow::types::NativeType;
39#[cfg(feature = "timezones")]
40use chrono::TimeZone;
41use memchr::{memchr_iter, memchr3};
42use num_traits::NumCast;
43use polars_core::prelude::*;
44use polars_utils::float16::pf16;
45
46use crate::csv::write::{QuoteStyle, SerializeOptions};
47
48const TOO_MANY_MSG: &str = "too many items requested from CSV serializer";
49const ARRAY_MISMATCH_MSG: &str = "wrong array type";
50
51#[allow(dead_code)]
52struct IgnoreFmt;
53impl std::fmt::Write for IgnoreFmt {
54 fn write_str(&mut self, _s: &str) -> std::fmt::Result {
55 Ok(())
56 }
57}
58
59pub(super) trait Serializer<'a> {
60 fn serialize(&mut self, buf: &mut Vec<u8>, options: &SerializeOptions);
61}
62
63fn make_serializer<'a, T, I: Iterator<Item = Option<T>>, const QUOTE_NON_NULL: bool>(
64 f: impl FnMut(T, &mut Vec<u8>, &SerializeOptions),
65 iter: I,
66) -> impl Serializer<'a> {
67 struct SerializerImpl<F, I, const QUOTE_NON_NULL: bool> {
68 f: F,
69 iter: I,
70 }
71
72 impl<'a, T, F, I, const QUOTE_NON_NULL: bool> Serializer<'a>
73 for SerializerImpl<F, I, QUOTE_NON_NULL>
74 where
75 F: FnMut(T, &mut Vec<u8>, &SerializeOptions),
76 I: Iterator<Item = Option<T>>,
77 {
78 fn serialize(&mut self, buf: &mut Vec<u8>, options: &SerializeOptions) {
79 let item = self.iter.next().expect(TOO_MANY_MSG);
80 match item {
81 Some(item) => {
82 if QUOTE_NON_NULL {
83 buf.push(options.quote_char);
84 }
85 (self.f)(item, buf, options);
86 if QUOTE_NON_NULL {
87 buf.push(options.quote_char);
88 }
89 },
90 None => buf.extend_from_slice(options.null.as_bytes()),
91 }
92 }
93 }
94
95 SerializerImpl::<_, _, QUOTE_NON_NULL> { f, iter }
96}
97
98fn integer_serializer<I: NativeType + itoa::Integer>(
99 array: &PrimitiveArray<I>,
100) -> impl Serializer<'_> {
101 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
102 let mut buffer = itoa::Buffer::new();
103 let value = buffer.format(item);
104 buf.extend_from_slice(value.as_bytes());
105 };
106
107 make_serializer::<_, _, false>(f, array.iter())
108}
109
110fn float_serializer_no_precision_autoformat_f16(array: &Float16Array) -> impl Serializer<'_> {
111 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
112 let mut buffer = zmij::Buffer::new();
113 let cast: f32 = NumCast::from(item).unwrap();
114 let value = buffer.format(cast);
115 buf.extend_from_slice(value.as_bytes());
116 };
117 float_serializer_no_precision_autoformat_(array, f)
118}
119
120fn float_serializer_no_precision_autoformat<I: NativeType + zmij::Float>(
121 array: &PrimitiveArray<I>,
122) -> impl Serializer<'_> {
123 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
124 let mut buffer = zmij::Buffer::new();
125 let value = buffer.format(item);
126 buf.extend_from_slice(value.as_bytes());
127 };
128 float_serializer_no_precision_autoformat_(array, f)
129}
130
131fn float_serializer_no_precision_autoformat_<
132 'a,
133 I: NativeType,
134 F: Fn(&'a I, &mut Vec<u8>, &SerializeOptions),
135>(
136 array: &'a PrimitiveArray<I>,
137 f: F,
138) -> impl Serializer<'a> {
139 make_serializer::<_, _, false>(f, array.iter())
140}
141
142fn float_serializer_no_precision_autoformat_decimal_comma_f16(
143 array: &Float16Array,
144) -> impl Serializer<'_> {
145 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
146 let mut buffer = zmij::Buffer::new();
147 let cast: f32 = NumCast::from(item).unwrap();
148 let value = buffer.format(cast);
149
150 for ch in value.as_bytes() {
151 buf.push(if *ch == b'.' { b',' } else { *ch });
152 }
153 };
154 float_serializer_no_precision_autoformat_decimal_comma_(array, f)
155}
156
157fn float_serializer_no_precision_autoformat_decimal_comma<I: NativeType + zmij::Float>(
158 array: &PrimitiveArray<I>,
159) -> impl Serializer<'_> {
160 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
161 let mut buffer = zmij::Buffer::new();
162 let value = buffer.format(item).as_bytes();
163
164 for ch in value {
165 buf.push(if *ch == b'.' { b',' } else { *ch });
166 }
167 };
168 float_serializer_no_precision_autoformat_decimal_comma_(array, f)
169}
170
171fn float_serializer_no_precision_autoformat_decimal_comma_<
172 'a,
173 I: NativeType,
174 F: Fn(&'a I, &mut Vec<u8>, &SerializeOptions),
175>(
176 array: &'a PrimitiveArray<I>,
177 f: F,
178) -> impl Serializer<'a> {
179 make_serializer::<_, _, false>(f, array.iter())
180}
181
182fn float_serializer_no_precision_scientific<I: NativeType + LowerExp>(
183 array: &PrimitiveArray<I>,
184) -> impl Serializer<'_> {
185 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
186 let _ = write!(buf, "{item:.e}");
188 };
189
190 make_serializer::<_, _, false>(f, array.iter())
191}
192
193fn float_serializer_no_precision_scientific_decimal_comma<I: NativeType + LowerExp>(
194 array: &PrimitiveArray<I>,
195) -> impl Serializer<'_> {
196 let mut scratch = Vec::new();
197
198 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
199 let _ = write!(&mut scratch, "{item:.e}");
201 for c in &mut scratch {
202 if *c == b'.' {
203 *c = b',';
204 break;
205 }
206 }
207 buf.extend_from_slice(&scratch);
208 };
209
210 make_serializer::<_, _, false>(f, array.iter())
211}
212
213fn float_serializer_no_precision_positional<I: NativeType + NumCast>(
214 array: &PrimitiveArray<I>,
215) -> impl Serializer<'_> {
216 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
217 let v: f64 = NumCast::from(item).unwrap();
218 let _ = write!(buf, "{v}");
219 };
220
221 make_serializer::<_, _, false>(f, array.iter())
222}
223
224fn float_serializer_no_precision_positional_decimal_comma<I: NativeType + NumCast>(
225 array: &PrimitiveArray<I>,
226) -> impl Serializer<'_> {
227 let mut scratch = Vec::new();
228
229 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
230 scratch.clear();
231 let v: f64 = NumCast::from(item).unwrap();
232 let _ = write!(&mut scratch, "{v}");
233 for c in &mut scratch {
234 if *c == b'.' {
235 *c = b',';
236 break;
237 }
238 }
239 buf.extend_from_slice(&scratch);
240 };
241
242 make_serializer::<_, _, false>(f, array.iter())
243}
244
245fn float_serializer_with_precision_scientific<I: NativeType + LowerExp>(
246 array: &PrimitiveArray<I>,
247 precision: usize,
248) -> impl Serializer<'_> {
249 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
250 let _ = write!(buf, "{item:.precision$e}");
252 };
253
254 make_serializer::<_, _, false>(f, array.iter())
255}
256
257fn float_serializer_with_precision_scientific_decimal_comma<I: NativeType + LowerExp>(
258 array: &PrimitiveArray<I>,
259 precision: usize,
260) -> impl Serializer<'_> {
261 let mut scratch = Vec::new();
262
263 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
264 scratch.clear();
265 let _ = write!(&mut scratch, "{item:.precision$e}");
267 for c in &mut scratch {
268 if *c == b'.' {
269 *c = b',';
270 break;
271 }
272 }
273 buf.extend_from_slice(&scratch);
274 };
275
276 make_serializer::<_, _, false>(f, array.iter())
277}
278
279fn float_serializer_with_precision_positional<I: NativeType>(
280 array: &PrimitiveArray<I>,
281 precision: usize,
282) -> impl Serializer<'_> {
283 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
284 let _ = write!(buf, "{item:.precision$}");
286 };
287
288 make_serializer::<_, _, false>(f, array.iter())
289}
290
291fn float_serializer_with_precision_positional_decimal_comma<I: NativeType>(
292 array: &PrimitiveArray<I>,
293 precision: usize,
294) -> impl Serializer<'_> {
295 let mut scratch = Vec::new();
296
297 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
298 scratch.clear();
299 let _ = write!(&mut scratch, "{item:.precision$}");
300 for c in &mut scratch {
301 if *c == b'.' {
302 *c = b',';
303 break;
304 }
305 }
306 buf.extend_from_slice(&scratch);
307 };
308
309 make_serializer::<_, _, false>(f, array.iter())
310}
311
312fn null_serializer(_array: &NullArray) -> impl Serializer<'_> {
313 struct NullSerializer;
314 impl<'a> Serializer<'a> for NullSerializer {
315 fn serialize(&mut self, buf: &mut Vec<u8>, options: &SerializeOptions) {
316 buf.extend_from_slice(options.null.as_bytes());
317 }
318 }
319 NullSerializer
320}
321
322fn bool_serializer<const QUOTE_NON_NULL: bool>(array: &BooleanArray) -> impl Serializer<'_> {
323 let f = move |item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
324 let s = if item { "true" } else { "false" };
325 buf.extend_from_slice(s.as_bytes());
326 };
327
328 make_serializer::<_, _, QUOTE_NON_NULL>(f, array.iter())
329}
330
331#[cfg(feature = "dtype-decimal")]
332fn decimal_serializer(array: &PrimitiveArray<i128>, scale: usize) -> impl Serializer<'_> {
333 let trim_zeros = arrow::compute::decimal::get_trim_decimal_zeros();
334
335 let mut fmt_buf = polars_compute::decimal::DecimalFmtBuffer::new();
336 let f = move |&item, buf: &mut Vec<u8>, options: &SerializeOptions| {
337 buf.extend_from_slice(
338 fmt_buf
339 .format_dec128(item, scale, trim_zeros, options.decimal_comma)
340 .as_bytes(),
341 );
342 };
343
344 make_serializer::<_, _, false>(f, array.iter())
345}
346
347#[cfg(any(
348 feature = "dtype-date",
349 feature = "dtype-time",
350 feature = "dtype-datetime"
351))]
352fn callback_serializer<'a, T: NativeType, const QUOTE_NON_NULL: bool>(
353 array: &'a PrimitiveArray<T>,
354 mut callback: impl FnMut(T, &mut Vec<u8>) + 'a,
355) -> impl Serializer<'a> {
356 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
357 callback(item, buf);
358 };
359
360 make_serializer::<_, _, QUOTE_NON_NULL>(f, array.iter())
361}
362
363#[cfg(any(feature = "dtype-date", feature = "dtype-time"))]
364type ChronoFormatIter<'a, 'b> = std::slice::Iter<'a, chrono::format::Item<'b>>;
365
366#[cfg(any(feature = "dtype-date", feature = "dtype-time"))]
367fn date_and_time_serializer<'a, Underlying: NativeType, T: std::fmt::Display>(
368 format_str: Option<&'a str>,
369 description: &str,
370 array: &'a dyn Array,
371 sample_value: T,
372 mut convert: impl FnMut(Underlying) -> T + Send + 'a,
373 mut format_fn: impl for<'b> FnMut(
374 &T,
375 ChronoFormatIter<'b, 'a>,
376 ) -> chrono::format::DelayedFormat<ChronoFormatIter<'b, 'a>>
377 + Send
378 + 'a,
379 options: &SerializeOptions,
380) -> PolarsResult<Box<dyn Serializer<'a> + Send + 'a>> {
381 let array = array.as_any().downcast_ref().unwrap();
382 let serializer = match format_str {
383 Some(format_str) => {
384 let format = chrono::format::StrftimeItems::new(format_str).parse().map_err(
385 |_| polars_err!(ComputeError: "cannot format {description} with format '{format_str}'"),
386 )?;
387 use std::fmt::Write;
388 write!(IgnoreFmt, "{}", format_fn(&sample_value, format.iter())).map_err(
391 |_| polars_err!(ComputeError: "cannot format {description} with format '{format_str}'"),
392 )?;
393 let callback = move |item, buf: &mut Vec<u8>| {
394 let item = convert(item);
395 let _ = write!(buf, "{}", format_fn(&item, format.iter()));
397 };
398 date_and_time_final_serializer(array, callback, options)
399 },
400 None => {
401 let callback = move |item, buf: &mut Vec<u8>| {
402 let item = convert(item);
403 let _ = write!(buf, "{item}");
405 };
406 date_and_time_final_serializer(array, callback, options)
407 },
408 };
409 Ok(serializer)
410}
411
412#[cfg(any(
413 feature = "dtype-date",
414 feature = "dtype-time",
415 feature = "dtype-datetime"
416))]
417fn date_and_time_final_serializer<'a, T: NativeType>(
418 array: &'a PrimitiveArray<T>,
419 callback: impl FnMut(T, &mut Vec<u8>) + Send + 'a,
420 options: &SerializeOptions,
421) -> Box<dyn Serializer<'a> + Send + 'a> {
422 match options.quote_style {
423 QuoteStyle::Always => Box::new(quote_serializer(callback_serializer::<T, false>(
424 array, callback,
425 ))) as Box<dyn Serializer + Send>,
426 QuoteStyle::NonNumeric => Box::new(callback_serializer::<T, true>(array, callback)),
427 _ => Box::new(callback_serializer::<T, false>(array, callback)),
428 }
429}
430
431pub(super) fn string_serializer<'a, Iter: Send + 'a>(
432 mut f: impl FnMut(&mut Iter) -> Option<&str> + Send + 'a,
433 options: &SerializeOptions,
434 mut update: impl FnMut(&'a dyn Array) -> Iter + Send + 'a,
435 array: &'a dyn Array,
436) -> Box<dyn Serializer<'a> + 'a + Send> {
437 const LF: u8 = b'\n';
438 const CR: u8 = b'\r';
439
440 struct StringSerializer<F, Iter> {
441 serialize: F,
442 iter: Iter,
443 }
444
445 impl<'a, F, Iter> Serializer<'a> for StringSerializer<F, Iter>
446 where
447 F: FnMut(&mut Iter, &mut Vec<u8>, &SerializeOptions),
448 {
449 fn serialize(&mut self, buf: &mut Vec<u8>, options: &SerializeOptions) {
450 (self.serialize)(&mut self.iter, buf, options);
451 }
452 }
453
454 fn serialize_str_escaped(buf: &mut Vec<u8>, s: &[u8], quote_char: u8, quoted: bool) {
455 let mut iter = memchr_iter(quote_char, s);
456 let first_quote = iter.next();
457 match first_quote {
458 None => buf.extend_from_slice(s),
459 Some(mut quote_pos) => {
460 if !quoted {
461 buf.push(quote_char);
462 }
463 let mut start_pos = 0;
464 loop {
465 buf.extend_from_slice(&s[start_pos..quote_pos]);
466 buf.extend_from_slice(&[quote_char, quote_char]);
467 match iter.next() {
468 Some(quote) => {
469 start_pos = quote_pos + 1;
470 quote_pos = quote;
471 },
472 None => {
473 buf.extend_from_slice(&s[quote_pos + 1..]);
474 break;
475 },
476 }
477 }
478 if !quoted {
479 buf.push(quote_char);
480 }
481 },
482 }
483 }
484
485 let iter = update(array);
486 match options.quote_style {
487 QuoteStyle::Always => {
488 let serialize =
489 move |iter: &mut Iter, buf: &mut Vec<u8>, options: &SerializeOptions| {
490 let quote_char = options.quote_char;
491 buf.push(quote_char);
492 let Some(s) = f(iter) else {
493 buf.extend_from_slice(options.null.as_bytes());
494 buf.push(quote_char);
495 return;
496 };
497 serialize_str_escaped(buf, s.as_bytes(), quote_char, true);
498 buf.push(quote_char);
499 };
500 Box::new(StringSerializer { serialize, iter })
501 },
502 QuoteStyle::NonNumeric => {
503 let serialize =
504 move |iter: &mut Iter, buf: &mut Vec<u8>, options: &SerializeOptions| {
505 let Some(s) = f(iter) else {
506 buf.extend_from_slice(options.null.as_bytes());
507 return;
508 };
509 let quote_char = options.quote_char;
510 buf.push(quote_char);
511 serialize_str_escaped(buf, s.as_bytes(), quote_char, true);
512 buf.push(quote_char);
513 };
514 Box::new(StringSerializer { serialize, iter })
515 },
516 QuoteStyle::Necessary => {
517 let serialize =
518 move |iter: &mut Iter, buf: &mut Vec<u8>, options: &SerializeOptions| {
519 let Some(s) = f(iter) else {
520 buf.extend_from_slice(options.null.as_bytes());
521 return;
522 };
523 let quote_char = options.quote_char;
524 if s.is_empty() {
526 buf.extend_from_slice(&[quote_char, quote_char]);
527 return;
528 }
529 let needs_quote = memchr3(options.separator, LF, CR, s.as_bytes()).is_some();
530 if needs_quote {
531 buf.push(quote_char);
532 }
533 serialize_str_escaped(buf, s.as_bytes(), quote_char, needs_quote);
534 if needs_quote {
535 buf.push(quote_char);
536 }
537 };
538 Box::new(StringSerializer { serialize, iter })
539 },
540 QuoteStyle::Never => {
541 let serialize =
542 move |iter: &mut Iter, buf: &mut Vec<u8>, options: &SerializeOptions| {
543 let Some(s) = f(iter) else {
544 buf.extend_from_slice(options.null.as_bytes());
545 return;
546 };
547 buf.extend_from_slice(s.as_bytes());
548 };
549 Box::new(StringSerializer { serialize, iter })
550 },
551 }
552}
553
554fn quote_serializer<'a>(serializer: impl Serializer<'a>) -> impl Serializer<'a> {
555 struct QuoteSerializer<S>(S);
556 impl<'a, S: Serializer<'a>> Serializer<'a> for QuoteSerializer<S> {
557 fn serialize(&mut self, buf: &mut Vec<u8>, options: &SerializeOptions) {
558 buf.push(options.quote_char);
559 self.0.serialize(buf, options);
560 buf.push(options.quote_char);
561 }
562 }
563 QuoteSerializer(serializer)
564}
565
566pub(super) fn serializer_for<'a>(
567 array: &'a dyn Array,
568 options: &'a SerializeOptions,
569 dtype: &'a DataType,
570 _datetime_format: &'a str,
571 _time_zone: Option<Tz>,
572) -> PolarsResult<Box<dyn Serializer<'a> + Send + 'a>> {
573 let needs_quotes = match dtype {
576 DataType::Float16 | DataType::Float32 | DataType::Float64 => {
577 let mut should_quote = options.decimal_comma && options.separator == b',';
588 if let Some(precision) = options.float_precision {
589 should_quote &= precision > 0;
590 }
591
592 match options.quote_style {
593 QuoteStyle::Always => true,
594 QuoteStyle::Necessary | QuoteStyle::NonNumeric => should_quote,
595 QuoteStyle::Never => false,
596 }
597 },
598 #[cfg(feature = "dtype-decimal")]
599 DataType::Decimal(_, scale) => {
600 let should_quote = options.decimal_comma && options.separator == b',' && *scale > 0;
602
603 match options.quote_style {
604 QuoteStyle::Always => true,
605 QuoteStyle::Necessary | QuoteStyle::NonNumeric => should_quote,
606 QuoteStyle::Never => false,
607 }
608 },
609 _ => options.quote_style == QuoteStyle::Always,
610 };
611
612 macro_rules! quote_wrapper {
613 ($make_serializer:path, $($arg:tt)*) => {{
614 let serializer = $make_serializer(array.as_any().downcast_ref().unwrap(), $($arg)*);
615 if needs_quotes {
616 Box::new(quote_serializer(serializer)) as Box<dyn Serializer + Send>
617 } else {
618 Box::new(serializer)
619 }
620 }};
621 ($make_serializer:path) => { quote_wrapper!($make_serializer,) };
622 }
623
624 let serializer = match dtype {
625 DataType::Int8 => quote_wrapper!(integer_serializer::<i8>),
626 DataType::UInt8 => quote_wrapper!(integer_serializer::<u8>),
627 DataType::Int16 => quote_wrapper!(integer_serializer::<i16>),
628 DataType::UInt16 => quote_wrapper!(integer_serializer::<u16>),
629 DataType::Int32 => quote_wrapper!(integer_serializer::<i32>),
630 DataType::UInt32 => quote_wrapper!(integer_serializer::<u32>),
631 DataType::Int64 => quote_wrapper!(integer_serializer::<i64>),
632 DataType::UInt64 => quote_wrapper!(integer_serializer::<u64>),
633 DataType::Int128 => quote_wrapper!(integer_serializer::<i128>),
634 DataType::UInt128 => quote_wrapper!(integer_serializer::<u128>),
635 DataType::Float16 => {
636 match (
637 options.decimal_comma,
638 options.float_precision,
639 options.float_scientific,
640 ) {
641 (false, Some(precision), Some(true)) => {
643 quote_wrapper!(
644 float_serializer_with_precision_scientific::<pf16>,
645 precision
646 )
647 },
648 (false, Some(precision), _) => {
649 quote_wrapper!(
650 float_serializer_with_precision_positional::<pf16>,
651 precision
652 )
653 },
654 (false, None, Some(true)) => {
655 quote_wrapper!(float_serializer_no_precision_scientific::<pf16>)
656 },
657 (false, None, Some(false)) => {
658 quote_wrapper!(float_serializer_no_precision_positional::<pf16>)
659 },
660 (false, None, None) => {
661 quote_wrapper!(float_serializer_no_precision_autoformat_f16)
662 },
663
664 (true, Some(precision), Some(true)) => quote_wrapper!(
666 float_serializer_with_precision_scientific_decimal_comma::<pf16>,
667 precision
668 ),
669 (true, Some(precision), _) => quote_wrapper!(
670 float_serializer_with_precision_positional_decimal_comma::<pf16>,
671 precision
672 ),
673 (true, None, Some(true)) => {
674 quote_wrapper!(float_serializer_no_precision_scientific_decimal_comma::<pf16>)
675 },
676 (true, None, Some(false)) => {
677 quote_wrapper!(float_serializer_no_precision_positional_decimal_comma::<pf16>)
678 },
679 (true, None, None) => {
680 quote_wrapper!(float_serializer_no_precision_autoformat_decimal_comma_f16)
681 },
682 }
683 },
684 DataType::Float32 => {
685 match (
686 options.decimal_comma,
687 options.float_precision,
688 options.float_scientific,
689 ) {
690 (false, Some(precision), Some(true)) => {
692 quote_wrapper!(float_serializer_with_precision_scientific::<f32>, precision)
693 },
694 (false, Some(precision), _) => {
695 quote_wrapper!(float_serializer_with_precision_positional::<f32>, precision)
696 },
697 (false, None, Some(true)) => {
698 quote_wrapper!(float_serializer_no_precision_scientific::<f32>)
699 },
700 (false, None, Some(false)) => {
701 quote_wrapper!(float_serializer_no_precision_positional::<f32>)
702 },
703 (false, None, None) => {
704 quote_wrapper!(float_serializer_no_precision_autoformat::<f32>)
705 },
706
707 (true, Some(precision), Some(true)) => quote_wrapper!(
709 float_serializer_with_precision_scientific_decimal_comma::<f32>,
710 precision
711 ),
712 (true, Some(precision), _) => quote_wrapper!(
713 float_serializer_with_precision_positional_decimal_comma::<f32>,
714 precision
715 ),
716 (true, None, Some(true)) => {
717 quote_wrapper!(float_serializer_no_precision_scientific_decimal_comma::<f32>)
718 },
719 (true, None, Some(false)) => {
720 quote_wrapper!(float_serializer_no_precision_positional_decimal_comma::<f32>)
721 },
722 (true, None, None) => {
723 quote_wrapper!(float_serializer_no_precision_autoformat_decimal_comma::<f32>)
724 },
725 }
726 },
727 DataType::Float64 => {
728 match (
729 options.decimal_comma,
730 options.float_precision,
731 options.float_scientific,
732 ) {
733 (false, Some(precision), Some(true)) => {
735 quote_wrapper!(float_serializer_with_precision_scientific::<f64>, precision)
736 },
737 (false, Some(precision), _) => {
738 quote_wrapper!(float_serializer_with_precision_positional::<f64>, precision)
739 },
740 (false, None, Some(true)) => {
741 quote_wrapper!(float_serializer_no_precision_scientific::<f64>)
742 },
743 (false, None, Some(false)) => {
744 quote_wrapper!(float_serializer_no_precision_positional::<f64>)
745 },
746 (false, None, None) => {
747 quote_wrapper!(float_serializer_no_precision_autoformat::<f64>)
748 },
749
750 (true, Some(precision), Some(true)) => quote_wrapper!(
752 float_serializer_with_precision_scientific_decimal_comma::<f64>,
753 precision
754 ),
755 (true, Some(precision), _) => quote_wrapper!(
756 float_serializer_with_precision_positional_decimal_comma::<f64>,
757 precision
758 ),
759 (true, None, Some(true)) => {
760 quote_wrapper!(float_serializer_no_precision_scientific_decimal_comma::<f64>)
761 },
762 (true, None, Some(false)) => {
763 quote_wrapper!(float_serializer_no_precision_positional_decimal_comma::<f64>)
764 },
765 (true, None, None) => {
766 quote_wrapper!(float_serializer_no_precision_autoformat_decimal_comma::<f64>)
767 },
768 }
769 },
770 DataType::Null => quote_wrapper!(null_serializer),
771 DataType::Boolean => {
772 let array = array.as_any().downcast_ref().unwrap();
773 match options.quote_style {
774 QuoteStyle::Always => Box::new(quote_serializer(bool_serializer::<false>(array)))
775 as Box<dyn Serializer + Send>,
776 QuoteStyle::NonNumeric => Box::new(bool_serializer::<true>(array)),
777 _ => Box::new(bool_serializer::<false>(array)),
778 }
779 },
780 #[cfg(feature = "dtype-date")]
781 DataType::Date => date_and_time_serializer(
782 options.date_format.as_deref(),
783 "NaiveDate",
784 array,
785 chrono::NaiveDate::MAX,
786 arrow::temporal_conversions::date32_to_date,
787 |date, items| date.format_with_items(items),
788 options,
789 )?,
790 #[cfg(feature = "dtype-time")]
791 DataType::Time => date_and_time_serializer(
792 Some(options.time_format.as_deref().unwrap_or("%T%.9f")),
793 "NaiveTime",
794 array,
795 chrono::NaiveTime::MIN,
796 arrow::temporal_conversions::time64ns_to_time,
797 |time, items| time.format_with_items(items),
798 options,
799 )?,
800 #[cfg(feature = "dtype-datetime")]
801 DataType::Datetime(time_unit, _) => {
802 let format = chrono::format::StrftimeItems::new(_datetime_format)
803 .parse()
804 .map_err(|_| {
805 polars_err!(
806 ComputeError: "cannot format {} with format '{_datetime_format}'",
807 if _time_zone.is_some() { "DateTime" } else { "NaiveDateTime" },
808 )
809 })?;
810 use std::fmt::Write;
811 let sample_datetime = match _time_zone {
812 #[cfg(feature = "timezones")]
813 Some(time_zone) => time_zone
814 .from_utc_datetime(&chrono::NaiveDateTime::MAX)
815 .format_with_items(format.iter()),
816 #[cfg(not(feature = "timezones"))]
817 Some(_) => panic!("activate 'timezones' feature"),
818 None => chrono::NaiveDateTime::MAX.format_with_items(format.iter()),
819 };
820 write!(IgnoreFmt, "{sample_datetime}").map_err(|_| {
823 polars_err!(
824 ComputeError: "cannot format {} with format '{_datetime_format}'",
825 if _time_zone.is_some() { "DateTime" } else { "NaiveDateTime" },
826 )
827 })?;
828
829 let array = array.as_any().downcast_ref().unwrap();
830
831 macro_rules! time_unit_serializer {
832 ($convert:ident) => {
833 match _time_zone {
834 #[cfg(feature = "timezones")]
835 Some(time_zone) => {
836 let callback = move |item, buf: &mut Vec<u8>| {
837 let item = arrow::temporal_conversions::$convert(item);
838 let item = time_zone.from_utc_datetime(&item);
839 let _ = write!(buf, "{}", item.format_with_items(format.iter()));
841 };
842 date_and_time_final_serializer(array, callback, options)
843 },
844 #[cfg(not(feature = "timezones"))]
845 Some(_) => panic!("activate 'timezones' feature"),
846 None => {
847 let callback = move |item, buf: &mut Vec<u8>| {
848 let item = arrow::temporal_conversions::$convert(item);
849 let _ = write!(buf, "{}", item.format_with_items(format.iter()));
851 };
852 date_and_time_final_serializer(array, callback, options)
853 },
854 }
855 };
856 }
857
858 match time_unit {
859 TimeUnit::Nanoseconds => time_unit_serializer!(timestamp_ns_to_datetime),
860 TimeUnit::Microseconds => time_unit_serializer!(timestamp_us_to_datetime),
861 TimeUnit::Milliseconds => time_unit_serializer!(timestamp_ms_to_datetime),
862 }
863 },
864 DataType::String => string_serializer(
865 |iter| Iterator::next(iter).expect(TOO_MANY_MSG),
866 options,
867 |arr| {
868 arr.as_any()
869 .downcast_ref::<Utf8ViewArray>()
870 .expect(ARRAY_MISMATCH_MSG)
871 .iter()
872 },
873 array,
874 ),
875 #[cfg(feature = "dtype-categorical")]
876 DataType::Categorical(_, mapping) | DataType::Enum(_, mapping) => {
877 polars_core::with_match_categorical_physical_type!(dtype.cat_physical().unwrap(), |$C| {
878 string_serializer(
879 |iter| {
880 let &idx: &<$C as PolarsCategoricalType>::Native = Iterator::next(iter).expect(TOO_MANY_MSG)?;
881 Some(unsafe { mapping.cat_to_str_unchecked(idx.as_cat()) })
882 },
883 options,
884 |arr| {
885 arr.as_any()
886 .downcast_ref::<PrimitiveArray<<$C as PolarsCategoricalType>::Native>>()
887 .expect(ARRAY_MISMATCH_MSG)
888 .iter()
889 },
890 array,
891 )
892 })
893 },
894 #[cfg(feature = "dtype-decimal")]
895 DataType::Decimal(_, scale) => {
896 quote_wrapper!(decimal_serializer, *scale)
897 },
898 _ => {
899 polars_bail!(ComputeError: "datatype {dtype} cannot be written to CSV\n\nConsider using JSON or a binary format.")
900 },
901 };
902 Ok(serializer)
903}
904
905#[cfg(test)]
906mod test {
907 use arrow::array::NullArray;
908 use polars_core::prelude::ArrowDataType;
909
910 use super::string_serializer;
911 use crate::csv::write::options::{QuoteStyle, SerializeOptions};
912
913 #[test]
915 fn test_string_serializer() {
916 #[track_caller]
917 fn check_string_serialization(options: &SerializeOptions, s: Option<&str>, expected: &str) {
918 let fake_array = NullArray::new(ArrowDataType::Null, 0);
919 let mut serializer = string_serializer(|s| *s, options, |_| s, &fake_array);
920 let mut buf = Vec::new();
921 serializer.serialize(&mut buf, options);
922 let serialized = std::str::from_utf8(&buf).unwrap();
923 if serialized != expected {
925 panic!(
926 "CSV string {s:?} wasn't serialized correctly: expected: `{expected}`, got: `{serialized}`"
927 );
928 }
929 }
930
931 let always_quote = SerializeOptions {
932 quote_style: QuoteStyle::Always,
933 ..SerializeOptions::default()
934 };
935 check_string_serialization(&always_quote, None, r#""""#);
936 check_string_serialization(&always_quote, Some(""), r#""""#);
937 check_string_serialization(&always_quote, Some("a"), r#""a""#);
938 check_string_serialization(&always_quote, Some("\""), r#""""""#);
939 check_string_serialization(&always_quote, Some("a\"\"b"), r#""a""""b""#);
940
941 let necessary_quote = SerializeOptions {
942 quote_style: QuoteStyle::Necessary,
943 ..SerializeOptions::default()
944 };
945 check_string_serialization(&necessary_quote, None, r#""#);
946 check_string_serialization(&necessary_quote, Some(""), r#""""#);
947 check_string_serialization(&necessary_quote, Some("a"), r#"a"#);
948 check_string_serialization(&necessary_quote, Some("\""), r#""""""#);
949 check_string_serialization(&necessary_quote, Some("a\"\"b"), r#""a""""b""#);
950 check_string_serialization(&necessary_quote, Some("a b"), r#"a b"#);
951 check_string_serialization(&necessary_quote, Some("a,b"), r#""a,b""#);
952 check_string_serialization(&necessary_quote, Some("a\nb"), "\"a\nb\"");
953 check_string_serialization(&necessary_quote, Some("a\rb"), "\"a\rb\"");
954
955 let never_quote = SerializeOptions {
956 quote_style: QuoteStyle::Never,
957 ..SerializeOptions::default()
958 };
959 check_string_serialization(&never_quote, None, "");
960 check_string_serialization(&never_quote, Some(""), "");
961 check_string_serialization(&never_quote, Some("a"), "a");
962 check_string_serialization(&never_quote, Some("\""), "\"");
963 check_string_serialization(&never_quote, Some("a\"\"b"), "a\"\"b");
964 check_string_serialization(&never_quote, Some("a b"), "a b");
965 check_string_serialization(&never_quote, Some("a,b"), "a,b");
966 check_string_serialization(&never_quote, Some("a\nb"), "a\nb");
967 check_string_serialization(&never_quote, Some("a\rb"), "a\rb");
968
969 let non_numeric_quote = SerializeOptions {
970 quote_style: QuoteStyle::NonNumeric,
971 ..SerializeOptions::default()
972 };
973 check_string_serialization(&non_numeric_quote, None, "");
974 check_string_serialization(&non_numeric_quote, Some(""), r#""""#);
975 check_string_serialization(&non_numeric_quote, Some("a"), r#""a""#);
976 check_string_serialization(&non_numeric_quote, Some("\""), r#""""""#);
977 check_string_serialization(&non_numeric_quote, Some("a\"\"b"), r#""a""""b""#);
978 check_string_serialization(&non_numeric_quote, Some("a b"), r#""a b""#);
979 check_string_serialization(&non_numeric_quote, Some("a,b"), r#""a,b""#);
980 check_string_serialization(&non_numeric_quote, Some("a\nb"), "\"a\nb\"");
981 check_string_serialization(&non_numeric_quote, Some("a\rb"), "\"a\rb\"");
982 }
983}