1use std::fmt::LowerExp;
34use std::io::Write;
35
36use arrow::array::{Array, BooleanArray, Float16Array, NullArray, PrimitiveArray, Utf8ViewArray};
37use arrow::legacy::time_zone::Tz;
38use arrow::types::NativeType;
39#[cfg(feature = "timezones")]
40use chrono::TimeZone;
41use memchr::{memchr_iter, memchr3};
42use num_traits::NumCast;
43use polars_core::prelude::*;
44use polars_utils::float16::pf16;
45
46use crate::csv::write::{QuoteStyle, SerializeOptions};
47
48const TOO_MANY_MSG: &str = "too many items requested from CSV serializer";
49const ARRAY_MISMATCH_MSG: &str = "wrong array type";
50
51#[allow(dead_code)]
52struct IgnoreFmt;
53impl std::fmt::Write for IgnoreFmt {
54 fn write_str(&mut self, _s: &str) -> std::fmt::Result {
55 Ok(())
56 }
57}
58
59pub(super) trait Serializer<'a> {
60 fn serialize(&mut self, buf: &mut Vec<u8>, options: &SerializeOptions);
61 fn update_array(&mut self, array: &'a dyn Array);
63}
64
65fn make_serializer<'a, T, I: Iterator<Item = Option<T>>, const QUOTE_NON_NULL: bool>(
66 f: impl FnMut(T, &mut Vec<u8>, &SerializeOptions),
67 iter: I,
68 update_array: impl FnMut(&'a dyn Array) -> I,
69) -> impl Serializer<'a> {
70 struct SerializerImpl<F, I, Update, const QUOTE_NON_NULL: bool> {
71 f: F,
72 iter: I,
73 update_array: Update,
74 }
75
76 impl<'a, T, F, I, Update, const QUOTE_NON_NULL: bool> Serializer<'a>
77 for SerializerImpl<F, I, Update, QUOTE_NON_NULL>
78 where
79 F: FnMut(T, &mut Vec<u8>, &SerializeOptions),
80 I: Iterator<Item = Option<T>>,
81 Update: FnMut(&'a dyn Array) -> I,
82 {
83 fn serialize(&mut self, buf: &mut Vec<u8>, options: &SerializeOptions) {
84 let item = self.iter.next().expect(TOO_MANY_MSG);
85 match item {
86 Some(item) => {
87 if QUOTE_NON_NULL {
88 buf.push(options.quote_char);
89 }
90 (self.f)(item, buf, options);
91 if QUOTE_NON_NULL {
92 buf.push(options.quote_char);
93 }
94 },
95 None => buf.extend_from_slice(options.null.as_bytes()),
96 }
97 }
98
99 fn update_array(&mut self, array: &'a dyn Array) {
100 self.iter = (self.update_array)(array);
101 }
102 }
103
104 SerializerImpl::<_, _, _, QUOTE_NON_NULL> {
105 f,
106 iter,
107 update_array,
108 }
109}
110
111fn integer_serializer<I: NativeType + itoa::Integer>(
112 array: &PrimitiveArray<I>,
113) -> impl Serializer<'_> {
114 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
115 let mut buffer = itoa::Buffer::new();
116 let value = buffer.format(item);
117 buf.extend_from_slice(value.as_bytes());
118 };
119
120 make_serializer::<_, _, false>(f, array.iter(), |array| {
121 array
122 .as_any()
123 .downcast_ref::<PrimitiveArray<I>>()
124 .expect(ARRAY_MISMATCH_MSG)
125 .iter()
126 })
127}
128
129fn float_serializer_no_precision_autoformat_f16(array: &Float16Array) -> impl Serializer<'_> {
130 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
131 let mut buffer = ryu::Buffer::new();
132 let cast: f32 = NumCast::from(item).unwrap();
133 let value = buffer.format(cast);
134 buf.extend_from_slice(value.as_bytes());
135 };
136 float_serializer_no_precision_autoformat_(array, f)
137}
138
139fn float_serializer_no_precision_autoformat<I: NativeType + ryu::Float>(
140 array: &PrimitiveArray<I>,
141) -> impl Serializer<'_> {
142 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
143 let mut buffer = ryu::Buffer::new();
144 let value = buffer.format(item);
145 buf.extend_from_slice(value.as_bytes());
146 };
147 float_serializer_no_precision_autoformat_(array, f)
148}
149
150fn float_serializer_no_precision_autoformat_<
151 'a,
152 I: NativeType,
153 F: Fn(&'a I, &mut Vec<u8>, &SerializeOptions),
154>(
155 array: &'a PrimitiveArray<I>,
156 f: F,
157) -> impl Serializer<'a> {
158 make_serializer::<_, _, false>(f, array.iter(), |array| {
159 array
160 .as_any()
161 .downcast_ref::<PrimitiveArray<I>>()
162 .expect(ARRAY_MISMATCH_MSG)
163 .iter()
164 })
165}
166
167fn float_serializer_no_precision_autoformat_decimal_comma_f16(
168 array: &Float16Array,
169) -> impl Serializer<'_> {
170 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
171 let mut buffer = ryu::Buffer::new();
172 let cast: f32 = NumCast::from(item).unwrap();
173 let value = buffer.format(cast);
174
175 for ch in value.as_bytes() {
176 buf.push(if *ch == b'.' { b',' } else { *ch });
177 }
178 };
179 float_serializer_no_precision_autoformat_decimal_comma_(array, f)
180}
181
182fn float_serializer_no_precision_autoformat_decimal_comma<I: NativeType + ryu::Float>(
183 array: &PrimitiveArray<I>,
184) -> impl Serializer<'_> {
185 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
186 let mut buffer = ryu::Buffer::new();
187 let value = buffer.format(item).as_bytes();
188
189 for ch in value {
190 buf.push(if *ch == b'.' { b',' } else { *ch });
191 }
192 };
193 float_serializer_no_precision_autoformat_decimal_comma_(array, f)
194}
195
196fn float_serializer_no_precision_autoformat_decimal_comma_<
197 'a,
198 I: NativeType,
199 F: Fn(&'a I, &mut Vec<u8>, &SerializeOptions),
200>(
201 array: &'a PrimitiveArray<I>,
202 f: F,
203) -> impl Serializer<'a> {
204 make_serializer::<_, _, false>(f, array.iter(), |array| {
205 array
206 .as_any()
207 .downcast_ref::<PrimitiveArray<I>>()
208 .expect(ARRAY_MISMATCH_MSG)
209 .iter()
210 })
211}
212
213fn float_serializer_no_precision_scientific<I: NativeType + LowerExp>(
214 array: &PrimitiveArray<I>,
215) -> impl Serializer<'_> {
216 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
217 let _ = write!(buf, "{item:.e}");
219 };
220
221 make_serializer::<_, _, false>(f, array.iter(), |array| {
222 array
223 .as_any()
224 .downcast_ref::<PrimitiveArray<I>>()
225 .expect(ARRAY_MISMATCH_MSG)
226 .iter()
227 })
228}
229
230fn float_serializer_no_precision_scientific_decimal_comma<I: NativeType + LowerExp>(
231 array: &PrimitiveArray<I>,
232) -> impl Serializer<'_> {
233 let mut scratch = Vec::new();
234
235 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
236 let _ = write!(&mut scratch, "{item:.e}");
238 for c in &mut scratch {
239 if *c == b'.' {
240 *c = b',';
241 break;
242 }
243 }
244 buf.extend_from_slice(&scratch);
245 };
246
247 make_serializer::<_, _, false>(f, array.iter(), |array| {
248 array
249 .as_any()
250 .downcast_ref::<PrimitiveArray<I>>()
251 .expect(ARRAY_MISMATCH_MSG)
252 .iter()
253 })
254}
255
256fn float_serializer_no_precision_positional<I: NativeType + NumCast>(
257 array: &PrimitiveArray<I>,
258) -> impl Serializer<'_> {
259 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
260 let v: f64 = NumCast::from(item).unwrap();
261 let _ = write!(buf, "{v}");
262 };
263
264 make_serializer::<_, _, false>(f, array.iter(), |array| {
265 array
266 .as_any()
267 .downcast_ref::<PrimitiveArray<I>>()
268 .expect(ARRAY_MISMATCH_MSG)
269 .iter()
270 })
271}
272
273fn float_serializer_no_precision_positional_decimal_comma<I: NativeType + NumCast>(
274 array: &PrimitiveArray<I>,
275) -> impl Serializer<'_> {
276 let mut scratch = Vec::new();
277
278 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
279 scratch.clear();
280 let v: f64 = NumCast::from(item).unwrap();
281 let _ = write!(&mut scratch, "{v}");
282 for c in &mut scratch {
283 if *c == b'.' {
284 *c = b',';
285 break;
286 }
287 }
288 buf.extend_from_slice(&scratch);
289 };
290
291 make_serializer::<_, _, false>(f, array.iter(), |array| {
292 array
293 .as_any()
294 .downcast_ref::<PrimitiveArray<I>>()
295 .expect(ARRAY_MISMATCH_MSG)
296 .iter()
297 })
298}
299
300fn float_serializer_with_precision_scientific<I: NativeType + LowerExp>(
301 array: &PrimitiveArray<I>,
302 precision: usize,
303) -> impl Serializer<'_> {
304 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
305 let _ = write!(buf, "{item:.precision$e}");
307 };
308
309 make_serializer::<_, _, false>(f, array.iter(), |array| {
310 array
311 .as_any()
312 .downcast_ref::<PrimitiveArray<I>>()
313 .expect(ARRAY_MISMATCH_MSG)
314 .iter()
315 })
316}
317
318fn float_serializer_with_precision_scientific_decimal_comma<I: NativeType + LowerExp>(
319 array: &PrimitiveArray<I>,
320 precision: usize,
321) -> impl Serializer<'_> {
322 let mut scratch = Vec::new();
323
324 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
325 scratch.clear();
326 let _ = write!(&mut scratch, "{item:.precision$e}");
328 for c in &mut scratch {
329 if *c == b'.' {
330 *c = b',';
331 break;
332 }
333 }
334 buf.extend_from_slice(&scratch);
335 };
336
337 make_serializer::<_, _, false>(f, array.iter(), |array| {
338 array
339 .as_any()
340 .downcast_ref::<PrimitiveArray<I>>()
341 .expect(ARRAY_MISMATCH_MSG)
342 .iter()
343 })
344}
345
346fn float_serializer_with_precision_positional<I: NativeType>(
347 array: &PrimitiveArray<I>,
348 precision: usize,
349) -> impl Serializer<'_> {
350 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
351 let _ = write!(buf, "{item:.precision$}");
353 };
354
355 make_serializer::<_, _, false>(f, array.iter(), |array| {
356 array
357 .as_any()
358 .downcast_ref::<PrimitiveArray<I>>()
359 .expect(ARRAY_MISMATCH_MSG)
360 .iter()
361 })
362}
363
364fn float_serializer_with_precision_positional_decimal_comma<I: NativeType>(
365 array: &PrimitiveArray<I>,
366 precision: usize,
367) -> impl Serializer<'_> {
368 let mut scratch = Vec::new();
369
370 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
371 scratch.clear();
372 let _ = write!(&mut scratch, "{item:.precision$}");
373 for c in &mut scratch {
374 if *c == b'.' {
375 *c = b',';
376 break;
377 }
378 }
379 buf.extend_from_slice(&scratch);
380 };
381
382 make_serializer::<_, _, false>(f, array.iter(), |array| {
383 array
384 .as_any()
385 .downcast_ref::<PrimitiveArray<I>>()
386 .expect(ARRAY_MISMATCH_MSG)
387 .iter()
388 })
389}
390
391fn null_serializer(_array: &NullArray) -> impl Serializer<'_> {
392 struct NullSerializer;
393 impl<'a> Serializer<'a> for NullSerializer {
394 fn serialize(&mut self, buf: &mut Vec<u8>, options: &SerializeOptions) {
395 buf.extend_from_slice(options.null.as_bytes());
396 }
397 fn update_array(&mut self, _array: &'a dyn Array) {}
398 }
399 NullSerializer
400}
401
402fn bool_serializer<const QUOTE_NON_NULL: bool>(array: &BooleanArray) -> impl Serializer<'_> {
403 let f = move |item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
404 let s = if item { "true" } else { "false" };
405 buf.extend_from_slice(s.as_bytes());
406 };
407
408 make_serializer::<_, _, QUOTE_NON_NULL>(f, array.iter(), |array| {
409 array
410 .as_any()
411 .downcast_ref::<BooleanArray>()
412 .expect(ARRAY_MISMATCH_MSG)
413 .iter()
414 })
415}
416
417#[cfg(feature = "dtype-decimal")]
418fn decimal_serializer(array: &PrimitiveArray<i128>, scale: usize) -> impl Serializer<'_> {
419 let trim_zeros = arrow::compute::decimal::get_trim_decimal_zeros();
420
421 let mut fmt_buf = polars_compute::decimal::DecimalFmtBuffer::new();
422 let f = move |&item, buf: &mut Vec<u8>, options: &SerializeOptions| {
423 buf.extend_from_slice(
424 fmt_buf
425 .format_dec128(item, scale, trim_zeros, options.decimal_comma)
426 .as_bytes(),
427 );
428 };
429
430 make_serializer::<_, _, false>(f, array.iter(), |array| {
431 array
432 .as_any()
433 .downcast_ref::<PrimitiveArray<i128>>()
434 .expect(ARRAY_MISMATCH_MSG)
435 .iter()
436 })
437}
438
439#[cfg(any(
440 feature = "dtype-date",
441 feature = "dtype-time",
442 feature = "dtype-datetime"
443))]
444fn callback_serializer<'a, T: NativeType, const QUOTE_NON_NULL: bool>(
445 array: &'a PrimitiveArray<T>,
446 mut callback: impl FnMut(T, &mut Vec<u8>) + 'a,
447) -> impl Serializer<'a> {
448 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
449 callback(item, buf);
450 };
451
452 make_serializer::<_, _, QUOTE_NON_NULL>(f, array.iter(), |array| {
453 array
454 .as_any()
455 .downcast_ref::<PrimitiveArray<T>>()
456 .expect(ARRAY_MISMATCH_MSG)
457 .iter()
458 })
459}
460
461#[cfg(any(feature = "dtype-date", feature = "dtype-time"))]
462type ChronoFormatIter<'a, 'b> = std::slice::Iter<'a, chrono::format::Item<'b>>;
463
464#[cfg(any(feature = "dtype-date", feature = "dtype-time"))]
465fn date_and_time_serializer<'a, Underlying: NativeType, T: std::fmt::Display>(
466 format_str: &'a Option<String>,
467 description: &str,
468 array: &'a dyn Array,
469 sample_value: T,
470 mut convert: impl FnMut(Underlying) -> T + Send + 'a,
471 mut format_fn: impl for<'b> FnMut(
472 &T,
473 ChronoFormatIter<'b, 'a>,
474 ) -> chrono::format::DelayedFormat<ChronoFormatIter<'b, 'a>>
475 + Send
476 + 'a,
477 options: &SerializeOptions,
478) -> PolarsResult<Box<dyn Serializer<'a> + Send + 'a>> {
479 let array = array.as_any().downcast_ref().unwrap();
480 let serializer = match format_str {
481 Some(format_str) => {
482 let format = chrono::format::StrftimeItems::new(format_str).parse().map_err(
483 |_| polars_err!(ComputeError: "cannot format {description} with format '{format_str}'"),
484 )?;
485 use std::fmt::Write;
486 write!(IgnoreFmt, "{}", format_fn(&sample_value, format.iter())).map_err(
489 |_| polars_err!(ComputeError: "cannot format {description} with format '{format_str}'"),
490 )?;
491 let callback = move |item, buf: &mut Vec<u8>| {
492 let item = convert(item);
493 let _ = write!(buf, "{}", format_fn(&item, format.iter()));
495 };
496 date_and_time_final_serializer(array, callback, options)
497 },
498 None => {
499 let callback = move |item, buf: &mut Vec<u8>| {
500 let item = convert(item);
501 let _ = write!(buf, "{item}");
503 };
504 date_and_time_final_serializer(array, callback, options)
505 },
506 };
507 Ok(serializer)
508}
509
510#[cfg(any(
511 feature = "dtype-date",
512 feature = "dtype-time",
513 feature = "dtype-datetime"
514))]
515fn date_and_time_final_serializer<'a, T: NativeType>(
516 array: &'a PrimitiveArray<T>,
517 callback: impl FnMut(T, &mut Vec<u8>) + Send + 'a,
518 options: &SerializeOptions,
519) -> Box<dyn Serializer<'a> + Send + 'a> {
520 match options.quote_style {
521 QuoteStyle::Always => Box::new(quote_serializer(callback_serializer::<T, false>(
522 array, callback,
523 ))) as Box<dyn Serializer + Send>,
524 QuoteStyle::NonNumeric => Box::new(callback_serializer::<T, true>(array, callback)),
525 _ => Box::new(callback_serializer::<T, false>(array, callback)),
526 }
527}
528
529pub(super) fn string_serializer<'a, Iter: Send + 'a>(
530 mut f: impl FnMut(&mut Iter) -> Option<&str> + Send + 'a,
531 options: &SerializeOptions,
532 mut update: impl FnMut(&'a dyn Array) -> Iter + Send + 'a,
533 array: &'a dyn Array,
534) -> Box<dyn Serializer<'a> + 'a + Send> {
535 const LF: u8 = b'\n';
536 const CR: u8 = b'\r';
537
538 struct StringSerializer<F, Iter, Update> {
539 serialize: F,
540 update: Update,
541 iter: Iter,
542 }
543
544 impl<'a, F, Iter, Update> Serializer<'a> for StringSerializer<F, Iter, Update>
545 where
546 F: FnMut(&mut Iter, &mut Vec<u8>, &SerializeOptions),
547 Update: FnMut(&'a dyn Array) -> Iter,
548 {
549 fn serialize(&mut self, buf: &mut Vec<u8>, options: &SerializeOptions) {
550 (self.serialize)(&mut self.iter, buf, options);
551 }
552
553 fn update_array(&mut self, array: &'a dyn Array) {
554 self.iter = (self.update)(array);
555 }
556 }
557
558 fn serialize_str_escaped(buf: &mut Vec<u8>, s: &[u8], quote_char: u8, quoted: bool) {
559 let mut iter = memchr_iter(quote_char, s);
560 let first_quote = iter.next();
561 match first_quote {
562 None => buf.extend_from_slice(s),
563 Some(mut quote_pos) => {
564 if !quoted {
565 buf.push(quote_char);
566 }
567 let mut start_pos = 0;
568 loop {
569 buf.extend_from_slice(&s[start_pos..quote_pos]);
570 buf.extend_from_slice(&[quote_char, quote_char]);
571 match iter.next() {
572 Some(quote) => {
573 start_pos = quote_pos + 1;
574 quote_pos = quote;
575 },
576 None => {
577 buf.extend_from_slice(&s[quote_pos + 1..]);
578 break;
579 },
580 }
581 }
582 if !quoted {
583 buf.push(quote_char);
584 }
585 },
586 }
587 }
588
589 let iter = update(array);
590 match options.quote_style {
591 QuoteStyle::Always => {
592 let serialize =
593 move |iter: &mut Iter, buf: &mut Vec<u8>, options: &SerializeOptions| {
594 let quote_char = options.quote_char;
595 buf.push(quote_char);
596 let Some(s) = f(iter) else {
597 buf.extend_from_slice(options.null.as_bytes());
598 buf.push(quote_char);
599 return;
600 };
601 serialize_str_escaped(buf, s.as_bytes(), quote_char, true);
602 buf.push(quote_char);
603 };
604 Box::new(StringSerializer {
605 serialize,
606 update,
607 iter,
608 })
609 },
610 QuoteStyle::NonNumeric => {
611 let serialize =
612 move |iter: &mut Iter, buf: &mut Vec<u8>, options: &SerializeOptions| {
613 let Some(s) = f(iter) else {
614 buf.extend_from_slice(options.null.as_bytes());
615 return;
616 };
617 let quote_char = options.quote_char;
618 buf.push(quote_char);
619 serialize_str_escaped(buf, s.as_bytes(), quote_char, true);
620 buf.push(quote_char);
621 };
622 Box::new(StringSerializer {
623 serialize,
624 update,
625 iter,
626 })
627 },
628 QuoteStyle::Necessary => {
629 let serialize =
630 move |iter: &mut Iter, buf: &mut Vec<u8>, options: &SerializeOptions| {
631 let Some(s) = f(iter) else {
632 buf.extend_from_slice(options.null.as_bytes());
633 return;
634 };
635 let quote_char = options.quote_char;
636 if s.is_empty() {
638 buf.extend_from_slice(&[quote_char, quote_char]);
639 return;
640 }
641 let needs_quote = memchr3(options.separator, LF, CR, s.as_bytes()).is_some();
642 if needs_quote {
643 buf.push(quote_char);
644 }
645 serialize_str_escaped(buf, s.as_bytes(), quote_char, needs_quote);
646 if needs_quote {
647 buf.push(quote_char);
648 }
649 };
650 Box::new(StringSerializer {
651 serialize,
652 update,
653 iter,
654 })
655 },
656 QuoteStyle::Never => {
657 let serialize =
658 move |iter: &mut Iter, buf: &mut Vec<u8>, options: &SerializeOptions| {
659 let Some(s) = f(iter) else {
660 buf.extend_from_slice(options.null.as_bytes());
661 return;
662 };
663 buf.extend_from_slice(s.as_bytes());
664 };
665 Box::new(StringSerializer {
666 serialize,
667 update,
668 iter,
669 })
670 },
671 }
672}
673
674fn quote_serializer<'a>(serializer: impl Serializer<'a>) -> impl Serializer<'a> {
675 struct QuoteSerializer<S>(S);
676 impl<'a, S: Serializer<'a>> Serializer<'a> for QuoteSerializer<S> {
677 fn serialize(&mut self, buf: &mut Vec<u8>, options: &SerializeOptions) {
678 buf.push(options.quote_char);
679 self.0.serialize(buf, options);
680 buf.push(options.quote_char);
681 }
682
683 fn update_array(&mut self, array: &'a dyn Array) {
684 self.0.update_array(array);
685 }
686 }
687 QuoteSerializer(serializer)
688}
689
690pub(super) fn serializer_for<'a>(
691 array: &'a dyn Array,
692 options: &'a SerializeOptions,
693 dtype: &'a DataType,
694 _datetime_format: &'a str,
695 _time_zone: Option<Tz>,
696) -> PolarsResult<Box<dyn Serializer<'a> + Send + 'a>> {
697 let needs_quotes = match dtype {
700 DataType::Float16 | DataType::Float32 | DataType::Float64 => {
701 let mut should_quote = options.decimal_comma && options.separator == b',';
712 if let Some(precision) = options.float_precision {
713 should_quote &= precision > 0;
714 }
715
716 match options.quote_style {
717 QuoteStyle::Always => true,
718 QuoteStyle::Necessary | QuoteStyle::NonNumeric => should_quote,
719 QuoteStyle::Never => false,
720 }
721 },
722 #[cfg(feature = "dtype-decimal")]
723 DataType::Decimal(_, scale) => {
724 let should_quote = options.decimal_comma && options.separator == b',' && *scale > 0;
726
727 match options.quote_style {
728 QuoteStyle::Always => true,
729 QuoteStyle::Necessary | QuoteStyle::NonNumeric => should_quote,
730 QuoteStyle::Never => false,
731 }
732 },
733 _ => options.quote_style == QuoteStyle::Always,
734 };
735
736 macro_rules! quote_wrapper {
737 ($make_serializer:path, $($arg:tt)*) => {{
738 let serializer = $make_serializer(array.as_any().downcast_ref().unwrap(), $($arg)*);
739 if needs_quotes {
740 Box::new(quote_serializer(serializer)) as Box<dyn Serializer + Send>
741 } else {
742 Box::new(serializer)
743 }
744 }};
745 ($make_serializer:path) => { quote_wrapper!($make_serializer,) };
746 }
747
748 let serializer = match dtype {
749 DataType::Int8 => quote_wrapper!(integer_serializer::<i8>),
750 DataType::UInt8 => quote_wrapper!(integer_serializer::<u8>),
751 DataType::Int16 => quote_wrapper!(integer_serializer::<i16>),
752 DataType::UInt16 => quote_wrapper!(integer_serializer::<u16>),
753 DataType::Int32 => quote_wrapper!(integer_serializer::<i32>),
754 DataType::UInt32 => quote_wrapper!(integer_serializer::<u32>),
755 DataType::Int64 => quote_wrapper!(integer_serializer::<i64>),
756 DataType::UInt64 => quote_wrapper!(integer_serializer::<u64>),
757 DataType::Int128 => quote_wrapper!(integer_serializer::<i128>),
758 DataType::UInt128 => quote_wrapper!(integer_serializer::<u128>),
759 DataType::Float16 => {
760 match (
761 options.decimal_comma,
762 options.float_precision,
763 options.float_scientific,
764 ) {
765 (false, Some(precision), Some(true)) => {
767 quote_wrapper!(
768 float_serializer_with_precision_scientific::<pf16>,
769 precision
770 )
771 },
772 (false, Some(precision), _) => {
773 quote_wrapper!(
774 float_serializer_with_precision_positional::<pf16>,
775 precision
776 )
777 },
778 (false, None, Some(true)) => {
779 quote_wrapper!(float_serializer_no_precision_scientific::<pf16>)
780 },
781 (false, None, Some(false)) => {
782 quote_wrapper!(float_serializer_no_precision_positional::<pf16>)
783 },
784 (false, None, None) => {
785 quote_wrapper!(float_serializer_no_precision_autoformat_f16)
786 },
787
788 (true, Some(precision), Some(true)) => quote_wrapper!(
790 float_serializer_with_precision_scientific_decimal_comma::<pf16>,
791 precision
792 ),
793 (true, Some(precision), _) => quote_wrapper!(
794 float_serializer_with_precision_positional_decimal_comma::<pf16>,
795 precision
796 ),
797 (true, None, Some(true)) => {
798 quote_wrapper!(float_serializer_no_precision_scientific_decimal_comma::<pf16>)
799 },
800 (true, None, Some(false)) => {
801 quote_wrapper!(float_serializer_no_precision_positional_decimal_comma::<pf16>)
802 },
803 (true, None, None) => {
804 quote_wrapper!(float_serializer_no_precision_autoformat_decimal_comma_f16)
805 },
806 }
807 },
808 DataType::Float32 => {
809 match (
810 options.decimal_comma,
811 options.float_precision,
812 options.float_scientific,
813 ) {
814 (false, Some(precision), Some(true)) => {
816 quote_wrapper!(float_serializer_with_precision_scientific::<f32>, precision)
817 },
818 (false, Some(precision), _) => {
819 quote_wrapper!(float_serializer_with_precision_positional::<f32>, precision)
820 },
821 (false, None, Some(true)) => {
822 quote_wrapper!(float_serializer_no_precision_scientific::<f32>)
823 },
824 (false, None, Some(false)) => {
825 quote_wrapper!(float_serializer_no_precision_positional::<f32>)
826 },
827 (false, None, None) => {
828 quote_wrapper!(float_serializer_no_precision_autoformat::<f32>)
829 },
830
831 (true, Some(precision), Some(true)) => quote_wrapper!(
833 float_serializer_with_precision_scientific_decimal_comma::<f32>,
834 precision
835 ),
836 (true, Some(precision), _) => quote_wrapper!(
837 float_serializer_with_precision_positional_decimal_comma::<f32>,
838 precision
839 ),
840 (true, None, Some(true)) => {
841 quote_wrapper!(float_serializer_no_precision_scientific_decimal_comma::<f32>)
842 },
843 (true, None, Some(false)) => {
844 quote_wrapper!(float_serializer_no_precision_positional_decimal_comma::<f32>)
845 },
846 (true, None, None) => {
847 quote_wrapper!(float_serializer_no_precision_autoformat_decimal_comma::<f32>)
848 },
849 }
850 },
851 DataType::Float64 => {
852 match (
853 options.decimal_comma,
854 options.float_precision,
855 options.float_scientific,
856 ) {
857 (false, Some(precision), Some(true)) => {
859 quote_wrapper!(float_serializer_with_precision_scientific::<f64>, precision)
860 },
861 (false, Some(precision), _) => {
862 quote_wrapper!(float_serializer_with_precision_positional::<f64>, precision)
863 },
864 (false, None, Some(true)) => {
865 quote_wrapper!(float_serializer_no_precision_scientific::<f64>)
866 },
867 (false, None, Some(false)) => {
868 quote_wrapper!(float_serializer_no_precision_positional::<f64>)
869 },
870 (false, None, None) => {
871 quote_wrapper!(float_serializer_no_precision_autoformat::<f64>)
872 },
873
874 (true, Some(precision), Some(true)) => quote_wrapper!(
876 float_serializer_with_precision_scientific_decimal_comma::<f64>,
877 precision
878 ),
879 (true, Some(precision), _) => quote_wrapper!(
880 float_serializer_with_precision_positional_decimal_comma::<f64>,
881 precision
882 ),
883 (true, None, Some(true)) => {
884 quote_wrapper!(float_serializer_no_precision_scientific_decimal_comma::<f64>)
885 },
886 (true, None, Some(false)) => {
887 quote_wrapper!(float_serializer_no_precision_positional_decimal_comma::<f64>)
888 },
889 (true, None, None) => {
890 quote_wrapper!(float_serializer_no_precision_autoformat_decimal_comma::<f64>)
891 },
892 }
893 },
894 DataType::Null => quote_wrapper!(null_serializer),
895 DataType::Boolean => {
896 let array = array.as_any().downcast_ref().unwrap();
897 match options.quote_style {
898 QuoteStyle::Always => Box::new(quote_serializer(bool_serializer::<false>(array)))
899 as Box<dyn Serializer + Send>,
900 QuoteStyle::NonNumeric => Box::new(bool_serializer::<true>(array)),
901 _ => Box::new(bool_serializer::<false>(array)),
902 }
903 },
904 #[cfg(feature = "dtype-date")]
905 DataType::Date => date_and_time_serializer(
906 &options.date_format,
907 "NaiveDate",
908 array,
909 chrono::NaiveDate::MAX,
910 arrow::temporal_conversions::date32_to_date,
911 |date, items| date.format_with_items(items),
912 options,
913 )?,
914 #[cfg(feature = "dtype-time")]
915 DataType::Time => date_and_time_serializer(
916 &options.time_format,
917 "NaiveTime",
918 array,
919 chrono::NaiveTime::MIN,
920 arrow::temporal_conversions::time64ns_to_time,
921 |time, items| time.format_with_items(items),
922 options,
923 )?,
924 #[cfg(feature = "dtype-datetime")]
925 DataType::Datetime(time_unit, _) => {
926 let format = chrono::format::StrftimeItems::new(_datetime_format)
927 .parse()
928 .map_err(|_| {
929 polars_err!(
930 ComputeError: "cannot format {} with format '{_datetime_format}'",
931 if _time_zone.is_some() { "DateTime" } else { "NaiveDateTime" },
932 )
933 })?;
934 use std::fmt::Write;
935 let sample_datetime = match _time_zone {
936 #[cfg(feature = "timezones")]
937 Some(time_zone) => time_zone
938 .from_utc_datetime(&chrono::NaiveDateTime::MAX)
939 .format_with_items(format.iter()),
940 #[cfg(not(feature = "timezones"))]
941 Some(_) => panic!("activate 'timezones' feature"),
942 None => chrono::NaiveDateTime::MAX.format_with_items(format.iter()),
943 };
944 write!(IgnoreFmt, "{sample_datetime}").map_err(|_| {
947 polars_err!(
948 ComputeError: "cannot format {} with format '{_datetime_format}'",
949 if _time_zone.is_some() { "DateTime" } else { "NaiveDateTime" },
950 )
951 })?;
952
953 let array = array.as_any().downcast_ref().unwrap();
954
955 macro_rules! time_unit_serializer {
956 ($convert:ident) => {
957 match _time_zone {
958 #[cfg(feature = "timezones")]
959 Some(time_zone) => {
960 let callback = move |item, buf: &mut Vec<u8>| {
961 let item = arrow::temporal_conversions::$convert(item);
962 let item = time_zone.from_utc_datetime(&item);
963 let _ = write!(buf, "{}", item.format_with_items(format.iter()));
965 };
966 date_and_time_final_serializer(array, callback, options)
967 },
968 #[cfg(not(feature = "timezones"))]
969 Some(_) => panic!("activate 'timezones' feature"),
970 None => {
971 let callback = move |item, buf: &mut Vec<u8>| {
972 let item = arrow::temporal_conversions::$convert(item);
973 let _ = write!(buf, "{}", item.format_with_items(format.iter()));
975 };
976 date_and_time_final_serializer(array, callback, options)
977 },
978 }
979 };
980 }
981
982 match time_unit {
983 TimeUnit::Nanoseconds => time_unit_serializer!(timestamp_ns_to_datetime),
984 TimeUnit::Microseconds => time_unit_serializer!(timestamp_us_to_datetime),
985 TimeUnit::Milliseconds => time_unit_serializer!(timestamp_ms_to_datetime),
986 }
987 },
988 DataType::String => string_serializer(
989 |iter| Iterator::next(iter).expect(TOO_MANY_MSG),
990 options,
991 |arr| {
992 arr.as_any()
993 .downcast_ref::<Utf8ViewArray>()
994 .expect(ARRAY_MISMATCH_MSG)
995 .iter()
996 },
997 array,
998 ),
999 #[cfg(feature = "dtype-categorical")]
1000 DataType::Categorical(_, mapping) | DataType::Enum(_, mapping) => {
1001 polars_core::with_match_categorical_physical_type!(dtype.cat_physical().unwrap(), |$C| {
1002 string_serializer(
1003 |iter| {
1004 let &idx: &<$C as PolarsCategoricalType>::Native = Iterator::next(iter).expect(TOO_MANY_MSG)?;
1005 Some(unsafe { mapping.cat_to_str_unchecked(idx.as_cat()) })
1006 },
1007 options,
1008 |arr| {
1009 arr.as_any()
1010 .downcast_ref::<PrimitiveArray<<$C as PolarsCategoricalType>::Native>>()
1011 .expect(ARRAY_MISMATCH_MSG)
1012 .iter()
1013 },
1014 array,
1015 )
1016 })
1017 },
1018 #[cfg(feature = "dtype-decimal")]
1019 DataType::Decimal(_, scale) => {
1020 quote_wrapper!(decimal_serializer, *scale)
1021 },
1022 _ => {
1023 polars_bail!(ComputeError: "datatype {dtype} cannot be written to CSV\n\nConsider using JSON or a binary format.")
1024 },
1025 };
1026 Ok(serializer)
1027}
1028
1029#[cfg(test)]
1030mod test {
1031 use arrow::array::NullArray;
1032 use polars_core::prelude::ArrowDataType;
1033
1034 use super::string_serializer;
1035 use crate::csv::write::options::{QuoteStyle, SerializeOptions};
1036
1037 #[test]
1039 fn test_string_serializer() {
1040 #[track_caller]
1041 fn check_string_serialization(options: &SerializeOptions, s: Option<&str>, expected: &str) {
1042 let fake_array = NullArray::new(ArrowDataType::Null, 0);
1043 let mut serializer = string_serializer(|s| *s, options, |_| s, &fake_array);
1044 let mut buf = Vec::new();
1045 serializer.serialize(&mut buf, options);
1046 let serialized = std::str::from_utf8(&buf).unwrap();
1047 if serialized != expected {
1049 panic!(
1050 "CSV string {s:?} wasn't serialized correctly: expected: `{expected}`, got: `{serialized}`"
1051 );
1052 }
1053 }
1054
1055 let always_quote = SerializeOptions {
1056 quote_style: QuoteStyle::Always,
1057 ..SerializeOptions::default()
1058 };
1059 check_string_serialization(&always_quote, None, r#""""#);
1060 check_string_serialization(&always_quote, Some(""), r#""""#);
1061 check_string_serialization(&always_quote, Some("a"), r#""a""#);
1062 check_string_serialization(&always_quote, Some("\""), r#""""""#);
1063 check_string_serialization(&always_quote, Some("a\"\"b"), r#""a""""b""#);
1064
1065 let necessary_quote = SerializeOptions {
1066 quote_style: QuoteStyle::Necessary,
1067 ..SerializeOptions::default()
1068 };
1069 check_string_serialization(&necessary_quote, None, r#""#);
1070 check_string_serialization(&necessary_quote, Some(""), r#""""#);
1071 check_string_serialization(&necessary_quote, Some("a"), r#"a"#);
1072 check_string_serialization(&necessary_quote, Some("\""), r#""""""#);
1073 check_string_serialization(&necessary_quote, Some("a\"\"b"), r#""a""""b""#);
1074 check_string_serialization(&necessary_quote, Some("a b"), r#"a b"#);
1075 check_string_serialization(&necessary_quote, Some("a,b"), r#""a,b""#);
1076 check_string_serialization(&necessary_quote, Some("a\nb"), "\"a\nb\"");
1077 check_string_serialization(&necessary_quote, Some("a\rb"), "\"a\rb\"");
1078
1079 let never_quote = SerializeOptions {
1080 quote_style: QuoteStyle::Never,
1081 ..SerializeOptions::default()
1082 };
1083 check_string_serialization(&never_quote, None, "");
1084 check_string_serialization(&never_quote, Some(""), "");
1085 check_string_serialization(&never_quote, Some("a"), "a");
1086 check_string_serialization(&never_quote, Some("\""), "\"");
1087 check_string_serialization(&never_quote, Some("a\"\"b"), "a\"\"b");
1088 check_string_serialization(&never_quote, Some("a b"), "a b");
1089 check_string_serialization(&never_quote, Some("a,b"), "a,b");
1090 check_string_serialization(&never_quote, Some("a\nb"), "a\nb");
1091 check_string_serialization(&never_quote, Some("a\rb"), "a\rb");
1092
1093 let non_numeric_quote = SerializeOptions {
1094 quote_style: QuoteStyle::NonNumeric,
1095 ..SerializeOptions::default()
1096 };
1097 check_string_serialization(&non_numeric_quote, None, "");
1098 check_string_serialization(&non_numeric_quote, Some(""), r#""""#);
1099 check_string_serialization(&non_numeric_quote, Some("a"), r#""a""#);
1100 check_string_serialization(&non_numeric_quote, Some("\""), r#""""""#);
1101 check_string_serialization(&non_numeric_quote, Some("a\"\"b"), r#""a""""b""#);
1102 check_string_serialization(&non_numeric_quote, Some("a b"), r#""a b""#);
1103 check_string_serialization(&non_numeric_quote, Some("a,b"), r#""a,b""#);
1104 check_string_serialization(&non_numeric_quote, Some("a\nb"), "\"a\nb\"");
1105 check_string_serialization(&non_numeric_quote, Some("a\rb"), "\"a\rb\"");
1106 }
1107}