1use std::fmt::LowerExp;
34use std::io::Write;
35
36use arrow::array::{Array, BooleanArray, NullArray, PrimitiveArray, Utf8ViewArray};
37use arrow::legacy::time_zone::Tz;
38use arrow::types::NativeType;
39#[cfg(feature = "timezones")]
40use chrono::TimeZone;
41use memchr::{memchr_iter, memchr3};
42use num_traits::NumCast;
43use polars_core::prelude::*;
44
45use crate::csv::write::{QuoteStyle, SerializeOptions};
46
47const TOO_MANY_MSG: &str = "too many items requested from CSV serializer";
48const ARRAY_MISMATCH_MSG: &str = "wrong array type";
49
50#[allow(dead_code)]
51struct IgnoreFmt;
52impl std::fmt::Write for IgnoreFmt {
53 fn write_str(&mut self, _s: &str) -> std::fmt::Result {
54 Ok(())
55 }
56}
57
58pub(super) trait Serializer<'a> {
59 fn serialize(&mut self, buf: &mut Vec<u8>, options: &SerializeOptions);
60 fn update_array(&mut self, array: &'a dyn Array);
62}
63
64fn make_serializer<'a, T, I: Iterator<Item = Option<T>>, const QUOTE_NON_NULL: bool>(
65 f: impl FnMut(T, &mut Vec<u8>, &SerializeOptions),
66 iter: I,
67 update_array: impl FnMut(&'a dyn Array) -> I,
68) -> impl Serializer<'a> {
69 struct SerializerImpl<F, I, Update, const QUOTE_NON_NULL: bool> {
70 f: F,
71 iter: I,
72 update_array: Update,
73 }
74
75 impl<'a, T, F, I, Update, const QUOTE_NON_NULL: bool> Serializer<'a>
76 for SerializerImpl<F, I, Update, QUOTE_NON_NULL>
77 where
78 F: FnMut(T, &mut Vec<u8>, &SerializeOptions),
79 I: Iterator<Item = Option<T>>,
80 Update: FnMut(&'a dyn Array) -> I,
81 {
82 fn serialize(&mut self, buf: &mut Vec<u8>, options: &SerializeOptions) {
83 let item = self.iter.next().expect(TOO_MANY_MSG);
84 match item {
85 Some(item) => {
86 if QUOTE_NON_NULL {
87 buf.push(options.quote_char);
88 }
89 (self.f)(item, buf, options);
90 if QUOTE_NON_NULL {
91 buf.push(options.quote_char);
92 }
93 },
94 None => buf.extend_from_slice(options.null.as_bytes()),
95 }
96 }
97
98 fn update_array(&mut self, array: &'a dyn Array) {
99 self.iter = (self.update_array)(array);
100 }
101 }
102
103 SerializerImpl::<_, _, _, QUOTE_NON_NULL> {
104 f,
105 iter,
106 update_array,
107 }
108}
109
110fn integer_serializer<I: NativeType + itoa::Integer>(array: &PrimitiveArray<I>) -> impl Serializer {
111 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
112 let mut buffer = itoa::Buffer::new();
113 let value = buffer.format(item);
114 buf.extend_from_slice(value.as_bytes());
115 };
116
117 make_serializer::<_, _, false>(f, array.iter(), |array| {
118 array
119 .as_any()
120 .downcast_ref::<PrimitiveArray<I>>()
121 .expect(ARRAY_MISMATCH_MSG)
122 .iter()
123 })
124}
125
126fn float_serializer_no_precision_autoformat<I: NativeType + ryu::Float>(
127 array: &PrimitiveArray<I>,
128) -> impl Serializer {
129 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
130 let mut buffer = ryu::Buffer::new();
131 let value = buffer.format(item);
132 buf.extend_from_slice(value.as_bytes());
133 };
134
135 make_serializer::<_, _, false>(f, array.iter(), |array| {
136 array
137 .as_any()
138 .downcast_ref::<PrimitiveArray<I>>()
139 .expect(ARRAY_MISMATCH_MSG)
140 .iter()
141 })
142}
143
144fn float_serializer_no_precision_scientific<I: NativeType + LowerExp>(
145 array: &PrimitiveArray<I>,
146) -> impl Serializer {
147 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
148 let _ = write!(buf, "{item:.e}");
150 };
151
152 make_serializer::<_, _, false>(f, array.iter(), |array| {
153 array
154 .as_any()
155 .downcast_ref::<PrimitiveArray<I>>()
156 .expect(ARRAY_MISMATCH_MSG)
157 .iter()
158 })
159}
160
161fn float_serializer_no_precision_positional<I: NativeType + NumCast>(
162 array: &PrimitiveArray<I>,
163) -> impl Serializer {
164 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
165 let v: f64 = NumCast::from(item).unwrap();
166 let value = v.to_string();
167 buf.extend_from_slice(value.as_bytes());
168 };
169
170 make_serializer::<_, _, false>(f, array.iter(), |array| {
171 array
172 .as_any()
173 .downcast_ref::<PrimitiveArray<I>>()
174 .expect(ARRAY_MISMATCH_MSG)
175 .iter()
176 })
177}
178
179fn float_serializer_with_precision_scientific<I: NativeType + LowerExp>(
180 array: &PrimitiveArray<I>,
181 precision: usize,
182) -> impl Serializer {
183 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
184 let _ = write!(buf, "{item:.precision$e}");
186 };
187
188 make_serializer::<_, _, false>(f, array.iter(), |array| {
189 array
190 .as_any()
191 .downcast_ref::<PrimitiveArray<I>>()
192 .expect(ARRAY_MISMATCH_MSG)
193 .iter()
194 })
195}
196
197fn float_serializer_with_precision_positional<I: NativeType>(
198 array: &PrimitiveArray<I>,
199 precision: usize,
200) -> impl Serializer {
201 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
202 let _ = write!(buf, "{item:.precision$}");
204 };
205
206 make_serializer::<_, _, false>(f, array.iter(), |array| {
207 array
208 .as_any()
209 .downcast_ref::<PrimitiveArray<I>>()
210 .expect(ARRAY_MISMATCH_MSG)
211 .iter()
212 })
213}
214
215fn null_serializer(_array: &NullArray) -> impl Serializer {
216 struct NullSerializer;
217 impl<'a> Serializer<'a> for NullSerializer {
218 fn serialize(&mut self, buf: &mut Vec<u8>, options: &SerializeOptions) {
219 buf.extend_from_slice(options.null.as_bytes());
220 }
221 fn update_array(&mut self, _array: &'a dyn Array) {}
222 }
223 NullSerializer
224}
225
226fn bool_serializer<const QUOTE_NON_NULL: bool>(array: &BooleanArray) -> impl Serializer {
227 let f = move |item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
228 let s = if item { "true" } else { "false" };
229 buf.extend_from_slice(s.as_bytes());
230 };
231
232 make_serializer::<_, _, QUOTE_NON_NULL>(f, array.iter(), |array| {
233 array
234 .as_any()
235 .downcast_ref::<BooleanArray>()
236 .expect(ARRAY_MISMATCH_MSG)
237 .iter()
238 })
239}
240
241#[cfg(feature = "dtype-decimal")]
242fn decimal_serializer(array: &PrimitiveArray<i128>, scale: usize) -> impl Serializer {
243 let trim_zeros = arrow::compute::decimal::get_trim_decimal_zeros();
244
245 let mut fmt_buf = arrow::compute::decimal::DecimalFmtBuffer::new();
246 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
247 buf.extend_from_slice(fmt_buf.format(item, scale, trim_zeros).as_bytes());
248 };
249
250 make_serializer::<_, _, false>(f, array.iter(), |array| {
251 array
252 .as_any()
253 .downcast_ref::<PrimitiveArray<i128>>()
254 .expect(ARRAY_MISMATCH_MSG)
255 .iter()
256 })
257}
258
259#[cfg(any(
260 feature = "dtype-date",
261 feature = "dtype-time",
262 feature = "dtype-datetime"
263))]
264fn callback_serializer<'a, T: NativeType, const QUOTE_NON_NULL: bool>(
265 array: &'a PrimitiveArray<T>,
266 mut callback: impl FnMut(T, &mut Vec<u8>) + 'a,
267) -> impl Serializer<'a> {
268 let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {
269 callback(item, buf);
270 };
271
272 make_serializer::<_, _, QUOTE_NON_NULL>(f, array.iter(), |array| {
273 array
274 .as_any()
275 .downcast_ref::<PrimitiveArray<T>>()
276 .expect(ARRAY_MISMATCH_MSG)
277 .iter()
278 })
279}
280
281#[cfg(any(feature = "dtype-date", feature = "dtype-time"))]
282type ChronoFormatIter<'a, 'b> = std::slice::Iter<'a, chrono::format::Item<'b>>;
283
284#[cfg(any(feature = "dtype-date", feature = "dtype-time"))]
285fn date_and_time_serializer<'a, Underlying: NativeType, T: std::fmt::Display>(
286 format_str: &'a Option<String>,
287 description: &str,
288 array: &'a dyn Array,
289 sample_value: T,
290 mut convert: impl FnMut(Underlying) -> T + Send + 'a,
291 mut format_fn: impl for<'b> FnMut(
292 &T,
293 ChronoFormatIter<'b, 'a>,
294 ) -> chrono::format::DelayedFormat<ChronoFormatIter<'b, 'a>>
295 + Send
296 + 'a,
297 options: &SerializeOptions,
298) -> PolarsResult<Box<dyn Serializer<'a> + Send + 'a>> {
299 let array = array.as_any().downcast_ref().unwrap();
300 let serializer = match format_str {
301 Some(format_str) => {
302 let format = chrono::format::StrftimeItems::new(format_str).parse().map_err(
303 |_| polars_err!(ComputeError: "cannot format {description} with format '{format_str}'"),
304 )?;
305 use std::fmt::Write;
306 write!(IgnoreFmt, "{}", format_fn(&sample_value, format.iter())).map_err(
309 |_| polars_err!(ComputeError: "cannot format {description} with format '{format_str}'"),
310 )?;
311 let callback = move |item, buf: &mut Vec<u8>| {
312 let item = convert(item);
313 let _ = write!(buf, "{}", format_fn(&item, format.iter()));
315 };
316 date_and_time_final_serializer(array, callback, options)
317 },
318 None => {
319 let callback = move |item, buf: &mut Vec<u8>| {
320 let item = convert(item);
321 let _ = write!(buf, "{item}");
323 };
324 date_and_time_final_serializer(array, callback, options)
325 },
326 };
327 Ok(serializer)
328}
329
330#[cfg(any(
331 feature = "dtype-date",
332 feature = "dtype-time",
333 feature = "dtype-datetime"
334))]
335fn date_and_time_final_serializer<'a, T: NativeType>(
336 array: &'a PrimitiveArray<T>,
337 callback: impl FnMut(T, &mut Vec<u8>) + Send + 'a,
338 options: &SerializeOptions,
339) -> Box<dyn Serializer<'a> + Send + 'a> {
340 match options.quote_style {
341 QuoteStyle::Always => Box::new(quote_serializer(callback_serializer::<T, false>(
342 array, callback,
343 ))) as Box<dyn Serializer + Send>,
344 QuoteStyle::NonNumeric => Box::new(callback_serializer::<T, true>(array, callback)),
345 _ => Box::new(callback_serializer::<T, false>(array, callback)),
346 }
347}
348
349pub(super) fn string_serializer<'a, Iter: Send + 'a>(
350 mut f: impl FnMut(&mut Iter) -> Option<&str> + Send + 'a,
351 options: &SerializeOptions,
352 mut update: impl FnMut(&'a dyn Array) -> Iter + Send + 'a,
353 array: &'a dyn Array,
354) -> Box<dyn Serializer<'a> + 'a + Send> {
355 const LF: u8 = b'\n';
356 const CR: u8 = b'\r';
357
358 struct StringSerializer<F, Iter, Update> {
359 serialize: F,
360 update: Update,
361 iter: Iter,
362 }
363
364 impl<'a, F, Iter, Update> Serializer<'a> for StringSerializer<F, Iter, Update>
365 where
366 F: FnMut(&mut Iter, &mut Vec<u8>, &SerializeOptions),
367 Update: FnMut(&'a dyn Array) -> Iter,
368 {
369 fn serialize(&mut self, buf: &mut Vec<u8>, options: &SerializeOptions) {
370 (self.serialize)(&mut self.iter, buf, options);
371 }
372
373 fn update_array(&mut self, array: &'a dyn Array) {
374 self.iter = (self.update)(array);
375 }
376 }
377
378 fn serialize_str_escaped(buf: &mut Vec<u8>, s: &[u8], quote_char: u8, quoted: bool) {
379 let mut iter = memchr_iter(quote_char, s);
380 let first_quote = iter.next();
381 match first_quote {
382 None => buf.extend_from_slice(s),
383 Some(mut quote_pos) => {
384 if !quoted {
385 buf.push(quote_char);
386 }
387 let mut start_pos = 0;
388 loop {
389 buf.extend_from_slice(&s[start_pos..quote_pos]);
390 buf.extend_from_slice(&[quote_char, quote_char]);
391 match iter.next() {
392 Some(quote) => {
393 start_pos = quote_pos + 1;
394 quote_pos = quote;
395 },
396 None => {
397 buf.extend_from_slice(&s[quote_pos + 1..]);
398 break;
399 },
400 }
401 }
402 if !quoted {
403 buf.push(quote_char);
404 }
405 },
406 }
407 }
408
409 let iter = update(array);
410 match options.quote_style {
411 QuoteStyle::Always => {
412 let serialize =
413 move |iter: &mut Iter, buf: &mut Vec<u8>, options: &SerializeOptions| {
414 let quote_char = options.quote_char;
415 buf.push(quote_char);
416 let Some(s) = f(iter) else {
417 buf.extend_from_slice(options.null.as_bytes());
418 buf.push(quote_char);
419 return;
420 };
421 serialize_str_escaped(buf, s.as_bytes(), quote_char, true);
422 buf.push(quote_char);
423 };
424 Box::new(StringSerializer {
425 serialize,
426 update,
427 iter,
428 })
429 },
430 QuoteStyle::NonNumeric => {
431 let serialize =
432 move |iter: &mut Iter, buf: &mut Vec<u8>, options: &SerializeOptions| {
433 let Some(s) = f(iter) else {
434 buf.extend_from_slice(options.null.as_bytes());
435 return;
436 };
437 let quote_char = options.quote_char;
438 buf.push(quote_char);
439 serialize_str_escaped(buf, s.as_bytes(), quote_char, true);
440 buf.push(quote_char);
441 };
442 Box::new(StringSerializer {
443 serialize,
444 update,
445 iter,
446 })
447 },
448 QuoteStyle::Necessary => {
449 let serialize =
450 move |iter: &mut Iter, buf: &mut Vec<u8>, options: &SerializeOptions| {
451 let Some(s) = f(iter) else {
452 buf.extend_from_slice(options.null.as_bytes());
453 return;
454 };
455 let quote_char = options.quote_char;
456 if s.is_empty() {
458 buf.extend_from_slice(&[quote_char, quote_char]);
459 return;
460 }
461 let needs_quote = memchr3(options.separator, LF, CR, s.as_bytes()).is_some();
462 if needs_quote {
463 buf.push(quote_char);
464 }
465 serialize_str_escaped(buf, s.as_bytes(), quote_char, needs_quote);
466 if needs_quote {
467 buf.push(quote_char);
468 }
469 };
470 Box::new(StringSerializer {
471 serialize,
472 update,
473 iter,
474 })
475 },
476 QuoteStyle::Never => {
477 let serialize =
478 move |iter: &mut Iter, buf: &mut Vec<u8>, options: &SerializeOptions| {
479 let Some(s) = f(iter) else {
480 buf.extend_from_slice(options.null.as_bytes());
481 return;
482 };
483 buf.extend_from_slice(s.as_bytes());
484 };
485 Box::new(StringSerializer {
486 serialize,
487 update,
488 iter,
489 })
490 },
491 }
492}
493
494fn quote_serializer<'a>(serializer: impl Serializer<'a>) -> impl Serializer<'a> {
495 struct QuoteSerializer<S>(S);
496 impl<'a, S: Serializer<'a>> Serializer<'a> for QuoteSerializer<S> {
497 fn serialize(&mut self, buf: &mut Vec<u8>, options: &SerializeOptions) {
498 buf.push(options.quote_char);
499 self.0.serialize(buf, options);
500 buf.push(options.quote_char);
501 }
502
503 fn update_array(&mut self, array: &'a dyn Array) {
504 self.0.update_array(array);
505 }
506 }
507 QuoteSerializer(serializer)
508}
509
510pub(super) fn serializer_for<'a>(
511 array: &'a dyn Array,
512 options: &'a SerializeOptions,
513 dtype: &'a DataType,
514 _datetime_format: &'a str,
515 _time_zone: Option<Tz>,
516) -> PolarsResult<Box<dyn Serializer<'a> + Send + 'a>> {
517 macro_rules! quote_if_always {
518 ($make_serializer:path, $($arg:tt)*) => {{
519 let serializer = $make_serializer(array.as_any().downcast_ref().unwrap(), $($arg)*);
520 if let QuoteStyle::Always = options.quote_style {
521 Box::new(quote_serializer(serializer)) as Box<dyn Serializer + Send>
522 } else {
523 Box::new(serializer)
524 }
525 }};
526 ($make_serializer:path) => { quote_if_always!($make_serializer,) };
527 }
528
529 let serializer = match dtype {
530 DataType::Int8 => quote_if_always!(integer_serializer::<i8>),
531 DataType::UInt8 => quote_if_always!(integer_serializer::<u8>),
532 DataType::Int16 => quote_if_always!(integer_serializer::<i16>),
533 DataType::UInt16 => quote_if_always!(integer_serializer::<u16>),
534 DataType::Int32 => quote_if_always!(integer_serializer::<i32>),
535 DataType::UInt32 => quote_if_always!(integer_serializer::<u32>),
536 DataType::Int64 => quote_if_always!(integer_serializer::<i64>),
537 DataType::UInt64 => quote_if_always!(integer_serializer::<u64>),
538 DataType::Int128 => quote_if_always!(integer_serializer::<i128>),
539 DataType::Float32 => match options.float_precision {
540 Some(precision) => match options.float_scientific {
541 Some(true) => {
542 quote_if_always!(float_serializer_with_precision_scientific::<f32>, precision)
543 },
544 _ => quote_if_always!(float_serializer_with_precision_positional::<f32>, precision),
545 },
546 None => match options.float_scientific {
547 Some(true) => quote_if_always!(float_serializer_no_precision_scientific::<f32>),
548 Some(false) => quote_if_always!(float_serializer_no_precision_positional::<f32>),
549 None => quote_if_always!(float_serializer_no_precision_autoformat::<f32>),
550 },
551 },
552 DataType::Float64 => match options.float_precision {
553 Some(precision) => match options.float_scientific {
554 Some(true) => {
555 quote_if_always!(float_serializer_with_precision_scientific::<f64>, precision)
556 },
557 _ => quote_if_always!(float_serializer_with_precision_positional::<f64>, precision),
558 },
559 None => match options.float_scientific {
560 Some(true) => quote_if_always!(float_serializer_no_precision_scientific::<f64>),
561 Some(false) => quote_if_always!(float_serializer_no_precision_positional::<f64>),
562 None => quote_if_always!(float_serializer_no_precision_autoformat::<f64>),
563 },
564 },
565 DataType::Null => quote_if_always!(null_serializer),
566 DataType::Boolean => {
567 let array = array.as_any().downcast_ref().unwrap();
568 match options.quote_style {
569 QuoteStyle::Always => Box::new(quote_serializer(bool_serializer::<false>(array)))
570 as Box<dyn Serializer + Send>,
571 QuoteStyle::NonNumeric => Box::new(bool_serializer::<true>(array)),
572 _ => Box::new(bool_serializer::<false>(array)),
573 }
574 },
575 #[cfg(feature = "dtype-date")]
576 DataType::Date => date_and_time_serializer(
577 &options.date_format,
578 "NaiveDate",
579 array,
580 chrono::NaiveDate::MAX,
581 arrow::temporal_conversions::date32_to_date,
582 |date, items| date.format_with_items(items),
583 options,
584 )?,
585 #[cfg(feature = "dtype-time")]
586 DataType::Time => date_and_time_serializer(
587 &options.time_format,
588 "NaiveTime",
589 array,
590 chrono::NaiveTime::MIN,
591 arrow::temporal_conversions::time64ns_to_time,
592 |time, items| time.format_with_items(items),
593 options,
594 )?,
595 #[cfg(feature = "dtype-datetime")]
596 DataType::Datetime(time_unit, _) => {
597 let format = chrono::format::StrftimeItems::new(_datetime_format)
598 .parse()
599 .map_err(|_| {
600 polars_err!(
601 ComputeError: "cannot format {} with format '{_datetime_format}'",
602 if _time_zone.is_some() { "DateTime" } else { "NaiveDateTime" },
603 )
604 })?;
605 use std::fmt::Write;
606 let sample_datetime = match _time_zone {
607 #[cfg(feature = "timezones")]
608 Some(time_zone) => time_zone
609 .from_utc_datetime(&chrono::NaiveDateTime::MAX)
610 .format_with_items(format.iter()),
611 #[cfg(not(feature = "timezones"))]
612 Some(_) => panic!("activate 'timezones' feature"),
613 None => chrono::NaiveDateTime::MAX.format_with_items(format.iter()),
614 };
615 write!(IgnoreFmt, "{sample_datetime}").map_err(|_| {
618 polars_err!(
619 ComputeError: "cannot format {} with format '{_datetime_format}'",
620 if _time_zone.is_some() { "DateTime" } else { "NaiveDateTime" },
621 )
622 })?;
623
624 let array = array.as_any().downcast_ref().unwrap();
625
626 macro_rules! time_unit_serializer {
627 ($convert:ident) => {
628 match _time_zone {
629 #[cfg(feature = "timezones")]
630 Some(time_zone) => {
631 let callback = move |item, buf: &mut Vec<u8>| {
632 let item = arrow::temporal_conversions::$convert(item);
633 let item = time_zone.from_utc_datetime(&item);
634 let _ = write!(buf, "{}", item.format_with_items(format.iter()));
636 };
637 date_and_time_final_serializer(array, callback, options)
638 },
639 #[cfg(not(feature = "timezones"))]
640 Some(_) => panic!("activate 'timezones' feature"),
641 None => {
642 let callback = move |item, buf: &mut Vec<u8>| {
643 let item = arrow::temporal_conversions::$convert(item);
644 let _ = write!(buf, "{}", item.format_with_items(format.iter()));
646 };
647 date_and_time_final_serializer(array, callback, options)
648 },
649 }
650 };
651 }
652
653 match time_unit {
654 TimeUnit::Nanoseconds => time_unit_serializer!(timestamp_ns_to_datetime),
655 TimeUnit::Microseconds => time_unit_serializer!(timestamp_us_to_datetime),
656 TimeUnit::Milliseconds => time_unit_serializer!(timestamp_ms_to_datetime),
657 }
658 },
659 DataType::String => string_serializer(
660 |iter| Iterator::next(iter).expect(TOO_MANY_MSG),
661 options,
662 |arr| {
663 arr.as_any()
664 .downcast_ref::<Utf8ViewArray>()
665 .expect(ARRAY_MISMATCH_MSG)
666 .iter()
667 },
668 array,
669 ),
670 #[cfg(feature = "dtype-categorical")]
671 DataType::Categorical(rev_map, _) | DataType::Enum(rev_map, _) => {
672 let rev_map = rev_map.as_deref().unwrap();
673 string_serializer(
674 |iter| {
675 let &idx: &u32 = Iterator::next(iter).expect(TOO_MANY_MSG)?;
676 Some(rev_map.get(idx))
677 },
678 options,
679 |arr| {
680 arr.as_any()
681 .downcast_ref::<PrimitiveArray<u32>>()
682 .expect(ARRAY_MISMATCH_MSG)
683 .iter()
684 },
685 array,
686 )
687 },
688 #[cfg(feature = "dtype-decimal")]
689 DataType::Decimal(_, scale) => {
690 quote_if_always!(decimal_serializer, scale.unwrap_or(0))
691 },
692 _ => {
693 polars_bail!(ComputeError: "datatype {dtype} cannot be written to CSV\n\nConsider using JSON or a binary format.")
694 },
695 };
696 Ok(serializer)
697}
698
699#[cfg(test)]
700mod test {
701 use arrow::array::NullArray;
702 use polars_core::prelude::ArrowDataType;
703
704 use super::string_serializer;
705 use crate::csv::write::options::{QuoteStyle, SerializeOptions};
706
707 #[test]
709 fn test_string_serializer() {
710 #[track_caller]
711 fn check_string_serialization(options: &SerializeOptions, s: Option<&str>, expected: &str) {
712 let fake_array = NullArray::new(ArrowDataType::Null, 0);
713 let mut serializer = string_serializer(|s| *s, options, |_| s, &fake_array);
714 let mut buf = Vec::new();
715 serializer.serialize(&mut buf, options);
716 let serialized = std::str::from_utf8(&buf).unwrap();
717 if serialized != expected {
719 panic!(
720 "CSV string {s:?} wasn't serialized correctly: expected: `{expected}`, got: `{serialized}`"
721 );
722 }
723 }
724
725 let always_quote = SerializeOptions {
726 quote_style: QuoteStyle::Always,
727 ..SerializeOptions::default()
728 };
729 check_string_serialization(&always_quote, None, r#""""#);
730 check_string_serialization(&always_quote, Some(""), r#""""#);
731 check_string_serialization(&always_quote, Some("a"), r#""a""#);
732 check_string_serialization(&always_quote, Some("\""), r#""""""#);
733 check_string_serialization(&always_quote, Some("a\"\"b"), r#""a""""b""#);
734
735 let necessary_quote = SerializeOptions {
736 quote_style: QuoteStyle::Necessary,
737 ..SerializeOptions::default()
738 };
739 check_string_serialization(&necessary_quote, None, r#""#);
740 check_string_serialization(&necessary_quote, Some(""), r#""""#);
741 check_string_serialization(&necessary_quote, Some("a"), r#"a"#);
742 check_string_serialization(&necessary_quote, Some("\""), r#""""""#);
743 check_string_serialization(&necessary_quote, Some("a\"\"b"), r#""a""""b""#);
744 check_string_serialization(&necessary_quote, Some("a b"), r#"a b"#);
745 check_string_serialization(&necessary_quote, Some("a,b"), r#""a,b""#);
746 check_string_serialization(&necessary_quote, Some("a\nb"), "\"a\nb\"");
747 check_string_serialization(&necessary_quote, Some("a\rb"), "\"a\rb\"");
748
749 let never_quote = SerializeOptions {
750 quote_style: QuoteStyle::Never,
751 ..SerializeOptions::default()
752 };
753 check_string_serialization(&never_quote, None, "");
754 check_string_serialization(&never_quote, Some(""), "");
755 check_string_serialization(&never_quote, Some("a"), "a");
756 check_string_serialization(&never_quote, Some("\""), "\"");
757 check_string_serialization(&never_quote, Some("a\"\"b"), "a\"\"b");
758 check_string_serialization(&never_quote, Some("a b"), "a b");
759 check_string_serialization(&never_quote, Some("a,b"), "a,b");
760 check_string_serialization(&never_quote, Some("a\nb"), "a\nb");
761 check_string_serialization(&never_quote, Some("a\rb"), "a\rb");
762
763 let non_numeric_quote = SerializeOptions {
764 quote_style: QuoteStyle::NonNumeric,
765 ..SerializeOptions::default()
766 };
767 check_string_serialization(&non_numeric_quote, None, "");
768 check_string_serialization(&non_numeric_quote, Some(""), r#""""#);
769 check_string_serialization(&non_numeric_quote, Some("a"), r#""a""#);
770 check_string_serialization(&non_numeric_quote, Some("\""), r#""""""#);
771 check_string_serialization(&non_numeric_quote, Some("a\"\"b"), r#""a""""b""#);
772 check_string_serialization(&non_numeric_quote, Some("a b"), r#""a b""#);
773 check_string_serialization(&non_numeric_quote, Some("a,b"), r#""a,b""#);
774 check_string_serialization(&non_numeric_quote, Some("a\nb"), "\"a\nb\"");
775 check_string_serialization(&non_numeric_quote, Some("a\rb"), "\"a\rb\"");
776 }
777}