polars_io/csv/read/
builder.rs

1use arrow::array::MutableBinaryViewArray;
2#[cfg(feature = "dtype-decimal")]
3use polars_compute::decimal::str_to_dec128;
4#[cfg(feature = "dtype-categorical")]
5use polars_core::chunked_array::builder::CategoricalChunkedBuilder;
6use polars_core::prelude::*;
7use polars_error::to_compute_err;
8#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
9use polars_time::chunkedarray::string::Pattern;
10#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
11use polars_time::prelude::string::infer::{
12    DatetimeInfer, StrpTimeParser, TryFromWithUnit, infer_pattern_single,
13};
14#[cfg(feature = "dtype-f16")]
15use polars_utils::float16::pf16;
16use polars_utils::vec::PushUnchecked;
17
18use super::options::CsvEncoding;
19use super::parser::{could_be_whitespace_fast, skip_whitespace};
20use super::utils::escape_field;
21
22pub(crate) trait PrimitiveParser: PolarsNumericType {
23    fn parse(bytes: &[u8]) -> Option<Self::Native>;
24}
25
26#[cfg(feature = "dtype-f16")]
27impl PrimitiveParser for Float16Type {
28    #[inline]
29    fn parse(bytes: &[u8]) -> Option<pf16> {
30        use num_traits::FromPrimitive;
31
32        pf16::from_f32(fast_float2::parse(bytes).ok()?)
33    }
34}
35
36impl PrimitiveParser for Float32Type {
37    #[inline]
38    fn parse(bytes: &[u8]) -> Option<f32> {
39        fast_float2::parse(bytes).ok()
40    }
41}
42impl PrimitiveParser for Float64Type {
43    #[inline]
44    fn parse(bytes: &[u8]) -> Option<f64> {
45        fast_float2::parse(bytes).ok()
46    }
47}
48
49#[cfg(feature = "dtype-u8")]
50impl PrimitiveParser for UInt8Type {
51    #[inline]
52    fn parse(bytes: &[u8]) -> Option<u8> {
53        atoi_simd::parse_skipped(bytes).ok()
54    }
55}
56#[cfg(feature = "dtype-u16")]
57impl PrimitiveParser for UInt16Type {
58    #[inline]
59    fn parse(bytes: &[u8]) -> Option<u16> {
60        atoi_simd::parse_skipped(bytes).ok()
61    }
62}
63impl PrimitiveParser for UInt32Type {
64    #[inline]
65    fn parse(bytes: &[u8]) -> Option<u32> {
66        atoi_simd::parse_skipped(bytes).ok()
67    }
68}
69impl PrimitiveParser for UInt64Type {
70    #[inline]
71    fn parse(bytes: &[u8]) -> Option<u64> {
72        atoi_simd::parse_skipped(bytes).ok()
73    }
74}
75#[cfg(feature = "dtype-u128")]
76impl PrimitiveParser for UInt128Type {
77    #[inline]
78    fn parse(bytes: &[u8]) -> Option<u128> {
79        atoi_simd::parse_skipped(bytes).ok()
80    }
81}
82#[cfg(feature = "dtype-i8")]
83impl PrimitiveParser for Int8Type {
84    #[inline]
85    fn parse(bytes: &[u8]) -> Option<i8> {
86        atoi_simd::parse_skipped(bytes).ok()
87    }
88}
89#[cfg(feature = "dtype-i16")]
90impl PrimitiveParser for Int16Type {
91    #[inline]
92    fn parse(bytes: &[u8]) -> Option<i16> {
93        atoi_simd::parse_skipped(bytes).ok()
94    }
95}
96impl PrimitiveParser for Int32Type {
97    #[inline]
98    fn parse(bytes: &[u8]) -> Option<i32> {
99        atoi_simd::parse_skipped(bytes).ok()
100    }
101}
102impl PrimitiveParser for Int64Type {
103    #[inline]
104    fn parse(bytes: &[u8]) -> Option<i64> {
105        atoi_simd::parse_skipped(bytes).ok()
106    }
107}
108#[cfg(feature = "dtype-i128")]
109impl PrimitiveParser for Int128Type {
110    #[inline]
111    fn parse(bytes: &[u8]) -> Option<i128> {
112        atoi_simd::parse_skipped(bytes).ok()
113    }
114}
115
116trait ParsedBuilder {
117    fn parse_bytes(
118        &mut self,
119        bytes: &[u8],
120        ignore_errors: bool,
121        _needs_escaping: bool,
122        _missing_is_null: bool,
123        _time_unit: Option<TimeUnit>,
124    ) -> PolarsResult<()>;
125}
126
127impl<T> ParsedBuilder for PrimitiveChunkedBuilder<T>
128where
129    T: PolarsNumericType + PrimitiveParser,
130{
131    #[inline]
132    fn parse_bytes(
133        &mut self,
134        mut bytes: &[u8],
135        ignore_errors: bool,
136        needs_escaping: bool,
137        _missing_is_null: bool,
138        _time_unit: Option<TimeUnit>,
139    ) -> PolarsResult<()> {
140        if !bytes.is_empty() && needs_escaping {
141            bytes = &bytes[1..bytes.len() - 1];
142        }
143
144        if !bytes.is_empty() && could_be_whitespace_fast(bytes[0]) {
145            bytes = skip_whitespace(bytes);
146        }
147
148        if bytes.is_empty() {
149            self.append_null();
150            return Ok(());
151        }
152
153        match T::parse(bytes) {
154            Some(value) => self.append_value(value),
155            None => {
156                if ignore_errors {
157                    self.append_null()
158                } else {
159                    polars_bail!(ComputeError: "invalid primitive value found during CSV parsing")
160                }
161            },
162        }
163        Ok(())
164    }
165}
166
167pub struct Utf8Field {
168    name: PlSmallStr,
169    mutable: MutableBinaryViewArray<[u8]>,
170    scratch: Vec<u8>,
171    quote_char: u8,
172    encoding: CsvEncoding,
173}
174
175impl Utf8Field {
176    fn new(
177        name: PlSmallStr,
178        capacity: usize,
179        quote_char: Option<u8>,
180        encoding: CsvEncoding,
181    ) -> Self {
182        Self {
183            name,
184            mutable: MutableBinaryViewArray::with_capacity(capacity),
185            scratch: vec![],
186            quote_char: quote_char.unwrap_or(b'"'),
187            encoding,
188        }
189    }
190}
191
192#[inline]
193pub fn validate_utf8(bytes: &[u8]) -> bool {
194    simdutf8::basic::from_utf8(bytes).is_ok()
195}
196
197impl ParsedBuilder for Utf8Field {
198    #[inline]
199    fn parse_bytes(
200        &mut self,
201        bytes: &[u8],
202        ignore_errors: bool,
203        needs_escaping: bool,
204        missing_is_null: bool,
205        _time_unit: Option<TimeUnit>,
206    ) -> PolarsResult<()> {
207        if bytes.is_empty() {
208            if missing_is_null {
209                self.mutable.push_null()
210            } else {
211                self.mutable.push(Some([]))
212            }
213            return Ok(());
214        }
215
216        // note that one branch writes without updating the length, so we must do that later.
217        let escaped_bytes = if needs_escaping {
218            self.scratch.clear();
219            self.scratch.reserve(bytes.len());
220            polars_ensure!(bytes.len() > 1 && bytes.last() == Some(&self.quote_char), ComputeError: "invalid csv file\n\nField `{}` is not properly escaped.", std::str::from_utf8(bytes).map_err(to_compute_err)?);
221
222            // SAFETY:
223            // we just allocated enough capacity and data_len is correct.
224            unsafe {
225                let n_written =
226                    escape_field(bytes, self.quote_char, self.scratch.spare_capacity_mut());
227                self.scratch.set_len(n_written);
228            }
229
230            self.scratch.as_slice()
231        } else {
232            bytes
233        };
234
235        if matches!(self.encoding, CsvEncoding::LossyUtf8) | ignore_errors {
236            // It is important that this happens after escaping, as invalid escaped string can produce
237            // invalid utf8.
238            let parse_result = validate_utf8(escaped_bytes);
239
240            match parse_result {
241                true => {
242                    let value = escaped_bytes;
243                    self.mutable.push_value(value)
244                },
245                false => {
246                    if matches!(self.encoding, CsvEncoding::LossyUtf8) {
247                        // TODO! do this without allocating
248                        let s = String::from_utf8_lossy(escaped_bytes);
249                        self.mutable.push_value(s.as_ref().as_bytes())
250                    } else if ignore_errors {
251                        self.mutable.push_null()
252                    } else {
253                        // If field before escaping is valid utf8, the escaping is incorrect.
254                        if needs_escaping && validate_utf8(bytes) {
255                            polars_bail!(ComputeError: "string field is not properly escaped");
256                        } else {
257                            polars_bail!(ComputeError: "invalid utf-8 sequence");
258                        }
259                    }
260                },
261            }
262        } else {
263            self.mutable.push_value(escaped_bytes)
264        }
265
266        Ok(())
267    }
268}
269
270#[cfg(feature = "dtype-categorical")]
271pub struct CategoricalField<T: PolarsCategoricalType> {
272    escape_scratch: Vec<u8>,
273    quote_char: u8,
274    builder: CategoricalChunkedBuilder<T>,
275}
276
277#[cfg(feature = "dtype-categorical")]
278impl<T: PolarsCategoricalType> CategoricalField<T> {
279    fn new(name: PlSmallStr, capacity: usize, quote_char: Option<u8>, dtype: DataType) -> Self {
280        let mut builder = CategoricalChunkedBuilder::new(name, dtype);
281        builder.reserve(capacity);
282
283        Self {
284            escape_scratch: vec![],
285            quote_char: quote_char.unwrap_or(b'"'),
286            builder,
287        }
288    }
289
290    #[inline]
291    fn parse_bytes(
292        &mut self,
293        bytes: &[u8],
294        ignore_errors: bool,
295        needs_escaping: bool,
296        _missing_is_null: bool,
297        _time_unit: Option<TimeUnit>,
298    ) -> PolarsResult<()> {
299        if bytes.is_empty() {
300            self.builder.append_null();
301            return Ok(());
302        }
303        if validate_utf8(bytes) {
304            if needs_escaping {
305                polars_ensure!(bytes.len() > 1, ComputeError: "invalid csv file\n\nField `{}` is not properly escaped.", std::str::from_utf8(bytes).map_err(to_compute_err)?);
306                self.escape_scratch.clear();
307                self.escape_scratch.reserve(bytes.len());
308                // SAFETY:
309                // we just allocated enough capacity and data_len is correct.
310                unsafe {
311                    let n_written = escape_field(
312                        bytes,
313                        self.quote_char,
314                        self.escape_scratch.spare_capacity_mut(),
315                    );
316                    self.escape_scratch.set_len(n_written);
317                }
318
319                // SAFETY:
320                // just did utf8 check
321                let key = unsafe { std::str::from_utf8_unchecked(&self.escape_scratch) };
322                self.builder.append_str(key)?;
323            } else {
324                // SAFETY:
325                // just did utf8 check
326                let key = unsafe { std::str::from_utf8_unchecked(bytes) };
327                self.builder.append_str(key)?;
328            }
329        } else if ignore_errors {
330            self.builder.append_null()
331        } else {
332            polars_bail!(ComputeError: "invalid utf-8 sequence");
333        }
334        Ok(())
335    }
336}
337
338impl ParsedBuilder for BooleanChunkedBuilder {
339    #[inline]
340    fn parse_bytes(
341        &mut self,
342        bytes: &[u8],
343        ignore_errors: bool,
344        needs_escaping: bool,
345        _missing_is_null: bool,
346        _time_unit: Option<TimeUnit>,
347    ) -> PolarsResult<()> {
348        let bytes = if needs_escaping {
349            &bytes[1..bytes.len() - 1]
350        } else {
351            bytes
352        };
353        if bytes.eq_ignore_ascii_case(b"false") {
354            self.append_value(false);
355        } else if bytes.eq_ignore_ascii_case(b"true") {
356            self.append_value(true);
357        } else if ignore_errors || bytes.is_empty() {
358            self.append_null();
359        } else {
360            polars_bail!(
361                ComputeError: "error while parsing value {} as boolean",
362                String::from_utf8_lossy(bytes),
363            );
364        }
365        Ok(())
366    }
367}
368
369#[cfg(feature = "dtype-decimal")]
370pub struct DecimalField {
371    builder: PrimitiveChunkedBuilder<Int128Type>,
372    precision: usize,
373    scale: usize,
374    decimal_comma: bool,
375}
376
377#[cfg(feature = "dtype-decimal")]
378impl DecimalField {
379    fn new(
380        name: PlSmallStr,
381        capacity: usize,
382        precision: usize,
383        scale: usize,
384        decimal_comma: bool,
385    ) -> Self {
386        let builder = PrimitiveChunkedBuilder::<Int128Type>::new(name, capacity);
387        Self {
388            builder,
389            precision,
390            scale,
391            decimal_comma,
392        }
393    }
394}
395
396#[cfg(feature = "dtype-decimal")]
397impl ParsedBuilder for DecimalField {
398    #[inline]
399    fn parse_bytes(
400        &mut self,
401        mut bytes: &[u8],
402        ignore_errors: bool,
403        needs_escaping: bool,
404        _missing_is_null: bool,
405        _time_unit: Option<TimeUnit>,
406    ) -> PolarsResult<()> {
407        if !bytes.is_empty() && needs_escaping {
408            bytes = &bytes[1..bytes.len() - 1];
409        }
410
411        if !bytes.is_empty() && could_be_whitespace_fast(bytes[0]) {
412            bytes = skip_whitespace(bytes);
413        }
414
415        if bytes.is_empty() {
416            self.builder.append_null();
417            return Ok(());
418        }
419
420        match str_to_dec128(bytes, self.precision, self.scale, self.decimal_comma) {
421            Some(value) => self.builder.append_value(value),
422            None => {
423                if ignore_errors {
424                    self.builder.append_null()
425                } else {
426                    polars_bail!(ComputeError: "invalid decimal value found during CSV parsing")
427                }
428            },
429        }
430
431        Ok(())
432    }
433}
434
435#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
436pub struct DatetimeField<T: PolarsNumericType> {
437    compiled: Option<DatetimeInfer<T>>,
438    builder: PrimitiveChunkedBuilder<T>,
439}
440
441#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
442impl<T: PolarsNumericType> DatetimeField<T> {
443    fn new(name: PlSmallStr, capacity: usize) -> Self {
444        let builder = PrimitiveChunkedBuilder::<T>::new(name, capacity);
445        Self {
446            compiled: None,
447            builder,
448        }
449    }
450}
451
452#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
453fn slow_datetime_parser<T>(
454    buf: &mut DatetimeField<T>,
455    bytes: &[u8],
456    time_unit: Option<TimeUnit>,
457    ignore_errors: bool,
458) -> PolarsResult<()>
459where
460    T: PolarsNumericType,
461    DatetimeInfer<T>: TryFromWithUnit<Pattern>,
462{
463    let val = if bytes.is_ascii() {
464        // SAFETY:
465        // we just checked it is ascii
466        unsafe { std::str::from_utf8_unchecked(bytes) }
467    } else {
468        match std::str::from_utf8(bytes) {
469            Ok(val) => val,
470            Err(_) => {
471                if ignore_errors {
472                    buf.builder.append_null();
473                    return Ok(());
474                } else {
475                    polars_bail!(ComputeError: "invalid utf-8 sequence");
476                }
477            },
478        }
479    };
480
481    let pattern = match &buf.compiled {
482        Some(compiled) => compiled.pattern,
483        None => match infer_pattern_single(val) {
484            Some(pattern) => pattern,
485            None => {
486                if ignore_errors {
487                    buf.builder.append_null();
488                    return Ok(());
489                } else {
490                    polars_bail!(ComputeError: "could not find a 'date/datetime' pattern for '{}'", val)
491                }
492            },
493        },
494    };
495    match DatetimeInfer::try_from_with_unit(pattern, time_unit) {
496        Ok(mut infer) => {
497            let parsed = infer.parse(val);
498            let Some(parsed) = parsed else {
499                if ignore_errors {
500                    buf.builder.append_null();
501                    return Ok(());
502                } else {
503                    polars_bail!(ComputeError: "could not parse '{}' with pattern '{:?}'", val, pattern)
504                }
505            };
506
507            buf.compiled = Some(infer);
508            buf.builder.append_value(parsed);
509            Ok(())
510        },
511        Err(err) => {
512            if ignore_errors {
513                buf.builder.append_null();
514                Ok(())
515            } else {
516                Err(err)
517            }
518        },
519    }
520}
521
522#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
523impl<T> ParsedBuilder for DatetimeField<T>
524where
525    T: PolarsNumericType,
526    DatetimeInfer<T>: TryFromWithUnit<Pattern> + StrpTimeParser<T::Native>,
527{
528    #[inline]
529    fn parse_bytes(
530        &mut self,
531        mut bytes: &[u8],
532        ignore_errors: bool,
533        needs_escaping: bool,
534        _missing_is_null: bool,
535        time_unit: Option<TimeUnit>,
536    ) -> PolarsResult<()> {
537        if needs_escaping && bytes.len() >= 2 {
538            bytes = &bytes[1..bytes.len() - 1]
539        }
540
541        if bytes.is_empty() {
542            // for types other than string `_missing_is_null` is irrelevant; we always append null
543            self.builder.append_null();
544            return Ok(());
545        }
546
547        match &mut self.compiled {
548            None => slow_datetime_parser(self, bytes, time_unit, ignore_errors),
549            Some(compiled) => {
550                match compiled.parse_bytes(bytes, time_unit) {
551                    Some(parsed) => {
552                        self.builder.append_value(parsed);
553                        Ok(())
554                    },
555                    // fall back on chrono parser
556                    // this is a lot slower, we need to do utf8 checking and use
557                    // the slower parser
558                    None => slow_datetime_parser(self, bytes, time_unit, ignore_errors),
559                }
560            },
561        }
562    }
563}
564
565pub fn init_builders(
566    projection: &[usize],
567    capacity: usize,
568    schema: &Schema,
569    quote_char: Option<u8>,
570    encoding: CsvEncoding,
571    decimal_comma: bool,
572) -> PolarsResult<Vec<Builder>> {
573    projection
574        .iter()
575        .map(|&i| {
576            let (name, dtype) = schema.get_at_index(i).unwrap();
577            let name = name.clone();
578            let builder = match dtype {
579                &DataType::Boolean => Builder::Boolean(BooleanChunkedBuilder::new(name, capacity)),
580                #[cfg(feature = "dtype-i8")]
581                &DataType::Int8 => Builder::Int8(PrimitiveChunkedBuilder::new(name, capacity)),
582                #[cfg(feature = "dtype-i16")]
583                &DataType::Int16 => Builder::Int16(PrimitiveChunkedBuilder::new(name, capacity)),
584                &DataType::Int32 => Builder::Int32(PrimitiveChunkedBuilder::new(name, capacity)),
585                &DataType::Int64 => Builder::Int64(PrimitiveChunkedBuilder::new(name, capacity)),
586                #[cfg(feature = "dtype-i128")]
587                &DataType::Int128 => Builder::Int128(PrimitiveChunkedBuilder::new(name, capacity)),
588                #[cfg(feature = "dtype-u8")]
589                &DataType::UInt8 => Builder::UInt8(PrimitiveChunkedBuilder::new(name, capacity)),
590                #[cfg(feature = "dtype-u16")]
591                &DataType::UInt16 => Builder::UInt16(PrimitiveChunkedBuilder::new(name, capacity)),
592                &DataType::UInt32 => Builder::UInt32(PrimitiveChunkedBuilder::new(name, capacity)),
593                &DataType::UInt64 => Builder::UInt64(PrimitiveChunkedBuilder::new(name, capacity)),
594                #[cfg(feature = "dtype-u128")]
595                &DataType::UInt128 => {
596                    Builder::UInt128(PrimitiveChunkedBuilder::new(name, capacity))
597                },
598                #[cfg(feature = "dtype-f16")]
599                &DataType::Float16 => {
600                    if decimal_comma {
601                        Builder::DecimalFloat16(
602                            PrimitiveChunkedBuilder::new(name, capacity),
603                            Default::default(),
604                        )
605                    } else {
606                        Builder::Float16(PrimitiveChunkedBuilder::new(name, capacity))
607                    }
608                },
609                &DataType::Float32 => {
610                    if decimal_comma {
611                        Builder::DecimalFloat32(
612                            PrimitiveChunkedBuilder::new(name, capacity),
613                            Default::default(),
614                        )
615                    } else {
616                        Builder::Float32(PrimitiveChunkedBuilder::new(name, capacity))
617                    }
618                },
619                &DataType::Float64 => {
620                    if decimal_comma {
621                        Builder::DecimalFloat64(
622                            PrimitiveChunkedBuilder::new(name, capacity),
623                            Default::default(),
624                        )
625                    } else {
626                        Builder::Float64(PrimitiveChunkedBuilder::new(name, capacity))
627                    }
628                },
629                #[cfg(feature = "dtype-decimal")]
630                &DataType::Decimal(precision, scale) => Builder::Decimal(DecimalField::new(
631                    name,
632                    capacity,
633                    precision,
634                    scale,
635                    decimal_comma,
636                )),
637                &DataType::String => {
638                    Builder::Utf8(Utf8Field::new(name, capacity, quote_char, encoding))
639                },
640                #[cfg(feature = "dtype-datetime")]
641                DataType::Datetime(time_unit, time_zone) => Builder::Datetime {
642                    buf: DatetimeField::new(name, capacity),
643                    time_unit: *time_unit,
644                    time_zone: time_zone.clone(),
645                },
646                #[cfg(feature = "dtype-date")]
647                &DataType::Date => Builder::Date(DatetimeField::new(name, capacity)),
648                #[cfg(feature = "dtype-categorical")]
649                DataType::Categorical(_, _) | DataType::Enum(_, _) => {
650                    match dtype.cat_physical().unwrap() {
651                        CategoricalPhysical::U8 => {
652                            Builder::Categorical8(CategoricalField::<Categorical8Type>::new(
653                                name,
654                                capacity,
655                                quote_char,
656                                dtype.clone(),
657                            ))
658                        },
659                        CategoricalPhysical::U16 => {
660                            Builder::Categorical16(CategoricalField::<Categorical16Type>::new(
661                                name,
662                                capacity,
663                                quote_char,
664                                dtype.clone(),
665                            ))
666                        },
667                        CategoricalPhysical::U32 => {
668                            Builder::Categorical32(CategoricalField::<Categorical32Type>::new(
669                                name,
670                                capacity,
671                                quote_char,
672                                dtype.clone(),
673                            ))
674                        },
675                    }
676                },
677                dt => polars_bail!(
678                    ComputeError: "unsupported data type when reading CSV: {} when reading CSV", dt,
679                ),
680            };
681            Ok(builder)
682        })
683        .collect()
684}
685
686#[allow(clippy::large_enum_variant)]
687pub enum Builder {
688    Boolean(BooleanChunkedBuilder),
689    #[cfg(feature = "dtype-i8")]
690    Int8(PrimitiveChunkedBuilder<Int8Type>),
691    #[cfg(feature = "dtype-i16")]
692    Int16(PrimitiveChunkedBuilder<Int16Type>),
693    Int32(PrimitiveChunkedBuilder<Int32Type>),
694    Int64(PrimitiveChunkedBuilder<Int64Type>),
695    #[cfg(feature = "dtype-i128")]
696    Int128(PrimitiveChunkedBuilder<Int128Type>),
697    #[cfg(feature = "dtype-u8")]
698    UInt8(PrimitiveChunkedBuilder<UInt8Type>),
699    #[cfg(feature = "dtype-u16")]
700    UInt16(PrimitiveChunkedBuilder<UInt16Type>),
701    UInt32(PrimitiveChunkedBuilder<UInt32Type>),
702    UInt64(PrimitiveChunkedBuilder<UInt64Type>),
703    #[cfg(feature = "dtype-u128")]
704    UInt128(PrimitiveChunkedBuilder<UInt128Type>),
705    #[cfg(feature = "dtype-f16")]
706    Float16(PrimitiveChunkedBuilder<Float16Type>),
707    Float32(PrimitiveChunkedBuilder<Float32Type>),
708    Float64(PrimitiveChunkedBuilder<Float64Type>),
709    #[cfg(feature = "dtype-decimal")]
710    Decimal(DecimalField),
711    /// Stores the Utf8 fields and the total string length seen for that column
712    Utf8(Utf8Field),
713    #[cfg(feature = "dtype-datetime")]
714    Datetime {
715        buf: DatetimeField<Int64Type>,
716        time_unit: TimeUnit,
717        time_zone: Option<TimeZone>,
718    },
719    #[cfg(feature = "dtype-date")]
720    Date(DatetimeField<Int32Type>),
721    #[cfg(feature = "dtype-categorical")]
722    Categorical8(CategoricalField<Categorical8Type>),
723    #[cfg(feature = "dtype-categorical")]
724    Categorical16(CategoricalField<Categorical16Type>),
725    #[cfg(feature = "dtype-categorical")]
726    Categorical32(CategoricalField<Categorical32Type>),
727    #[cfg(feature = "dtype-f16")]
728    DecimalFloat16(PrimitiveChunkedBuilder<Float16Type>, Vec<u8>),
729    DecimalFloat32(PrimitiveChunkedBuilder<Float32Type>, Vec<u8>),
730    DecimalFloat64(PrimitiveChunkedBuilder<Float64Type>, Vec<u8>),
731}
732
733impl Builder {
734    pub fn into_series(self) -> PolarsResult<Series> {
735        let s = match self {
736            Builder::Boolean(v) => v.finish().into_series(),
737            #[cfg(feature = "dtype-i8")]
738            Builder::Int8(v) => v.finish().into_series(),
739            #[cfg(feature = "dtype-i16")]
740            Builder::Int16(v) => v.finish().into_series(),
741            Builder::Int32(v) => v.finish().into_series(),
742            Builder::Int64(v) => v.finish().into_series(),
743            #[cfg(feature = "dtype-i128")]
744            Builder::Int128(v) => v.finish().into_series(),
745            #[cfg(feature = "dtype-u8")]
746            Builder::UInt8(v) => v.finish().into_series(),
747            #[cfg(feature = "dtype-u16")]
748            Builder::UInt16(v) => v.finish().into_series(),
749            Builder::UInt32(v) => v.finish().into_series(),
750            Builder::UInt64(v) => v.finish().into_series(),
751            #[cfg(feature = "dtype-u128")]
752            Builder::UInt128(v) => v.finish().into_series(),
753            #[cfg(feature = "dtype-f16")]
754            Builder::Float16(v) => v.finish().into_series(),
755            Builder::Float32(v) => v.finish().into_series(),
756            Builder::Float64(v) => v.finish().into_series(),
757            #[cfg(feature = "dtype-f16")]
758            Builder::DecimalFloat16(v, _) => v.finish().into_series(),
759            Builder::DecimalFloat32(v, _) => v.finish().into_series(),
760            Builder::DecimalFloat64(v, _) => v.finish().into_series(),
761            #[cfg(feature = "dtype-decimal")]
762            Builder::Decimal(DecimalField {
763                builder,
764                precision,
765                scale,
766                ..
767            }) => unsafe {
768                builder
769                    .finish()
770                    .into_series()
771                    .from_physical_unchecked(&DataType::Decimal(precision, scale))
772                    .unwrap()
773            },
774            #[cfg(feature = "dtype-datetime")]
775            Builder::Datetime {
776                buf,
777                time_unit,
778                time_zone,
779            } => buf
780                .builder
781                .finish()
782                .into_series()
783                .cast(&DataType::Datetime(time_unit, time_zone))
784                .unwrap(),
785            #[cfg(feature = "dtype-date")]
786            Builder::Date(v) => v
787                .builder
788                .finish()
789                .into_series()
790                .cast(&DataType::Date)
791                .unwrap(),
792
793            Builder::Utf8(v) => {
794                let arr = v.mutable.freeze();
795                StringChunked::with_chunk(v.name, unsafe { arr.to_utf8view_unchecked() })
796                    .into_series()
797            },
798            #[cfg(feature = "dtype-categorical")]
799            Builder::Categorical8(buf) => buf.builder.finish().into_series(),
800            #[cfg(feature = "dtype-categorical")]
801            Builder::Categorical16(buf) => buf.builder.finish().into_series(),
802            #[cfg(feature = "dtype-categorical")]
803            Builder::Categorical32(buf) => buf.builder.finish().into_series(),
804        };
805        Ok(s)
806    }
807
808    pub fn add_null(&mut self, valid: bool) {
809        match self {
810            Builder::Boolean(v) => v.append_null(),
811            #[cfg(feature = "dtype-i8")]
812            Builder::Int8(v) => v.append_null(),
813            #[cfg(feature = "dtype-i16")]
814            Builder::Int16(v) => v.append_null(),
815            Builder::Int32(v) => v.append_null(),
816            Builder::Int64(v) => v.append_null(),
817            #[cfg(feature = "dtype-i128")]
818            Builder::Int128(v) => v.append_null(),
819            #[cfg(feature = "dtype-u8")]
820            Builder::UInt8(v) => v.append_null(),
821            #[cfg(feature = "dtype-u16")]
822            Builder::UInt16(v) => v.append_null(),
823            Builder::UInt32(v) => v.append_null(),
824            Builder::UInt64(v) => v.append_null(),
825            #[cfg(feature = "dtype-u128")]
826            Builder::UInt128(v) => v.append_null(),
827            #[cfg(feature = "dtype-f16")]
828            Builder::Float16(v) => v.append_null(),
829            Builder::Float32(v) => v.append_null(),
830            Builder::Float64(v) => v.append_null(),
831            #[cfg(feature = "dtype-decimal")]
832            Builder::Decimal(buf) => buf.builder.append_null(),
833            #[cfg(feature = "dtype-f16")]
834            Builder::DecimalFloat16(v, _) => v.append_null(),
835            Builder::DecimalFloat32(v, _) => v.append_null(),
836            Builder::DecimalFloat64(v, _) => v.append_null(),
837            Builder::Utf8(v) => {
838                if valid {
839                    v.mutable.push_value("")
840                } else {
841                    v.mutable.push_null()
842                }
843            },
844            #[cfg(feature = "dtype-datetime")]
845            Builder::Datetime { buf, .. } => buf.builder.append_null(),
846            #[cfg(feature = "dtype-date")]
847            Builder::Date(v) => v.builder.append_null(),
848            #[cfg(feature = "dtype-categorical")]
849            Builder::Categorical8(buf) => buf.builder.append_null(),
850            #[cfg(feature = "dtype-categorical")]
851            Builder::Categorical16(buf) => buf.builder.append_null(),
852            #[cfg(feature = "dtype-categorical")]
853            Builder::Categorical32(buf) => buf.builder.append_null(),
854        };
855    }
856
857    pub fn dtype(&self) -> DataType {
858        match self {
859            Builder::Boolean(_) => DataType::Boolean,
860            #[cfg(feature = "dtype-i8")]
861            Builder::Int8(_) => DataType::Int8,
862            #[cfg(feature = "dtype-i16")]
863            Builder::Int16(_) => DataType::Int16,
864            Builder::Int32(_) => DataType::Int32,
865            Builder::Int64(_) => DataType::Int64,
866            #[cfg(feature = "dtype-i128")]
867            Builder::Int128(_) => DataType::Int128,
868            #[cfg(feature = "dtype-u8")]
869            Builder::UInt8(_) => DataType::UInt8,
870            #[cfg(feature = "dtype-u16")]
871            Builder::UInt16(_) => DataType::UInt16,
872            Builder::UInt32(_) => DataType::UInt32,
873            Builder::UInt64(_) => DataType::UInt64,
874            #[cfg(feature = "dtype-u128")]
875            Builder::UInt128(_) => DataType::UInt128,
876            #[cfg(feature = "dtype-f16")]
877            Builder::Float16(_) | Builder::DecimalFloat16(_, _) => DataType::Float16,
878            Builder::Float32(_) | Builder::DecimalFloat32(_, _) => DataType::Float32,
879            Builder::Float64(_) | Builder::DecimalFloat64(_, _) => DataType::Float64,
880            #[cfg(feature = "dtype-decimal")]
881            Builder::Decimal(DecimalField {
882                precision, scale, ..
883            }) => DataType::Decimal(*precision, *scale),
884            Builder::Utf8(_) => DataType::String,
885            #[cfg(feature = "dtype-datetime")]
886            Builder::Datetime { time_unit, .. } => DataType::Datetime(*time_unit, None),
887            #[cfg(feature = "dtype-date")]
888            Builder::Date(_) => DataType::Date,
889            #[cfg(feature = "dtype-categorical")]
890            Builder::Categorical8(buf) => buf.builder.dtype().clone(),
891            #[cfg(feature = "dtype-categorical")]
892            Builder::Categorical16(buf) => buf.builder.dtype().clone(),
893            #[cfg(feature = "dtype-categorical")]
894            Builder::Categorical32(buf) => buf.builder.dtype().clone(),
895        }
896    }
897
898    #[inline]
899    pub fn add(
900        &mut self,
901        bytes: &[u8],
902        ignore_errors: bool,
903        needs_escaping: bool,
904        missing_is_null: bool,
905    ) -> PolarsResult<()> {
906        use Builder::*;
907        match self {
908            Boolean(buf) => <BooleanChunkedBuilder as ParsedBuilder>::parse_bytes(
909                buf,
910                bytes,
911                ignore_errors,
912                needs_escaping,
913                missing_is_null,
914                None,
915            ),
916            #[cfg(feature = "dtype-i8")]
917            Int8(buf) => <PrimitiveChunkedBuilder<Int8Type> as ParsedBuilder>::parse_bytes(
918                buf,
919                bytes,
920                ignore_errors,
921                needs_escaping,
922                missing_is_null,
923                None,
924            ),
925            #[cfg(feature = "dtype-i16")]
926            Int16(buf) => <PrimitiveChunkedBuilder<Int16Type> as ParsedBuilder>::parse_bytes(
927                buf,
928                bytes,
929                ignore_errors,
930                needs_escaping,
931                missing_is_null,
932                None,
933            ),
934            Int32(buf) => <PrimitiveChunkedBuilder<Int32Type> as ParsedBuilder>::parse_bytes(
935                buf,
936                bytes,
937                ignore_errors,
938                needs_escaping,
939                missing_is_null,
940                None,
941            ),
942            Int64(buf) => <PrimitiveChunkedBuilder<Int64Type> as ParsedBuilder>::parse_bytes(
943                buf,
944                bytes,
945                ignore_errors,
946                needs_escaping,
947                missing_is_null,
948                None,
949            ),
950            #[cfg(feature = "dtype-i128")]
951            Int128(buf) => <PrimitiveChunkedBuilder<Int128Type> as ParsedBuilder>::parse_bytes(
952                buf,
953                bytes,
954                ignore_errors,
955                needs_escaping,
956                missing_is_null,
957                None,
958            ),
959            #[cfg(feature = "dtype-u8")]
960            UInt8(buf) => <PrimitiveChunkedBuilder<UInt8Type> as ParsedBuilder>::parse_bytes(
961                buf,
962                bytes,
963                ignore_errors,
964                needs_escaping,
965                missing_is_null,
966                None,
967            ),
968            #[cfg(feature = "dtype-u16")]
969            UInt16(buf) => <PrimitiveChunkedBuilder<UInt16Type> as ParsedBuilder>::parse_bytes(
970                buf,
971                bytes,
972                ignore_errors,
973                needs_escaping,
974                missing_is_null,
975                None,
976            ),
977            UInt32(buf) => <PrimitiveChunkedBuilder<UInt32Type> as ParsedBuilder>::parse_bytes(
978                buf,
979                bytes,
980                ignore_errors,
981                needs_escaping,
982                missing_is_null,
983                None,
984            ),
985            UInt64(buf) => <PrimitiveChunkedBuilder<UInt64Type> as ParsedBuilder>::parse_bytes(
986                buf,
987                bytes,
988                ignore_errors,
989                needs_escaping,
990                missing_is_null,
991                None,
992            ),
993            #[cfg(feature = "dtype-u128")]
994            UInt128(buf) => <PrimitiveChunkedBuilder<UInt128Type> as ParsedBuilder>::parse_bytes(
995                buf,
996                bytes,
997                ignore_errors,
998                needs_escaping,
999                missing_is_null,
1000                None,
1001            ),
1002            #[cfg(feature = "dtype-f16")]
1003            Float16(buf) => <PrimitiveChunkedBuilder<Float16Type> as ParsedBuilder>::parse_bytes(
1004                buf,
1005                bytes,
1006                ignore_errors,
1007                needs_escaping,
1008                missing_is_null,
1009                None,
1010            ),
1011            Float32(buf) => <PrimitiveChunkedBuilder<Float32Type> as ParsedBuilder>::parse_bytes(
1012                buf,
1013                bytes,
1014                ignore_errors,
1015                needs_escaping,
1016                missing_is_null,
1017                None,
1018            ),
1019            Float64(buf) => <PrimitiveChunkedBuilder<Float64Type> as ParsedBuilder>::parse_bytes(
1020                buf,
1021                bytes,
1022                ignore_errors,
1023                needs_escaping,
1024                missing_is_null,
1025                None,
1026            ),
1027            #[cfg(feature = "dtype-f16")]
1028            DecimalFloat16(buf, scratch) => {
1029                prepare_decimal_comma(bytes, scratch);
1030                <PrimitiveChunkedBuilder<Float16Type> as ParsedBuilder>::parse_bytes(
1031                    buf,
1032                    scratch,
1033                    ignore_errors,
1034                    needs_escaping,
1035                    missing_is_null,
1036                    None,
1037                )
1038            },
1039            DecimalFloat32(buf, scratch) => {
1040                prepare_decimal_comma(bytes, scratch);
1041                <PrimitiveChunkedBuilder<Float32Type> as ParsedBuilder>::parse_bytes(
1042                    buf,
1043                    scratch,
1044                    ignore_errors,
1045                    needs_escaping,
1046                    missing_is_null,
1047                    None,
1048                )
1049            },
1050            DecimalFloat64(buf, scratch) => {
1051                prepare_decimal_comma(bytes, scratch);
1052                <PrimitiveChunkedBuilder<Float64Type> as ParsedBuilder>::parse_bytes(
1053                    buf,
1054                    scratch,
1055                    ignore_errors,
1056                    needs_escaping,
1057                    missing_is_null,
1058                    None,
1059                )
1060            },
1061            #[cfg(feature = "dtype-decimal")]
1062            Decimal(buf) => <DecimalField as ParsedBuilder>::parse_bytes(
1063                buf,
1064                bytes,
1065                ignore_errors,
1066                needs_escaping,
1067                missing_is_null,
1068                None,
1069            ),
1070            Utf8(buf) => <Utf8Field as ParsedBuilder>::parse_bytes(
1071                buf,
1072                bytes,
1073                ignore_errors,
1074                needs_escaping,
1075                missing_is_null,
1076                None,
1077            ),
1078            #[cfg(feature = "dtype-datetime")]
1079            Datetime { buf, time_unit, .. } => {
1080                <DatetimeField<Int64Type> as ParsedBuilder>::parse_bytes(
1081                    buf,
1082                    bytes,
1083                    ignore_errors,
1084                    needs_escaping,
1085                    missing_is_null,
1086                    Some(*time_unit),
1087                )
1088            },
1089            #[cfg(feature = "dtype-date")]
1090            Date(buf) => <DatetimeField<Int32Type> as ParsedBuilder>::parse_bytes(
1091                buf,
1092                bytes,
1093                ignore_errors,
1094                needs_escaping,
1095                missing_is_null,
1096                None,
1097            ),
1098            #[cfg(feature = "dtype-categorical")]
1099            Categorical8(buf) => {
1100                buf.parse_bytes(bytes, ignore_errors, needs_escaping, missing_is_null, None)
1101            },
1102            #[cfg(feature = "dtype-categorical")]
1103            Categorical16(buf) => {
1104                buf.parse_bytes(bytes, ignore_errors, needs_escaping, missing_is_null, None)
1105            },
1106            #[cfg(feature = "dtype-categorical")]
1107            Categorical32(buf) => {
1108                buf.parse_bytes(bytes, ignore_errors, needs_escaping, missing_is_null, None)
1109            },
1110        }
1111    }
1112}
1113
1114#[inline]
1115fn prepare_decimal_comma(bytes: &[u8], scratch: &mut Vec<u8>) {
1116    scratch.clear();
1117    scratch.reserve(bytes.len());
1118
1119    // SAFETY: we pre-allocated.
1120    for &byte in bytes {
1121        if byte == b',' {
1122            unsafe { scratch.push_unchecked(b'.') }
1123        } else {
1124            unsafe { scratch.push_unchecked(byte) }
1125        }
1126    }
1127}