polars_io/csv/read/
buffer.rs

1use arrow::array::MutableBinaryViewArray;
2#[cfg(feature = "dtype-decimal")]
3use polars_compute::decimal::str_to_dec128;
4#[cfg(feature = "dtype-categorical")]
5use polars_core::chunked_array::builder::CategoricalChunkedBuilder;
6use polars_core::prelude::*;
7use polars_error::to_compute_err;
8#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
9use polars_time::chunkedarray::string::Pattern;
10#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
11use polars_time::prelude::string::infer::{
12    DatetimeInfer, StrpTimeParser, TryFromWithUnit, infer_pattern_single,
13};
14use polars_utils::vec::PushUnchecked;
15
16use super::options::CsvEncoding;
17use super::parser::{could_be_whitespace_fast, skip_whitespace};
18use super::utils::escape_field;
19
20pub(crate) trait PrimitiveParser: PolarsNumericType {
21    fn parse(bytes: &[u8]) -> Option<Self::Native>;
22}
23
24impl PrimitiveParser for Float32Type {
25    #[inline]
26    fn parse(bytes: &[u8]) -> Option<f32> {
27        fast_float2::parse(bytes).ok()
28    }
29}
30impl PrimitiveParser for Float64Type {
31    #[inline]
32    fn parse(bytes: &[u8]) -> Option<f64> {
33        fast_float2::parse(bytes).ok()
34    }
35}
36
37#[cfg(feature = "dtype-u8")]
38impl PrimitiveParser for UInt8Type {
39    #[inline]
40    fn parse(bytes: &[u8]) -> Option<u8> {
41        atoi_simd::parse_skipped(bytes).ok()
42    }
43}
44#[cfg(feature = "dtype-u16")]
45impl PrimitiveParser for UInt16Type {
46    #[inline]
47    fn parse(bytes: &[u8]) -> Option<u16> {
48        atoi_simd::parse_skipped(bytes).ok()
49    }
50}
51impl PrimitiveParser for UInt32Type {
52    #[inline]
53    fn parse(bytes: &[u8]) -> Option<u32> {
54        atoi_simd::parse_skipped(bytes).ok()
55    }
56}
57impl PrimitiveParser for UInt64Type {
58    #[inline]
59    fn parse(bytes: &[u8]) -> Option<u64> {
60        atoi_simd::parse_skipped(bytes).ok()
61    }
62}
63#[cfg(feature = "dtype-u128")]
64impl PrimitiveParser for UInt128Type {
65    #[inline]
66    fn parse(bytes: &[u8]) -> Option<u128> {
67        atoi_simd::parse_skipped(bytes).ok()
68    }
69}
70#[cfg(feature = "dtype-i8")]
71impl PrimitiveParser for Int8Type {
72    #[inline]
73    fn parse(bytes: &[u8]) -> Option<i8> {
74        atoi_simd::parse_skipped(bytes).ok()
75    }
76}
77#[cfg(feature = "dtype-i16")]
78impl PrimitiveParser for Int16Type {
79    #[inline]
80    fn parse(bytes: &[u8]) -> Option<i16> {
81        atoi_simd::parse_skipped(bytes).ok()
82    }
83}
84impl PrimitiveParser for Int32Type {
85    #[inline]
86    fn parse(bytes: &[u8]) -> Option<i32> {
87        atoi_simd::parse_skipped(bytes).ok()
88    }
89}
90impl PrimitiveParser for Int64Type {
91    #[inline]
92    fn parse(bytes: &[u8]) -> Option<i64> {
93        atoi_simd::parse_skipped(bytes).ok()
94    }
95}
96#[cfg(feature = "dtype-i128")]
97impl PrimitiveParser for Int128Type {
98    #[inline]
99    fn parse(bytes: &[u8]) -> Option<i128> {
100        atoi_simd::parse_skipped(bytes).ok()
101    }
102}
103
104trait ParsedBuffer {
105    fn parse_bytes(
106        &mut self,
107        bytes: &[u8],
108        ignore_errors: bool,
109        _needs_escaping: bool,
110        _missing_is_null: bool,
111        _time_unit: Option<TimeUnit>,
112    ) -> PolarsResult<()>;
113}
114
115impl<T> ParsedBuffer for PrimitiveChunkedBuilder<T>
116where
117    T: PolarsNumericType + PrimitiveParser,
118{
119    #[inline]
120    fn parse_bytes(
121        &mut self,
122        mut bytes: &[u8],
123        ignore_errors: bool,
124        needs_escaping: bool,
125        _missing_is_null: bool,
126        _time_unit: Option<TimeUnit>,
127    ) -> PolarsResult<()> {
128        if !bytes.is_empty() && needs_escaping {
129            bytes = &bytes[1..bytes.len() - 1];
130        }
131
132        if !bytes.is_empty() && could_be_whitespace_fast(bytes[0]) {
133            bytes = skip_whitespace(bytes);
134        }
135
136        if bytes.is_empty() {
137            self.append_null();
138            return Ok(());
139        }
140
141        match T::parse(bytes) {
142            Some(value) => self.append_value(value),
143            None => {
144                if ignore_errors {
145                    self.append_null()
146                } else {
147                    polars_bail!(ComputeError: "invalid primitive value found during CSV parsing")
148                }
149            },
150        }
151        Ok(())
152    }
153}
154
155pub struct Utf8Field {
156    name: PlSmallStr,
157    mutable: MutableBinaryViewArray<[u8]>,
158    scratch: Vec<u8>,
159    quote_char: u8,
160    encoding: CsvEncoding,
161}
162
163impl Utf8Field {
164    fn new(
165        name: PlSmallStr,
166        capacity: usize,
167        quote_char: Option<u8>,
168        encoding: CsvEncoding,
169    ) -> Self {
170        Self {
171            name,
172            mutable: MutableBinaryViewArray::with_capacity(capacity),
173            scratch: vec![],
174            quote_char: quote_char.unwrap_or(b'"'),
175            encoding,
176        }
177    }
178}
179
180#[inline]
181pub fn validate_utf8(bytes: &[u8]) -> bool {
182    simdutf8::basic::from_utf8(bytes).is_ok()
183}
184
185impl ParsedBuffer for Utf8Field {
186    #[inline]
187    fn parse_bytes(
188        &mut self,
189        bytes: &[u8],
190        ignore_errors: bool,
191        needs_escaping: bool,
192        missing_is_null: bool,
193        _time_unit: Option<TimeUnit>,
194    ) -> PolarsResult<()> {
195        if bytes.is_empty() {
196            if missing_is_null {
197                self.mutable.push_null()
198            } else {
199                self.mutable.push(Some([]))
200            }
201            return Ok(());
202        }
203
204        // note that one branch writes without updating the length, so we must do that later.
205        let escaped_bytes = if needs_escaping {
206            self.scratch.clear();
207            self.scratch.reserve(bytes.len());
208            polars_ensure!(bytes.len() > 1 && bytes.last() == Some(&self.quote_char), ComputeError: "invalid csv file\n\nField `{}` is not properly escaped.", std::str::from_utf8(bytes).map_err(to_compute_err)?);
209
210            // SAFETY:
211            // we just allocated enough capacity and data_len is correct.
212            unsafe {
213                let n_written =
214                    escape_field(bytes, self.quote_char, self.scratch.spare_capacity_mut());
215                self.scratch.set_len(n_written);
216            }
217
218            self.scratch.as_slice()
219        } else {
220            bytes
221        };
222
223        if matches!(self.encoding, CsvEncoding::LossyUtf8) | ignore_errors {
224            // It is important that this happens after escaping, as invalid escaped string can produce
225            // invalid utf8.
226            let parse_result = validate_utf8(escaped_bytes);
227
228            match parse_result {
229                true => {
230                    let value = escaped_bytes;
231                    self.mutable.push_value(value)
232                },
233                false => {
234                    if matches!(self.encoding, CsvEncoding::LossyUtf8) {
235                        // TODO! do this without allocating
236                        let s = String::from_utf8_lossy(escaped_bytes);
237                        self.mutable.push_value(s.as_ref().as_bytes())
238                    } else if ignore_errors {
239                        self.mutable.push_null()
240                    } else {
241                        // If field before escaping is valid utf8, the escaping is incorrect.
242                        if needs_escaping && validate_utf8(bytes) {
243                            polars_bail!(ComputeError: "string field is not properly escaped");
244                        } else {
245                            polars_bail!(ComputeError: "invalid utf-8 sequence");
246                        }
247                    }
248                },
249            }
250        } else {
251            self.mutable.push_value(escaped_bytes)
252        }
253
254        Ok(())
255    }
256}
257
258#[cfg(feature = "dtype-categorical")]
259pub struct CategoricalField<T: PolarsCategoricalType> {
260    escape_scratch: Vec<u8>,
261    quote_char: u8,
262    builder: CategoricalChunkedBuilder<T>,
263}
264
265#[cfg(feature = "dtype-categorical")]
266impl<T: PolarsCategoricalType> CategoricalField<T> {
267    fn new(name: PlSmallStr, capacity: usize, quote_char: Option<u8>, dtype: DataType) -> Self {
268        let mut builder = CategoricalChunkedBuilder::new(name, dtype);
269        builder.reserve(capacity);
270
271        Self {
272            escape_scratch: vec![],
273            quote_char: quote_char.unwrap_or(b'"'),
274            builder,
275        }
276    }
277
278    #[inline]
279    fn parse_bytes(
280        &mut self,
281        bytes: &[u8],
282        ignore_errors: bool,
283        needs_escaping: bool,
284        _missing_is_null: bool,
285        _time_unit: Option<TimeUnit>,
286    ) -> PolarsResult<()> {
287        if bytes.is_empty() {
288            self.builder.append_null();
289            return Ok(());
290        }
291        if validate_utf8(bytes) {
292            if needs_escaping {
293                polars_ensure!(bytes.len() > 1, ComputeError: "invalid csv file\n\nField `{}` is not properly escaped.", std::str::from_utf8(bytes).map_err(to_compute_err)?);
294                self.escape_scratch.clear();
295                self.escape_scratch.reserve(bytes.len());
296                // SAFETY:
297                // we just allocated enough capacity and data_len is correct.
298                unsafe {
299                    let n_written = escape_field(
300                        bytes,
301                        self.quote_char,
302                        self.escape_scratch.spare_capacity_mut(),
303                    );
304                    self.escape_scratch.set_len(n_written);
305                }
306
307                // SAFETY:
308                // just did utf8 check
309                let key = unsafe { std::str::from_utf8_unchecked(&self.escape_scratch) };
310                self.builder.append_str(key)?;
311            } else {
312                // SAFETY:
313                // just did utf8 check
314                let key = unsafe { std::str::from_utf8_unchecked(bytes) };
315                self.builder.append_str(key)?;
316            }
317        } else if ignore_errors {
318            self.builder.append_null()
319        } else {
320            polars_bail!(ComputeError: "invalid utf-8 sequence");
321        }
322        Ok(())
323    }
324}
325
326impl ParsedBuffer for BooleanChunkedBuilder {
327    #[inline]
328    fn parse_bytes(
329        &mut self,
330        bytes: &[u8],
331        ignore_errors: bool,
332        needs_escaping: bool,
333        _missing_is_null: bool,
334        _time_unit: Option<TimeUnit>,
335    ) -> PolarsResult<()> {
336        let bytes = if needs_escaping {
337            &bytes[1..bytes.len() - 1]
338        } else {
339            bytes
340        };
341        if bytes.eq_ignore_ascii_case(b"false") {
342            self.append_value(false);
343        } else if bytes.eq_ignore_ascii_case(b"true") {
344            self.append_value(true);
345        } else if ignore_errors || bytes.is_empty() {
346            self.append_null();
347        } else {
348            polars_bail!(
349                ComputeError: "error while parsing value {} as boolean",
350                String::from_utf8_lossy(bytes),
351            );
352        }
353        Ok(())
354    }
355}
356
357#[cfg(feature = "dtype-decimal")]
358pub struct DecimalField {
359    builder: PrimitiveChunkedBuilder<Int128Type>,
360    precision: usize,
361    scale: usize,
362    decimal_comma: bool,
363}
364
365#[cfg(feature = "dtype-decimal")]
366impl DecimalField {
367    fn new(
368        name: PlSmallStr,
369        capacity: usize,
370        precision: usize,
371        scale: usize,
372        decimal_comma: bool,
373    ) -> Self {
374        let builder = PrimitiveChunkedBuilder::<Int128Type>::new(name, capacity);
375        Self {
376            builder,
377            precision,
378            scale,
379            decimal_comma,
380        }
381    }
382}
383
384#[cfg(feature = "dtype-decimal")]
385impl ParsedBuffer for DecimalField {
386    #[inline]
387    fn parse_bytes(
388        &mut self,
389        mut bytes: &[u8],
390        ignore_errors: bool,
391        needs_escaping: bool,
392        _missing_is_null: bool,
393        _time_unit: Option<TimeUnit>,
394    ) -> PolarsResult<()> {
395        if !bytes.is_empty() && needs_escaping {
396            bytes = &bytes[1..bytes.len() - 1];
397        }
398
399        if !bytes.is_empty() && could_be_whitespace_fast(bytes[0]) {
400            bytes = skip_whitespace(bytes);
401        }
402
403        if bytes.is_empty() {
404            self.builder.append_null();
405            return Ok(());
406        }
407
408        match str_to_dec128(bytes, self.precision, self.scale, self.decimal_comma) {
409            Some(value) => self.builder.append_value(value),
410            None => {
411                if ignore_errors {
412                    self.builder.append_null()
413                } else {
414                    polars_bail!(ComputeError: "invalid decimal value found during CSV parsing")
415                }
416            },
417        }
418
419        Ok(())
420    }
421}
422
423#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
424pub struct DatetimeField<T: PolarsNumericType> {
425    compiled: Option<DatetimeInfer<T>>,
426    builder: PrimitiveChunkedBuilder<T>,
427}
428
429#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
430impl<T: PolarsNumericType> DatetimeField<T> {
431    fn new(name: PlSmallStr, capacity: usize) -> Self {
432        let builder = PrimitiveChunkedBuilder::<T>::new(name, capacity);
433        Self {
434            compiled: None,
435            builder,
436        }
437    }
438}
439
440#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
441fn slow_datetime_parser<T>(
442    buf: &mut DatetimeField<T>,
443    bytes: &[u8],
444    time_unit: Option<TimeUnit>,
445    ignore_errors: bool,
446) -> PolarsResult<()>
447where
448    T: PolarsNumericType,
449    DatetimeInfer<T>: TryFromWithUnit<Pattern>,
450{
451    let val = if bytes.is_ascii() {
452        // SAFETY:
453        // we just checked it is ascii
454        unsafe { std::str::from_utf8_unchecked(bytes) }
455    } else {
456        match std::str::from_utf8(bytes) {
457            Ok(val) => val,
458            Err(_) => {
459                if ignore_errors {
460                    buf.builder.append_null();
461                    return Ok(());
462                } else {
463                    polars_bail!(ComputeError: "invalid utf-8 sequence");
464                }
465            },
466        }
467    };
468
469    let pattern = match &buf.compiled {
470        Some(compiled) => compiled.pattern,
471        None => match infer_pattern_single(val) {
472            Some(pattern) => pattern,
473            None => {
474                if ignore_errors {
475                    buf.builder.append_null();
476                    return Ok(());
477                } else {
478                    polars_bail!(ComputeError: "could not find a 'date/datetime' pattern for '{}'", val)
479                }
480            },
481        },
482    };
483    match DatetimeInfer::try_from_with_unit(pattern, time_unit) {
484        Ok(mut infer) => {
485            let parsed = infer.parse(val);
486            let Some(parsed) = parsed else {
487                if ignore_errors {
488                    buf.builder.append_null();
489                    return Ok(());
490                } else {
491                    polars_bail!(ComputeError: "could not parse '{}' with pattern '{:?}'", val, pattern)
492                }
493            };
494
495            buf.compiled = Some(infer);
496            buf.builder.append_value(parsed);
497            Ok(())
498        },
499        Err(err) => {
500            if ignore_errors {
501                buf.builder.append_null();
502                Ok(())
503            } else {
504                Err(err)
505            }
506        },
507    }
508}
509
510#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
511impl<T> ParsedBuffer for DatetimeField<T>
512where
513    T: PolarsNumericType,
514    DatetimeInfer<T>: TryFromWithUnit<Pattern> + StrpTimeParser<T::Native>,
515{
516    #[inline]
517    fn parse_bytes(
518        &mut self,
519        mut bytes: &[u8],
520        ignore_errors: bool,
521        needs_escaping: bool,
522        _missing_is_null: bool,
523        time_unit: Option<TimeUnit>,
524    ) -> PolarsResult<()> {
525        if needs_escaping && bytes.len() >= 2 {
526            bytes = &bytes[1..bytes.len() - 1]
527        }
528
529        if bytes.is_empty() {
530            // for types other than string `_missing_is_null` is irrelevant; we always append null
531            self.builder.append_null();
532            return Ok(());
533        }
534
535        match &mut self.compiled {
536            None => slow_datetime_parser(self, bytes, time_unit, ignore_errors),
537            Some(compiled) => {
538                match compiled.parse_bytes(bytes, time_unit) {
539                    Some(parsed) => {
540                        self.builder.append_value(parsed);
541                        Ok(())
542                    },
543                    // fall back on chrono parser
544                    // this is a lot slower, we need to do utf8 checking and use
545                    // the slower parser
546                    None => slow_datetime_parser(self, bytes, time_unit, ignore_errors),
547                }
548            },
549        }
550    }
551}
552
553pub fn init_buffers(
554    projection: &[usize],
555    capacity: usize,
556    schema: &Schema,
557    quote_char: Option<u8>,
558    encoding: CsvEncoding,
559    decimal_comma: bool,
560) -> PolarsResult<Vec<Buffer>> {
561    projection
562        .iter()
563        .map(|&i| {
564            let (name, dtype) = schema.get_at_index(i).unwrap();
565            let name = name.clone();
566            let builder = match dtype {
567                &DataType::Boolean => Buffer::Boolean(BooleanChunkedBuilder::new(name, capacity)),
568                #[cfg(feature = "dtype-i8")]
569                &DataType::Int8 => Buffer::Int8(PrimitiveChunkedBuilder::new(name, capacity)),
570                #[cfg(feature = "dtype-i16")]
571                &DataType::Int16 => Buffer::Int16(PrimitiveChunkedBuilder::new(name, capacity)),
572                &DataType::Int32 => Buffer::Int32(PrimitiveChunkedBuilder::new(name, capacity)),
573                &DataType::Int64 => Buffer::Int64(PrimitiveChunkedBuilder::new(name, capacity)),
574                #[cfg(feature = "dtype-i128")]
575                &DataType::Int128 => Buffer::Int128(PrimitiveChunkedBuilder::new(name, capacity)),
576                #[cfg(feature = "dtype-u8")]
577                &DataType::UInt8 => Buffer::UInt8(PrimitiveChunkedBuilder::new(name, capacity)),
578                #[cfg(feature = "dtype-u16")]
579                &DataType::UInt16 => Buffer::UInt16(PrimitiveChunkedBuilder::new(name, capacity)),
580                &DataType::UInt32 => Buffer::UInt32(PrimitiveChunkedBuilder::new(name, capacity)),
581                &DataType::UInt64 => Buffer::UInt64(PrimitiveChunkedBuilder::new(name, capacity)),
582                #[cfg(feature = "dtype-u128")]
583                &DataType::UInt128 => Buffer::UInt128(PrimitiveChunkedBuilder::new(name, capacity)),
584                &DataType::Float32 => {
585                    if decimal_comma {
586                        Buffer::DecimalFloat32(
587                            PrimitiveChunkedBuilder::new(name, capacity),
588                            Default::default(),
589                        )
590                    } else {
591                        Buffer::Float32(PrimitiveChunkedBuilder::new(name, capacity))
592                    }
593                },
594                &DataType::Float64 => {
595                    if decimal_comma {
596                        Buffer::DecimalFloat64(
597                            PrimitiveChunkedBuilder::new(name, capacity),
598                            Default::default(),
599                        )
600                    } else {
601                        Buffer::Float64(PrimitiveChunkedBuilder::new(name, capacity))
602                    }
603                },
604                #[cfg(feature = "dtype-decimal")]
605                &DataType::Decimal(precision, scale) => Buffer::Decimal(DecimalField::new(
606                    name,
607                    capacity,
608                    precision,
609                    scale,
610                    decimal_comma,
611                )),
612                &DataType::String => {
613                    Buffer::Utf8(Utf8Field::new(name, capacity, quote_char, encoding))
614                },
615                #[cfg(feature = "dtype-datetime")]
616                DataType::Datetime(time_unit, time_zone) => Buffer::Datetime {
617                    buf: DatetimeField::new(name, capacity),
618                    time_unit: *time_unit,
619                    time_zone: time_zone.clone(),
620                },
621                #[cfg(feature = "dtype-date")]
622                &DataType::Date => Buffer::Date(DatetimeField::new(name, capacity)),
623                #[cfg(feature = "dtype-categorical")]
624                DataType::Categorical(_, _) | DataType::Enum(_, _) => {
625                    match dtype.cat_physical().unwrap() {
626                        CategoricalPhysical::U8 => {
627                            Buffer::Categorical8(CategoricalField::<Categorical8Type>::new(
628                                name,
629                                capacity,
630                                quote_char,
631                                dtype.clone(),
632                            ))
633                        },
634                        CategoricalPhysical::U16 => {
635                            Buffer::Categorical16(CategoricalField::<Categorical16Type>::new(
636                                name,
637                                capacity,
638                                quote_char,
639                                dtype.clone(),
640                            ))
641                        },
642                        CategoricalPhysical::U32 => {
643                            Buffer::Categorical32(CategoricalField::<Categorical32Type>::new(
644                                name,
645                                capacity,
646                                quote_char,
647                                dtype.clone(),
648                            ))
649                        },
650                    }
651                },
652                dt => polars_bail!(
653                    ComputeError: "unsupported data type when reading CSV: {} when reading CSV", dt,
654                ),
655            };
656            Ok(builder)
657        })
658        .collect()
659}
660
661#[allow(clippy::large_enum_variant)]
662pub enum Buffer {
663    Boolean(BooleanChunkedBuilder),
664    #[cfg(feature = "dtype-i8")]
665    Int8(PrimitiveChunkedBuilder<Int8Type>),
666    #[cfg(feature = "dtype-i16")]
667    Int16(PrimitiveChunkedBuilder<Int16Type>),
668    Int32(PrimitiveChunkedBuilder<Int32Type>),
669    Int64(PrimitiveChunkedBuilder<Int64Type>),
670    #[cfg(feature = "dtype-i128")]
671    Int128(PrimitiveChunkedBuilder<Int128Type>),
672    #[cfg(feature = "dtype-u8")]
673    UInt8(PrimitiveChunkedBuilder<UInt8Type>),
674    #[cfg(feature = "dtype-u16")]
675    UInt16(PrimitiveChunkedBuilder<UInt16Type>),
676    UInt32(PrimitiveChunkedBuilder<UInt32Type>),
677    UInt64(PrimitiveChunkedBuilder<UInt64Type>),
678    #[cfg(feature = "dtype-u128")]
679    UInt128(PrimitiveChunkedBuilder<UInt128Type>),
680    Float32(PrimitiveChunkedBuilder<Float32Type>),
681    Float64(PrimitiveChunkedBuilder<Float64Type>),
682    #[cfg(feature = "dtype-decimal")]
683    Decimal(DecimalField),
684    /// Stores the Utf8 fields and the total string length seen for that column
685    Utf8(Utf8Field),
686    #[cfg(feature = "dtype-datetime")]
687    Datetime {
688        buf: DatetimeField<Int64Type>,
689        time_unit: TimeUnit,
690        time_zone: Option<TimeZone>,
691    },
692    #[cfg(feature = "dtype-date")]
693    Date(DatetimeField<Int32Type>),
694    #[cfg(feature = "dtype-categorical")]
695    Categorical8(CategoricalField<Categorical8Type>),
696    #[cfg(feature = "dtype-categorical")]
697    Categorical16(CategoricalField<Categorical16Type>),
698    #[cfg(feature = "dtype-categorical")]
699    Categorical32(CategoricalField<Categorical32Type>),
700    DecimalFloat32(PrimitiveChunkedBuilder<Float32Type>, Vec<u8>),
701    DecimalFloat64(PrimitiveChunkedBuilder<Float64Type>, Vec<u8>),
702}
703
704impl Buffer {
705    pub fn into_series(self) -> PolarsResult<Series> {
706        let s = match self {
707            Buffer::Boolean(v) => v.finish().into_series(),
708            #[cfg(feature = "dtype-i8")]
709            Buffer::Int8(v) => v.finish().into_series(),
710            #[cfg(feature = "dtype-i16")]
711            Buffer::Int16(v) => v.finish().into_series(),
712            Buffer::Int32(v) => v.finish().into_series(),
713            Buffer::Int64(v) => v.finish().into_series(),
714            #[cfg(feature = "dtype-i128")]
715            Buffer::Int128(v) => v.finish().into_series(),
716            #[cfg(feature = "dtype-u8")]
717            Buffer::UInt8(v) => v.finish().into_series(),
718            #[cfg(feature = "dtype-u16")]
719            Buffer::UInt16(v) => v.finish().into_series(),
720            Buffer::UInt32(v) => v.finish().into_series(),
721            Buffer::UInt64(v) => v.finish().into_series(),
722            #[cfg(feature = "dtype-u128")]
723            Buffer::UInt128(v) => v.finish().into_series(),
724            Buffer::Float32(v) => v.finish().into_series(),
725            Buffer::Float64(v) => v.finish().into_series(),
726            Buffer::DecimalFloat32(v, _) => v.finish().into_series(),
727            Buffer::DecimalFloat64(v, _) => v.finish().into_series(),
728            #[cfg(feature = "dtype-decimal")]
729            Buffer::Decimal(DecimalField {
730                builder,
731                precision,
732                scale,
733                ..
734            }) => unsafe {
735                builder
736                    .finish()
737                    .into_series()
738                    .from_physical_unchecked(&DataType::Decimal(precision, scale))
739                    .unwrap()
740            },
741            #[cfg(feature = "dtype-datetime")]
742            Buffer::Datetime {
743                buf,
744                time_unit,
745                time_zone,
746            } => buf
747                .builder
748                .finish()
749                .into_series()
750                .cast(&DataType::Datetime(time_unit, time_zone))
751                .unwrap(),
752            #[cfg(feature = "dtype-date")]
753            Buffer::Date(v) => v
754                .builder
755                .finish()
756                .into_series()
757                .cast(&DataType::Date)
758                .unwrap(),
759
760            Buffer::Utf8(v) => {
761                let arr = v.mutable.freeze();
762                StringChunked::with_chunk(v.name, unsafe { arr.to_utf8view_unchecked() })
763                    .into_series()
764            },
765            #[cfg(feature = "dtype-categorical")]
766            Buffer::Categorical8(buf) => buf.builder.finish().into_series(),
767            #[cfg(feature = "dtype-categorical")]
768            Buffer::Categorical16(buf) => buf.builder.finish().into_series(),
769            #[cfg(feature = "dtype-categorical")]
770            Buffer::Categorical32(buf) => buf.builder.finish().into_series(),
771        };
772        Ok(s)
773    }
774
775    pub fn add_null(&mut self, valid: bool) {
776        match self {
777            Buffer::Boolean(v) => v.append_null(),
778            #[cfg(feature = "dtype-i8")]
779            Buffer::Int8(v) => v.append_null(),
780            #[cfg(feature = "dtype-i16")]
781            Buffer::Int16(v) => v.append_null(),
782            Buffer::Int32(v) => v.append_null(),
783            Buffer::Int64(v) => v.append_null(),
784            #[cfg(feature = "dtype-i128")]
785            Buffer::Int128(v) => v.append_null(),
786            #[cfg(feature = "dtype-u8")]
787            Buffer::UInt8(v) => v.append_null(),
788            #[cfg(feature = "dtype-u16")]
789            Buffer::UInt16(v) => v.append_null(),
790            Buffer::UInt32(v) => v.append_null(),
791            Buffer::UInt64(v) => v.append_null(),
792            #[cfg(feature = "dtype-u128")]
793            Buffer::UInt128(v) => v.append_null(),
794            Buffer::Float32(v) => v.append_null(),
795            Buffer::Float64(v) => v.append_null(),
796            #[cfg(feature = "dtype-decimal")]
797            Buffer::Decimal(buf) => buf.builder.append_null(),
798            Buffer::DecimalFloat32(v, _) => v.append_null(),
799            Buffer::DecimalFloat64(v, _) => v.append_null(),
800            Buffer::Utf8(v) => {
801                if valid {
802                    v.mutable.push_value("")
803                } else {
804                    v.mutable.push_null()
805                }
806            },
807            #[cfg(feature = "dtype-datetime")]
808            Buffer::Datetime { buf, .. } => buf.builder.append_null(),
809            #[cfg(feature = "dtype-date")]
810            Buffer::Date(v) => v.builder.append_null(),
811            #[cfg(feature = "dtype-categorical")]
812            Buffer::Categorical8(buf) => buf.builder.append_null(),
813            #[cfg(feature = "dtype-categorical")]
814            Buffer::Categorical16(buf) => buf.builder.append_null(),
815            #[cfg(feature = "dtype-categorical")]
816            Buffer::Categorical32(buf) => buf.builder.append_null(),
817        };
818    }
819
820    pub fn dtype(&self) -> DataType {
821        match self {
822            Buffer::Boolean(_) => DataType::Boolean,
823            #[cfg(feature = "dtype-i8")]
824            Buffer::Int8(_) => DataType::Int8,
825            #[cfg(feature = "dtype-i16")]
826            Buffer::Int16(_) => DataType::Int16,
827            Buffer::Int32(_) => DataType::Int32,
828            Buffer::Int64(_) => DataType::Int64,
829            #[cfg(feature = "dtype-i128")]
830            Buffer::Int128(_) => DataType::Int128,
831            #[cfg(feature = "dtype-u8")]
832            Buffer::UInt8(_) => DataType::UInt8,
833            #[cfg(feature = "dtype-u16")]
834            Buffer::UInt16(_) => DataType::UInt16,
835            Buffer::UInt32(_) => DataType::UInt32,
836            Buffer::UInt64(_) => DataType::UInt64,
837            #[cfg(feature = "dtype-u128")]
838            Buffer::UInt128(_) => DataType::UInt128,
839            Buffer::Float32(_) | Buffer::DecimalFloat32(_, _) => DataType::Float32,
840            Buffer::Float64(_) | Buffer::DecimalFloat64(_, _) => DataType::Float64,
841            #[cfg(feature = "dtype-decimal")]
842            Buffer::Decimal(DecimalField {
843                precision, scale, ..
844            }) => DataType::Decimal(*precision, *scale),
845            Buffer::Utf8(_) => DataType::String,
846            #[cfg(feature = "dtype-datetime")]
847            Buffer::Datetime { time_unit, .. } => DataType::Datetime(*time_unit, None),
848            #[cfg(feature = "dtype-date")]
849            Buffer::Date(_) => DataType::Date,
850            #[cfg(feature = "dtype-categorical")]
851            Buffer::Categorical8(buf) => buf.builder.dtype().clone(),
852            #[cfg(feature = "dtype-categorical")]
853            Buffer::Categorical16(buf) => buf.builder.dtype().clone(),
854            #[cfg(feature = "dtype-categorical")]
855            Buffer::Categorical32(buf) => buf.builder.dtype().clone(),
856        }
857    }
858
859    #[inline]
860    pub fn add(
861        &mut self,
862        bytes: &[u8],
863        ignore_errors: bool,
864        needs_escaping: bool,
865        missing_is_null: bool,
866    ) -> PolarsResult<()> {
867        use Buffer::*;
868        match self {
869            Boolean(buf) => <BooleanChunkedBuilder as ParsedBuffer>::parse_bytes(
870                buf,
871                bytes,
872                ignore_errors,
873                needs_escaping,
874                missing_is_null,
875                None,
876            ),
877            #[cfg(feature = "dtype-i8")]
878            Int8(buf) => <PrimitiveChunkedBuilder<Int8Type> as ParsedBuffer>::parse_bytes(
879                buf,
880                bytes,
881                ignore_errors,
882                needs_escaping,
883                missing_is_null,
884                None,
885            ),
886            #[cfg(feature = "dtype-i16")]
887            Int16(buf) => <PrimitiveChunkedBuilder<Int16Type> as ParsedBuffer>::parse_bytes(
888                buf,
889                bytes,
890                ignore_errors,
891                needs_escaping,
892                missing_is_null,
893                None,
894            ),
895            Int32(buf) => <PrimitiveChunkedBuilder<Int32Type> as ParsedBuffer>::parse_bytes(
896                buf,
897                bytes,
898                ignore_errors,
899                needs_escaping,
900                missing_is_null,
901                None,
902            ),
903            Int64(buf) => <PrimitiveChunkedBuilder<Int64Type> as ParsedBuffer>::parse_bytes(
904                buf,
905                bytes,
906                ignore_errors,
907                needs_escaping,
908                missing_is_null,
909                None,
910            ),
911            #[cfg(feature = "dtype-i128")]
912            Int128(buf) => <PrimitiveChunkedBuilder<Int128Type> as ParsedBuffer>::parse_bytes(
913                buf,
914                bytes,
915                ignore_errors,
916                needs_escaping,
917                missing_is_null,
918                None,
919            ),
920            #[cfg(feature = "dtype-u8")]
921            UInt8(buf) => <PrimitiveChunkedBuilder<UInt8Type> as ParsedBuffer>::parse_bytes(
922                buf,
923                bytes,
924                ignore_errors,
925                needs_escaping,
926                missing_is_null,
927                None,
928            ),
929            #[cfg(feature = "dtype-u16")]
930            UInt16(buf) => <PrimitiveChunkedBuilder<UInt16Type> as ParsedBuffer>::parse_bytes(
931                buf,
932                bytes,
933                ignore_errors,
934                needs_escaping,
935                missing_is_null,
936                None,
937            ),
938            UInt32(buf) => <PrimitiveChunkedBuilder<UInt32Type> as ParsedBuffer>::parse_bytes(
939                buf,
940                bytes,
941                ignore_errors,
942                needs_escaping,
943                missing_is_null,
944                None,
945            ),
946            UInt64(buf) => <PrimitiveChunkedBuilder<UInt64Type> as ParsedBuffer>::parse_bytes(
947                buf,
948                bytes,
949                ignore_errors,
950                needs_escaping,
951                missing_is_null,
952                None,
953            ),
954            #[cfg(feature = "dtype-u128")]
955            UInt128(buf) => <PrimitiveChunkedBuilder<UInt128Type> as ParsedBuffer>::parse_bytes(
956                buf,
957                bytes,
958                ignore_errors,
959                needs_escaping,
960                missing_is_null,
961                None,
962            ),
963            Float32(buf) => <PrimitiveChunkedBuilder<Float32Type> as ParsedBuffer>::parse_bytes(
964                buf,
965                bytes,
966                ignore_errors,
967                needs_escaping,
968                missing_is_null,
969                None,
970            ),
971            Float64(buf) => <PrimitiveChunkedBuilder<Float64Type> as ParsedBuffer>::parse_bytes(
972                buf,
973                bytes,
974                ignore_errors,
975                needs_escaping,
976                missing_is_null,
977                None,
978            ),
979            DecimalFloat32(buf, scratch) => {
980                prepare_decimal_comma(bytes, scratch);
981                <PrimitiveChunkedBuilder<Float32Type> as ParsedBuffer>::parse_bytes(
982                    buf,
983                    scratch,
984                    ignore_errors,
985                    needs_escaping,
986                    missing_is_null,
987                    None,
988                )
989            },
990            DecimalFloat64(buf, scratch) => {
991                prepare_decimal_comma(bytes, scratch);
992                <PrimitiveChunkedBuilder<Float64Type> as ParsedBuffer>::parse_bytes(
993                    buf,
994                    scratch,
995                    ignore_errors,
996                    needs_escaping,
997                    missing_is_null,
998                    None,
999                )
1000            },
1001            #[cfg(feature = "dtype-decimal")]
1002            Decimal(buf) => <DecimalField as ParsedBuffer>::parse_bytes(
1003                buf,
1004                bytes,
1005                ignore_errors,
1006                needs_escaping,
1007                missing_is_null,
1008                None,
1009            ),
1010            Utf8(buf) => <Utf8Field as ParsedBuffer>::parse_bytes(
1011                buf,
1012                bytes,
1013                ignore_errors,
1014                needs_escaping,
1015                missing_is_null,
1016                None,
1017            ),
1018            #[cfg(feature = "dtype-datetime")]
1019            Datetime { buf, time_unit, .. } => {
1020                <DatetimeField<Int64Type> as ParsedBuffer>::parse_bytes(
1021                    buf,
1022                    bytes,
1023                    ignore_errors,
1024                    needs_escaping,
1025                    missing_is_null,
1026                    Some(*time_unit),
1027                )
1028            },
1029            #[cfg(feature = "dtype-date")]
1030            Date(buf) => <DatetimeField<Int32Type> as ParsedBuffer>::parse_bytes(
1031                buf,
1032                bytes,
1033                ignore_errors,
1034                needs_escaping,
1035                missing_is_null,
1036                None,
1037            ),
1038            #[cfg(feature = "dtype-categorical")]
1039            Categorical8(buf) => {
1040                buf.parse_bytes(bytes, ignore_errors, needs_escaping, missing_is_null, None)
1041            },
1042            #[cfg(feature = "dtype-categorical")]
1043            Categorical16(buf) => {
1044                buf.parse_bytes(bytes, ignore_errors, needs_escaping, missing_is_null, None)
1045            },
1046            #[cfg(feature = "dtype-categorical")]
1047            Categorical32(buf) => {
1048                buf.parse_bytes(bytes, ignore_errors, needs_escaping, missing_is_null, None)
1049            },
1050        }
1051    }
1052}
1053
1054#[inline]
1055fn prepare_decimal_comma(bytes: &[u8], scratch: &mut Vec<u8>) {
1056    scratch.clear();
1057    scratch.reserve(bytes.len());
1058
1059    // SAFETY: we pre-allocated.
1060    for &byte in bytes {
1061        if byte == b',' {
1062            unsafe { scratch.push_unchecked(b'.') }
1063        } else {
1064            unsafe { scratch.push_unchecked(byte) }
1065        }
1066    }
1067}