1use arrow::array::MutableBinaryViewArray;
2use polars_core::prelude::*;
3use polars_error::to_compute_err;
4#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
5use polars_time::chunkedarray::string::Pattern;
6#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
7use polars_time::prelude::string::infer::{
8 DatetimeInfer, StrpTimeParser, TryFromWithUnit, infer_pattern_single,
9};
10use polars_utils::vec::PushUnchecked;
11
12use super::options::CsvEncoding;
13use super::parser::{is_whitespace, skip_whitespace};
14use super::utils::escape_field;
15
16pub(crate) trait PrimitiveParser: PolarsNumericType {
17 fn parse(bytes: &[u8]) -> Option<Self::Native>;
18}
19
20impl PrimitiveParser for Float32Type {
21 #[inline]
22 fn parse(bytes: &[u8]) -> Option<f32> {
23 fast_float2::parse(bytes).ok()
24 }
25}
26impl PrimitiveParser for Float64Type {
27 #[inline]
28 fn parse(bytes: &[u8]) -> Option<f64> {
29 fast_float2::parse(bytes).ok()
30 }
31}
32
33#[cfg(feature = "dtype-u8")]
34impl PrimitiveParser for UInt8Type {
35 #[inline]
36 fn parse(bytes: &[u8]) -> Option<u8> {
37 atoi_simd::parse_skipped(bytes).ok()
38 }
39}
40#[cfg(feature = "dtype-u16")]
41impl PrimitiveParser for UInt16Type {
42 #[inline]
43 fn parse(bytes: &[u8]) -> Option<u16> {
44 atoi_simd::parse_skipped(bytes).ok()
45 }
46}
47impl PrimitiveParser for UInt32Type {
48 #[inline]
49 fn parse(bytes: &[u8]) -> Option<u32> {
50 atoi_simd::parse_skipped(bytes).ok()
51 }
52}
53impl PrimitiveParser for UInt64Type {
54 #[inline]
55 fn parse(bytes: &[u8]) -> Option<u64> {
56 atoi_simd::parse_skipped(bytes).ok()
57 }
58}
59#[cfg(feature = "dtype-i8")]
60impl PrimitiveParser for Int8Type {
61 #[inline]
62 fn parse(bytes: &[u8]) -> Option<i8> {
63 atoi_simd::parse_skipped(bytes).ok()
64 }
65}
66#[cfg(feature = "dtype-i16")]
67impl PrimitiveParser for Int16Type {
68 #[inline]
69 fn parse(bytes: &[u8]) -> Option<i16> {
70 atoi_simd::parse_skipped(bytes).ok()
71 }
72}
73impl PrimitiveParser for Int32Type {
74 #[inline]
75 fn parse(bytes: &[u8]) -> Option<i32> {
76 atoi_simd::parse_skipped(bytes).ok()
77 }
78}
79impl PrimitiveParser for Int64Type {
80 #[inline]
81 fn parse(bytes: &[u8]) -> Option<i64> {
82 atoi_simd::parse_skipped(bytes).ok()
83 }
84}
85#[cfg(feature = "dtype-i128")]
86impl PrimitiveParser for Int128Type {
87 #[inline]
88 fn parse(bytes: &[u8]) -> Option<i128> {
89 atoi_simd::parse_skipped(bytes).ok()
90 }
91}
92
93trait ParsedBuffer {
94 fn parse_bytes(
95 &mut self,
96 bytes: &[u8],
97 ignore_errors: bool,
98 _needs_escaping: bool,
99 _missing_is_null: bool,
100 _time_unit: Option<TimeUnit>,
101 ) -> PolarsResult<()>;
102}
103
104impl<T> ParsedBuffer for PrimitiveChunkedBuilder<T>
105where
106 T: PolarsNumericType + PrimitiveParser,
107{
108 #[inline]
109 fn parse_bytes(
110 &mut self,
111 bytes: &[u8],
112 ignore_errors: bool,
113 needs_escaping: bool,
114 _missing_is_null: bool,
115 _time_unit: Option<TimeUnit>,
116 ) -> PolarsResult<()> {
117 if bytes.is_empty() {
118 self.append_null()
119 } else {
120 let bytes = if needs_escaping {
121 &bytes[1..bytes.len() - 1]
122 } else {
123 bytes
124 };
125
126 match T::parse(bytes) {
131 Some(value) => self.append_value(value),
132 None => {
133 if !bytes.is_empty() && is_whitespace(bytes[0]) {
135 let bytes = skip_whitespace(bytes);
136 return self.parse_bytes(
137 bytes,
138 ignore_errors,
139 false, _missing_is_null,
141 None,
142 );
143 }
144 polars_ensure!(
145 bytes.is_empty() || ignore_errors,
146 ComputeError: "remaining bytes non-empty",
147 );
148 self.append_null()
149 },
150 };
151 }
152 Ok(())
153 }
154}
155
156pub struct Utf8Field {
157 name: PlSmallStr,
158 mutable: MutableBinaryViewArray<[u8]>,
159 scratch: Vec<u8>,
160 quote_char: u8,
161 encoding: CsvEncoding,
162}
163
164impl Utf8Field {
165 fn new(
166 name: PlSmallStr,
167 capacity: usize,
168 quote_char: Option<u8>,
169 encoding: CsvEncoding,
170 ) -> Self {
171 Self {
172 name,
173 mutable: MutableBinaryViewArray::with_capacity(capacity),
174 scratch: vec![],
175 quote_char: quote_char.unwrap_or(b'"'),
176 encoding,
177 }
178 }
179}
180
181#[inline]
182pub fn validate_utf8(bytes: &[u8]) -> bool {
183 simdutf8::basic::from_utf8(bytes).is_ok()
184}
185
186impl ParsedBuffer for Utf8Field {
187 #[inline]
188 fn parse_bytes(
189 &mut self,
190 bytes: &[u8],
191 ignore_errors: bool,
192 needs_escaping: bool,
193 missing_is_null: bool,
194 _time_unit: Option<TimeUnit>,
195 ) -> PolarsResult<()> {
196 if bytes.is_empty() {
197 if missing_is_null {
198 self.mutable.push_null()
199 } else {
200 self.mutable.push(Some([]))
201 }
202 return Ok(());
203 }
204
205 let escaped_bytes = if needs_escaping {
207 self.scratch.clear();
208 self.scratch.reserve(bytes.len());
209 polars_ensure!(bytes.len() > 1 && bytes.last() == Some(&self.quote_char), ComputeError: "invalid csv file\n\nField `{}` is not properly escaped.", std::str::from_utf8(bytes).map_err(to_compute_err)?);
210
211 unsafe {
214 let n_written =
215 escape_field(bytes, self.quote_char, self.scratch.spare_capacity_mut());
216 self.scratch.set_len(n_written);
217 }
218
219 self.scratch.as_slice()
220 } else {
221 bytes
222 };
223
224 if matches!(self.encoding, CsvEncoding::LossyUtf8) | ignore_errors {
225 let parse_result = validate_utf8(escaped_bytes);
228
229 match parse_result {
230 true => {
231 let value = escaped_bytes;
232 self.mutable.push_value(value)
233 },
234 false => {
235 if matches!(self.encoding, CsvEncoding::LossyUtf8) {
236 let s = String::from_utf8_lossy(escaped_bytes);
238 self.mutable.push_value(s.as_ref().as_bytes())
239 } else if ignore_errors {
240 self.mutable.push_null()
241 } else {
242 if needs_escaping && validate_utf8(bytes) {
244 polars_bail!(ComputeError: "string field is not properly escaped");
245 } else {
246 polars_bail!(ComputeError: "invalid utf-8 sequence");
247 }
248 }
249 },
250 }
251 } else {
252 self.mutable.push_value(escaped_bytes)
253 }
254
255 Ok(())
256 }
257}
258
259#[cfg(not(feature = "dtype-categorical"))]
260pub struct CategoricalField {
261 phantom: std::marker::PhantomData<u8>,
262}
263
264#[cfg(feature = "dtype-categorical")]
265pub struct CategoricalField {
266 escape_scratch: Vec<u8>,
267 quote_char: u8,
268 builder: CategoricalChunkedBuilder,
269 is_enum: bool,
270}
271
272#[cfg(feature = "dtype-categorical")]
273impl CategoricalField {
274 fn new(
275 name: PlSmallStr,
276 capacity: usize,
277 quote_char: Option<u8>,
278 ordering: CategoricalOrdering,
279 ) -> Self {
280 let builder = CategoricalChunkedBuilder::new(name, capacity, ordering);
281
282 Self {
283 escape_scratch: vec![],
284 quote_char: quote_char.unwrap_or(b'"'),
285 builder,
286 is_enum: false,
287 }
288 }
289
290 fn new_enum(quote_char: Option<u8>, builder: CategoricalChunkedBuilder) -> Self {
291 Self {
292 escape_scratch: vec![],
293 quote_char: quote_char.unwrap_or(b'"'),
294 builder,
295 is_enum: true,
296 }
297 }
298
299 #[inline]
300 fn parse_bytes(
301 &mut self,
302 bytes: &[u8],
303 ignore_errors: bool,
304 needs_escaping: bool,
305 _missing_is_null: bool,
306 _time_unit: Option<TimeUnit>,
307 ) -> PolarsResult<()> {
308 if bytes.is_empty() {
309 self.builder.append_null();
310 return Ok(());
311 }
312 if validate_utf8(bytes) {
313 if needs_escaping {
314 polars_ensure!(bytes.len() > 1, ComputeError: "invalid csv file\n\nField `{}` is not properly escaped.", std::str::from_utf8(bytes).map_err(to_compute_err)?);
315 self.escape_scratch.clear();
316 self.escape_scratch.reserve(bytes.len());
317 unsafe {
320 let n_written = escape_field(
321 bytes,
322 self.quote_char,
323 self.escape_scratch.spare_capacity_mut(),
324 );
325 self.escape_scratch.set_len(n_written);
326 }
327
328 let key = unsafe { std::str::from_utf8_unchecked(&self.escape_scratch) };
331 if self.is_enum {
332 self.builder.try_append_value(key)?;
333 } else {
334 self.builder.append_value(key);
335 }
336 } else {
337 let key = unsafe { std::str::from_utf8_unchecked(bytes) };
340 if self.is_enum {
341 self.builder.try_append_value(key)?
342 } else {
343 self.builder.append_value(key)
344 }
345 }
346 } else if ignore_errors {
347 self.builder.append_null()
348 } else {
349 polars_bail!(ComputeError: "invalid utf-8 sequence");
350 }
351 Ok(())
352 }
353}
354
355impl ParsedBuffer for BooleanChunkedBuilder {
356 #[inline]
357 fn parse_bytes(
358 &mut self,
359 bytes: &[u8],
360 ignore_errors: bool,
361 needs_escaping: bool,
362 _missing_is_null: bool,
363 _time_unit: Option<TimeUnit>,
364 ) -> PolarsResult<()> {
365 let bytes = if needs_escaping {
366 &bytes[1..bytes.len() - 1]
367 } else {
368 bytes
369 };
370 if bytes.eq_ignore_ascii_case(b"false") {
371 self.append_value(false);
372 } else if bytes.eq_ignore_ascii_case(b"true") {
373 self.append_value(true);
374 } else if ignore_errors || bytes.is_empty() {
375 self.append_null();
376 } else {
377 polars_bail!(
378 ComputeError: "error while parsing value {} as boolean",
379 String::from_utf8_lossy(bytes),
380 );
381 }
382 Ok(())
383 }
384}
385
386#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
387pub struct DatetimeField<T: PolarsNumericType> {
388 compiled: Option<DatetimeInfer<T>>,
389 builder: PrimitiveChunkedBuilder<T>,
390}
391
392#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
393impl<T: PolarsNumericType> DatetimeField<T> {
394 fn new(name: PlSmallStr, capacity: usize) -> Self {
395 let builder = PrimitiveChunkedBuilder::<T>::new(name, capacity);
396 Self {
397 compiled: None,
398 builder,
399 }
400 }
401}
402
403#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
404fn slow_datetime_parser<T>(
405 buf: &mut DatetimeField<T>,
406 bytes: &[u8],
407 time_unit: Option<TimeUnit>,
408 ignore_errors: bool,
409) -> PolarsResult<()>
410where
411 T: PolarsNumericType,
412 DatetimeInfer<T>: TryFromWithUnit<Pattern>,
413{
414 let val = if bytes.is_ascii() {
415 unsafe { std::str::from_utf8_unchecked(bytes) }
418 } else {
419 match std::str::from_utf8(bytes) {
420 Ok(val) => val,
421 Err(_) => {
422 if ignore_errors {
423 buf.builder.append_null();
424 return Ok(());
425 } else {
426 polars_bail!(ComputeError: "invalid utf-8 sequence");
427 }
428 },
429 }
430 };
431
432 let pattern = match &buf.compiled {
433 Some(compiled) => compiled.pattern,
434 None => match infer_pattern_single(val) {
435 Some(pattern) => pattern,
436 None => {
437 if ignore_errors {
438 buf.builder.append_null();
439 return Ok(());
440 } else {
441 polars_bail!(ComputeError: "could not find a 'date/datetime' pattern for '{}'", val)
442 }
443 },
444 },
445 };
446 match DatetimeInfer::try_from_with_unit(pattern, time_unit) {
447 Ok(mut infer) => {
448 let parsed = infer.parse(val);
449 let Some(parsed) = parsed else {
450 if ignore_errors {
451 buf.builder.append_null();
452 return Ok(());
453 } else {
454 polars_bail!(ComputeError: "could not parse '{}' with pattern '{:?}'", val, pattern)
455 }
456 };
457
458 buf.compiled = Some(infer);
459 buf.builder.append_value(parsed);
460 Ok(())
461 },
462 Err(err) => {
463 if ignore_errors {
464 buf.builder.append_null();
465 Ok(())
466 } else {
467 Err(err)
468 }
469 },
470 }
471}
472
473#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
474impl<T> ParsedBuffer for DatetimeField<T>
475where
476 T: PolarsNumericType,
477 DatetimeInfer<T>: TryFromWithUnit<Pattern> + StrpTimeParser<T::Native>,
478{
479 #[inline]
480 fn parse_bytes(
481 &mut self,
482 mut bytes: &[u8],
483 ignore_errors: bool,
484 needs_escaping: bool,
485 _missing_is_null: bool,
486 time_unit: Option<TimeUnit>,
487 ) -> PolarsResult<()> {
488 if needs_escaping && bytes.len() >= 2 {
489 bytes = &bytes[1..bytes.len() - 1]
490 }
491
492 if bytes.is_empty() {
493 self.builder.append_null();
495 return Ok(());
496 }
497
498 match &mut self.compiled {
499 None => slow_datetime_parser(self, bytes, time_unit, ignore_errors),
500 Some(compiled) => {
501 match compiled.parse_bytes(bytes, time_unit) {
502 Some(parsed) => {
503 self.builder.append_value(parsed);
504 Ok(())
505 },
506 None => slow_datetime_parser(self, bytes, time_unit, ignore_errors),
510 }
511 },
512 }
513 }
514}
515
516pub fn init_buffers(
517 projection: &[usize],
518 capacity: usize,
519 schema: &Schema,
520 quote_char: Option<u8>,
521 encoding: CsvEncoding,
522 decimal_comma: bool,
523) -> PolarsResult<Vec<Buffer>> {
524 projection
525 .iter()
526 .map(|&i| {
527 let (name, dtype) = schema.get_at_index(i).unwrap();
528 let name = name.clone();
529 let builder = match dtype {
530 &DataType::Boolean => Buffer::Boolean(BooleanChunkedBuilder::new(name, capacity)),
531 #[cfg(feature = "dtype-i8")]
532 &DataType::Int8 => Buffer::Int8(PrimitiveChunkedBuilder::new(name, capacity)),
533 #[cfg(feature = "dtype-i16")]
534 &DataType::Int16 => Buffer::Int16(PrimitiveChunkedBuilder::new(name, capacity)),
535 &DataType::Int32 => Buffer::Int32(PrimitiveChunkedBuilder::new(name, capacity)),
536 &DataType::Int64 => Buffer::Int64(PrimitiveChunkedBuilder::new(name, capacity)),
537 #[cfg(feature = "dtype-i128")]
538 &DataType::Int128 => Buffer::Int128(PrimitiveChunkedBuilder::new(name, capacity)),
539 #[cfg(feature = "dtype-u8")]
540 &DataType::UInt8 => Buffer::UInt8(PrimitiveChunkedBuilder::new(name, capacity)),
541 #[cfg(feature = "dtype-u16")]
542 &DataType::UInt16 => Buffer::UInt16(PrimitiveChunkedBuilder::new(name, capacity)),
543 &DataType::UInt32 => Buffer::UInt32(PrimitiveChunkedBuilder::new(name, capacity)),
544 &DataType::UInt64 => Buffer::UInt64(PrimitiveChunkedBuilder::new(name, capacity)),
545 &DataType::Float32 => {
546 if decimal_comma {
547 Buffer::DecimalFloat32(
548 PrimitiveChunkedBuilder::new(name, capacity),
549 Default::default(),
550 )
551 } else {
552 Buffer::Float32(PrimitiveChunkedBuilder::new(name, capacity))
553 }
554 },
555 &DataType::Float64 => {
556 if decimal_comma {
557 Buffer::DecimalFloat64(
558 PrimitiveChunkedBuilder::new(name, capacity),
559 Default::default(),
560 )
561 } else {
562 Buffer::Float64(PrimitiveChunkedBuilder::new(name, capacity))
563 }
564 },
565 &DataType::String => {
566 Buffer::Utf8(Utf8Field::new(name, capacity, quote_char, encoding))
567 },
568 #[cfg(feature = "dtype-datetime")]
569 DataType::Datetime(time_unit, time_zone) => Buffer::Datetime {
570 buf: DatetimeField::new(name, capacity),
571 time_unit: *time_unit,
572 time_zone: time_zone.clone(),
573 },
574 #[cfg(feature = "dtype-date")]
575 &DataType::Date => Buffer::Date(DatetimeField::new(name, capacity)),
576 #[cfg(feature = "dtype-categorical")]
577 DataType::Categorical(_, ordering) => Buffer::Categorical(CategoricalField::new(
578 name, capacity, quote_char, *ordering,
579 )),
580 #[cfg(feature = "dtype-categorical")]
581 DataType::Enum(rev_map, _) => {
582 let Some(rev_map) = rev_map else {
583 polars_bail!(ComputeError: "enum categories must be set")
584 };
585 let cats = rev_map.get_categories();
586 let mut builder =
587 CategoricalChunkedBuilder::new(name, capacity, Default::default());
588 for cat in cats.values_iter() {
589 builder.register_value(cat);
590 }
591 Buffer::Categorical(CategoricalField::new_enum(quote_char, builder))
592 },
593 dt => polars_bail!(
594 ComputeError: "unsupported data type when reading CSV: {} when reading CSV", dt,
595 ),
596 };
597 Ok(builder)
598 })
599 .collect()
600}
601
602#[allow(clippy::large_enum_variant)]
603pub enum Buffer {
604 Boolean(BooleanChunkedBuilder),
605 #[cfg(feature = "dtype-i8")]
606 Int8(PrimitiveChunkedBuilder<Int8Type>),
607 #[cfg(feature = "dtype-i16")]
608 Int16(PrimitiveChunkedBuilder<Int16Type>),
609 Int32(PrimitiveChunkedBuilder<Int32Type>),
610 Int64(PrimitiveChunkedBuilder<Int64Type>),
611 #[cfg(feature = "dtype-i128")]
612 Int128(PrimitiveChunkedBuilder<Int128Type>),
613 #[cfg(feature = "dtype-u8")]
614 UInt8(PrimitiveChunkedBuilder<UInt8Type>),
615 #[cfg(feature = "dtype-u16")]
616 UInt16(PrimitiveChunkedBuilder<UInt16Type>),
617 UInt32(PrimitiveChunkedBuilder<UInt32Type>),
618 UInt64(PrimitiveChunkedBuilder<UInt64Type>),
619 Float32(PrimitiveChunkedBuilder<Float32Type>),
620 Float64(PrimitiveChunkedBuilder<Float64Type>),
621 Utf8(Utf8Field),
623 #[cfg(feature = "dtype-datetime")]
624 Datetime {
625 buf: DatetimeField<Int64Type>,
626 time_unit: TimeUnit,
627 time_zone: Option<TimeZone>,
628 },
629 #[cfg(feature = "dtype-date")]
630 Date(DatetimeField<Int32Type>),
631 #[allow(dead_code)]
632 Categorical(CategoricalField),
633 DecimalFloat32(PrimitiveChunkedBuilder<Float32Type>, Vec<u8>),
634 DecimalFloat64(PrimitiveChunkedBuilder<Float64Type>, Vec<u8>),
635}
636
637impl Buffer {
638 pub fn into_series(self) -> PolarsResult<Series> {
639 let s = match self {
640 Buffer::Boolean(v) => v.finish().into_series(),
641 #[cfg(feature = "dtype-i8")]
642 Buffer::Int8(v) => v.finish().into_series(),
643 #[cfg(feature = "dtype-i16")]
644 Buffer::Int16(v) => v.finish().into_series(),
645 Buffer::Int32(v) => v.finish().into_series(),
646 Buffer::Int64(v) => v.finish().into_series(),
647 #[cfg(feature = "dtype-i128")]
648 Buffer::Int128(v) => v.finish().into_series(),
649 #[cfg(feature = "dtype-u8")]
650 Buffer::UInt8(v) => v.finish().into_series(),
651 #[cfg(feature = "dtype-u16")]
652 Buffer::UInt16(v) => v.finish().into_series(),
653 Buffer::UInt32(v) => v.finish().into_series(),
654 Buffer::UInt64(v) => v.finish().into_series(),
655 Buffer::Float32(v) => v.finish().into_series(),
656 Buffer::Float64(v) => v.finish().into_series(),
657 Buffer::DecimalFloat32(v, _) => v.finish().into_series(),
658 Buffer::DecimalFloat64(v, _) => v.finish().into_series(),
659 #[cfg(feature = "dtype-datetime")]
660 Buffer::Datetime {
661 buf,
662 time_unit,
663 time_zone,
664 } => buf
665 .builder
666 .finish()
667 .into_series()
668 .cast(&DataType::Datetime(time_unit, time_zone))
669 .unwrap(),
670 #[cfg(feature = "dtype-date")]
671 Buffer::Date(v) => v
672 .builder
673 .finish()
674 .into_series()
675 .cast(&DataType::Date)
676 .unwrap(),
677
678 Buffer::Utf8(v) => {
679 let arr = v.mutable.freeze();
680 StringChunked::with_chunk(v.name.clone(), unsafe { arr.to_utf8view_unchecked() })
681 .into_series()
682 },
683 #[allow(unused_variables)]
684 Buffer::Categorical(buf) => {
685 #[cfg(feature = "dtype-categorical")]
686 {
687 let ca = buf.builder.finish();
688
689 if buf.is_enum {
690 let DataType::Categorical(Some(rev_map), _) = ca.dtype() else {
691 unreachable!()
692 };
693 let idx = ca.physical().clone();
694 let dtype = DataType::Enum(Some(rev_map.clone()), Default::default());
695
696 unsafe {
697 CategoricalChunked::from_cats_and_dtype_unchecked(idx, dtype)
698 .into_series()
699 }
700 } else {
701 ca.into_series()
702 }
703 }
704 #[cfg(not(feature = "dtype-categorical"))]
705 {
706 panic!("activate 'dtype-categorical' feature")
707 }
708 },
709 };
710 Ok(s)
711 }
712
713 pub fn add_null(&mut self, valid: bool) {
714 match self {
715 Buffer::Boolean(v) => v.append_null(),
716 #[cfg(feature = "dtype-i8")]
717 Buffer::Int8(v) => v.append_null(),
718 #[cfg(feature = "dtype-i16")]
719 Buffer::Int16(v) => v.append_null(),
720 Buffer::Int32(v) => v.append_null(),
721 Buffer::Int64(v) => v.append_null(),
722 #[cfg(feature = "dtype-i128")]
723 Buffer::Int128(v) => v.append_null(),
724 #[cfg(feature = "dtype-u8")]
725 Buffer::UInt8(v) => v.append_null(),
726 #[cfg(feature = "dtype-u16")]
727 Buffer::UInt16(v) => v.append_null(),
728 Buffer::UInt32(v) => v.append_null(),
729 Buffer::UInt64(v) => v.append_null(),
730 Buffer::Float32(v) => v.append_null(),
731 Buffer::Float64(v) => v.append_null(),
732 Buffer::DecimalFloat32(v, _) => v.append_null(),
733 Buffer::DecimalFloat64(v, _) => v.append_null(),
734 Buffer::Utf8(v) => {
735 if valid {
736 v.mutable.push_value("")
737 } else {
738 v.mutable.push_null()
739 }
740 },
741 #[cfg(feature = "dtype-datetime")]
742 Buffer::Datetime { buf, .. } => buf.builder.append_null(),
743 #[cfg(feature = "dtype-date")]
744 Buffer::Date(v) => v.builder.append_null(),
745 #[allow(unused_variables)]
746 Buffer::Categorical(cat_builder) => {
747 #[cfg(feature = "dtype-categorical")]
748 {
749 cat_builder.builder.append_null()
750 }
751 #[cfg(not(feature = "dtype-categorical"))]
752 {
753 panic!("activate 'dtype-categorical' feature")
754 }
755 },
756 };
757 }
758
759 pub fn dtype(&self) -> DataType {
760 match self {
761 Buffer::Boolean(_) => DataType::Boolean,
762 #[cfg(feature = "dtype-i8")]
763 Buffer::Int8(_) => DataType::Int8,
764 #[cfg(feature = "dtype-i16")]
765 Buffer::Int16(_) => DataType::Int16,
766 Buffer::Int32(_) => DataType::Int32,
767 Buffer::Int64(_) => DataType::Int64,
768 #[cfg(feature = "dtype-i128")]
769 Buffer::Int128(_) => DataType::Int128,
770 #[cfg(feature = "dtype-u8")]
771 Buffer::UInt8(_) => DataType::UInt8,
772 #[cfg(feature = "dtype-u16")]
773 Buffer::UInt16(_) => DataType::UInt16,
774 Buffer::UInt32(_) => DataType::UInt32,
775 Buffer::UInt64(_) => DataType::UInt64,
776 Buffer::Float32(_) | Buffer::DecimalFloat32(_, _) => DataType::Float32,
777 Buffer::Float64(_) | Buffer::DecimalFloat64(_, _) => DataType::Float64,
778 Buffer::Utf8(_) => DataType::String,
779 #[cfg(feature = "dtype-datetime")]
780 Buffer::Datetime { time_unit, .. } => DataType::Datetime(*time_unit, None),
781 #[cfg(feature = "dtype-date")]
782 Buffer::Date(_) => DataType::Date,
783 Buffer::Categorical(_) => {
784 #[cfg(feature = "dtype-categorical")]
785 {
786 DataType::Categorical(None, Default::default())
787 }
788
789 #[cfg(not(feature = "dtype-categorical"))]
790 {
791 panic!("activate 'dtype-categorical' feature")
792 }
793 },
794 }
795 }
796
797 #[inline]
798 pub fn add(
799 &mut self,
800 bytes: &[u8],
801 ignore_errors: bool,
802 needs_escaping: bool,
803 missing_is_null: bool,
804 ) -> PolarsResult<()> {
805 use Buffer::*;
806 match self {
807 Boolean(buf) => <BooleanChunkedBuilder as ParsedBuffer>::parse_bytes(
808 buf,
809 bytes,
810 ignore_errors,
811 needs_escaping,
812 missing_is_null,
813 None,
814 ),
815 #[cfg(feature = "dtype-i8")]
816 Int8(buf) => <PrimitiveChunkedBuilder<Int8Type> as ParsedBuffer>::parse_bytes(
817 buf,
818 bytes,
819 ignore_errors,
820 needs_escaping,
821 missing_is_null,
822 None,
823 ),
824 #[cfg(feature = "dtype-i16")]
825 Int16(buf) => <PrimitiveChunkedBuilder<Int16Type> as ParsedBuffer>::parse_bytes(
826 buf,
827 bytes,
828 ignore_errors,
829 needs_escaping,
830 missing_is_null,
831 None,
832 ),
833 Int32(buf) => <PrimitiveChunkedBuilder<Int32Type> as ParsedBuffer>::parse_bytes(
834 buf,
835 bytes,
836 ignore_errors,
837 needs_escaping,
838 missing_is_null,
839 None,
840 ),
841 Int64(buf) => <PrimitiveChunkedBuilder<Int64Type> as ParsedBuffer>::parse_bytes(
842 buf,
843 bytes,
844 ignore_errors,
845 needs_escaping,
846 missing_is_null,
847 None,
848 ),
849 #[cfg(feature = "dtype-i128")]
850 Int128(buf) => <PrimitiveChunkedBuilder<Int128Type> as ParsedBuffer>::parse_bytes(
851 buf,
852 bytes,
853 ignore_errors,
854 needs_escaping,
855 missing_is_null,
856 None,
857 ),
858 #[cfg(feature = "dtype-u8")]
859 UInt8(buf) => <PrimitiveChunkedBuilder<UInt8Type> as ParsedBuffer>::parse_bytes(
860 buf,
861 bytes,
862 ignore_errors,
863 needs_escaping,
864 missing_is_null,
865 None,
866 ),
867 #[cfg(feature = "dtype-u16")]
868 UInt16(buf) => <PrimitiveChunkedBuilder<UInt16Type> as ParsedBuffer>::parse_bytes(
869 buf,
870 bytes,
871 ignore_errors,
872 needs_escaping,
873 missing_is_null,
874 None,
875 ),
876 UInt32(buf) => <PrimitiveChunkedBuilder<UInt32Type> as ParsedBuffer>::parse_bytes(
877 buf,
878 bytes,
879 ignore_errors,
880 needs_escaping,
881 missing_is_null,
882 None,
883 ),
884 UInt64(buf) => <PrimitiveChunkedBuilder<UInt64Type> as ParsedBuffer>::parse_bytes(
885 buf,
886 bytes,
887 ignore_errors,
888 needs_escaping,
889 missing_is_null,
890 None,
891 ),
892 Float32(buf) => <PrimitiveChunkedBuilder<Float32Type> as ParsedBuffer>::parse_bytes(
893 buf,
894 bytes,
895 ignore_errors,
896 needs_escaping,
897 missing_is_null,
898 None,
899 ),
900 Float64(buf) => <PrimitiveChunkedBuilder<Float64Type> as ParsedBuffer>::parse_bytes(
901 buf,
902 bytes,
903 ignore_errors,
904 needs_escaping,
905 missing_is_null,
906 None,
907 ),
908 DecimalFloat32(buf, scratch) => {
909 prepare_decimal_comma(bytes, scratch);
910 <PrimitiveChunkedBuilder<Float32Type> as ParsedBuffer>::parse_bytes(
911 buf,
912 scratch,
913 ignore_errors,
914 needs_escaping,
915 missing_is_null,
916 None,
917 )
918 },
919 DecimalFloat64(buf, scratch) => {
920 prepare_decimal_comma(bytes, scratch);
921 <PrimitiveChunkedBuilder<Float64Type> as ParsedBuffer>::parse_bytes(
922 buf,
923 scratch,
924 ignore_errors,
925 needs_escaping,
926 missing_is_null,
927 None,
928 )
929 },
930 Utf8(buf) => <Utf8Field as ParsedBuffer>::parse_bytes(
931 buf,
932 bytes,
933 ignore_errors,
934 needs_escaping,
935 missing_is_null,
936 None,
937 ),
938 #[cfg(feature = "dtype-datetime")]
939 Datetime { buf, time_unit, .. } => {
940 <DatetimeField<Int64Type> as ParsedBuffer>::parse_bytes(
941 buf,
942 bytes,
943 ignore_errors,
944 needs_escaping,
945 missing_is_null,
946 Some(*time_unit),
947 )
948 },
949 #[cfg(feature = "dtype-date")]
950 Date(buf) => <DatetimeField<Int32Type> as ParsedBuffer>::parse_bytes(
951 buf,
952 bytes,
953 ignore_errors,
954 needs_escaping,
955 missing_is_null,
956 None,
957 ),
958 #[allow(unused_variables)]
959 Categorical(buf) => {
960 #[cfg(feature = "dtype-categorical")]
961 {
962 buf.parse_bytes(bytes, ignore_errors, needs_escaping, missing_is_null, None)
963 }
964
965 #[cfg(not(feature = "dtype-categorical"))]
966 {
967 panic!("activate 'dtype-categorical' feature")
968 }
969 },
970 }
971 }
972}
973
974#[inline]
975fn prepare_decimal_comma(bytes: &[u8], scratch: &mut Vec<u8>) {
976 scratch.clear();
977 scratch.reserve(bytes.len());
978
979 for &byte in bytes {
981 if byte == b',' {
982 unsafe { scratch.push_unchecked(b'.') }
983 } else {
984 unsafe { scratch.push_unchecked(byte) }
985 }
986 }
987}