1use arrow::array::MutableBinaryViewArray;
2#[cfg(feature = "dtype-categorical")]
3use polars_core::chunked_array::builder::CategoricalChunkedBuilder;
4use polars_core::prelude::*;
5use polars_error::to_compute_err;
6#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
7use polars_time::chunkedarray::string::Pattern;
8#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
9use polars_time::prelude::string::infer::{
10 DatetimeInfer, StrpTimeParser, TryFromWithUnit, infer_pattern_single,
11};
12use polars_utils::vec::PushUnchecked;
13
14use super::options::CsvEncoding;
15use super::parser::{is_whitespace, skip_whitespace};
16use super::utils::escape_field;
17
18pub(crate) trait PrimitiveParser: PolarsNumericType {
19 fn parse(bytes: &[u8]) -> Option<Self::Native>;
20}
21
22impl PrimitiveParser for Float32Type {
23 #[inline]
24 fn parse(bytes: &[u8]) -> Option<f32> {
25 fast_float2::parse(bytes).ok()
26 }
27}
28impl PrimitiveParser for Float64Type {
29 #[inline]
30 fn parse(bytes: &[u8]) -> Option<f64> {
31 fast_float2::parse(bytes).ok()
32 }
33}
34
35#[cfg(feature = "dtype-u8")]
36impl PrimitiveParser for UInt8Type {
37 #[inline]
38 fn parse(bytes: &[u8]) -> Option<u8> {
39 atoi_simd::parse_skipped(bytes).ok()
40 }
41}
42#[cfg(feature = "dtype-u16")]
43impl PrimitiveParser for UInt16Type {
44 #[inline]
45 fn parse(bytes: &[u8]) -> Option<u16> {
46 atoi_simd::parse_skipped(bytes).ok()
47 }
48}
49impl PrimitiveParser for UInt32Type {
50 #[inline]
51 fn parse(bytes: &[u8]) -> Option<u32> {
52 atoi_simd::parse_skipped(bytes).ok()
53 }
54}
55impl PrimitiveParser for UInt64Type {
56 #[inline]
57 fn parse(bytes: &[u8]) -> Option<u64> {
58 atoi_simd::parse_skipped(bytes).ok()
59 }
60}
61#[cfg(feature = "dtype-i8")]
62impl PrimitiveParser for Int8Type {
63 #[inline]
64 fn parse(bytes: &[u8]) -> Option<i8> {
65 atoi_simd::parse_skipped(bytes).ok()
66 }
67}
68#[cfg(feature = "dtype-i16")]
69impl PrimitiveParser for Int16Type {
70 #[inline]
71 fn parse(bytes: &[u8]) -> Option<i16> {
72 atoi_simd::parse_skipped(bytes).ok()
73 }
74}
75impl PrimitiveParser for Int32Type {
76 #[inline]
77 fn parse(bytes: &[u8]) -> Option<i32> {
78 atoi_simd::parse_skipped(bytes).ok()
79 }
80}
81impl PrimitiveParser for Int64Type {
82 #[inline]
83 fn parse(bytes: &[u8]) -> Option<i64> {
84 atoi_simd::parse_skipped(bytes).ok()
85 }
86}
87#[cfg(feature = "dtype-i128")]
88impl PrimitiveParser for Int128Type {
89 #[inline]
90 fn parse(bytes: &[u8]) -> Option<i128> {
91 atoi_simd::parse_skipped(bytes).ok()
92 }
93}
94
95trait ParsedBuffer {
96 fn parse_bytes(
97 &mut self,
98 bytes: &[u8],
99 ignore_errors: bool,
100 _needs_escaping: bool,
101 _missing_is_null: bool,
102 _time_unit: Option<TimeUnit>,
103 ) -> PolarsResult<()>;
104}
105
106impl<T> ParsedBuffer for PrimitiveChunkedBuilder<T>
107where
108 T: PolarsNumericType + PrimitiveParser,
109{
110 #[inline]
111 fn parse_bytes(
112 &mut self,
113 bytes: &[u8],
114 ignore_errors: bool,
115 needs_escaping: bool,
116 _missing_is_null: bool,
117 _time_unit: Option<TimeUnit>,
118 ) -> PolarsResult<()> {
119 if bytes.is_empty() {
120 self.append_null()
121 } else {
122 let bytes = if needs_escaping {
123 &bytes[1..bytes.len() - 1]
124 } else {
125 bytes
126 };
127
128 match T::parse(bytes) {
133 Some(value) => self.append_value(value),
134 None => {
135 if !bytes.is_empty() && is_whitespace(bytes[0]) {
137 let bytes = skip_whitespace(bytes);
138 return self.parse_bytes(
139 bytes,
140 ignore_errors,
141 false, _missing_is_null,
143 None,
144 );
145 }
146 polars_ensure!(
147 bytes.is_empty() || ignore_errors,
148 ComputeError: "remaining bytes non-empty",
149 );
150 self.append_null()
151 },
152 };
153 }
154 Ok(())
155 }
156}
157
158pub struct Utf8Field {
159 name: PlSmallStr,
160 mutable: MutableBinaryViewArray<[u8]>,
161 scratch: Vec<u8>,
162 quote_char: u8,
163 encoding: CsvEncoding,
164}
165
166impl Utf8Field {
167 fn new(
168 name: PlSmallStr,
169 capacity: usize,
170 quote_char: Option<u8>,
171 encoding: CsvEncoding,
172 ) -> Self {
173 Self {
174 name,
175 mutable: MutableBinaryViewArray::with_capacity(capacity),
176 scratch: vec![],
177 quote_char: quote_char.unwrap_or(b'"'),
178 encoding,
179 }
180 }
181}
182
183#[inline]
184pub fn validate_utf8(bytes: &[u8]) -> bool {
185 simdutf8::basic::from_utf8(bytes).is_ok()
186}
187
188impl ParsedBuffer for Utf8Field {
189 #[inline]
190 fn parse_bytes(
191 &mut self,
192 bytes: &[u8],
193 ignore_errors: bool,
194 needs_escaping: bool,
195 missing_is_null: bool,
196 _time_unit: Option<TimeUnit>,
197 ) -> PolarsResult<()> {
198 if bytes.is_empty() {
199 if missing_is_null {
200 self.mutable.push_null()
201 } else {
202 self.mutable.push(Some([]))
203 }
204 return Ok(());
205 }
206
207 let escaped_bytes = if needs_escaping {
209 self.scratch.clear();
210 self.scratch.reserve(bytes.len());
211 polars_ensure!(bytes.len() > 1 && bytes.last() == Some(&self.quote_char), ComputeError: "invalid csv file\n\nField `{}` is not properly escaped.", std::str::from_utf8(bytes).map_err(to_compute_err)?);
212
213 unsafe {
216 let n_written =
217 escape_field(bytes, self.quote_char, self.scratch.spare_capacity_mut());
218 self.scratch.set_len(n_written);
219 }
220
221 self.scratch.as_slice()
222 } else {
223 bytes
224 };
225
226 if matches!(self.encoding, CsvEncoding::LossyUtf8) | ignore_errors {
227 let parse_result = validate_utf8(escaped_bytes);
230
231 match parse_result {
232 true => {
233 let value = escaped_bytes;
234 self.mutable.push_value(value)
235 },
236 false => {
237 if matches!(self.encoding, CsvEncoding::LossyUtf8) {
238 let s = String::from_utf8_lossy(escaped_bytes);
240 self.mutable.push_value(s.as_ref().as_bytes())
241 } else if ignore_errors {
242 self.mutable.push_null()
243 } else {
244 if needs_escaping && validate_utf8(bytes) {
246 polars_bail!(ComputeError: "string field is not properly escaped");
247 } else {
248 polars_bail!(ComputeError: "invalid utf-8 sequence");
249 }
250 }
251 },
252 }
253 } else {
254 self.mutable.push_value(escaped_bytes)
255 }
256
257 Ok(())
258 }
259}
260
261#[cfg(feature = "dtype-categorical")]
262pub struct CategoricalField<T: PolarsCategoricalType> {
263 escape_scratch: Vec<u8>,
264 quote_char: u8,
265 builder: CategoricalChunkedBuilder<T>,
266}
267
268#[cfg(feature = "dtype-categorical")]
269impl<T: PolarsCategoricalType> CategoricalField<T> {
270 fn new(name: PlSmallStr, capacity: usize, quote_char: Option<u8>, dtype: DataType) -> Self {
271 let mut builder = CategoricalChunkedBuilder::new(name, dtype);
272 builder.reserve(capacity);
273
274 Self {
275 escape_scratch: vec![],
276 quote_char: quote_char.unwrap_or(b'"'),
277 builder,
278 }
279 }
280
281 #[inline]
282 fn parse_bytes(
283 &mut self,
284 bytes: &[u8],
285 ignore_errors: bool,
286 needs_escaping: bool,
287 _missing_is_null: bool,
288 _time_unit: Option<TimeUnit>,
289 ) -> PolarsResult<()> {
290 if bytes.is_empty() {
291 self.builder.append_null();
292 return Ok(());
293 }
294 if validate_utf8(bytes) {
295 if needs_escaping {
296 polars_ensure!(bytes.len() > 1, ComputeError: "invalid csv file\n\nField `{}` is not properly escaped.", std::str::from_utf8(bytes).map_err(to_compute_err)?);
297 self.escape_scratch.clear();
298 self.escape_scratch.reserve(bytes.len());
299 unsafe {
302 let n_written = escape_field(
303 bytes,
304 self.quote_char,
305 self.escape_scratch.spare_capacity_mut(),
306 );
307 self.escape_scratch.set_len(n_written);
308 }
309
310 let key = unsafe { std::str::from_utf8_unchecked(&self.escape_scratch) };
313 self.builder.append_str(key)?;
314 } else {
315 let key = unsafe { std::str::from_utf8_unchecked(bytes) };
318 self.builder.append_str(key)?;
319 }
320 } else if ignore_errors {
321 self.builder.append_null()
322 } else {
323 polars_bail!(ComputeError: "invalid utf-8 sequence");
324 }
325 Ok(())
326 }
327}
328
329impl ParsedBuffer for BooleanChunkedBuilder {
330 #[inline]
331 fn parse_bytes(
332 &mut self,
333 bytes: &[u8],
334 ignore_errors: bool,
335 needs_escaping: bool,
336 _missing_is_null: bool,
337 _time_unit: Option<TimeUnit>,
338 ) -> PolarsResult<()> {
339 let bytes = if needs_escaping {
340 &bytes[1..bytes.len() - 1]
341 } else {
342 bytes
343 };
344 if bytes.eq_ignore_ascii_case(b"false") {
345 self.append_value(false);
346 } else if bytes.eq_ignore_ascii_case(b"true") {
347 self.append_value(true);
348 } else if ignore_errors || bytes.is_empty() {
349 self.append_null();
350 } else {
351 polars_bail!(
352 ComputeError: "error while parsing value {} as boolean",
353 String::from_utf8_lossy(bytes),
354 );
355 }
356 Ok(())
357 }
358}
359
360#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
361pub struct DatetimeField<T: PolarsNumericType> {
362 compiled: Option<DatetimeInfer<T>>,
363 builder: PrimitiveChunkedBuilder<T>,
364}
365
366#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
367impl<T: PolarsNumericType> DatetimeField<T> {
368 fn new(name: PlSmallStr, capacity: usize) -> Self {
369 let builder = PrimitiveChunkedBuilder::<T>::new(name, capacity);
370 Self {
371 compiled: None,
372 builder,
373 }
374 }
375}
376
377#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
378fn slow_datetime_parser<T>(
379 buf: &mut DatetimeField<T>,
380 bytes: &[u8],
381 time_unit: Option<TimeUnit>,
382 ignore_errors: bool,
383) -> PolarsResult<()>
384where
385 T: PolarsNumericType,
386 DatetimeInfer<T>: TryFromWithUnit<Pattern>,
387{
388 let val = if bytes.is_ascii() {
389 unsafe { std::str::from_utf8_unchecked(bytes) }
392 } else {
393 match std::str::from_utf8(bytes) {
394 Ok(val) => val,
395 Err(_) => {
396 if ignore_errors {
397 buf.builder.append_null();
398 return Ok(());
399 } else {
400 polars_bail!(ComputeError: "invalid utf-8 sequence");
401 }
402 },
403 }
404 };
405
406 let pattern = match &buf.compiled {
407 Some(compiled) => compiled.pattern,
408 None => match infer_pattern_single(val) {
409 Some(pattern) => pattern,
410 None => {
411 if ignore_errors {
412 buf.builder.append_null();
413 return Ok(());
414 } else {
415 polars_bail!(ComputeError: "could not find a 'date/datetime' pattern for '{}'", val)
416 }
417 },
418 },
419 };
420 match DatetimeInfer::try_from_with_unit(pattern, time_unit) {
421 Ok(mut infer) => {
422 let parsed = infer.parse(val);
423 let Some(parsed) = parsed else {
424 if ignore_errors {
425 buf.builder.append_null();
426 return Ok(());
427 } else {
428 polars_bail!(ComputeError: "could not parse '{}' with pattern '{:?}'", val, pattern)
429 }
430 };
431
432 buf.compiled = Some(infer);
433 buf.builder.append_value(parsed);
434 Ok(())
435 },
436 Err(err) => {
437 if ignore_errors {
438 buf.builder.append_null();
439 Ok(())
440 } else {
441 Err(err)
442 }
443 },
444 }
445}
446
447#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
448impl<T> ParsedBuffer for DatetimeField<T>
449where
450 T: PolarsNumericType,
451 DatetimeInfer<T>: TryFromWithUnit<Pattern> + StrpTimeParser<T::Native>,
452{
453 #[inline]
454 fn parse_bytes(
455 &mut self,
456 mut bytes: &[u8],
457 ignore_errors: bool,
458 needs_escaping: bool,
459 _missing_is_null: bool,
460 time_unit: Option<TimeUnit>,
461 ) -> PolarsResult<()> {
462 if needs_escaping && bytes.len() >= 2 {
463 bytes = &bytes[1..bytes.len() - 1]
464 }
465
466 if bytes.is_empty() {
467 self.builder.append_null();
469 return Ok(());
470 }
471
472 match &mut self.compiled {
473 None => slow_datetime_parser(self, bytes, time_unit, ignore_errors),
474 Some(compiled) => {
475 match compiled.parse_bytes(bytes, time_unit) {
476 Some(parsed) => {
477 self.builder.append_value(parsed);
478 Ok(())
479 },
480 None => slow_datetime_parser(self, bytes, time_unit, ignore_errors),
484 }
485 },
486 }
487 }
488}
489
490pub fn init_buffers(
491 projection: &[usize],
492 capacity: usize,
493 schema: &Schema,
494 quote_char: Option<u8>,
495 encoding: CsvEncoding,
496 decimal_comma: bool,
497) -> PolarsResult<Vec<Buffer>> {
498 projection
499 .iter()
500 .map(|&i| {
501 let (name, dtype) = schema.get_at_index(i).unwrap();
502 let name = name.clone();
503 let builder = match dtype {
504 &DataType::Boolean => Buffer::Boolean(BooleanChunkedBuilder::new(name, capacity)),
505 #[cfg(feature = "dtype-i8")]
506 &DataType::Int8 => Buffer::Int8(PrimitiveChunkedBuilder::new(name, capacity)),
507 #[cfg(feature = "dtype-i16")]
508 &DataType::Int16 => Buffer::Int16(PrimitiveChunkedBuilder::new(name, capacity)),
509 &DataType::Int32 => Buffer::Int32(PrimitiveChunkedBuilder::new(name, capacity)),
510 &DataType::Int64 => Buffer::Int64(PrimitiveChunkedBuilder::new(name, capacity)),
511 #[cfg(feature = "dtype-i128")]
512 &DataType::Int128 => Buffer::Int128(PrimitiveChunkedBuilder::new(name, capacity)),
513 #[cfg(feature = "dtype-u8")]
514 &DataType::UInt8 => Buffer::UInt8(PrimitiveChunkedBuilder::new(name, capacity)),
515 #[cfg(feature = "dtype-u16")]
516 &DataType::UInt16 => Buffer::UInt16(PrimitiveChunkedBuilder::new(name, capacity)),
517 &DataType::UInt32 => Buffer::UInt32(PrimitiveChunkedBuilder::new(name, capacity)),
518 &DataType::UInt64 => Buffer::UInt64(PrimitiveChunkedBuilder::new(name, capacity)),
519 &DataType::Float32 => {
520 if decimal_comma {
521 Buffer::DecimalFloat32(
522 PrimitiveChunkedBuilder::new(name, capacity),
523 Default::default(),
524 )
525 } else {
526 Buffer::Float32(PrimitiveChunkedBuilder::new(name, capacity))
527 }
528 },
529 &DataType::Float64 => {
530 if decimal_comma {
531 Buffer::DecimalFloat64(
532 PrimitiveChunkedBuilder::new(name, capacity),
533 Default::default(),
534 )
535 } else {
536 Buffer::Float64(PrimitiveChunkedBuilder::new(name, capacity))
537 }
538 },
539 &DataType::String => {
540 Buffer::Utf8(Utf8Field::new(name, capacity, quote_char, encoding))
541 },
542 #[cfg(feature = "dtype-datetime")]
543 DataType::Datetime(time_unit, time_zone) => Buffer::Datetime {
544 buf: DatetimeField::new(name, capacity),
545 time_unit: *time_unit,
546 time_zone: time_zone.clone(),
547 },
548 #[cfg(feature = "dtype-date")]
549 &DataType::Date => Buffer::Date(DatetimeField::new(name, capacity)),
550 #[cfg(feature = "dtype-categorical")]
551 DataType::Categorical(_, _) | DataType::Enum(_, _) => {
552 match dtype.cat_physical().unwrap() {
553 CategoricalPhysical::U8 => {
554 Buffer::Categorical8(CategoricalField::<Categorical8Type>::new(
555 name,
556 capacity,
557 quote_char,
558 dtype.clone(),
559 ))
560 },
561 CategoricalPhysical::U16 => {
562 Buffer::Categorical16(CategoricalField::<Categorical16Type>::new(
563 name,
564 capacity,
565 quote_char,
566 dtype.clone(),
567 ))
568 },
569 CategoricalPhysical::U32 => {
570 Buffer::Categorical32(CategoricalField::<Categorical32Type>::new(
571 name,
572 capacity,
573 quote_char,
574 dtype.clone(),
575 ))
576 },
577 }
578 },
579 dt => polars_bail!(
580 ComputeError: "unsupported data type when reading CSV: {} when reading CSV", dt,
581 ),
582 };
583 Ok(builder)
584 })
585 .collect()
586}
587
588#[allow(clippy::large_enum_variant)]
589pub enum Buffer {
590 Boolean(BooleanChunkedBuilder),
591 #[cfg(feature = "dtype-i8")]
592 Int8(PrimitiveChunkedBuilder<Int8Type>),
593 #[cfg(feature = "dtype-i16")]
594 Int16(PrimitiveChunkedBuilder<Int16Type>),
595 Int32(PrimitiveChunkedBuilder<Int32Type>),
596 Int64(PrimitiveChunkedBuilder<Int64Type>),
597 #[cfg(feature = "dtype-i128")]
598 Int128(PrimitiveChunkedBuilder<Int128Type>),
599 #[cfg(feature = "dtype-u8")]
600 UInt8(PrimitiveChunkedBuilder<UInt8Type>),
601 #[cfg(feature = "dtype-u16")]
602 UInt16(PrimitiveChunkedBuilder<UInt16Type>),
603 UInt32(PrimitiveChunkedBuilder<UInt32Type>),
604 UInt64(PrimitiveChunkedBuilder<UInt64Type>),
605 Float32(PrimitiveChunkedBuilder<Float32Type>),
606 Float64(PrimitiveChunkedBuilder<Float64Type>),
607 Utf8(Utf8Field),
609 #[cfg(feature = "dtype-datetime")]
610 Datetime {
611 buf: DatetimeField<Int64Type>,
612 time_unit: TimeUnit,
613 time_zone: Option<TimeZone>,
614 },
615 #[cfg(feature = "dtype-date")]
616 Date(DatetimeField<Int32Type>),
617 #[cfg(feature = "dtype-categorical")]
618 Categorical8(CategoricalField<Categorical8Type>),
619 #[cfg(feature = "dtype-categorical")]
620 Categorical16(CategoricalField<Categorical16Type>),
621 #[cfg(feature = "dtype-categorical")]
622 Categorical32(CategoricalField<Categorical32Type>),
623 DecimalFloat32(PrimitiveChunkedBuilder<Float32Type>, Vec<u8>),
624 DecimalFloat64(PrimitiveChunkedBuilder<Float64Type>, Vec<u8>),
625}
626
627impl Buffer {
628 pub fn into_series(self) -> PolarsResult<Series> {
629 let s = match self {
630 Buffer::Boolean(v) => v.finish().into_series(),
631 #[cfg(feature = "dtype-i8")]
632 Buffer::Int8(v) => v.finish().into_series(),
633 #[cfg(feature = "dtype-i16")]
634 Buffer::Int16(v) => v.finish().into_series(),
635 Buffer::Int32(v) => v.finish().into_series(),
636 Buffer::Int64(v) => v.finish().into_series(),
637 #[cfg(feature = "dtype-i128")]
638 Buffer::Int128(v) => v.finish().into_series(),
639 #[cfg(feature = "dtype-u8")]
640 Buffer::UInt8(v) => v.finish().into_series(),
641 #[cfg(feature = "dtype-u16")]
642 Buffer::UInt16(v) => v.finish().into_series(),
643 Buffer::UInt32(v) => v.finish().into_series(),
644 Buffer::UInt64(v) => v.finish().into_series(),
645 Buffer::Float32(v) => v.finish().into_series(),
646 Buffer::Float64(v) => v.finish().into_series(),
647 Buffer::DecimalFloat32(v, _) => v.finish().into_series(),
648 Buffer::DecimalFloat64(v, _) => v.finish().into_series(),
649 #[cfg(feature = "dtype-datetime")]
650 Buffer::Datetime {
651 buf,
652 time_unit,
653 time_zone,
654 } => buf
655 .builder
656 .finish()
657 .into_series()
658 .cast(&DataType::Datetime(time_unit, time_zone))
659 .unwrap(),
660 #[cfg(feature = "dtype-date")]
661 Buffer::Date(v) => v
662 .builder
663 .finish()
664 .into_series()
665 .cast(&DataType::Date)
666 .unwrap(),
667
668 Buffer::Utf8(v) => {
669 let arr = v.mutable.freeze();
670 StringChunked::with_chunk(v.name, unsafe { arr.to_utf8view_unchecked() })
671 .into_series()
672 },
673 #[cfg(feature = "dtype-categorical")]
674 Buffer::Categorical8(buf) => buf.builder.finish().into_series(),
675 #[cfg(feature = "dtype-categorical")]
676 Buffer::Categorical16(buf) => buf.builder.finish().into_series(),
677 #[cfg(feature = "dtype-categorical")]
678 Buffer::Categorical32(buf) => buf.builder.finish().into_series(),
679 };
680 Ok(s)
681 }
682
683 pub fn add_null(&mut self, valid: bool) {
684 match self {
685 Buffer::Boolean(v) => v.append_null(),
686 #[cfg(feature = "dtype-i8")]
687 Buffer::Int8(v) => v.append_null(),
688 #[cfg(feature = "dtype-i16")]
689 Buffer::Int16(v) => v.append_null(),
690 Buffer::Int32(v) => v.append_null(),
691 Buffer::Int64(v) => v.append_null(),
692 #[cfg(feature = "dtype-i128")]
693 Buffer::Int128(v) => v.append_null(),
694 #[cfg(feature = "dtype-u8")]
695 Buffer::UInt8(v) => v.append_null(),
696 #[cfg(feature = "dtype-u16")]
697 Buffer::UInt16(v) => v.append_null(),
698 Buffer::UInt32(v) => v.append_null(),
699 Buffer::UInt64(v) => v.append_null(),
700 Buffer::Float32(v) => v.append_null(),
701 Buffer::Float64(v) => v.append_null(),
702 Buffer::DecimalFloat32(v, _) => v.append_null(),
703 Buffer::DecimalFloat64(v, _) => v.append_null(),
704 Buffer::Utf8(v) => {
705 if valid {
706 v.mutable.push_value("")
707 } else {
708 v.mutable.push_null()
709 }
710 },
711 #[cfg(feature = "dtype-datetime")]
712 Buffer::Datetime { buf, .. } => buf.builder.append_null(),
713 #[cfg(feature = "dtype-date")]
714 Buffer::Date(v) => v.builder.append_null(),
715 #[cfg(feature = "dtype-categorical")]
716 Buffer::Categorical8(buf) => buf.builder.append_null(),
717 #[cfg(feature = "dtype-categorical")]
718 Buffer::Categorical16(buf) => buf.builder.append_null(),
719 #[cfg(feature = "dtype-categorical")]
720 Buffer::Categorical32(buf) => buf.builder.append_null(),
721 };
722 }
723
724 pub fn dtype(&self) -> DataType {
725 match self {
726 Buffer::Boolean(_) => DataType::Boolean,
727 #[cfg(feature = "dtype-i8")]
728 Buffer::Int8(_) => DataType::Int8,
729 #[cfg(feature = "dtype-i16")]
730 Buffer::Int16(_) => DataType::Int16,
731 Buffer::Int32(_) => DataType::Int32,
732 Buffer::Int64(_) => DataType::Int64,
733 #[cfg(feature = "dtype-i128")]
734 Buffer::Int128(_) => DataType::Int128,
735 #[cfg(feature = "dtype-u8")]
736 Buffer::UInt8(_) => DataType::UInt8,
737 #[cfg(feature = "dtype-u16")]
738 Buffer::UInt16(_) => DataType::UInt16,
739 Buffer::UInt32(_) => DataType::UInt32,
740 Buffer::UInt64(_) => DataType::UInt64,
741 Buffer::Float32(_) | Buffer::DecimalFloat32(_, _) => DataType::Float32,
742 Buffer::Float64(_) | Buffer::DecimalFloat64(_, _) => DataType::Float64,
743 Buffer::Utf8(_) => DataType::String,
744 #[cfg(feature = "dtype-datetime")]
745 Buffer::Datetime { time_unit, .. } => DataType::Datetime(*time_unit, None),
746 #[cfg(feature = "dtype-date")]
747 Buffer::Date(_) => DataType::Date,
748 #[cfg(feature = "dtype-categorical")]
749 Buffer::Categorical8(buf) => buf.builder.dtype().clone(),
750 #[cfg(feature = "dtype-categorical")]
751 Buffer::Categorical16(buf) => buf.builder.dtype().clone(),
752 #[cfg(feature = "dtype-categorical")]
753 Buffer::Categorical32(buf) => buf.builder.dtype().clone(),
754 }
755 }
756
757 #[inline]
758 pub fn add(
759 &mut self,
760 bytes: &[u8],
761 ignore_errors: bool,
762 needs_escaping: bool,
763 missing_is_null: bool,
764 ) -> PolarsResult<()> {
765 use Buffer::*;
766 match self {
767 Boolean(buf) => <BooleanChunkedBuilder as ParsedBuffer>::parse_bytes(
768 buf,
769 bytes,
770 ignore_errors,
771 needs_escaping,
772 missing_is_null,
773 None,
774 ),
775 #[cfg(feature = "dtype-i8")]
776 Int8(buf) => <PrimitiveChunkedBuilder<Int8Type> as ParsedBuffer>::parse_bytes(
777 buf,
778 bytes,
779 ignore_errors,
780 needs_escaping,
781 missing_is_null,
782 None,
783 ),
784 #[cfg(feature = "dtype-i16")]
785 Int16(buf) => <PrimitiveChunkedBuilder<Int16Type> as ParsedBuffer>::parse_bytes(
786 buf,
787 bytes,
788 ignore_errors,
789 needs_escaping,
790 missing_is_null,
791 None,
792 ),
793 Int32(buf) => <PrimitiveChunkedBuilder<Int32Type> as ParsedBuffer>::parse_bytes(
794 buf,
795 bytes,
796 ignore_errors,
797 needs_escaping,
798 missing_is_null,
799 None,
800 ),
801 Int64(buf) => <PrimitiveChunkedBuilder<Int64Type> as ParsedBuffer>::parse_bytes(
802 buf,
803 bytes,
804 ignore_errors,
805 needs_escaping,
806 missing_is_null,
807 None,
808 ),
809 #[cfg(feature = "dtype-i128")]
810 Int128(buf) => <PrimitiveChunkedBuilder<Int128Type> as ParsedBuffer>::parse_bytes(
811 buf,
812 bytes,
813 ignore_errors,
814 needs_escaping,
815 missing_is_null,
816 None,
817 ),
818 #[cfg(feature = "dtype-u8")]
819 UInt8(buf) => <PrimitiveChunkedBuilder<UInt8Type> as ParsedBuffer>::parse_bytes(
820 buf,
821 bytes,
822 ignore_errors,
823 needs_escaping,
824 missing_is_null,
825 None,
826 ),
827 #[cfg(feature = "dtype-u16")]
828 UInt16(buf) => <PrimitiveChunkedBuilder<UInt16Type> as ParsedBuffer>::parse_bytes(
829 buf,
830 bytes,
831 ignore_errors,
832 needs_escaping,
833 missing_is_null,
834 None,
835 ),
836 UInt32(buf) => <PrimitiveChunkedBuilder<UInt32Type> as ParsedBuffer>::parse_bytes(
837 buf,
838 bytes,
839 ignore_errors,
840 needs_escaping,
841 missing_is_null,
842 None,
843 ),
844 UInt64(buf) => <PrimitiveChunkedBuilder<UInt64Type> as ParsedBuffer>::parse_bytes(
845 buf,
846 bytes,
847 ignore_errors,
848 needs_escaping,
849 missing_is_null,
850 None,
851 ),
852 Float32(buf) => <PrimitiveChunkedBuilder<Float32Type> as ParsedBuffer>::parse_bytes(
853 buf,
854 bytes,
855 ignore_errors,
856 needs_escaping,
857 missing_is_null,
858 None,
859 ),
860 Float64(buf) => <PrimitiveChunkedBuilder<Float64Type> as ParsedBuffer>::parse_bytes(
861 buf,
862 bytes,
863 ignore_errors,
864 needs_escaping,
865 missing_is_null,
866 None,
867 ),
868 DecimalFloat32(buf, scratch) => {
869 prepare_decimal_comma(bytes, scratch);
870 <PrimitiveChunkedBuilder<Float32Type> as ParsedBuffer>::parse_bytes(
871 buf,
872 scratch,
873 ignore_errors,
874 needs_escaping,
875 missing_is_null,
876 None,
877 )
878 },
879 DecimalFloat64(buf, scratch) => {
880 prepare_decimal_comma(bytes, scratch);
881 <PrimitiveChunkedBuilder<Float64Type> as ParsedBuffer>::parse_bytes(
882 buf,
883 scratch,
884 ignore_errors,
885 needs_escaping,
886 missing_is_null,
887 None,
888 )
889 },
890 Utf8(buf) => <Utf8Field as ParsedBuffer>::parse_bytes(
891 buf,
892 bytes,
893 ignore_errors,
894 needs_escaping,
895 missing_is_null,
896 None,
897 ),
898 #[cfg(feature = "dtype-datetime")]
899 Datetime { buf, time_unit, .. } => {
900 <DatetimeField<Int64Type> as ParsedBuffer>::parse_bytes(
901 buf,
902 bytes,
903 ignore_errors,
904 needs_escaping,
905 missing_is_null,
906 Some(*time_unit),
907 )
908 },
909 #[cfg(feature = "dtype-date")]
910 Date(buf) => <DatetimeField<Int32Type> as ParsedBuffer>::parse_bytes(
911 buf,
912 bytes,
913 ignore_errors,
914 needs_escaping,
915 missing_is_null,
916 None,
917 ),
918 #[cfg(feature = "dtype-categorical")]
919 Categorical8(buf) => {
920 buf.parse_bytes(bytes, ignore_errors, needs_escaping, missing_is_null, None)
921 },
922 #[cfg(feature = "dtype-categorical")]
923 Categorical16(buf) => {
924 buf.parse_bytes(bytes, ignore_errors, needs_escaping, missing_is_null, None)
925 },
926 #[cfg(feature = "dtype-categorical")]
927 Categorical32(buf) => {
928 buf.parse_bytes(bytes, ignore_errors, needs_escaping, missing_is_null, None)
929 },
930 }
931 }
932}
933
934#[inline]
935fn prepare_decimal_comma(bytes: &[u8], scratch: &mut Vec<u8>) {
936 scratch.clear();
937 scratch.reserve(bytes.len());
938
939 for &byte in bytes {
941 if byte == b',' {
942 unsafe { scratch.push_unchecked(b'.') }
943 } else {
944 unsafe { scratch.push_unchecked(byte) }
945 }
946 }
947}