1use arrow::array::MutableBinaryViewArray;
2#[cfg(feature = "dtype-decimal")]
3use polars_compute::decimal::str_to_dec128;
4#[cfg(feature = "dtype-categorical")]
5use polars_core::chunked_array::builder::CategoricalChunkedBuilder;
6use polars_core::prelude::*;
7use polars_error::to_compute_err;
8#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
9use polars_time::chunkedarray::string::Pattern;
10#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
11use polars_time::prelude::string::infer::{
12 DatetimeInfer, StrpTimeParser, TryFromWithUnit, infer_pattern_single,
13};
14use polars_utils::vec::PushUnchecked;
15
16use super::options::CsvEncoding;
17use super::parser::{could_be_whitespace_fast, skip_whitespace};
18use super::utils::escape_field;
19
20pub(crate) trait PrimitiveParser: PolarsNumericType {
21 fn parse(bytes: &[u8]) -> Option<Self::Native>;
22}
23
24impl PrimitiveParser for Float32Type {
25 #[inline]
26 fn parse(bytes: &[u8]) -> Option<f32> {
27 fast_float2::parse(bytes).ok()
28 }
29}
30impl PrimitiveParser for Float64Type {
31 #[inline]
32 fn parse(bytes: &[u8]) -> Option<f64> {
33 fast_float2::parse(bytes).ok()
34 }
35}
36
37#[cfg(feature = "dtype-u8")]
38impl PrimitiveParser for UInt8Type {
39 #[inline]
40 fn parse(bytes: &[u8]) -> Option<u8> {
41 atoi_simd::parse_skipped(bytes).ok()
42 }
43}
44#[cfg(feature = "dtype-u16")]
45impl PrimitiveParser for UInt16Type {
46 #[inline]
47 fn parse(bytes: &[u8]) -> Option<u16> {
48 atoi_simd::parse_skipped(bytes).ok()
49 }
50}
51impl PrimitiveParser for UInt32Type {
52 #[inline]
53 fn parse(bytes: &[u8]) -> Option<u32> {
54 atoi_simd::parse_skipped(bytes).ok()
55 }
56}
57impl PrimitiveParser for UInt64Type {
58 #[inline]
59 fn parse(bytes: &[u8]) -> Option<u64> {
60 atoi_simd::parse_skipped(bytes).ok()
61 }
62}
63#[cfg(feature = "dtype-u128")]
64impl PrimitiveParser for UInt128Type {
65 #[inline]
66 fn parse(bytes: &[u8]) -> Option<u128> {
67 atoi_simd::parse_skipped(bytes).ok()
68 }
69}
70#[cfg(feature = "dtype-i8")]
71impl PrimitiveParser for Int8Type {
72 #[inline]
73 fn parse(bytes: &[u8]) -> Option<i8> {
74 atoi_simd::parse_skipped(bytes).ok()
75 }
76}
77#[cfg(feature = "dtype-i16")]
78impl PrimitiveParser for Int16Type {
79 #[inline]
80 fn parse(bytes: &[u8]) -> Option<i16> {
81 atoi_simd::parse_skipped(bytes).ok()
82 }
83}
84impl PrimitiveParser for Int32Type {
85 #[inline]
86 fn parse(bytes: &[u8]) -> Option<i32> {
87 atoi_simd::parse_skipped(bytes).ok()
88 }
89}
90impl PrimitiveParser for Int64Type {
91 #[inline]
92 fn parse(bytes: &[u8]) -> Option<i64> {
93 atoi_simd::parse_skipped(bytes).ok()
94 }
95}
96#[cfg(feature = "dtype-i128")]
97impl PrimitiveParser for Int128Type {
98 #[inline]
99 fn parse(bytes: &[u8]) -> Option<i128> {
100 atoi_simd::parse_skipped(bytes).ok()
101 }
102}
103
104trait ParsedBuffer {
105 fn parse_bytes(
106 &mut self,
107 bytes: &[u8],
108 ignore_errors: bool,
109 _needs_escaping: bool,
110 _missing_is_null: bool,
111 _time_unit: Option<TimeUnit>,
112 ) -> PolarsResult<()>;
113}
114
115impl<T> ParsedBuffer for PrimitiveChunkedBuilder<T>
116where
117 T: PolarsNumericType + PrimitiveParser,
118{
119 #[inline]
120 fn parse_bytes(
121 &mut self,
122 mut bytes: &[u8],
123 ignore_errors: bool,
124 needs_escaping: bool,
125 _missing_is_null: bool,
126 _time_unit: Option<TimeUnit>,
127 ) -> PolarsResult<()> {
128 if !bytes.is_empty() && needs_escaping {
129 bytes = &bytes[1..bytes.len() - 1];
130 }
131
132 if !bytes.is_empty() && could_be_whitespace_fast(bytes[0]) {
133 bytes = skip_whitespace(bytes);
134 }
135
136 if bytes.is_empty() {
137 self.append_null();
138 return Ok(());
139 }
140
141 match T::parse(bytes) {
142 Some(value) => self.append_value(value),
143 None => {
144 if ignore_errors {
145 self.append_null()
146 } else {
147 polars_bail!(ComputeError: "invalid primitive value found during CSV parsing")
148 }
149 },
150 }
151 Ok(())
152 }
153}
154
155pub struct Utf8Field {
156 name: PlSmallStr,
157 mutable: MutableBinaryViewArray<[u8]>,
158 scratch: Vec<u8>,
159 quote_char: u8,
160 encoding: CsvEncoding,
161}
162
163impl Utf8Field {
164 fn new(
165 name: PlSmallStr,
166 capacity: usize,
167 quote_char: Option<u8>,
168 encoding: CsvEncoding,
169 ) -> Self {
170 Self {
171 name,
172 mutable: MutableBinaryViewArray::with_capacity(capacity),
173 scratch: vec![],
174 quote_char: quote_char.unwrap_or(b'"'),
175 encoding,
176 }
177 }
178}
179
180#[inline]
181pub fn validate_utf8(bytes: &[u8]) -> bool {
182 simdutf8::basic::from_utf8(bytes).is_ok()
183}
184
185impl ParsedBuffer for Utf8Field {
186 #[inline]
187 fn parse_bytes(
188 &mut self,
189 bytes: &[u8],
190 ignore_errors: bool,
191 needs_escaping: bool,
192 missing_is_null: bool,
193 _time_unit: Option<TimeUnit>,
194 ) -> PolarsResult<()> {
195 if bytes.is_empty() {
196 if missing_is_null {
197 self.mutable.push_null()
198 } else {
199 self.mutable.push(Some([]))
200 }
201 return Ok(());
202 }
203
204 let escaped_bytes = if needs_escaping {
206 self.scratch.clear();
207 self.scratch.reserve(bytes.len());
208 polars_ensure!(bytes.len() > 1 && bytes.last() == Some(&self.quote_char), ComputeError: "invalid csv file\n\nField `{}` is not properly escaped.", std::str::from_utf8(bytes).map_err(to_compute_err)?);
209
210 unsafe {
213 let n_written =
214 escape_field(bytes, self.quote_char, self.scratch.spare_capacity_mut());
215 self.scratch.set_len(n_written);
216 }
217
218 self.scratch.as_slice()
219 } else {
220 bytes
221 };
222
223 if matches!(self.encoding, CsvEncoding::LossyUtf8) | ignore_errors {
224 let parse_result = validate_utf8(escaped_bytes);
227
228 match parse_result {
229 true => {
230 let value = escaped_bytes;
231 self.mutable.push_value(value)
232 },
233 false => {
234 if matches!(self.encoding, CsvEncoding::LossyUtf8) {
235 let s = String::from_utf8_lossy(escaped_bytes);
237 self.mutable.push_value(s.as_ref().as_bytes())
238 } else if ignore_errors {
239 self.mutable.push_null()
240 } else {
241 if needs_escaping && validate_utf8(bytes) {
243 polars_bail!(ComputeError: "string field is not properly escaped");
244 } else {
245 polars_bail!(ComputeError: "invalid utf-8 sequence");
246 }
247 }
248 },
249 }
250 } else {
251 self.mutable.push_value(escaped_bytes)
252 }
253
254 Ok(())
255 }
256}
257
258#[cfg(feature = "dtype-categorical")]
259pub struct CategoricalField<T: PolarsCategoricalType> {
260 escape_scratch: Vec<u8>,
261 quote_char: u8,
262 builder: CategoricalChunkedBuilder<T>,
263}
264
265#[cfg(feature = "dtype-categorical")]
266impl<T: PolarsCategoricalType> CategoricalField<T> {
267 fn new(name: PlSmallStr, capacity: usize, quote_char: Option<u8>, dtype: DataType) -> Self {
268 let mut builder = CategoricalChunkedBuilder::new(name, dtype);
269 builder.reserve(capacity);
270
271 Self {
272 escape_scratch: vec![],
273 quote_char: quote_char.unwrap_or(b'"'),
274 builder,
275 }
276 }
277
278 #[inline]
279 fn parse_bytes(
280 &mut self,
281 bytes: &[u8],
282 ignore_errors: bool,
283 needs_escaping: bool,
284 _missing_is_null: bool,
285 _time_unit: Option<TimeUnit>,
286 ) -> PolarsResult<()> {
287 if bytes.is_empty() {
288 self.builder.append_null();
289 return Ok(());
290 }
291 if validate_utf8(bytes) {
292 if needs_escaping {
293 polars_ensure!(bytes.len() > 1, ComputeError: "invalid csv file\n\nField `{}` is not properly escaped.", std::str::from_utf8(bytes).map_err(to_compute_err)?);
294 self.escape_scratch.clear();
295 self.escape_scratch.reserve(bytes.len());
296 unsafe {
299 let n_written = escape_field(
300 bytes,
301 self.quote_char,
302 self.escape_scratch.spare_capacity_mut(),
303 );
304 self.escape_scratch.set_len(n_written);
305 }
306
307 let key = unsafe { std::str::from_utf8_unchecked(&self.escape_scratch) };
310 self.builder.append_str(key)?;
311 } else {
312 let key = unsafe { std::str::from_utf8_unchecked(bytes) };
315 self.builder.append_str(key)?;
316 }
317 } else if ignore_errors {
318 self.builder.append_null()
319 } else {
320 polars_bail!(ComputeError: "invalid utf-8 sequence");
321 }
322 Ok(())
323 }
324}
325
326impl ParsedBuffer for BooleanChunkedBuilder {
327 #[inline]
328 fn parse_bytes(
329 &mut self,
330 bytes: &[u8],
331 ignore_errors: bool,
332 needs_escaping: bool,
333 _missing_is_null: bool,
334 _time_unit: Option<TimeUnit>,
335 ) -> PolarsResult<()> {
336 let bytes = if needs_escaping {
337 &bytes[1..bytes.len() - 1]
338 } else {
339 bytes
340 };
341 if bytes.eq_ignore_ascii_case(b"false") {
342 self.append_value(false);
343 } else if bytes.eq_ignore_ascii_case(b"true") {
344 self.append_value(true);
345 } else if ignore_errors || bytes.is_empty() {
346 self.append_null();
347 } else {
348 polars_bail!(
349 ComputeError: "error while parsing value {} as boolean",
350 String::from_utf8_lossy(bytes),
351 );
352 }
353 Ok(())
354 }
355}
356
357#[cfg(feature = "dtype-decimal")]
358pub struct DecimalField {
359 builder: PrimitiveChunkedBuilder<Int128Type>,
360 precision: usize,
361 scale: usize,
362 decimal_comma: bool,
363}
364
365#[cfg(feature = "dtype-decimal")]
366impl DecimalField {
367 fn new(
368 name: PlSmallStr,
369 capacity: usize,
370 precision: usize,
371 scale: usize,
372 decimal_comma: bool,
373 ) -> Self {
374 let builder = PrimitiveChunkedBuilder::<Int128Type>::new(name, capacity);
375 Self {
376 builder,
377 precision,
378 scale,
379 decimal_comma,
380 }
381 }
382}
383
384#[cfg(feature = "dtype-decimal")]
385impl ParsedBuffer for DecimalField {
386 #[inline]
387 fn parse_bytes(
388 &mut self,
389 mut bytes: &[u8],
390 ignore_errors: bool,
391 needs_escaping: bool,
392 _missing_is_null: bool,
393 _time_unit: Option<TimeUnit>,
394 ) -> PolarsResult<()> {
395 if !bytes.is_empty() && needs_escaping {
396 bytes = &bytes[1..bytes.len() - 1];
397 }
398
399 if !bytes.is_empty() && could_be_whitespace_fast(bytes[0]) {
400 bytes = skip_whitespace(bytes);
401 }
402
403 if bytes.is_empty() {
404 self.builder.append_null();
405 return Ok(());
406 }
407
408 match str_to_dec128(bytes, self.precision, self.scale, self.decimal_comma) {
409 Some(value) => self.builder.append_value(value),
410 None => {
411 if ignore_errors {
412 self.builder.append_null()
413 } else {
414 polars_bail!(ComputeError: "invalid decimal value found during CSV parsing")
415 }
416 },
417 }
418
419 Ok(())
420 }
421}
422
423#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
424pub struct DatetimeField<T: PolarsNumericType> {
425 compiled: Option<DatetimeInfer<T>>,
426 builder: PrimitiveChunkedBuilder<T>,
427}
428
429#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
430impl<T: PolarsNumericType> DatetimeField<T> {
431 fn new(name: PlSmallStr, capacity: usize) -> Self {
432 let builder = PrimitiveChunkedBuilder::<T>::new(name, capacity);
433 Self {
434 compiled: None,
435 builder,
436 }
437 }
438}
439
440#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
441fn slow_datetime_parser<T>(
442 buf: &mut DatetimeField<T>,
443 bytes: &[u8],
444 time_unit: Option<TimeUnit>,
445 ignore_errors: bool,
446) -> PolarsResult<()>
447where
448 T: PolarsNumericType,
449 DatetimeInfer<T>: TryFromWithUnit<Pattern>,
450{
451 let val = if bytes.is_ascii() {
452 unsafe { std::str::from_utf8_unchecked(bytes) }
455 } else {
456 match std::str::from_utf8(bytes) {
457 Ok(val) => val,
458 Err(_) => {
459 if ignore_errors {
460 buf.builder.append_null();
461 return Ok(());
462 } else {
463 polars_bail!(ComputeError: "invalid utf-8 sequence");
464 }
465 },
466 }
467 };
468
469 let pattern = match &buf.compiled {
470 Some(compiled) => compiled.pattern,
471 None => match infer_pattern_single(val) {
472 Some(pattern) => pattern,
473 None => {
474 if ignore_errors {
475 buf.builder.append_null();
476 return Ok(());
477 } else {
478 polars_bail!(ComputeError: "could not find a 'date/datetime' pattern for '{}'", val)
479 }
480 },
481 },
482 };
483 match DatetimeInfer::try_from_with_unit(pattern, time_unit) {
484 Ok(mut infer) => {
485 let parsed = infer.parse(val);
486 let Some(parsed) = parsed else {
487 if ignore_errors {
488 buf.builder.append_null();
489 return Ok(());
490 } else {
491 polars_bail!(ComputeError: "could not parse '{}' with pattern '{:?}'", val, pattern)
492 }
493 };
494
495 buf.compiled = Some(infer);
496 buf.builder.append_value(parsed);
497 Ok(())
498 },
499 Err(err) => {
500 if ignore_errors {
501 buf.builder.append_null();
502 Ok(())
503 } else {
504 Err(err)
505 }
506 },
507 }
508}
509
510#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
511impl<T> ParsedBuffer for DatetimeField<T>
512where
513 T: PolarsNumericType,
514 DatetimeInfer<T>: TryFromWithUnit<Pattern> + StrpTimeParser<T::Native>,
515{
516 #[inline]
517 fn parse_bytes(
518 &mut self,
519 mut bytes: &[u8],
520 ignore_errors: bool,
521 needs_escaping: bool,
522 _missing_is_null: bool,
523 time_unit: Option<TimeUnit>,
524 ) -> PolarsResult<()> {
525 if needs_escaping && bytes.len() >= 2 {
526 bytes = &bytes[1..bytes.len() - 1]
527 }
528
529 if bytes.is_empty() {
530 self.builder.append_null();
532 return Ok(());
533 }
534
535 match &mut self.compiled {
536 None => slow_datetime_parser(self, bytes, time_unit, ignore_errors),
537 Some(compiled) => {
538 match compiled.parse_bytes(bytes, time_unit) {
539 Some(parsed) => {
540 self.builder.append_value(parsed);
541 Ok(())
542 },
543 None => slow_datetime_parser(self, bytes, time_unit, ignore_errors),
547 }
548 },
549 }
550 }
551}
552
553pub fn init_buffers(
554 projection: &[usize],
555 capacity: usize,
556 schema: &Schema,
557 quote_char: Option<u8>,
558 encoding: CsvEncoding,
559 decimal_comma: bool,
560) -> PolarsResult<Vec<Buffer>> {
561 projection
562 .iter()
563 .map(|&i| {
564 let (name, dtype) = schema.get_at_index(i).unwrap();
565 let name = name.clone();
566 let builder = match dtype {
567 &DataType::Boolean => Buffer::Boolean(BooleanChunkedBuilder::new(name, capacity)),
568 #[cfg(feature = "dtype-i8")]
569 &DataType::Int8 => Buffer::Int8(PrimitiveChunkedBuilder::new(name, capacity)),
570 #[cfg(feature = "dtype-i16")]
571 &DataType::Int16 => Buffer::Int16(PrimitiveChunkedBuilder::new(name, capacity)),
572 &DataType::Int32 => Buffer::Int32(PrimitiveChunkedBuilder::new(name, capacity)),
573 &DataType::Int64 => Buffer::Int64(PrimitiveChunkedBuilder::new(name, capacity)),
574 #[cfg(feature = "dtype-i128")]
575 &DataType::Int128 => Buffer::Int128(PrimitiveChunkedBuilder::new(name, capacity)),
576 #[cfg(feature = "dtype-u8")]
577 &DataType::UInt8 => Buffer::UInt8(PrimitiveChunkedBuilder::new(name, capacity)),
578 #[cfg(feature = "dtype-u16")]
579 &DataType::UInt16 => Buffer::UInt16(PrimitiveChunkedBuilder::new(name, capacity)),
580 &DataType::UInt32 => Buffer::UInt32(PrimitiveChunkedBuilder::new(name, capacity)),
581 &DataType::UInt64 => Buffer::UInt64(PrimitiveChunkedBuilder::new(name, capacity)),
582 #[cfg(feature = "dtype-u128")]
583 &DataType::UInt128 => Buffer::UInt128(PrimitiveChunkedBuilder::new(name, capacity)),
584 &DataType::Float32 => {
585 if decimal_comma {
586 Buffer::DecimalFloat32(
587 PrimitiveChunkedBuilder::new(name, capacity),
588 Default::default(),
589 )
590 } else {
591 Buffer::Float32(PrimitiveChunkedBuilder::new(name, capacity))
592 }
593 },
594 &DataType::Float64 => {
595 if decimal_comma {
596 Buffer::DecimalFloat64(
597 PrimitiveChunkedBuilder::new(name, capacity),
598 Default::default(),
599 )
600 } else {
601 Buffer::Float64(PrimitiveChunkedBuilder::new(name, capacity))
602 }
603 },
604 #[cfg(feature = "dtype-decimal")]
605 &DataType::Decimal(precision, scale) => Buffer::Decimal(DecimalField::new(
606 name,
607 capacity,
608 precision,
609 scale,
610 decimal_comma,
611 )),
612 &DataType::String => {
613 Buffer::Utf8(Utf8Field::new(name, capacity, quote_char, encoding))
614 },
615 #[cfg(feature = "dtype-datetime")]
616 DataType::Datetime(time_unit, time_zone) => Buffer::Datetime {
617 buf: DatetimeField::new(name, capacity),
618 time_unit: *time_unit,
619 time_zone: time_zone.clone(),
620 },
621 #[cfg(feature = "dtype-date")]
622 &DataType::Date => Buffer::Date(DatetimeField::new(name, capacity)),
623 #[cfg(feature = "dtype-categorical")]
624 DataType::Categorical(_, _) | DataType::Enum(_, _) => {
625 match dtype.cat_physical().unwrap() {
626 CategoricalPhysical::U8 => {
627 Buffer::Categorical8(CategoricalField::<Categorical8Type>::new(
628 name,
629 capacity,
630 quote_char,
631 dtype.clone(),
632 ))
633 },
634 CategoricalPhysical::U16 => {
635 Buffer::Categorical16(CategoricalField::<Categorical16Type>::new(
636 name,
637 capacity,
638 quote_char,
639 dtype.clone(),
640 ))
641 },
642 CategoricalPhysical::U32 => {
643 Buffer::Categorical32(CategoricalField::<Categorical32Type>::new(
644 name,
645 capacity,
646 quote_char,
647 dtype.clone(),
648 ))
649 },
650 }
651 },
652 dt => polars_bail!(
653 ComputeError: "unsupported data type when reading CSV: {} when reading CSV", dt,
654 ),
655 };
656 Ok(builder)
657 })
658 .collect()
659}
660
661#[allow(clippy::large_enum_variant)]
662pub enum Buffer {
663 Boolean(BooleanChunkedBuilder),
664 #[cfg(feature = "dtype-i8")]
665 Int8(PrimitiveChunkedBuilder<Int8Type>),
666 #[cfg(feature = "dtype-i16")]
667 Int16(PrimitiveChunkedBuilder<Int16Type>),
668 Int32(PrimitiveChunkedBuilder<Int32Type>),
669 Int64(PrimitiveChunkedBuilder<Int64Type>),
670 #[cfg(feature = "dtype-i128")]
671 Int128(PrimitiveChunkedBuilder<Int128Type>),
672 #[cfg(feature = "dtype-u8")]
673 UInt8(PrimitiveChunkedBuilder<UInt8Type>),
674 #[cfg(feature = "dtype-u16")]
675 UInt16(PrimitiveChunkedBuilder<UInt16Type>),
676 UInt32(PrimitiveChunkedBuilder<UInt32Type>),
677 UInt64(PrimitiveChunkedBuilder<UInt64Type>),
678 #[cfg(feature = "dtype-u128")]
679 UInt128(PrimitiveChunkedBuilder<UInt128Type>),
680 Float32(PrimitiveChunkedBuilder<Float32Type>),
681 Float64(PrimitiveChunkedBuilder<Float64Type>),
682 #[cfg(feature = "dtype-decimal")]
683 Decimal(DecimalField),
684 Utf8(Utf8Field),
686 #[cfg(feature = "dtype-datetime")]
687 Datetime {
688 buf: DatetimeField<Int64Type>,
689 time_unit: TimeUnit,
690 time_zone: Option<TimeZone>,
691 },
692 #[cfg(feature = "dtype-date")]
693 Date(DatetimeField<Int32Type>),
694 #[cfg(feature = "dtype-categorical")]
695 Categorical8(CategoricalField<Categorical8Type>),
696 #[cfg(feature = "dtype-categorical")]
697 Categorical16(CategoricalField<Categorical16Type>),
698 #[cfg(feature = "dtype-categorical")]
699 Categorical32(CategoricalField<Categorical32Type>),
700 DecimalFloat32(PrimitiveChunkedBuilder<Float32Type>, Vec<u8>),
701 DecimalFloat64(PrimitiveChunkedBuilder<Float64Type>, Vec<u8>),
702}
703
704impl Buffer {
705 pub fn into_series(self) -> PolarsResult<Series> {
706 let s = match self {
707 Buffer::Boolean(v) => v.finish().into_series(),
708 #[cfg(feature = "dtype-i8")]
709 Buffer::Int8(v) => v.finish().into_series(),
710 #[cfg(feature = "dtype-i16")]
711 Buffer::Int16(v) => v.finish().into_series(),
712 Buffer::Int32(v) => v.finish().into_series(),
713 Buffer::Int64(v) => v.finish().into_series(),
714 #[cfg(feature = "dtype-i128")]
715 Buffer::Int128(v) => v.finish().into_series(),
716 #[cfg(feature = "dtype-u8")]
717 Buffer::UInt8(v) => v.finish().into_series(),
718 #[cfg(feature = "dtype-u16")]
719 Buffer::UInt16(v) => v.finish().into_series(),
720 Buffer::UInt32(v) => v.finish().into_series(),
721 Buffer::UInt64(v) => v.finish().into_series(),
722 #[cfg(feature = "dtype-u128")]
723 Buffer::UInt128(v) => v.finish().into_series(),
724 Buffer::Float32(v) => v.finish().into_series(),
725 Buffer::Float64(v) => v.finish().into_series(),
726 Buffer::DecimalFloat32(v, _) => v.finish().into_series(),
727 Buffer::DecimalFloat64(v, _) => v.finish().into_series(),
728 #[cfg(feature = "dtype-decimal")]
729 Buffer::Decimal(DecimalField {
730 builder,
731 precision,
732 scale,
733 ..
734 }) => unsafe {
735 builder
736 .finish()
737 .into_series()
738 .from_physical_unchecked(&DataType::Decimal(precision, scale))
739 .unwrap()
740 },
741 #[cfg(feature = "dtype-datetime")]
742 Buffer::Datetime {
743 buf,
744 time_unit,
745 time_zone,
746 } => buf
747 .builder
748 .finish()
749 .into_series()
750 .cast(&DataType::Datetime(time_unit, time_zone))
751 .unwrap(),
752 #[cfg(feature = "dtype-date")]
753 Buffer::Date(v) => v
754 .builder
755 .finish()
756 .into_series()
757 .cast(&DataType::Date)
758 .unwrap(),
759
760 Buffer::Utf8(v) => {
761 let arr = v.mutable.freeze();
762 StringChunked::with_chunk(v.name, unsafe { arr.to_utf8view_unchecked() })
763 .into_series()
764 },
765 #[cfg(feature = "dtype-categorical")]
766 Buffer::Categorical8(buf) => buf.builder.finish().into_series(),
767 #[cfg(feature = "dtype-categorical")]
768 Buffer::Categorical16(buf) => buf.builder.finish().into_series(),
769 #[cfg(feature = "dtype-categorical")]
770 Buffer::Categorical32(buf) => buf.builder.finish().into_series(),
771 };
772 Ok(s)
773 }
774
775 pub fn add_null(&mut self, valid: bool) {
776 match self {
777 Buffer::Boolean(v) => v.append_null(),
778 #[cfg(feature = "dtype-i8")]
779 Buffer::Int8(v) => v.append_null(),
780 #[cfg(feature = "dtype-i16")]
781 Buffer::Int16(v) => v.append_null(),
782 Buffer::Int32(v) => v.append_null(),
783 Buffer::Int64(v) => v.append_null(),
784 #[cfg(feature = "dtype-i128")]
785 Buffer::Int128(v) => v.append_null(),
786 #[cfg(feature = "dtype-u8")]
787 Buffer::UInt8(v) => v.append_null(),
788 #[cfg(feature = "dtype-u16")]
789 Buffer::UInt16(v) => v.append_null(),
790 Buffer::UInt32(v) => v.append_null(),
791 Buffer::UInt64(v) => v.append_null(),
792 #[cfg(feature = "dtype-u128")]
793 Buffer::UInt128(v) => v.append_null(),
794 Buffer::Float32(v) => v.append_null(),
795 Buffer::Float64(v) => v.append_null(),
796 #[cfg(feature = "dtype-decimal")]
797 Buffer::Decimal(buf) => buf.builder.append_null(),
798 Buffer::DecimalFloat32(v, _) => v.append_null(),
799 Buffer::DecimalFloat64(v, _) => v.append_null(),
800 Buffer::Utf8(v) => {
801 if valid {
802 v.mutable.push_value("")
803 } else {
804 v.mutable.push_null()
805 }
806 },
807 #[cfg(feature = "dtype-datetime")]
808 Buffer::Datetime { buf, .. } => buf.builder.append_null(),
809 #[cfg(feature = "dtype-date")]
810 Buffer::Date(v) => v.builder.append_null(),
811 #[cfg(feature = "dtype-categorical")]
812 Buffer::Categorical8(buf) => buf.builder.append_null(),
813 #[cfg(feature = "dtype-categorical")]
814 Buffer::Categorical16(buf) => buf.builder.append_null(),
815 #[cfg(feature = "dtype-categorical")]
816 Buffer::Categorical32(buf) => buf.builder.append_null(),
817 };
818 }
819
820 pub fn dtype(&self) -> DataType {
821 match self {
822 Buffer::Boolean(_) => DataType::Boolean,
823 #[cfg(feature = "dtype-i8")]
824 Buffer::Int8(_) => DataType::Int8,
825 #[cfg(feature = "dtype-i16")]
826 Buffer::Int16(_) => DataType::Int16,
827 Buffer::Int32(_) => DataType::Int32,
828 Buffer::Int64(_) => DataType::Int64,
829 #[cfg(feature = "dtype-i128")]
830 Buffer::Int128(_) => DataType::Int128,
831 #[cfg(feature = "dtype-u8")]
832 Buffer::UInt8(_) => DataType::UInt8,
833 #[cfg(feature = "dtype-u16")]
834 Buffer::UInt16(_) => DataType::UInt16,
835 Buffer::UInt32(_) => DataType::UInt32,
836 Buffer::UInt64(_) => DataType::UInt64,
837 #[cfg(feature = "dtype-u128")]
838 Buffer::UInt128(_) => DataType::UInt128,
839 Buffer::Float32(_) | Buffer::DecimalFloat32(_, _) => DataType::Float32,
840 Buffer::Float64(_) | Buffer::DecimalFloat64(_, _) => DataType::Float64,
841 #[cfg(feature = "dtype-decimal")]
842 Buffer::Decimal(DecimalField {
843 precision, scale, ..
844 }) => DataType::Decimal(*precision, *scale),
845 Buffer::Utf8(_) => DataType::String,
846 #[cfg(feature = "dtype-datetime")]
847 Buffer::Datetime { time_unit, .. } => DataType::Datetime(*time_unit, None),
848 #[cfg(feature = "dtype-date")]
849 Buffer::Date(_) => DataType::Date,
850 #[cfg(feature = "dtype-categorical")]
851 Buffer::Categorical8(buf) => buf.builder.dtype().clone(),
852 #[cfg(feature = "dtype-categorical")]
853 Buffer::Categorical16(buf) => buf.builder.dtype().clone(),
854 #[cfg(feature = "dtype-categorical")]
855 Buffer::Categorical32(buf) => buf.builder.dtype().clone(),
856 }
857 }
858
859 #[inline]
860 pub fn add(
861 &mut self,
862 bytes: &[u8],
863 ignore_errors: bool,
864 needs_escaping: bool,
865 missing_is_null: bool,
866 ) -> PolarsResult<()> {
867 use Buffer::*;
868 match self {
869 Boolean(buf) => <BooleanChunkedBuilder as ParsedBuffer>::parse_bytes(
870 buf,
871 bytes,
872 ignore_errors,
873 needs_escaping,
874 missing_is_null,
875 None,
876 ),
877 #[cfg(feature = "dtype-i8")]
878 Int8(buf) => <PrimitiveChunkedBuilder<Int8Type> as ParsedBuffer>::parse_bytes(
879 buf,
880 bytes,
881 ignore_errors,
882 needs_escaping,
883 missing_is_null,
884 None,
885 ),
886 #[cfg(feature = "dtype-i16")]
887 Int16(buf) => <PrimitiveChunkedBuilder<Int16Type> as ParsedBuffer>::parse_bytes(
888 buf,
889 bytes,
890 ignore_errors,
891 needs_escaping,
892 missing_is_null,
893 None,
894 ),
895 Int32(buf) => <PrimitiveChunkedBuilder<Int32Type> as ParsedBuffer>::parse_bytes(
896 buf,
897 bytes,
898 ignore_errors,
899 needs_escaping,
900 missing_is_null,
901 None,
902 ),
903 Int64(buf) => <PrimitiveChunkedBuilder<Int64Type> as ParsedBuffer>::parse_bytes(
904 buf,
905 bytes,
906 ignore_errors,
907 needs_escaping,
908 missing_is_null,
909 None,
910 ),
911 #[cfg(feature = "dtype-i128")]
912 Int128(buf) => <PrimitiveChunkedBuilder<Int128Type> as ParsedBuffer>::parse_bytes(
913 buf,
914 bytes,
915 ignore_errors,
916 needs_escaping,
917 missing_is_null,
918 None,
919 ),
920 #[cfg(feature = "dtype-u8")]
921 UInt8(buf) => <PrimitiveChunkedBuilder<UInt8Type> as ParsedBuffer>::parse_bytes(
922 buf,
923 bytes,
924 ignore_errors,
925 needs_escaping,
926 missing_is_null,
927 None,
928 ),
929 #[cfg(feature = "dtype-u16")]
930 UInt16(buf) => <PrimitiveChunkedBuilder<UInt16Type> as ParsedBuffer>::parse_bytes(
931 buf,
932 bytes,
933 ignore_errors,
934 needs_escaping,
935 missing_is_null,
936 None,
937 ),
938 UInt32(buf) => <PrimitiveChunkedBuilder<UInt32Type> as ParsedBuffer>::parse_bytes(
939 buf,
940 bytes,
941 ignore_errors,
942 needs_escaping,
943 missing_is_null,
944 None,
945 ),
946 UInt64(buf) => <PrimitiveChunkedBuilder<UInt64Type> as ParsedBuffer>::parse_bytes(
947 buf,
948 bytes,
949 ignore_errors,
950 needs_escaping,
951 missing_is_null,
952 None,
953 ),
954 #[cfg(feature = "dtype-u128")]
955 UInt128(buf) => <PrimitiveChunkedBuilder<UInt128Type> as ParsedBuffer>::parse_bytes(
956 buf,
957 bytes,
958 ignore_errors,
959 needs_escaping,
960 missing_is_null,
961 None,
962 ),
963 Float32(buf) => <PrimitiveChunkedBuilder<Float32Type> as ParsedBuffer>::parse_bytes(
964 buf,
965 bytes,
966 ignore_errors,
967 needs_escaping,
968 missing_is_null,
969 None,
970 ),
971 Float64(buf) => <PrimitiveChunkedBuilder<Float64Type> as ParsedBuffer>::parse_bytes(
972 buf,
973 bytes,
974 ignore_errors,
975 needs_escaping,
976 missing_is_null,
977 None,
978 ),
979 DecimalFloat32(buf, scratch) => {
980 prepare_decimal_comma(bytes, scratch);
981 <PrimitiveChunkedBuilder<Float32Type> as ParsedBuffer>::parse_bytes(
982 buf,
983 scratch,
984 ignore_errors,
985 needs_escaping,
986 missing_is_null,
987 None,
988 )
989 },
990 DecimalFloat64(buf, scratch) => {
991 prepare_decimal_comma(bytes, scratch);
992 <PrimitiveChunkedBuilder<Float64Type> as ParsedBuffer>::parse_bytes(
993 buf,
994 scratch,
995 ignore_errors,
996 needs_escaping,
997 missing_is_null,
998 None,
999 )
1000 },
1001 #[cfg(feature = "dtype-decimal")]
1002 Decimal(buf) => <DecimalField as ParsedBuffer>::parse_bytes(
1003 buf,
1004 bytes,
1005 ignore_errors,
1006 needs_escaping,
1007 missing_is_null,
1008 None,
1009 ),
1010 Utf8(buf) => <Utf8Field as ParsedBuffer>::parse_bytes(
1011 buf,
1012 bytes,
1013 ignore_errors,
1014 needs_escaping,
1015 missing_is_null,
1016 None,
1017 ),
1018 #[cfg(feature = "dtype-datetime")]
1019 Datetime { buf, time_unit, .. } => {
1020 <DatetimeField<Int64Type> as ParsedBuffer>::parse_bytes(
1021 buf,
1022 bytes,
1023 ignore_errors,
1024 needs_escaping,
1025 missing_is_null,
1026 Some(*time_unit),
1027 )
1028 },
1029 #[cfg(feature = "dtype-date")]
1030 Date(buf) => <DatetimeField<Int32Type> as ParsedBuffer>::parse_bytes(
1031 buf,
1032 bytes,
1033 ignore_errors,
1034 needs_escaping,
1035 missing_is_null,
1036 None,
1037 ),
1038 #[cfg(feature = "dtype-categorical")]
1039 Categorical8(buf) => {
1040 buf.parse_bytes(bytes, ignore_errors, needs_escaping, missing_is_null, None)
1041 },
1042 #[cfg(feature = "dtype-categorical")]
1043 Categorical16(buf) => {
1044 buf.parse_bytes(bytes, ignore_errors, needs_escaping, missing_is_null, None)
1045 },
1046 #[cfg(feature = "dtype-categorical")]
1047 Categorical32(buf) => {
1048 buf.parse_bytes(bytes, ignore_errors, needs_escaping, missing_is_null, None)
1049 },
1050 }
1051 }
1052}
1053
1054#[inline]
1055fn prepare_decimal_comma(bytes: &[u8], scratch: &mut Vec<u8>) {
1056 scratch.clear();
1057 scratch.reserve(bytes.len());
1058
1059 for &byte in bytes {
1061 if byte == b',' {
1062 unsafe { scratch.push_unchecked(b'.') }
1063 } else {
1064 unsafe { scratch.push_unchecked(byte) }
1065 }
1066 }
1067}