1use arrow::array::MutableBinaryViewArray;
2#[cfg(feature = "dtype-decimal")]
3use polars_compute::decimal::str_to_dec128;
4#[cfg(feature = "dtype-categorical")]
5use polars_core::chunked_array::builder::CategoricalChunkedBuilder;
6use polars_core::prelude::*;
7use polars_error::to_compute_err;
8#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
9use polars_time::chunkedarray::string::Pattern;
10#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
11use polars_time::prelude::string::infer::{
12 DatetimeInfer, StrpTimeParser, TryFromWithUnit, infer_pattern_single,
13};
14#[cfg(feature = "dtype-f16")]
15use polars_utils::float16::pf16;
16use polars_utils::vec::PushUnchecked;
17
18use super::options::CsvEncoding;
19use super::parser::{could_be_whitespace_fast, skip_whitespace};
20use super::utils::escape_field;
21
22pub(crate) trait PrimitiveParser: PolarsNumericType {
23 fn parse(bytes: &[u8]) -> Option<Self::Native>;
24}
25
26#[cfg(feature = "dtype-f16")]
27impl PrimitiveParser for Float16Type {
28 #[inline]
29 fn parse(bytes: &[u8]) -> Option<pf16> {
30 use num_traits::FromPrimitive;
31
32 pf16::from_f32(fast_float2::parse(bytes).ok()?)
33 }
34}
35
36impl PrimitiveParser for Float32Type {
37 #[inline]
38 fn parse(bytes: &[u8]) -> Option<f32> {
39 fast_float2::parse(bytes).ok()
40 }
41}
42impl PrimitiveParser for Float64Type {
43 #[inline]
44 fn parse(bytes: &[u8]) -> Option<f64> {
45 fast_float2::parse(bytes).ok()
46 }
47}
48
49#[cfg(feature = "dtype-u8")]
50impl PrimitiveParser for UInt8Type {
51 #[inline]
52 fn parse(bytes: &[u8]) -> Option<u8> {
53 atoi_simd::parse_skipped(bytes).ok()
54 }
55}
56#[cfg(feature = "dtype-u16")]
57impl PrimitiveParser for UInt16Type {
58 #[inline]
59 fn parse(bytes: &[u8]) -> Option<u16> {
60 atoi_simd::parse_skipped(bytes).ok()
61 }
62}
63impl PrimitiveParser for UInt32Type {
64 #[inline]
65 fn parse(bytes: &[u8]) -> Option<u32> {
66 atoi_simd::parse_skipped(bytes).ok()
67 }
68}
69impl PrimitiveParser for UInt64Type {
70 #[inline]
71 fn parse(bytes: &[u8]) -> Option<u64> {
72 atoi_simd::parse_skipped(bytes).ok()
73 }
74}
75#[cfg(feature = "dtype-u128")]
76impl PrimitiveParser for UInt128Type {
77 #[inline]
78 fn parse(bytes: &[u8]) -> Option<u128> {
79 atoi_simd::parse_skipped(bytes).ok()
80 }
81}
82#[cfg(feature = "dtype-i8")]
83impl PrimitiveParser for Int8Type {
84 #[inline]
85 fn parse(bytes: &[u8]) -> Option<i8> {
86 atoi_simd::parse_skipped(bytes).ok()
87 }
88}
89#[cfg(feature = "dtype-i16")]
90impl PrimitiveParser for Int16Type {
91 #[inline]
92 fn parse(bytes: &[u8]) -> Option<i16> {
93 atoi_simd::parse_skipped(bytes).ok()
94 }
95}
96impl PrimitiveParser for Int32Type {
97 #[inline]
98 fn parse(bytes: &[u8]) -> Option<i32> {
99 atoi_simd::parse_skipped(bytes).ok()
100 }
101}
102impl PrimitiveParser for Int64Type {
103 #[inline]
104 fn parse(bytes: &[u8]) -> Option<i64> {
105 atoi_simd::parse_skipped(bytes).ok()
106 }
107}
108#[cfg(feature = "dtype-i128")]
109impl PrimitiveParser for Int128Type {
110 #[inline]
111 fn parse(bytes: &[u8]) -> Option<i128> {
112 atoi_simd::parse_skipped(bytes).ok()
113 }
114}
115
116trait ParsedBuffer {
117 fn parse_bytes(
118 &mut self,
119 bytes: &[u8],
120 ignore_errors: bool,
121 _needs_escaping: bool,
122 _missing_is_null: bool,
123 _time_unit: Option<TimeUnit>,
124 ) -> PolarsResult<()>;
125}
126
127impl<T> ParsedBuffer for PrimitiveChunkedBuilder<T>
128where
129 T: PolarsNumericType + PrimitiveParser,
130{
131 #[inline]
132 fn parse_bytes(
133 &mut self,
134 mut bytes: &[u8],
135 ignore_errors: bool,
136 needs_escaping: bool,
137 _missing_is_null: bool,
138 _time_unit: Option<TimeUnit>,
139 ) -> PolarsResult<()> {
140 if !bytes.is_empty() && needs_escaping {
141 bytes = &bytes[1..bytes.len() - 1];
142 }
143
144 if !bytes.is_empty() && could_be_whitespace_fast(bytes[0]) {
145 bytes = skip_whitespace(bytes);
146 }
147
148 if bytes.is_empty() {
149 self.append_null();
150 return Ok(());
151 }
152
153 match T::parse(bytes) {
154 Some(value) => self.append_value(value),
155 None => {
156 if ignore_errors {
157 self.append_null()
158 } else {
159 polars_bail!(ComputeError: "invalid primitive value found during CSV parsing")
160 }
161 },
162 }
163 Ok(())
164 }
165}
166
167pub struct Utf8Field {
168 name: PlSmallStr,
169 mutable: MutableBinaryViewArray<[u8]>,
170 scratch: Vec<u8>,
171 quote_char: u8,
172 encoding: CsvEncoding,
173}
174
175impl Utf8Field {
176 fn new(
177 name: PlSmallStr,
178 capacity: usize,
179 quote_char: Option<u8>,
180 encoding: CsvEncoding,
181 ) -> Self {
182 Self {
183 name,
184 mutable: MutableBinaryViewArray::with_capacity(capacity),
185 scratch: vec![],
186 quote_char: quote_char.unwrap_or(b'"'),
187 encoding,
188 }
189 }
190}
191
192#[inline]
193pub fn validate_utf8(bytes: &[u8]) -> bool {
194 simdutf8::basic::from_utf8(bytes).is_ok()
195}
196
197impl ParsedBuffer for Utf8Field {
198 #[inline]
199 fn parse_bytes(
200 &mut self,
201 bytes: &[u8],
202 ignore_errors: bool,
203 needs_escaping: bool,
204 missing_is_null: bool,
205 _time_unit: Option<TimeUnit>,
206 ) -> PolarsResult<()> {
207 if bytes.is_empty() {
208 if missing_is_null {
209 self.mutable.push_null()
210 } else {
211 self.mutable.push(Some([]))
212 }
213 return Ok(());
214 }
215
216 let escaped_bytes = if needs_escaping {
218 self.scratch.clear();
219 self.scratch.reserve(bytes.len());
220 polars_ensure!(bytes.len() > 1 && bytes.last() == Some(&self.quote_char), ComputeError: "invalid csv file\n\nField `{}` is not properly escaped.", std::str::from_utf8(bytes).map_err(to_compute_err)?);
221
222 unsafe {
225 let n_written =
226 escape_field(bytes, self.quote_char, self.scratch.spare_capacity_mut());
227 self.scratch.set_len(n_written);
228 }
229
230 self.scratch.as_slice()
231 } else {
232 bytes
233 };
234
235 if matches!(self.encoding, CsvEncoding::LossyUtf8) | ignore_errors {
236 let parse_result = validate_utf8(escaped_bytes);
239
240 match parse_result {
241 true => {
242 let value = escaped_bytes;
243 self.mutable.push_value(value)
244 },
245 false => {
246 if matches!(self.encoding, CsvEncoding::LossyUtf8) {
247 let s = String::from_utf8_lossy(escaped_bytes);
249 self.mutable.push_value(s.as_ref().as_bytes())
250 } else if ignore_errors {
251 self.mutable.push_null()
252 } else {
253 if needs_escaping && validate_utf8(bytes) {
255 polars_bail!(ComputeError: "string field is not properly escaped");
256 } else {
257 polars_bail!(ComputeError: "invalid utf-8 sequence");
258 }
259 }
260 },
261 }
262 } else {
263 self.mutable.push_value(escaped_bytes)
264 }
265
266 Ok(())
267 }
268}
269
270#[cfg(feature = "dtype-categorical")]
271pub struct CategoricalField<T: PolarsCategoricalType> {
272 escape_scratch: Vec<u8>,
273 quote_char: u8,
274 builder: CategoricalChunkedBuilder<T>,
275}
276
277#[cfg(feature = "dtype-categorical")]
278impl<T: PolarsCategoricalType> CategoricalField<T> {
279 fn new(name: PlSmallStr, capacity: usize, quote_char: Option<u8>, dtype: DataType) -> Self {
280 let mut builder = CategoricalChunkedBuilder::new(name, dtype);
281 builder.reserve(capacity);
282
283 Self {
284 escape_scratch: vec![],
285 quote_char: quote_char.unwrap_or(b'"'),
286 builder,
287 }
288 }
289
290 #[inline]
291 fn parse_bytes(
292 &mut self,
293 bytes: &[u8],
294 ignore_errors: bool,
295 needs_escaping: bool,
296 _missing_is_null: bool,
297 _time_unit: Option<TimeUnit>,
298 ) -> PolarsResult<()> {
299 if bytes.is_empty() {
300 self.builder.append_null();
301 return Ok(());
302 }
303 if validate_utf8(bytes) {
304 if needs_escaping {
305 polars_ensure!(bytes.len() > 1, ComputeError: "invalid csv file\n\nField `{}` is not properly escaped.", std::str::from_utf8(bytes).map_err(to_compute_err)?);
306 self.escape_scratch.clear();
307 self.escape_scratch.reserve(bytes.len());
308 unsafe {
311 let n_written = escape_field(
312 bytes,
313 self.quote_char,
314 self.escape_scratch.spare_capacity_mut(),
315 );
316 self.escape_scratch.set_len(n_written);
317 }
318
319 let key = unsafe { std::str::from_utf8_unchecked(&self.escape_scratch) };
322 self.builder.append_str(key)?;
323 } else {
324 let key = unsafe { std::str::from_utf8_unchecked(bytes) };
327 self.builder.append_str(key)?;
328 }
329 } else if ignore_errors {
330 self.builder.append_null()
331 } else {
332 polars_bail!(ComputeError: "invalid utf-8 sequence");
333 }
334 Ok(())
335 }
336}
337
338impl ParsedBuffer for BooleanChunkedBuilder {
339 #[inline]
340 fn parse_bytes(
341 &mut self,
342 bytes: &[u8],
343 ignore_errors: bool,
344 needs_escaping: bool,
345 _missing_is_null: bool,
346 _time_unit: Option<TimeUnit>,
347 ) -> PolarsResult<()> {
348 let bytes = if needs_escaping {
349 &bytes[1..bytes.len() - 1]
350 } else {
351 bytes
352 };
353 if bytes.eq_ignore_ascii_case(b"false") {
354 self.append_value(false);
355 } else if bytes.eq_ignore_ascii_case(b"true") {
356 self.append_value(true);
357 } else if ignore_errors || bytes.is_empty() {
358 self.append_null();
359 } else {
360 polars_bail!(
361 ComputeError: "error while parsing value {} as boolean",
362 String::from_utf8_lossy(bytes),
363 );
364 }
365 Ok(())
366 }
367}
368
369#[cfg(feature = "dtype-decimal")]
370pub struct DecimalField {
371 builder: PrimitiveChunkedBuilder<Int128Type>,
372 precision: usize,
373 scale: usize,
374 decimal_comma: bool,
375}
376
377#[cfg(feature = "dtype-decimal")]
378impl DecimalField {
379 fn new(
380 name: PlSmallStr,
381 capacity: usize,
382 precision: usize,
383 scale: usize,
384 decimal_comma: bool,
385 ) -> Self {
386 let builder = PrimitiveChunkedBuilder::<Int128Type>::new(name, capacity);
387 Self {
388 builder,
389 precision,
390 scale,
391 decimal_comma,
392 }
393 }
394}
395
396#[cfg(feature = "dtype-decimal")]
397impl ParsedBuffer for DecimalField {
398 #[inline]
399 fn parse_bytes(
400 &mut self,
401 mut bytes: &[u8],
402 ignore_errors: bool,
403 needs_escaping: bool,
404 _missing_is_null: bool,
405 _time_unit: Option<TimeUnit>,
406 ) -> PolarsResult<()> {
407 if !bytes.is_empty() && needs_escaping {
408 bytes = &bytes[1..bytes.len() - 1];
409 }
410
411 if !bytes.is_empty() && could_be_whitespace_fast(bytes[0]) {
412 bytes = skip_whitespace(bytes);
413 }
414
415 if bytes.is_empty() {
416 self.builder.append_null();
417 return Ok(());
418 }
419
420 match str_to_dec128(bytes, self.precision, self.scale, self.decimal_comma) {
421 Some(value) => self.builder.append_value(value),
422 None => {
423 if ignore_errors {
424 self.builder.append_null()
425 } else {
426 polars_bail!(ComputeError: "invalid decimal value found during CSV parsing")
427 }
428 },
429 }
430
431 Ok(())
432 }
433}
434
435#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
436pub struct DatetimeField<T: PolarsNumericType> {
437 compiled: Option<DatetimeInfer<T>>,
438 builder: PrimitiveChunkedBuilder<T>,
439}
440
441#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
442impl<T: PolarsNumericType> DatetimeField<T> {
443 fn new(name: PlSmallStr, capacity: usize) -> Self {
444 let builder = PrimitiveChunkedBuilder::<T>::new(name, capacity);
445 Self {
446 compiled: None,
447 builder,
448 }
449 }
450}
451
452#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
453fn slow_datetime_parser<T>(
454 buf: &mut DatetimeField<T>,
455 bytes: &[u8],
456 time_unit: Option<TimeUnit>,
457 ignore_errors: bool,
458) -> PolarsResult<()>
459where
460 T: PolarsNumericType,
461 DatetimeInfer<T>: TryFromWithUnit<Pattern>,
462{
463 let val = if bytes.is_ascii() {
464 unsafe { std::str::from_utf8_unchecked(bytes) }
467 } else {
468 match std::str::from_utf8(bytes) {
469 Ok(val) => val,
470 Err(_) => {
471 if ignore_errors {
472 buf.builder.append_null();
473 return Ok(());
474 } else {
475 polars_bail!(ComputeError: "invalid utf-8 sequence");
476 }
477 },
478 }
479 };
480
481 let pattern = match &buf.compiled {
482 Some(compiled) => compiled.pattern,
483 None => match infer_pattern_single(val) {
484 Some(pattern) => pattern,
485 None => {
486 if ignore_errors {
487 buf.builder.append_null();
488 return Ok(());
489 } else {
490 polars_bail!(ComputeError: "could not find a 'date/datetime' pattern for '{}'", val)
491 }
492 },
493 },
494 };
495 match DatetimeInfer::try_from_with_unit(pattern, time_unit) {
496 Ok(mut infer) => {
497 let parsed = infer.parse(val);
498 let Some(parsed) = parsed else {
499 if ignore_errors {
500 buf.builder.append_null();
501 return Ok(());
502 } else {
503 polars_bail!(ComputeError: "could not parse '{}' with pattern '{:?}'", val, pattern)
504 }
505 };
506
507 buf.compiled = Some(infer);
508 buf.builder.append_value(parsed);
509 Ok(())
510 },
511 Err(err) => {
512 if ignore_errors {
513 buf.builder.append_null();
514 Ok(())
515 } else {
516 Err(err)
517 }
518 },
519 }
520}
521
522#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
523impl<T> ParsedBuffer for DatetimeField<T>
524where
525 T: PolarsNumericType,
526 DatetimeInfer<T>: TryFromWithUnit<Pattern> + StrpTimeParser<T::Native>,
527{
528 #[inline]
529 fn parse_bytes(
530 &mut self,
531 mut bytes: &[u8],
532 ignore_errors: bool,
533 needs_escaping: bool,
534 _missing_is_null: bool,
535 time_unit: Option<TimeUnit>,
536 ) -> PolarsResult<()> {
537 if needs_escaping && bytes.len() >= 2 {
538 bytes = &bytes[1..bytes.len() - 1]
539 }
540
541 if bytes.is_empty() {
542 self.builder.append_null();
544 return Ok(());
545 }
546
547 match &mut self.compiled {
548 None => slow_datetime_parser(self, bytes, time_unit, ignore_errors),
549 Some(compiled) => {
550 match compiled.parse_bytes(bytes, time_unit) {
551 Some(parsed) => {
552 self.builder.append_value(parsed);
553 Ok(())
554 },
555 None => slow_datetime_parser(self, bytes, time_unit, ignore_errors),
559 }
560 },
561 }
562 }
563}
564
565pub fn init_buffers(
566 projection: &[usize],
567 capacity: usize,
568 schema: &Schema,
569 quote_char: Option<u8>,
570 encoding: CsvEncoding,
571 decimal_comma: bool,
572) -> PolarsResult<Vec<Buffer>> {
573 projection
574 .iter()
575 .map(|&i| {
576 let (name, dtype) = schema.get_at_index(i).unwrap();
577 let name = name.clone();
578 let builder = match dtype {
579 &DataType::Boolean => Buffer::Boolean(BooleanChunkedBuilder::new(name, capacity)),
580 #[cfg(feature = "dtype-i8")]
581 &DataType::Int8 => Buffer::Int8(PrimitiveChunkedBuilder::new(name, capacity)),
582 #[cfg(feature = "dtype-i16")]
583 &DataType::Int16 => Buffer::Int16(PrimitiveChunkedBuilder::new(name, capacity)),
584 &DataType::Int32 => Buffer::Int32(PrimitiveChunkedBuilder::new(name, capacity)),
585 &DataType::Int64 => Buffer::Int64(PrimitiveChunkedBuilder::new(name, capacity)),
586 #[cfg(feature = "dtype-i128")]
587 &DataType::Int128 => Buffer::Int128(PrimitiveChunkedBuilder::new(name, capacity)),
588 #[cfg(feature = "dtype-u8")]
589 &DataType::UInt8 => Buffer::UInt8(PrimitiveChunkedBuilder::new(name, capacity)),
590 #[cfg(feature = "dtype-u16")]
591 &DataType::UInt16 => Buffer::UInt16(PrimitiveChunkedBuilder::new(name, capacity)),
592 &DataType::UInt32 => Buffer::UInt32(PrimitiveChunkedBuilder::new(name, capacity)),
593 &DataType::UInt64 => Buffer::UInt64(PrimitiveChunkedBuilder::new(name, capacity)),
594 #[cfg(feature = "dtype-u128")]
595 &DataType::UInt128 => Buffer::UInt128(PrimitiveChunkedBuilder::new(name, capacity)),
596 #[cfg(feature = "dtype-f16")]
597 &DataType::Float16 => {
598 if decimal_comma {
599 Buffer::DecimalFloat16(
600 PrimitiveChunkedBuilder::new(name, capacity),
601 Default::default(),
602 )
603 } else {
604 Buffer::Float16(PrimitiveChunkedBuilder::new(name, capacity))
605 }
606 },
607 &DataType::Float32 => {
608 if decimal_comma {
609 Buffer::DecimalFloat32(
610 PrimitiveChunkedBuilder::new(name, capacity),
611 Default::default(),
612 )
613 } else {
614 Buffer::Float32(PrimitiveChunkedBuilder::new(name, capacity))
615 }
616 },
617 &DataType::Float64 => {
618 if decimal_comma {
619 Buffer::DecimalFloat64(
620 PrimitiveChunkedBuilder::new(name, capacity),
621 Default::default(),
622 )
623 } else {
624 Buffer::Float64(PrimitiveChunkedBuilder::new(name, capacity))
625 }
626 },
627 #[cfg(feature = "dtype-decimal")]
628 &DataType::Decimal(precision, scale) => Buffer::Decimal(DecimalField::new(
629 name,
630 capacity,
631 precision,
632 scale,
633 decimal_comma,
634 )),
635 &DataType::String => {
636 Buffer::Utf8(Utf8Field::new(name, capacity, quote_char, encoding))
637 },
638 #[cfg(feature = "dtype-datetime")]
639 DataType::Datetime(time_unit, time_zone) => Buffer::Datetime {
640 buf: DatetimeField::new(name, capacity),
641 time_unit: *time_unit,
642 time_zone: time_zone.clone(),
643 },
644 #[cfg(feature = "dtype-date")]
645 &DataType::Date => Buffer::Date(DatetimeField::new(name, capacity)),
646 #[cfg(feature = "dtype-categorical")]
647 DataType::Categorical(_, _) | DataType::Enum(_, _) => {
648 match dtype.cat_physical().unwrap() {
649 CategoricalPhysical::U8 => {
650 Buffer::Categorical8(CategoricalField::<Categorical8Type>::new(
651 name,
652 capacity,
653 quote_char,
654 dtype.clone(),
655 ))
656 },
657 CategoricalPhysical::U16 => {
658 Buffer::Categorical16(CategoricalField::<Categorical16Type>::new(
659 name,
660 capacity,
661 quote_char,
662 dtype.clone(),
663 ))
664 },
665 CategoricalPhysical::U32 => {
666 Buffer::Categorical32(CategoricalField::<Categorical32Type>::new(
667 name,
668 capacity,
669 quote_char,
670 dtype.clone(),
671 ))
672 },
673 }
674 },
675 dt => polars_bail!(
676 ComputeError: "unsupported data type when reading CSV: {} when reading CSV", dt,
677 ),
678 };
679 Ok(builder)
680 })
681 .collect()
682}
683
684#[allow(clippy::large_enum_variant)]
685pub enum Buffer {
686 Boolean(BooleanChunkedBuilder),
687 #[cfg(feature = "dtype-i8")]
688 Int8(PrimitiveChunkedBuilder<Int8Type>),
689 #[cfg(feature = "dtype-i16")]
690 Int16(PrimitiveChunkedBuilder<Int16Type>),
691 Int32(PrimitiveChunkedBuilder<Int32Type>),
692 Int64(PrimitiveChunkedBuilder<Int64Type>),
693 #[cfg(feature = "dtype-i128")]
694 Int128(PrimitiveChunkedBuilder<Int128Type>),
695 #[cfg(feature = "dtype-u8")]
696 UInt8(PrimitiveChunkedBuilder<UInt8Type>),
697 #[cfg(feature = "dtype-u16")]
698 UInt16(PrimitiveChunkedBuilder<UInt16Type>),
699 UInt32(PrimitiveChunkedBuilder<UInt32Type>),
700 UInt64(PrimitiveChunkedBuilder<UInt64Type>),
701 #[cfg(feature = "dtype-u128")]
702 UInt128(PrimitiveChunkedBuilder<UInt128Type>),
703 #[cfg(feature = "dtype-f16")]
704 Float16(PrimitiveChunkedBuilder<Float16Type>),
705 Float32(PrimitiveChunkedBuilder<Float32Type>),
706 Float64(PrimitiveChunkedBuilder<Float64Type>),
707 #[cfg(feature = "dtype-decimal")]
708 Decimal(DecimalField),
709 Utf8(Utf8Field),
711 #[cfg(feature = "dtype-datetime")]
712 Datetime {
713 buf: DatetimeField<Int64Type>,
714 time_unit: TimeUnit,
715 time_zone: Option<TimeZone>,
716 },
717 #[cfg(feature = "dtype-date")]
718 Date(DatetimeField<Int32Type>),
719 #[cfg(feature = "dtype-categorical")]
720 Categorical8(CategoricalField<Categorical8Type>),
721 #[cfg(feature = "dtype-categorical")]
722 Categorical16(CategoricalField<Categorical16Type>),
723 #[cfg(feature = "dtype-categorical")]
724 Categorical32(CategoricalField<Categorical32Type>),
725 #[cfg(feature = "dtype-f16")]
726 DecimalFloat16(PrimitiveChunkedBuilder<Float16Type>, Vec<u8>),
727 DecimalFloat32(PrimitiveChunkedBuilder<Float32Type>, Vec<u8>),
728 DecimalFloat64(PrimitiveChunkedBuilder<Float64Type>, Vec<u8>),
729}
730
731impl Buffer {
732 pub fn into_series(self) -> PolarsResult<Series> {
733 let s = match self {
734 Buffer::Boolean(v) => v.finish().into_series(),
735 #[cfg(feature = "dtype-i8")]
736 Buffer::Int8(v) => v.finish().into_series(),
737 #[cfg(feature = "dtype-i16")]
738 Buffer::Int16(v) => v.finish().into_series(),
739 Buffer::Int32(v) => v.finish().into_series(),
740 Buffer::Int64(v) => v.finish().into_series(),
741 #[cfg(feature = "dtype-i128")]
742 Buffer::Int128(v) => v.finish().into_series(),
743 #[cfg(feature = "dtype-u8")]
744 Buffer::UInt8(v) => v.finish().into_series(),
745 #[cfg(feature = "dtype-u16")]
746 Buffer::UInt16(v) => v.finish().into_series(),
747 Buffer::UInt32(v) => v.finish().into_series(),
748 Buffer::UInt64(v) => v.finish().into_series(),
749 #[cfg(feature = "dtype-u128")]
750 Buffer::UInt128(v) => v.finish().into_series(),
751 #[cfg(feature = "dtype-f16")]
752 Buffer::Float16(v) => v.finish().into_series(),
753 Buffer::Float32(v) => v.finish().into_series(),
754 Buffer::Float64(v) => v.finish().into_series(),
755 #[cfg(feature = "dtype-f16")]
756 Buffer::DecimalFloat16(v, _) => v.finish().into_series(),
757 Buffer::DecimalFloat32(v, _) => v.finish().into_series(),
758 Buffer::DecimalFloat64(v, _) => v.finish().into_series(),
759 #[cfg(feature = "dtype-decimal")]
760 Buffer::Decimal(DecimalField {
761 builder,
762 precision,
763 scale,
764 ..
765 }) => unsafe {
766 builder
767 .finish()
768 .into_series()
769 .from_physical_unchecked(&DataType::Decimal(precision, scale))
770 .unwrap()
771 },
772 #[cfg(feature = "dtype-datetime")]
773 Buffer::Datetime {
774 buf,
775 time_unit,
776 time_zone,
777 } => buf
778 .builder
779 .finish()
780 .into_series()
781 .cast(&DataType::Datetime(time_unit, time_zone))
782 .unwrap(),
783 #[cfg(feature = "dtype-date")]
784 Buffer::Date(v) => v
785 .builder
786 .finish()
787 .into_series()
788 .cast(&DataType::Date)
789 .unwrap(),
790
791 Buffer::Utf8(v) => {
792 let arr = v.mutable.freeze();
793 StringChunked::with_chunk(v.name, unsafe { arr.to_utf8view_unchecked() })
794 .into_series()
795 },
796 #[cfg(feature = "dtype-categorical")]
797 Buffer::Categorical8(buf) => buf.builder.finish().into_series(),
798 #[cfg(feature = "dtype-categorical")]
799 Buffer::Categorical16(buf) => buf.builder.finish().into_series(),
800 #[cfg(feature = "dtype-categorical")]
801 Buffer::Categorical32(buf) => buf.builder.finish().into_series(),
802 };
803 Ok(s)
804 }
805
806 pub fn add_null(&mut self, valid: bool) {
807 match self {
808 Buffer::Boolean(v) => v.append_null(),
809 #[cfg(feature = "dtype-i8")]
810 Buffer::Int8(v) => v.append_null(),
811 #[cfg(feature = "dtype-i16")]
812 Buffer::Int16(v) => v.append_null(),
813 Buffer::Int32(v) => v.append_null(),
814 Buffer::Int64(v) => v.append_null(),
815 #[cfg(feature = "dtype-i128")]
816 Buffer::Int128(v) => v.append_null(),
817 #[cfg(feature = "dtype-u8")]
818 Buffer::UInt8(v) => v.append_null(),
819 #[cfg(feature = "dtype-u16")]
820 Buffer::UInt16(v) => v.append_null(),
821 Buffer::UInt32(v) => v.append_null(),
822 Buffer::UInt64(v) => v.append_null(),
823 #[cfg(feature = "dtype-u128")]
824 Buffer::UInt128(v) => v.append_null(),
825 #[cfg(feature = "dtype-f16")]
826 Buffer::Float16(v) => v.append_null(),
827 Buffer::Float32(v) => v.append_null(),
828 Buffer::Float64(v) => v.append_null(),
829 #[cfg(feature = "dtype-decimal")]
830 Buffer::Decimal(buf) => buf.builder.append_null(),
831 #[cfg(feature = "dtype-f16")]
832 Buffer::DecimalFloat16(v, _) => v.append_null(),
833 Buffer::DecimalFloat32(v, _) => v.append_null(),
834 Buffer::DecimalFloat64(v, _) => v.append_null(),
835 Buffer::Utf8(v) => {
836 if valid {
837 v.mutable.push_value("")
838 } else {
839 v.mutable.push_null()
840 }
841 },
842 #[cfg(feature = "dtype-datetime")]
843 Buffer::Datetime { buf, .. } => buf.builder.append_null(),
844 #[cfg(feature = "dtype-date")]
845 Buffer::Date(v) => v.builder.append_null(),
846 #[cfg(feature = "dtype-categorical")]
847 Buffer::Categorical8(buf) => buf.builder.append_null(),
848 #[cfg(feature = "dtype-categorical")]
849 Buffer::Categorical16(buf) => buf.builder.append_null(),
850 #[cfg(feature = "dtype-categorical")]
851 Buffer::Categorical32(buf) => buf.builder.append_null(),
852 };
853 }
854
855 pub fn dtype(&self) -> DataType {
856 match self {
857 Buffer::Boolean(_) => DataType::Boolean,
858 #[cfg(feature = "dtype-i8")]
859 Buffer::Int8(_) => DataType::Int8,
860 #[cfg(feature = "dtype-i16")]
861 Buffer::Int16(_) => DataType::Int16,
862 Buffer::Int32(_) => DataType::Int32,
863 Buffer::Int64(_) => DataType::Int64,
864 #[cfg(feature = "dtype-i128")]
865 Buffer::Int128(_) => DataType::Int128,
866 #[cfg(feature = "dtype-u8")]
867 Buffer::UInt8(_) => DataType::UInt8,
868 #[cfg(feature = "dtype-u16")]
869 Buffer::UInt16(_) => DataType::UInt16,
870 Buffer::UInt32(_) => DataType::UInt32,
871 Buffer::UInt64(_) => DataType::UInt64,
872 #[cfg(feature = "dtype-u128")]
873 Buffer::UInt128(_) => DataType::UInt128,
874 #[cfg(feature = "dtype-f16")]
875 Buffer::Float16(_) | Buffer::DecimalFloat16(_, _) => DataType::Float16,
876 Buffer::Float32(_) | Buffer::DecimalFloat32(_, _) => DataType::Float32,
877 Buffer::Float64(_) | Buffer::DecimalFloat64(_, _) => DataType::Float64,
878 #[cfg(feature = "dtype-decimal")]
879 Buffer::Decimal(DecimalField {
880 precision, scale, ..
881 }) => DataType::Decimal(*precision, *scale),
882 Buffer::Utf8(_) => DataType::String,
883 #[cfg(feature = "dtype-datetime")]
884 Buffer::Datetime { time_unit, .. } => DataType::Datetime(*time_unit, None),
885 #[cfg(feature = "dtype-date")]
886 Buffer::Date(_) => DataType::Date,
887 #[cfg(feature = "dtype-categorical")]
888 Buffer::Categorical8(buf) => buf.builder.dtype().clone(),
889 #[cfg(feature = "dtype-categorical")]
890 Buffer::Categorical16(buf) => buf.builder.dtype().clone(),
891 #[cfg(feature = "dtype-categorical")]
892 Buffer::Categorical32(buf) => buf.builder.dtype().clone(),
893 }
894 }
895
896 #[inline]
897 pub fn add(
898 &mut self,
899 bytes: &[u8],
900 ignore_errors: bool,
901 needs_escaping: bool,
902 missing_is_null: bool,
903 ) -> PolarsResult<()> {
904 use Buffer::*;
905 match self {
906 Boolean(buf) => <BooleanChunkedBuilder as ParsedBuffer>::parse_bytes(
907 buf,
908 bytes,
909 ignore_errors,
910 needs_escaping,
911 missing_is_null,
912 None,
913 ),
914 #[cfg(feature = "dtype-i8")]
915 Int8(buf) => <PrimitiveChunkedBuilder<Int8Type> as ParsedBuffer>::parse_bytes(
916 buf,
917 bytes,
918 ignore_errors,
919 needs_escaping,
920 missing_is_null,
921 None,
922 ),
923 #[cfg(feature = "dtype-i16")]
924 Int16(buf) => <PrimitiveChunkedBuilder<Int16Type> as ParsedBuffer>::parse_bytes(
925 buf,
926 bytes,
927 ignore_errors,
928 needs_escaping,
929 missing_is_null,
930 None,
931 ),
932 Int32(buf) => <PrimitiveChunkedBuilder<Int32Type> as ParsedBuffer>::parse_bytes(
933 buf,
934 bytes,
935 ignore_errors,
936 needs_escaping,
937 missing_is_null,
938 None,
939 ),
940 Int64(buf) => <PrimitiveChunkedBuilder<Int64Type> as ParsedBuffer>::parse_bytes(
941 buf,
942 bytes,
943 ignore_errors,
944 needs_escaping,
945 missing_is_null,
946 None,
947 ),
948 #[cfg(feature = "dtype-i128")]
949 Int128(buf) => <PrimitiveChunkedBuilder<Int128Type> as ParsedBuffer>::parse_bytes(
950 buf,
951 bytes,
952 ignore_errors,
953 needs_escaping,
954 missing_is_null,
955 None,
956 ),
957 #[cfg(feature = "dtype-u8")]
958 UInt8(buf) => <PrimitiveChunkedBuilder<UInt8Type> as ParsedBuffer>::parse_bytes(
959 buf,
960 bytes,
961 ignore_errors,
962 needs_escaping,
963 missing_is_null,
964 None,
965 ),
966 #[cfg(feature = "dtype-u16")]
967 UInt16(buf) => <PrimitiveChunkedBuilder<UInt16Type> as ParsedBuffer>::parse_bytes(
968 buf,
969 bytes,
970 ignore_errors,
971 needs_escaping,
972 missing_is_null,
973 None,
974 ),
975 UInt32(buf) => <PrimitiveChunkedBuilder<UInt32Type> as ParsedBuffer>::parse_bytes(
976 buf,
977 bytes,
978 ignore_errors,
979 needs_escaping,
980 missing_is_null,
981 None,
982 ),
983 UInt64(buf) => <PrimitiveChunkedBuilder<UInt64Type> as ParsedBuffer>::parse_bytes(
984 buf,
985 bytes,
986 ignore_errors,
987 needs_escaping,
988 missing_is_null,
989 None,
990 ),
991 #[cfg(feature = "dtype-u128")]
992 UInt128(buf) => <PrimitiveChunkedBuilder<UInt128Type> as ParsedBuffer>::parse_bytes(
993 buf,
994 bytes,
995 ignore_errors,
996 needs_escaping,
997 missing_is_null,
998 None,
999 ),
1000 #[cfg(feature = "dtype-f16")]
1001 Float16(buf) => <PrimitiveChunkedBuilder<Float16Type> as ParsedBuffer>::parse_bytes(
1002 buf,
1003 bytes,
1004 ignore_errors,
1005 needs_escaping,
1006 missing_is_null,
1007 None,
1008 ),
1009 Float32(buf) => <PrimitiveChunkedBuilder<Float32Type> as ParsedBuffer>::parse_bytes(
1010 buf,
1011 bytes,
1012 ignore_errors,
1013 needs_escaping,
1014 missing_is_null,
1015 None,
1016 ),
1017 Float64(buf) => <PrimitiveChunkedBuilder<Float64Type> as ParsedBuffer>::parse_bytes(
1018 buf,
1019 bytes,
1020 ignore_errors,
1021 needs_escaping,
1022 missing_is_null,
1023 None,
1024 ),
1025 #[cfg(feature = "dtype-f16")]
1026 DecimalFloat16(buf, scratch) => {
1027 prepare_decimal_comma(bytes, scratch);
1028 <PrimitiveChunkedBuilder<Float16Type> as ParsedBuffer>::parse_bytes(
1029 buf,
1030 scratch,
1031 ignore_errors,
1032 needs_escaping,
1033 missing_is_null,
1034 None,
1035 )
1036 },
1037 DecimalFloat32(buf, scratch) => {
1038 prepare_decimal_comma(bytes, scratch);
1039 <PrimitiveChunkedBuilder<Float32Type> as ParsedBuffer>::parse_bytes(
1040 buf,
1041 scratch,
1042 ignore_errors,
1043 needs_escaping,
1044 missing_is_null,
1045 None,
1046 )
1047 },
1048 DecimalFloat64(buf, scratch) => {
1049 prepare_decimal_comma(bytes, scratch);
1050 <PrimitiveChunkedBuilder<Float64Type> as ParsedBuffer>::parse_bytes(
1051 buf,
1052 scratch,
1053 ignore_errors,
1054 needs_escaping,
1055 missing_is_null,
1056 None,
1057 )
1058 },
1059 #[cfg(feature = "dtype-decimal")]
1060 Decimal(buf) => <DecimalField as ParsedBuffer>::parse_bytes(
1061 buf,
1062 bytes,
1063 ignore_errors,
1064 needs_escaping,
1065 missing_is_null,
1066 None,
1067 ),
1068 Utf8(buf) => <Utf8Field as ParsedBuffer>::parse_bytes(
1069 buf,
1070 bytes,
1071 ignore_errors,
1072 needs_escaping,
1073 missing_is_null,
1074 None,
1075 ),
1076 #[cfg(feature = "dtype-datetime")]
1077 Datetime { buf, time_unit, .. } => {
1078 <DatetimeField<Int64Type> as ParsedBuffer>::parse_bytes(
1079 buf,
1080 bytes,
1081 ignore_errors,
1082 needs_escaping,
1083 missing_is_null,
1084 Some(*time_unit),
1085 )
1086 },
1087 #[cfg(feature = "dtype-date")]
1088 Date(buf) => <DatetimeField<Int32Type> as ParsedBuffer>::parse_bytes(
1089 buf,
1090 bytes,
1091 ignore_errors,
1092 needs_escaping,
1093 missing_is_null,
1094 None,
1095 ),
1096 #[cfg(feature = "dtype-categorical")]
1097 Categorical8(buf) => {
1098 buf.parse_bytes(bytes, ignore_errors, needs_escaping, missing_is_null, None)
1099 },
1100 #[cfg(feature = "dtype-categorical")]
1101 Categorical16(buf) => {
1102 buf.parse_bytes(bytes, ignore_errors, needs_escaping, missing_is_null, None)
1103 },
1104 #[cfg(feature = "dtype-categorical")]
1105 Categorical32(buf) => {
1106 buf.parse_bytes(bytes, ignore_errors, needs_escaping, missing_is_null, None)
1107 },
1108 }
1109 }
1110}
1111
1112#[inline]
1113fn prepare_decimal_comma(bytes: &[u8], scratch: &mut Vec<u8>) {
1114 scratch.clear();
1115 scratch.reserve(bytes.len());
1116
1117 for &byte in bytes {
1119 if byte == b',' {
1120 unsafe { scratch.push_unchecked(b'.') }
1121 } else {
1122 unsafe { scratch.push_unchecked(byte) }
1123 }
1124 }
1125}