1use arrow::array::MutableBinaryViewArray;
2#[cfg(feature = "dtype-decimal")]
3use polars_compute::decimal::str_to_dec128;
4#[cfg(feature = "dtype-categorical")]
5use polars_core::chunked_array::builder::CategoricalChunkedBuilder;
6use polars_core::prelude::*;
7use polars_error::to_compute_err;
8#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
9use polars_time::chunkedarray::string::Pattern;
10#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
11use polars_time::prelude::string::infer::{
12 DatetimeInfer, StrpTimeParser, TryFromWithUnit, infer_pattern_single,
13};
14#[cfg(feature = "dtype-f16")]
15use polars_utils::float16::pf16;
16use polars_utils::vec::PushUnchecked;
17
18use super::options::CsvEncoding;
19use super::parser::{could_be_whitespace_fast, skip_whitespace};
20use super::utils::escape_field;
21
22pub(crate) trait PrimitiveParser: PolarsNumericType {
23 fn parse(bytes: &[u8]) -> Option<Self::Native>;
24}
25
26#[cfg(feature = "dtype-f16")]
27impl PrimitiveParser for Float16Type {
28 #[inline]
29 fn parse(bytes: &[u8]) -> Option<pf16> {
30 use num_traits::FromPrimitive;
31
32 pf16::from_f32(fast_float2::parse(bytes).ok()?)
33 }
34}
35
36impl PrimitiveParser for Float32Type {
37 #[inline]
38 fn parse(bytes: &[u8]) -> Option<f32> {
39 fast_float2::parse(bytes).ok()
40 }
41}
42impl PrimitiveParser for Float64Type {
43 #[inline]
44 fn parse(bytes: &[u8]) -> Option<f64> {
45 fast_float2::parse(bytes).ok()
46 }
47}
48
49#[cfg(feature = "dtype-u8")]
50impl PrimitiveParser for UInt8Type {
51 #[inline]
52 fn parse(bytes: &[u8]) -> Option<u8> {
53 atoi_simd::parse_skipped(bytes).ok()
54 }
55}
56#[cfg(feature = "dtype-u16")]
57impl PrimitiveParser for UInt16Type {
58 #[inline]
59 fn parse(bytes: &[u8]) -> Option<u16> {
60 atoi_simd::parse_skipped(bytes).ok()
61 }
62}
63impl PrimitiveParser for UInt32Type {
64 #[inline]
65 fn parse(bytes: &[u8]) -> Option<u32> {
66 atoi_simd::parse_skipped(bytes).ok()
67 }
68}
69impl PrimitiveParser for UInt64Type {
70 #[inline]
71 fn parse(bytes: &[u8]) -> Option<u64> {
72 atoi_simd::parse_skipped(bytes).ok()
73 }
74}
75#[cfg(feature = "dtype-u128")]
76impl PrimitiveParser for UInt128Type {
77 #[inline]
78 fn parse(bytes: &[u8]) -> Option<u128> {
79 atoi_simd::parse_skipped(bytes).ok()
80 }
81}
82#[cfg(feature = "dtype-i8")]
83impl PrimitiveParser for Int8Type {
84 #[inline]
85 fn parse(bytes: &[u8]) -> Option<i8> {
86 atoi_simd::parse_skipped(bytes).ok()
87 }
88}
89#[cfg(feature = "dtype-i16")]
90impl PrimitiveParser for Int16Type {
91 #[inline]
92 fn parse(bytes: &[u8]) -> Option<i16> {
93 atoi_simd::parse_skipped(bytes).ok()
94 }
95}
96impl PrimitiveParser for Int32Type {
97 #[inline]
98 fn parse(bytes: &[u8]) -> Option<i32> {
99 atoi_simd::parse_skipped(bytes).ok()
100 }
101}
102impl PrimitiveParser for Int64Type {
103 #[inline]
104 fn parse(bytes: &[u8]) -> Option<i64> {
105 atoi_simd::parse_skipped(bytes).ok()
106 }
107}
108#[cfg(feature = "dtype-i128")]
109impl PrimitiveParser for Int128Type {
110 #[inline]
111 fn parse(bytes: &[u8]) -> Option<i128> {
112 atoi_simd::parse_skipped(bytes).ok()
113 }
114}
115
116trait ParsedBuilder {
117 fn parse_bytes(
118 &mut self,
119 bytes: &[u8],
120 ignore_errors: bool,
121 _needs_escaping: bool,
122 _missing_is_null: bool,
123 _time_unit: Option<TimeUnit>,
124 ) -> PolarsResult<()>;
125}
126
127impl<T> ParsedBuilder for PrimitiveChunkedBuilder<T>
128where
129 T: PolarsNumericType + PrimitiveParser,
130{
131 #[inline]
132 fn parse_bytes(
133 &mut self,
134 mut bytes: &[u8],
135 ignore_errors: bool,
136 needs_escaping: bool,
137 _missing_is_null: bool,
138 _time_unit: Option<TimeUnit>,
139 ) -> PolarsResult<()> {
140 if !bytes.is_empty() && needs_escaping {
141 bytes = &bytes[1..bytes.len() - 1];
142 }
143
144 if !bytes.is_empty() && could_be_whitespace_fast(bytes[0]) {
145 bytes = skip_whitespace(bytes);
146 }
147
148 if bytes.is_empty() {
149 self.append_null();
150 return Ok(());
151 }
152
153 match T::parse(bytes) {
154 Some(value) => self.append_value(value),
155 None => {
156 if ignore_errors {
157 self.append_null()
158 } else {
159 polars_bail!(ComputeError: "invalid primitive value found during CSV parsing")
160 }
161 },
162 }
163 Ok(())
164 }
165}
166
167pub struct Utf8Field {
168 name: PlSmallStr,
169 mutable: MutableBinaryViewArray<[u8]>,
170 scratch: Vec<u8>,
171 quote_char: u8,
172 encoding: CsvEncoding,
173}
174
175impl Utf8Field {
176 fn new(
177 name: PlSmallStr,
178 capacity: usize,
179 quote_char: Option<u8>,
180 encoding: CsvEncoding,
181 ) -> Self {
182 Self {
183 name,
184 mutable: MutableBinaryViewArray::with_capacity(capacity),
185 scratch: vec![],
186 quote_char: quote_char.unwrap_or(b'"'),
187 encoding,
188 }
189 }
190}
191
192#[inline]
193pub fn validate_utf8(bytes: &[u8]) -> bool {
194 simdutf8::basic::from_utf8(bytes).is_ok()
195}
196
197impl ParsedBuilder for Utf8Field {
198 #[inline]
199 fn parse_bytes(
200 &mut self,
201 bytes: &[u8],
202 ignore_errors: bool,
203 needs_escaping: bool,
204 missing_is_null: bool,
205 _time_unit: Option<TimeUnit>,
206 ) -> PolarsResult<()> {
207 if bytes.is_empty() {
208 if missing_is_null {
209 self.mutable.push_null()
210 } else {
211 self.mutable.push(Some([]))
212 }
213 return Ok(());
214 }
215
216 let escaped_bytes = if needs_escaping {
218 self.scratch.clear();
219 self.scratch.reserve(bytes.len());
220 polars_ensure!(bytes.len() > 1 && bytes.last() == Some(&self.quote_char), ComputeError: "invalid csv file\n\nField `{}` is not properly escaped.", std::str::from_utf8(bytes).map_err(to_compute_err)?);
221
222 unsafe {
225 let n_written =
226 escape_field(bytes, self.quote_char, self.scratch.spare_capacity_mut());
227 self.scratch.set_len(n_written);
228 }
229
230 self.scratch.as_slice()
231 } else {
232 bytes
233 };
234
235 if matches!(self.encoding, CsvEncoding::LossyUtf8) | ignore_errors {
236 let parse_result = validate_utf8(escaped_bytes);
239
240 match parse_result {
241 true => {
242 let value = escaped_bytes;
243 self.mutable.push_value(value)
244 },
245 false => {
246 if matches!(self.encoding, CsvEncoding::LossyUtf8) {
247 let s = String::from_utf8_lossy(escaped_bytes);
249 self.mutable.push_value(s.as_ref().as_bytes())
250 } else if ignore_errors {
251 self.mutable.push_null()
252 } else {
253 if needs_escaping && validate_utf8(bytes) {
255 polars_bail!(ComputeError: "string field is not properly escaped");
256 } else {
257 polars_bail!(ComputeError: "invalid utf-8 sequence");
258 }
259 }
260 },
261 }
262 } else {
263 self.mutable.push_value(escaped_bytes)
264 }
265
266 Ok(())
267 }
268}
269
270#[cfg(feature = "dtype-categorical")]
271pub struct CategoricalField<T: PolarsCategoricalType> {
272 escape_scratch: Vec<u8>,
273 quote_char: u8,
274 builder: CategoricalChunkedBuilder<T>,
275}
276
277#[cfg(feature = "dtype-categorical")]
278impl<T: PolarsCategoricalType> CategoricalField<T> {
279 fn new(name: PlSmallStr, capacity: usize, quote_char: Option<u8>, dtype: DataType) -> Self {
280 let mut builder = CategoricalChunkedBuilder::new(name, dtype);
281 builder.reserve(capacity);
282
283 Self {
284 escape_scratch: vec![],
285 quote_char: quote_char.unwrap_or(b'"'),
286 builder,
287 }
288 }
289
290 #[inline]
291 fn parse_bytes(
292 &mut self,
293 bytes: &[u8],
294 ignore_errors: bool,
295 needs_escaping: bool,
296 _missing_is_null: bool,
297 _time_unit: Option<TimeUnit>,
298 ) -> PolarsResult<()> {
299 if bytes.is_empty() {
300 self.builder.append_null();
301 return Ok(());
302 }
303 if validate_utf8(bytes) {
304 if needs_escaping {
305 polars_ensure!(bytes.len() > 1, ComputeError: "invalid csv file\n\nField `{}` is not properly escaped.", std::str::from_utf8(bytes).map_err(to_compute_err)?);
306 self.escape_scratch.clear();
307 self.escape_scratch.reserve(bytes.len());
308 unsafe {
311 let n_written = escape_field(
312 bytes,
313 self.quote_char,
314 self.escape_scratch.spare_capacity_mut(),
315 );
316 self.escape_scratch.set_len(n_written);
317 }
318
319 let key = unsafe { std::str::from_utf8_unchecked(&self.escape_scratch) };
322 self.builder.append_str(key)?;
323 } else {
324 let key = unsafe { std::str::from_utf8_unchecked(bytes) };
327 self.builder.append_str(key)?;
328 }
329 } else if ignore_errors {
330 self.builder.append_null()
331 } else {
332 polars_bail!(ComputeError: "invalid utf-8 sequence");
333 }
334 Ok(())
335 }
336}
337
338impl ParsedBuilder for BooleanChunkedBuilder {
339 #[inline]
340 fn parse_bytes(
341 &mut self,
342 bytes: &[u8],
343 ignore_errors: bool,
344 needs_escaping: bool,
345 _missing_is_null: bool,
346 _time_unit: Option<TimeUnit>,
347 ) -> PolarsResult<()> {
348 let bytes = if needs_escaping {
349 &bytes[1..bytes.len() - 1]
350 } else {
351 bytes
352 };
353 if bytes.eq_ignore_ascii_case(b"false") {
354 self.append_value(false);
355 } else if bytes.eq_ignore_ascii_case(b"true") {
356 self.append_value(true);
357 } else if ignore_errors || bytes.is_empty() {
358 self.append_null();
359 } else {
360 polars_bail!(
361 ComputeError: "error while parsing value {} as boolean",
362 String::from_utf8_lossy(bytes),
363 );
364 }
365 Ok(())
366 }
367}
368
369#[cfg(feature = "dtype-decimal")]
370pub struct DecimalField {
371 builder: PrimitiveChunkedBuilder<Int128Type>,
372 precision: usize,
373 scale: usize,
374 decimal_comma: bool,
375}
376
377#[cfg(feature = "dtype-decimal")]
378impl DecimalField {
379 fn new(
380 name: PlSmallStr,
381 capacity: usize,
382 precision: usize,
383 scale: usize,
384 decimal_comma: bool,
385 ) -> Self {
386 let builder = PrimitiveChunkedBuilder::<Int128Type>::new(name, capacity);
387 Self {
388 builder,
389 precision,
390 scale,
391 decimal_comma,
392 }
393 }
394}
395
396#[cfg(feature = "dtype-decimal")]
397impl ParsedBuilder for DecimalField {
398 #[inline]
399 fn parse_bytes(
400 &mut self,
401 mut bytes: &[u8],
402 ignore_errors: bool,
403 needs_escaping: bool,
404 _missing_is_null: bool,
405 _time_unit: Option<TimeUnit>,
406 ) -> PolarsResult<()> {
407 if !bytes.is_empty() && needs_escaping {
408 bytes = &bytes[1..bytes.len() - 1];
409 }
410
411 if !bytes.is_empty() && could_be_whitespace_fast(bytes[0]) {
412 bytes = skip_whitespace(bytes);
413 }
414
415 if bytes.is_empty() {
416 self.builder.append_null();
417 return Ok(());
418 }
419
420 match str_to_dec128(bytes, self.precision, self.scale, self.decimal_comma) {
421 Some(value) => self.builder.append_value(value),
422 None => {
423 if ignore_errors {
424 self.builder.append_null()
425 } else {
426 polars_bail!(ComputeError: "invalid decimal value found during CSV parsing")
427 }
428 },
429 }
430
431 Ok(())
432 }
433}
434
435#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
436pub struct DatetimeField<T: PolarsNumericType> {
437 compiled: Option<DatetimeInfer<T>>,
438 builder: PrimitiveChunkedBuilder<T>,
439}
440
441#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
442impl<T: PolarsNumericType> DatetimeField<T> {
443 fn new(name: PlSmallStr, capacity: usize) -> Self {
444 let builder = PrimitiveChunkedBuilder::<T>::new(name, capacity);
445 Self {
446 compiled: None,
447 builder,
448 }
449 }
450}
451
452#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
453fn slow_datetime_parser<T>(
454 buf: &mut DatetimeField<T>,
455 bytes: &[u8],
456 time_unit: Option<TimeUnit>,
457 ignore_errors: bool,
458) -> PolarsResult<()>
459where
460 T: PolarsNumericType,
461 DatetimeInfer<T>: TryFromWithUnit<Pattern>,
462{
463 let val = if bytes.is_ascii() {
464 unsafe { std::str::from_utf8_unchecked(bytes) }
467 } else {
468 match std::str::from_utf8(bytes) {
469 Ok(val) => val,
470 Err(_) => {
471 if ignore_errors {
472 buf.builder.append_null();
473 return Ok(());
474 } else {
475 polars_bail!(ComputeError: "invalid utf-8 sequence");
476 }
477 },
478 }
479 };
480
481 let pattern = match &buf.compiled {
482 Some(compiled) => compiled.pattern,
483 None => match infer_pattern_single(val) {
484 Some(pattern) => pattern,
485 None => {
486 if ignore_errors {
487 buf.builder.append_null();
488 return Ok(());
489 } else {
490 polars_bail!(ComputeError: "could not find a 'date/datetime' pattern for '{}'", val)
491 }
492 },
493 },
494 };
495 match DatetimeInfer::try_from_with_unit(pattern, time_unit) {
496 Ok(mut infer) => {
497 let parsed = infer.parse(val);
498 let Some(parsed) = parsed else {
499 if ignore_errors {
500 buf.builder.append_null();
501 return Ok(());
502 } else {
503 polars_bail!(ComputeError: "could not parse '{}' with pattern '{:?}'", val, pattern)
504 }
505 };
506
507 buf.compiled = Some(infer);
508 buf.builder.append_value(parsed);
509 Ok(())
510 },
511 Err(err) => {
512 if ignore_errors {
513 buf.builder.append_null();
514 Ok(())
515 } else {
516 Err(err)
517 }
518 },
519 }
520}
521
522#[cfg(any(feature = "dtype-datetime", feature = "dtype-date"))]
523impl<T> ParsedBuilder for DatetimeField<T>
524where
525 T: PolarsNumericType,
526 DatetimeInfer<T>: TryFromWithUnit<Pattern> + StrpTimeParser<T::Native>,
527{
528 #[inline]
529 fn parse_bytes(
530 &mut self,
531 mut bytes: &[u8],
532 ignore_errors: bool,
533 needs_escaping: bool,
534 _missing_is_null: bool,
535 time_unit: Option<TimeUnit>,
536 ) -> PolarsResult<()> {
537 if needs_escaping && bytes.len() >= 2 {
538 bytes = &bytes[1..bytes.len() - 1]
539 }
540
541 if bytes.is_empty() {
542 self.builder.append_null();
544 return Ok(());
545 }
546
547 match &mut self.compiled {
548 None => slow_datetime_parser(self, bytes, time_unit, ignore_errors),
549 Some(compiled) => {
550 match compiled.parse_bytes(bytes, time_unit) {
551 Some(parsed) => {
552 self.builder.append_value(parsed);
553 Ok(())
554 },
555 None => slow_datetime_parser(self, bytes, time_unit, ignore_errors),
559 }
560 },
561 }
562 }
563}
564
565pub fn init_builders(
566 projection: &[usize],
567 capacity: usize,
568 schema: &Schema,
569 quote_char: Option<u8>,
570 encoding: CsvEncoding,
571 decimal_comma: bool,
572) -> PolarsResult<Vec<Builder>> {
573 projection
574 .iter()
575 .map(|&i| {
576 let (name, dtype) = schema.get_at_index(i).unwrap();
577 let name = name.clone();
578 let builder = match dtype {
579 &DataType::Boolean => Builder::Boolean(BooleanChunkedBuilder::new(name, capacity)),
580 #[cfg(feature = "dtype-i8")]
581 &DataType::Int8 => Builder::Int8(PrimitiveChunkedBuilder::new(name, capacity)),
582 #[cfg(feature = "dtype-i16")]
583 &DataType::Int16 => Builder::Int16(PrimitiveChunkedBuilder::new(name, capacity)),
584 &DataType::Int32 => Builder::Int32(PrimitiveChunkedBuilder::new(name, capacity)),
585 &DataType::Int64 => Builder::Int64(PrimitiveChunkedBuilder::new(name, capacity)),
586 #[cfg(feature = "dtype-i128")]
587 &DataType::Int128 => Builder::Int128(PrimitiveChunkedBuilder::new(name, capacity)),
588 #[cfg(feature = "dtype-u8")]
589 &DataType::UInt8 => Builder::UInt8(PrimitiveChunkedBuilder::new(name, capacity)),
590 #[cfg(feature = "dtype-u16")]
591 &DataType::UInt16 => Builder::UInt16(PrimitiveChunkedBuilder::new(name, capacity)),
592 &DataType::UInt32 => Builder::UInt32(PrimitiveChunkedBuilder::new(name, capacity)),
593 &DataType::UInt64 => Builder::UInt64(PrimitiveChunkedBuilder::new(name, capacity)),
594 #[cfg(feature = "dtype-u128")]
595 &DataType::UInt128 => {
596 Builder::UInt128(PrimitiveChunkedBuilder::new(name, capacity))
597 },
598 #[cfg(feature = "dtype-f16")]
599 &DataType::Float16 => {
600 if decimal_comma {
601 Builder::DecimalFloat16(
602 PrimitiveChunkedBuilder::new(name, capacity),
603 Default::default(),
604 )
605 } else {
606 Builder::Float16(PrimitiveChunkedBuilder::new(name, capacity))
607 }
608 },
609 &DataType::Float32 => {
610 if decimal_comma {
611 Builder::DecimalFloat32(
612 PrimitiveChunkedBuilder::new(name, capacity),
613 Default::default(),
614 )
615 } else {
616 Builder::Float32(PrimitiveChunkedBuilder::new(name, capacity))
617 }
618 },
619 &DataType::Float64 => {
620 if decimal_comma {
621 Builder::DecimalFloat64(
622 PrimitiveChunkedBuilder::new(name, capacity),
623 Default::default(),
624 )
625 } else {
626 Builder::Float64(PrimitiveChunkedBuilder::new(name, capacity))
627 }
628 },
629 #[cfg(feature = "dtype-decimal")]
630 &DataType::Decimal(precision, scale) => Builder::Decimal(DecimalField::new(
631 name,
632 capacity,
633 precision,
634 scale,
635 decimal_comma,
636 )),
637 &DataType::String => {
638 Builder::Utf8(Utf8Field::new(name, capacity, quote_char, encoding))
639 },
640 #[cfg(feature = "dtype-datetime")]
641 DataType::Datetime(time_unit, time_zone) => Builder::Datetime {
642 buf: DatetimeField::new(name, capacity),
643 time_unit: *time_unit,
644 time_zone: time_zone.clone(),
645 },
646 #[cfg(feature = "dtype-date")]
647 &DataType::Date => Builder::Date(DatetimeField::new(name, capacity)),
648 #[cfg(feature = "dtype-categorical")]
649 DataType::Categorical(_, _) | DataType::Enum(_, _) => {
650 match dtype.cat_physical().unwrap() {
651 CategoricalPhysical::U8 => {
652 Builder::Categorical8(CategoricalField::<Categorical8Type>::new(
653 name,
654 capacity,
655 quote_char,
656 dtype.clone(),
657 ))
658 },
659 CategoricalPhysical::U16 => {
660 Builder::Categorical16(CategoricalField::<Categorical16Type>::new(
661 name,
662 capacity,
663 quote_char,
664 dtype.clone(),
665 ))
666 },
667 CategoricalPhysical::U32 => {
668 Builder::Categorical32(CategoricalField::<Categorical32Type>::new(
669 name,
670 capacity,
671 quote_char,
672 dtype.clone(),
673 ))
674 },
675 }
676 },
677 dt => polars_bail!(
678 ComputeError: "unsupported data type when reading CSV: {} when reading CSV", dt,
679 ),
680 };
681 Ok(builder)
682 })
683 .collect()
684}
685
686#[allow(clippy::large_enum_variant)]
687pub enum Builder {
688 Boolean(BooleanChunkedBuilder),
689 #[cfg(feature = "dtype-i8")]
690 Int8(PrimitiveChunkedBuilder<Int8Type>),
691 #[cfg(feature = "dtype-i16")]
692 Int16(PrimitiveChunkedBuilder<Int16Type>),
693 Int32(PrimitiveChunkedBuilder<Int32Type>),
694 Int64(PrimitiveChunkedBuilder<Int64Type>),
695 #[cfg(feature = "dtype-i128")]
696 Int128(PrimitiveChunkedBuilder<Int128Type>),
697 #[cfg(feature = "dtype-u8")]
698 UInt8(PrimitiveChunkedBuilder<UInt8Type>),
699 #[cfg(feature = "dtype-u16")]
700 UInt16(PrimitiveChunkedBuilder<UInt16Type>),
701 UInt32(PrimitiveChunkedBuilder<UInt32Type>),
702 UInt64(PrimitiveChunkedBuilder<UInt64Type>),
703 #[cfg(feature = "dtype-u128")]
704 UInt128(PrimitiveChunkedBuilder<UInt128Type>),
705 #[cfg(feature = "dtype-f16")]
706 Float16(PrimitiveChunkedBuilder<Float16Type>),
707 Float32(PrimitiveChunkedBuilder<Float32Type>),
708 Float64(PrimitiveChunkedBuilder<Float64Type>),
709 #[cfg(feature = "dtype-decimal")]
710 Decimal(DecimalField),
711 Utf8(Utf8Field),
713 #[cfg(feature = "dtype-datetime")]
714 Datetime {
715 buf: DatetimeField<Int64Type>,
716 time_unit: TimeUnit,
717 time_zone: Option<TimeZone>,
718 },
719 #[cfg(feature = "dtype-date")]
720 Date(DatetimeField<Int32Type>),
721 #[cfg(feature = "dtype-categorical")]
722 Categorical8(CategoricalField<Categorical8Type>),
723 #[cfg(feature = "dtype-categorical")]
724 Categorical16(CategoricalField<Categorical16Type>),
725 #[cfg(feature = "dtype-categorical")]
726 Categorical32(CategoricalField<Categorical32Type>),
727 #[cfg(feature = "dtype-f16")]
728 DecimalFloat16(PrimitiveChunkedBuilder<Float16Type>, Vec<u8>),
729 DecimalFloat32(PrimitiveChunkedBuilder<Float32Type>, Vec<u8>),
730 DecimalFloat64(PrimitiveChunkedBuilder<Float64Type>, Vec<u8>),
731}
732
733impl Builder {
734 pub fn into_series(self) -> PolarsResult<Series> {
735 let s = match self {
736 Builder::Boolean(v) => v.finish().into_series(),
737 #[cfg(feature = "dtype-i8")]
738 Builder::Int8(v) => v.finish().into_series(),
739 #[cfg(feature = "dtype-i16")]
740 Builder::Int16(v) => v.finish().into_series(),
741 Builder::Int32(v) => v.finish().into_series(),
742 Builder::Int64(v) => v.finish().into_series(),
743 #[cfg(feature = "dtype-i128")]
744 Builder::Int128(v) => v.finish().into_series(),
745 #[cfg(feature = "dtype-u8")]
746 Builder::UInt8(v) => v.finish().into_series(),
747 #[cfg(feature = "dtype-u16")]
748 Builder::UInt16(v) => v.finish().into_series(),
749 Builder::UInt32(v) => v.finish().into_series(),
750 Builder::UInt64(v) => v.finish().into_series(),
751 #[cfg(feature = "dtype-u128")]
752 Builder::UInt128(v) => v.finish().into_series(),
753 #[cfg(feature = "dtype-f16")]
754 Builder::Float16(v) => v.finish().into_series(),
755 Builder::Float32(v) => v.finish().into_series(),
756 Builder::Float64(v) => v.finish().into_series(),
757 #[cfg(feature = "dtype-f16")]
758 Builder::DecimalFloat16(v, _) => v.finish().into_series(),
759 Builder::DecimalFloat32(v, _) => v.finish().into_series(),
760 Builder::DecimalFloat64(v, _) => v.finish().into_series(),
761 #[cfg(feature = "dtype-decimal")]
762 Builder::Decimal(DecimalField {
763 builder,
764 precision,
765 scale,
766 ..
767 }) => unsafe {
768 builder
769 .finish()
770 .into_series()
771 .from_physical_unchecked(&DataType::Decimal(precision, scale))
772 .unwrap()
773 },
774 #[cfg(feature = "dtype-datetime")]
775 Builder::Datetime {
776 buf,
777 time_unit,
778 time_zone,
779 } => buf
780 .builder
781 .finish()
782 .into_series()
783 .cast(&DataType::Datetime(time_unit, time_zone))
784 .unwrap(),
785 #[cfg(feature = "dtype-date")]
786 Builder::Date(v) => v
787 .builder
788 .finish()
789 .into_series()
790 .cast(&DataType::Date)
791 .unwrap(),
792
793 Builder::Utf8(v) => {
794 let arr = v.mutable.freeze();
795 StringChunked::with_chunk(v.name, unsafe { arr.to_utf8view_unchecked() })
796 .into_series()
797 },
798 #[cfg(feature = "dtype-categorical")]
799 Builder::Categorical8(buf) => buf.builder.finish().into_series(),
800 #[cfg(feature = "dtype-categorical")]
801 Builder::Categorical16(buf) => buf.builder.finish().into_series(),
802 #[cfg(feature = "dtype-categorical")]
803 Builder::Categorical32(buf) => buf.builder.finish().into_series(),
804 };
805 Ok(s)
806 }
807
808 pub fn add_null(&mut self, valid: bool) {
809 match self {
810 Builder::Boolean(v) => v.append_null(),
811 #[cfg(feature = "dtype-i8")]
812 Builder::Int8(v) => v.append_null(),
813 #[cfg(feature = "dtype-i16")]
814 Builder::Int16(v) => v.append_null(),
815 Builder::Int32(v) => v.append_null(),
816 Builder::Int64(v) => v.append_null(),
817 #[cfg(feature = "dtype-i128")]
818 Builder::Int128(v) => v.append_null(),
819 #[cfg(feature = "dtype-u8")]
820 Builder::UInt8(v) => v.append_null(),
821 #[cfg(feature = "dtype-u16")]
822 Builder::UInt16(v) => v.append_null(),
823 Builder::UInt32(v) => v.append_null(),
824 Builder::UInt64(v) => v.append_null(),
825 #[cfg(feature = "dtype-u128")]
826 Builder::UInt128(v) => v.append_null(),
827 #[cfg(feature = "dtype-f16")]
828 Builder::Float16(v) => v.append_null(),
829 Builder::Float32(v) => v.append_null(),
830 Builder::Float64(v) => v.append_null(),
831 #[cfg(feature = "dtype-decimal")]
832 Builder::Decimal(buf) => buf.builder.append_null(),
833 #[cfg(feature = "dtype-f16")]
834 Builder::DecimalFloat16(v, _) => v.append_null(),
835 Builder::DecimalFloat32(v, _) => v.append_null(),
836 Builder::DecimalFloat64(v, _) => v.append_null(),
837 Builder::Utf8(v) => {
838 if valid {
839 v.mutable.push_value("")
840 } else {
841 v.mutable.push_null()
842 }
843 },
844 #[cfg(feature = "dtype-datetime")]
845 Builder::Datetime { buf, .. } => buf.builder.append_null(),
846 #[cfg(feature = "dtype-date")]
847 Builder::Date(v) => v.builder.append_null(),
848 #[cfg(feature = "dtype-categorical")]
849 Builder::Categorical8(buf) => buf.builder.append_null(),
850 #[cfg(feature = "dtype-categorical")]
851 Builder::Categorical16(buf) => buf.builder.append_null(),
852 #[cfg(feature = "dtype-categorical")]
853 Builder::Categorical32(buf) => buf.builder.append_null(),
854 };
855 }
856
857 pub fn dtype(&self) -> DataType {
858 match self {
859 Builder::Boolean(_) => DataType::Boolean,
860 #[cfg(feature = "dtype-i8")]
861 Builder::Int8(_) => DataType::Int8,
862 #[cfg(feature = "dtype-i16")]
863 Builder::Int16(_) => DataType::Int16,
864 Builder::Int32(_) => DataType::Int32,
865 Builder::Int64(_) => DataType::Int64,
866 #[cfg(feature = "dtype-i128")]
867 Builder::Int128(_) => DataType::Int128,
868 #[cfg(feature = "dtype-u8")]
869 Builder::UInt8(_) => DataType::UInt8,
870 #[cfg(feature = "dtype-u16")]
871 Builder::UInt16(_) => DataType::UInt16,
872 Builder::UInt32(_) => DataType::UInt32,
873 Builder::UInt64(_) => DataType::UInt64,
874 #[cfg(feature = "dtype-u128")]
875 Builder::UInt128(_) => DataType::UInt128,
876 #[cfg(feature = "dtype-f16")]
877 Builder::Float16(_) | Builder::DecimalFloat16(_, _) => DataType::Float16,
878 Builder::Float32(_) | Builder::DecimalFloat32(_, _) => DataType::Float32,
879 Builder::Float64(_) | Builder::DecimalFloat64(_, _) => DataType::Float64,
880 #[cfg(feature = "dtype-decimal")]
881 Builder::Decimal(DecimalField {
882 precision, scale, ..
883 }) => DataType::Decimal(*precision, *scale),
884 Builder::Utf8(_) => DataType::String,
885 #[cfg(feature = "dtype-datetime")]
886 Builder::Datetime { time_unit, .. } => DataType::Datetime(*time_unit, None),
887 #[cfg(feature = "dtype-date")]
888 Builder::Date(_) => DataType::Date,
889 #[cfg(feature = "dtype-categorical")]
890 Builder::Categorical8(buf) => buf.builder.dtype().clone(),
891 #[cfg(feature = "dtype-categorical")]
892 Builder::Categorical16(buf) => buf.builder.dtype().clone(),
893 #[cfg(feature = "dtype-categorical")]
894 Builder::Categorical32(buf) => buf.builder.dtype().clone(),
895 }
896 }
897
898 #[inline]
899 pub fn add(
900 &mut self,
901 bytes: &[u8],
902 ignore_errors: bool,
903 needs_escaping: bool,
904 missing_is_null: bool,
905 ) -> PolarsResult<()> {
906 use Builder::*;
907 match self {
908 Boolean(buf) => <BooleanChunkedBuilder as ParsedBuilder>::parse_bytes(
909 buf,
910 bytes,
911 ignore_errors,
912 needs_escaping,
913 missing_is_null,
914 None,
915 ),
916 #[cfg(feature = "dtype-i8")]
917 Int8(buf) => <PrimitiveChunkedBuilder<Int8Type> as ParsedBuilder>::parse_bytes(
918 buf,
919 bytes,
920 ignore_errors,
921 needs_escaping,
922 missing_is_null,
923 None,
924 ),
925 #[cfg(feature = "dtype-i16")]
926 Int16(buf) => <PrimitiveChunkedBuilder<Int16Type> as ParsedBuilder>::parse_bytes(
927 buf,
928 bytes,
929 ignore_errors,
930 needs_escaping,
931 missing_is_null,
932 None,
933 ),
934 Int32(buf) => <PrimitiveChunkedBuilder<Int32Type> as ParsedBuilder>::parse_bytes(
935 buf,
936 bytes,
937 ignore_errors,
938 needs_escaping,
939 missing_is_null,
940 None,
941 ),
942 Int64(buf) => <PrimitiveChunkedBuilder<Int64Type> as ParsedBuilder>::parse_bytes(
943 buf,
944 bytes,
945 ignore_errors,
946 needs_escaping,
947 missing_is_null,
948 None,
949 ),
950 #[cfg(feature = "dtype-i128")]
951 Int128(buf) => <PrimitiveChunkedBuilder<Int128Type> as ParsedBuilder>::parse_bytes(
952 buf,
953 bytes,
954 ignore_errors,
955 needs_escaping,
956 missing_is_null,
957 None,
958 ),
959 #[cfg(feature = "dtype-u8")]
960 UInt8(buf) => <PrimitiveChunkedBuilder<UInt8Type> as ParsedBuilder>::parse_bytes(
961 buf,
962 bytes,
963 ignore_errors,
964 needs_escaping,
965 missing_is_null,
966 None,
967 ),
968 #[cfg(feature = "dtype-u16")]
969 UInt16(buf) => <PrimitiveChunkedBuilder<UInt16Type> as ParsedBuilder>::parse_bytes(
970 buf,
971 bytes,
972 ignore_errors,
973 needs_escaping,
974 missing_is_null,
975 None,
976 ),
977 UInt32(buf) => <PrimitiveChunkedBuilder<UInt32Type> as ParsedBuilder>::parse_bytes(
978 buf,
979 bytes,
980 ignore_errors,
981 needs_escaping,
982 missing_is_null,
983 None,
984 ),
985 UInt64(buf) => <PrimitiveChunkedBuilder<UInt64Type> as ParsedBuilder>::parse_bytes(
986 buf,
987 bytes,
988 ignore_errors,
989 needs_escaping,
990 missing_is_null,
991 None,
992 ),
993 #[cfg(feature = "dtype-u128")]
994 UInt128(buf) => <PrimitiveChunkedBuilder<UInt128Type> as ParsedBuilder>::parse_bytes(
995 buf,
996 bytes,
997 ignore_errors,
998 needs_escaping,
999 missing_is_null,
1000 None,
1001 ),
1002 #[cfg(feature = "dtype-f16")]
1003 Float16(buf) => <PrimitiveChunkedBuilder<Float16Type> as ParsedBuilder>::parse_bytes(
1004 buf,
1005 bytes,
1006 ignore_errors,
1007 needs_escaping,
1008 missing_is_null,
1009 None,
1010 ),
1011 Float32(buf) => <PrimitiveChunkedBuilder<Float32Type> as ParsedBuilder>::parse_bytes(
1012 buf,
1013 bytes,
1014 ignore_errors,
1015 needs_escaping,
1016 missing_is_null,
1017 None,
1018 ),
1019 Float64(buf) => <PrimitiveChunkedBuilder<Float64Type> as ParsedBuilder>::parse_bytes(
1020 buf,
1021 bytes,
1022 ignore_errors,
1023 needs_escaping,
1024 missing_is_null,
1025 None,
1026 ),
1027 #[cfg(feature = "dtype-f16")]
1028 DecimalFloat16(buf, scratch) => {
1029 prepare_decimal_comma(bytes, scratch);
1030 <PrimitiveChunkedBuilder<Float16Type> as ParsedBuilder>::parse_bytes(
1031 buf,
1032 scratch,
1033 ignore_errors,
1034 needs_escaping,
1035 missing_is_null,
1036 None,
1037 )
1038 },
1039 DecimalFloat32(buf, scratch) => {
1040 prepare_decimal_comma(bytes, scratch);
1041 <PrimitiveChunkedBuilder<Float32Type> as ParsedBuilder>::parse_bytes(
1042 buf,
1043 scratch,
1044 ignore_errors,
1045 needs_escaping,
1046 missing_is_null,
1047 None,
1048 )
1049 },
1050 DecimalFloat64(buf, scratch) => {
1051 prepare_decimal_comma(bytes, scratch);
1052 <PrimitiveChunkedBuilder<Float64Type> as ParsedBuilder>::parse_bytes(
1053 buf,
1054 scratch,
1055 ignore_errors,
1056 needs_escaping,
1057 missing_is_null,
1058 None,
1059 )
1060 },
1061 #[cfg(feature = "dtype-decimal")]
1062 Decimal(buf) => <DecimalField as ParsedBuilder>::parse_bytes(
1063 buf,
1064 bytes,
1065 ignore_errors,
1066 needs_escaping,
1067 missing_is_null,
1068 None,
1069 ),
1070 Utf8(buf) => <Utf8Field as ParsedBuilder>::parse_bytes(
1071 buf,
1072 bytes,
1073 ignore_errors,
1074 needs_escaping,
1075 missing_is_null,
1076 None,
1077 ),
1078 #[cfg(feature = "dtype-datetime")]
1079 Datetime { buf, time_unit, .. } => {
1080 <DatetimeField<Int64Type> as ParsedBuilder>::parse_bytes(
1081 buf,
1082 bytes,
1083 ignore_errors,
1084 needs_escaping,
1085 missing_is_null,
1086 Some(*time_unit),
1087 )
1088 },
1089 #[cfg(feature = "dtype-date")]
1090 Date(buf) => <DatetimeField<Int32Type> as ParsedBuilder>::parse_bytes(
1091 buf,
1092 bytes,
1093 ignore_errors,
1094 needs_escaping,
1095 missing_is_null,
1096 None,
1097 ),
1098 #[cfg(feature = "dtype-categorical")]
1099 Categorical8(buf) => {
1100 buf.parse_bytes(bytes, ignore_errors, needs_escaping, missing_is_null, None)
1101 },
1102 #[cfg(feature = "dtype-categorical")]
1103 Categorical16(buf) => {
1104 buf.parse_bytes(bytes, ignore_errors, needs_escaping, missing_is_null, None)
1105 },
1106 #[cfg(feature = "dtype-categorical")]
1107 Categorical32(buf) => {
1108 buf.parse_bytes(bytes, ignore_errors, needs_escaping, missing_is_null, None)
1109 },
1110 }
1111 }
1112}
1113
1114#[inline]
1115fn prepare_decimal_comma(bytes: &[u8], scratch: &mut Vec<u8>) {
1116 scratch.clear();
1117 scratch.reserve(bytes.len());
1118
1119 for &byte in bytes {
1121 if byte == b',' {
1122 unsafe { scratch.push_unchecked(b'.') }
1123 } else {
1124 unsafe { scratch.push_unchecked(byte) }
1125 }
1126 }
1127}