polars_core/series/
mod.rs

1#![allow(unsafe_op_in_unsafe_fn)]
2//! Type agnostic columnar data structure.
3use crate::chunked_array::flags::StatisticsFlags;
4pub use crate::prelude::ChunkCompareEq;
5use crate::prelude::*;
6use crate::{HEAD_DEFAULT_LENGTH, TAIL_DEFAULT_LENGTH};
7
8macro_rules! invalid_operation_panic {
9    ($op:ident, $s:expr) => {
10        panic!(
11            "`{}` operation not supported for dtype `{}`",
12            stringify!($op),
13            $s._dtype()
14        )
15    };
16}
17
18pub mod amortized_iter;
19mod any_value;
20pub mod arithmetic;
21pub mod builder;
22mod comparison;
23mod from;
24pub mod implementations;
25mod into;
26pub(crate) mod iterator;
27pub mod ops;
28mod series_trait;
29
30use std::borrow::Cow;
31use std::hash::{Hash, Hasher};
32use std::ops::Deref;
33
34use arrow::compute::aggregate::estimated_bytes_size;
35use arrow::offset::Offsets;
36pub use from::*;
37pub use iterator::{SeriesIter, SeriesPhysIter};
38use num_traits::NumCast;
39use polars_error::feature_gated;
40pub use series_trait::{IsSorted, *};
41
42use crate::POOL;
43use crate::chunked_array::cast::CastOptions;
44#[cfg(feature = "zip_with")]
45use crate::series::arithmetic::coerce_lhs_rhs;
46use crate::utils::{Wrap, handle_casting_failures, materialize_dyn_int};
47
48/// # Series
49/// The columnar data type for a DataFrame.
50///
51/// Most of the available functions are defined in the [SeriesTrait trait](crate::series::SeriesTrait).
52///
53/// The `Series` struct consists
54/// of typed [ChunkedArray]'s. To quickly cast
55/// a `Series` to a `ChunkedArray` you can call the method with the name of the type:
56///
57/// ```
58/// # use polars_core::prelude::*;
59/// let s: Series = [1, 2, 3].iter().collect();
60/// // Quickly obtain the ChunkedArray wrapped by the Series.
61/// let chunked_array = s.i32().unwrap();
62/// ```
63///
64/// ## Arithmetic
65///
66/// You can do standard arithmetic on series.
67/// ```
68/// # use polars_core::prelude::*;
69/// let s = Series::new("a".into(), [1 , 2, 3]);
70/// let out_add = &s + &s;
71/// let out_sub = &s - &s;
72/// let out_div = &s / &s;
73/// let out_mul = &s * &s;
74/// ```
75///
76/// Or with series and numbers.
77///
78/// ```
79/// # use polars_core::prelude::*;
80/// let s: Series = (1..3).collect();
81/// let out_add_one = &s + 1;
82/// let out_multiply = &s * 10;
83///
84/// // Could not overload left hand side operator.
85/// let out_divide = 1.div(&s);
86/// let out_add = 1.add(&s);
87/// let out_subtract = 1.sub(&s);
88/// let out_multiply = 1.mul(&s);
89/// ```
90///
91/// ## Comparison
92/// You can obtain boolean mask by comparing series.
93///
94/// ```
95/// # use polars_core::prelude::*;
96/// let s = Series::new("dollars".into(), &[1, 2, 3]);
97/// let mask = s.equal(1).unwrap();
98/// let valid = [true, false, false].iter();
99/// assert!(mask
100///     .into_iter()
101///     .map(|opt_bool| opt_bool.unwrap()) // option, because series can be null
102///     .zip(valid)
103///     .all(|(a, b)| a == *b))
104/// ```
105///
106/// See all the comparison operators in the [ChunkCompareEq trait](crate::chunked_array::ops::ChunkCompareEq) and
107/// [ChunkCompareIneq trait](crate::chunked_array::ops::ChunkCompareIneq).
108///
109/// ## Iterators
110/// The Series variants contain differently typed [ChunkedArray]s.
111/// These structs can be turned into iterators, making it possible to use any function/ closure you want
112/// on a Series.
113///
114/// These iterators return an `Option<T>` because the values of a series may be null.
115///
116/// ```
117/// use polars_core::prelude::*;
118/// let pi = 3.14;
119/// let s = Series::new("angle".into(), [2f32 * pi, pi, 1.5 * pi].as_ref());
120/// let s_cos: Series = s.f32()
121///                     .expect("series was not an f32 dtype")
122///                     .into_iter()
123///                     .map(|opt_angle| opt_angle.map(|angle| angle.cos()))
124///                     .collect();
125/// ```
126///
127/// ## Creation
128/// Series can be create from different data structures. Below we'll show a few ways we can create
129/// a Series object.
130///
131/// ```
132/// # use polars_core::prelude::*;
133/// // Series can be created from Vec's, slices and arrays
134/// Series::new("boolean series".into(), &[true, false, true]);
135/// Series::new("int series".into(), &[1, 2, 3]);
136/// // And can be nullable
137/// Series::new("got nulls".into(), &[Some(1), None, Some(2)]);
138///
139/// // Series can also be collected from iterators
140/// let from_iter: Series = (0..10)
141///     .into_iter()
142///     .collect();
143///
144/// ```
145#[derive(Clone)]
146#[must_use]
147pub struct Series(pub Arc<dyn SeriesTrait>);
148
149impl PartialEq for Wrap<Series> {
150    fn eq(&self, other: &Self) -> bool {
151        self.0.equals_missing(other)
152    }
153}
154
155impl Eq for Wrap<Series> {}
156
157impl Hash for Wrap<Series> {
158    fn hash<H: Hasher>(&self, state: &mut H) {
159        let rs = PlSeedableRandomStateQuality::fixed();
160        let mut h = vec![];
161        if self.0.vec_hash(rs, &mut h).is_ok() {
162            let h = h.into_iter().fold(0, |a: u64, b| a.wrapping_add(b));
163            h.hash(state)
164        } else {
165            self.len().hash(state);
166            self.null_count().hash(state);
167            self.dtype().hash(state);
168        }
169    }
170}
171
172impl Series {
173    /// Create a new empty Series.
174    pub fn new_empty(name: PlSmallStr, dtype: &DataType) -> Series {
175        Series::full_null(name, 0, dtype)
176    }
177
178    pub fn clear(&self) -> Series {
179        if self.is_empty() {
180            self.clone()
181        } else {
182            match self.dtype() {
183                #[cfg(feature = "object")]
184                DataType::Object(_) => self
185                    .take(&ChunkedArray::<IdxType>::new_vec(PlSmallStr::EMPTY, vec![]))
186                    .unwrap(),
187                dt => Series::new_empty(self.name().clone(), dt),
188            }
189        }
190    }
191
192    #[doc(hidden)]
193    pub fn _get_inner_mut(&mut self) -> &mut dyn SeriesTrait {
194        if Arc::weak_count(&self.0) + Arc::strong_count(&self.0) != 1 {
195            self.0 = self.0.clone_inner();
196        }
197        Arc::get_mut(&mut self.0).expect("implementation error")
198    }
199
200    /// Take or clone a owned copy of the inner [`ChunkedArray`].
201    pub fn take_inner<T>(self) -> ChunkedArray<T>
202    where
203        T: 'static + PolarsDataType<IsLogical = FalseT>,
204    {
205        let arc_any = self.0.as_arc_any();
206        let downcast = arc_any
207            .downcast::<implementations::SeriesWrap<ChunkedArray<T>>>()
208            .unwrap();
209
210        match Arc::try_unwrap(downcast) {
211            Ok(ca) => ca.0,
212            Err(ca) => ca.as_ref().as_ref().clone(),
213        }
214    }
215
216    /// # Safety
217    /// The caller must ensure the length and the data types of `ArrayRef` does not change.
218    /// And that the null_count is updated (e.g. with a `compute_len()`)
219    pub unsafe fn chunks_mut(&mut self) -> &mut Vec<ArrayRef> {
220        #[allow(unused_mut)]
221        let mut ca = self._get_inner_mut();
222        ca.chunks_mut()
223    }
224
225    pub fn into_chunks(mut self) -> Vec<ArrayRef> {
226        let ca = self._get_inner_mut();
227        let chunks = std::mem::take(unsafe { ca.chunks_mut() });
228        ca.compute_len();
229        chunks
230    }
231
232    // TODO! this probably can now be removed, now we don't have special case for structs.
233    pub fn select_chunk(&self, i: usize) -> Self {
234        let mut new = self.clear();
235        let mut flags = self.get_flags();
236
237        use StatisticsFlags as F;
238        flags &= F::IS_SORTED_ANY | F::CAN_FAST_EXPLODE_LIST;
239
240        // Assign mut so we go through arc only once.
241        let mut_new = new._get_inner_mut();
242        let chunks = unsafe { mut_new.chunks_mut() };
243        let chunk = self.chunks()[i].clone();
244        chunks.clear();
245        chunks.push(chunk);
246        mut_new.compute_len();
247        mut_new._set_flags(flags);
248        new
249    }
250
251    pub fn is_sorted_flag(&self) -> IsSorted {
252        if self.len() <= 1 {
253            return IsSorted::Ascending;
254        }
255        self.get_flags().is_sorted()
256    }
257
258    pub fn set_sorted_flag(&mut self, sorted: IsSorted) {
259        let mut flags = self.get_flags();
260        flags.set_sorted(sorted);
261        self.set_flags(flags);
262    }
263
264    pub(crate) fn clear_flags(&mut self) {
265        self.set_flags(StatisticsFlags::empty());
266    }
267    pub fn get_flags(&self) -> StatisticsFlags {
268        self.0._get_flags()
269    }
270
271    pub(crate) fn set_flags(&mut self, flags: StatisticsFlags) {
272        self._get_inner_mut()._set_flags(flags)
273    }
274
275    pub fn into_frame(self) -> DataFrame {
276        // SAFETY: A single-column dataframe cannot have length mismatches or duplicate names
277        unsafe { DataFrame::new_no_checks(self.len(), vec![self.into()]) }
278    }
279
280    /// Rename series.
281    pub fn rename(&mut self, name: PlSmallStr) -> &mut Series {
282        self._get_inner_mut().rename(name);
283        self
284    }
285
286    /// Return this Series with a new name.
287    pub fn with_name(mut self, name: PlSmallStr) -> Series {
288        self.rename(name);
289        self
290    }
291
292    pub fn from_arrow_chunks(name: PlSmallStr, arrays: Vec<ArrayRef>) -> PolarsResult<Series> {
293        Self::try_from((name, arrays))
294    }
295
296    pub fn from_arrow(name: PlSmallStr, array: ArrayRef) -> PolarsResult<Series> {
297        Self::try_from((name, array))
298    }
299
300    /// Shrink the capacity of this array to fit its length.
301    pub fn shrink_to_fit(&mut self) {
302        self._get_inner_mut().shrink_to_fit()
303    }
304
305    /// Append in place. This is done by adding the chunks of `other` to this [`Series`].
306    ///
307    /// See [`ChunkedArray::append`] and [`ChunkedArray::extend`].
308    pub fn append(&mut self, other: &Series) -> PolarsResult<&mut Self> {
309        let must_cast = other.dtype().matches_schema_type(self.dtype())?;
310        if must_cast {
311            let other = other.cast(self.dtype())?;
312            self.append_owned(other)?;
313        } else {
314            self._get_inner_mut().append(other)?;
315        }
316        Ok(self)
317    }
318
319    /// Append in place. This is done by adding the chunks of `other` to this [`Series`].
320    ///
321    /// See [`ChunkedArray::append_owned`] and [`ChunkedArray::extend`].
322    pub fn append_owned(&mut self, other: Series) -> PolarsResult<&mut Self> {
323        let must_cast = other.dtype().matches_schema_type(self.dtype())?;
324        if must_cast {
325            let other = other.cast(self.dtype())?;
326            self._get_inner_mut().append_owned(other)?;
327        } else {
328            self._get_inner_mut().append_owned(other)?;
329        }
330        Ok(self)
331    }
332
333    /// Redo a length and null_count compute
334    pub fn compute_len(&mut self) {
335        self._get_inner_mut().compute_len()
336    }
337
338    /// Extend the memory backed by this array with the values from `other`.
339    ///
340    /// See [`ChunkedArray::extend`] and [`ChunkedArray::append`].
341    pub fn extend(&mut self, other: &Series) -> PolarsResult<&mut Self> {
342        let must_cast = other.dtype().matches_schema_type(self.dtype())?;
343        if must_cast {
344            let other = other.cast(self.dtype())?;
345            self._get_inner_mut().extend(&other)?;
346        } else {
347            self._get_inner_mut().extend(other)?;
348        }
349        Ok(self)
350    }
351
352    /// Sort the series with specific options.
353    ///
354    /// # Example
355    ///
356    /// ```rust
357    /// # use polars_core::prelude::*;
358    /// # fn main() -> PolarsResult<()> {
359    /// let s = Series::new("foo".into(), [2, 1, 3]);
360    /// let sorted = s.sort(SortOptions::default())?;
361    /// assert_eq!(sorted, Series::new("foo".into(), [1, 2, 3]));
362    /// # Ok(())
363    /// }
364    /// ```
365    ///
366    /// See [`SortOptions`] for more options.
367    pub fn sort(&self, sort_options: SortOptions) -> PolarsResult<Self> {
368        self.sort_with(sort_options)
369    }
370
371    /// Only implemented for numeric types
372    pub fn as_single_ptr(&mut self) -> PolarsResult<usize> {
373        self._get_inner_mut().as_single_ptr()
374    }
375
376    pub fn cast(&self, dtype: &DataType) -> PolarsResult<Self> {
377        self.cast_with_options(dtype, CastOptions::NonStrict)
378    }
379
380    /// Cast [`Series`] to another [`DataType`].
381    pub fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Self> {
382        use DataType as D;
383
384        let do_clone = match dtype {
385            D::Unknown(UnknownKind::Any) => true,
386            D::Unknown(UnknownKind::Int(_)) if self.dtype().is_integer() => true,
387            D::Unknown(UnknownKind::Float) if self.dtype().is_float() => true,
388            D::Unknown(UnknownKind::Str)
389                if self.dtype().is_string() | self.dtype().is_categorical() =>
390            {
391                true
392            },
393            dt if dt.is_primitive() && dt == self.dtype() => true,
394            #[cfg(feature = "dtype-categorical")]
395            D::Enum(None, _) => {
396                polars_bail!(InvalidOperation: "cannot cast / initialize Enum without categories present");
397            },
398            _ => false,
399        };
400
401        if do_clone {
402            return Ok(self.clone());
403        }
404
405        pub fn cast_dtype(dtype: &DataType) -> Option<DataType> {
406            match dtype {
407                D::Unknown(UnknownKind::Int(v)) => Some(materialize_dyn_int(*v).dtype()),
408                D::Unknown(UnknownKind::Float) => Some(DataType::Float64),
409                D::Unknown(UnknownKind::Str) => Some(DataType::String),
410                // Best leave as is.
411                D::List(inner) => cast_dtype(inner.as_ref()).map(Box::new).map(D::List),
412                #[cfg(feature = "dtype-struct")]
413                D::Struct(fields) => {
414                    // @NOTE: We only allocate if we really need to.
415
416                    let mut field_iter = fields.iter().enumerate();
417                    let mut new_fields = loop {
418                        let (i, field) = field_iter.next()?;
419
420                        if let Some(dtype) = cast_dtype(&field.dtype) {
421                            let mut new_fields = Vec::with_capacity(fields.len());
422                            new_fields.extend(fields.iter().take(i).cloned());
423                            new_fields.push(Field {
424                                name: field.name.clone(),
425                                dtype,
426                            });
427                            break new_fields;
428                        }
429                    };
430
431                    new_fields.extend(fields.iter().skip(new_fields.len()).cloned().map(|field| {
432                        let dtype = cast_dtype(&field.dtype).unwrap_or(field.dtype);
433                        Field {
434                            name: field.name.clone(),
435                            dtype,
436                        }
437                    }));
438
439                    Some(D::Struct(new_fields))
440                },
441                _ => None,
442            }
443        }
444
445        let casted = cast_dtype(dtype);
446        let dtype = match casted {
447            None => dtype,
448            Some(ref dtype) => dtype,
449        };
450
451        // Always allow casting all nulls to other all nulls.
452        let len = self.len();
453        if self.null_count() == len {
454            return Ok(Series::full_null(self.name().clone(), len, dtype));
455        }
456
457        let new_options = match options {
458            // Strictness is handled on this level to improve error messages.
459            CastOptions::Strict => CastOptions::NonStrict,
460            opt => opt,
461        };
462
463        let ret = self.0.cast(dtype, new_options);
464
465        match options {
466            CastOptions::NonStrict | CastOptions::Overflowing => ret,
467            CastOptions::Strict => {
468                let ret = ret?;
469                if self.null_count() != ret.null_count() {
470                    handle_casting_failures(self, &ret)?;
471                }
472                Ok(ret)
473            },
474        }
475    }
476
477    /// Cast from physical to logical types without any checks on the validity of the cast.
478    ///
479    /// # Safety
480    ///
481    /// This can lead to invalid memory access in downstream code.
482    pub unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Self> {
483        match self.dtype() {
484            #[cfg(feature = "dtype-struct")]
485            DataType::Struct(_) => self.struct_().unwrap().cast_unchecked(dtype),
486            DataType::List(_) => self.list().unwrap().cast_unchecked(dtype),
487            dt if dt.is_primitive_numeric() => {
488                with_match_physical_numeric_polars_type!(dt, |$T| {
489                    let ca: &ChunkedArray<$T> = self.as_ref().as_ref().as_ref();
490                        ca.cast_unchecked(dtype)
491                })
492            },
493            DataType::Binary => self.binary().unwrap().cast_unchecked(dtype),
494            _ => self.cast_with_options(dtype, CastOptions::Overflowing),
495        }
496    }
497
498    /// Convert a non-logical series back into a logical series without casting.
499    ///
500    /// # Safety
501    ///
502    /// This can lead to invalid memory access in downstream code.
503    pub unsafe fn from_physical_unchecked(&self, dtype: &DataType) -> PolarsResult<Self> {
504        debug_assert!(!self.dtype().is_logical());
505
506        if self.dtype() == dtype {
507            return Ok(self.clone());
508        }
509
510        use DataType as D;
511        match (self.dtype(), dtype) {
512            #[cfg(feature = "dtype-decimal")]
513            (D::Int128, D::Decimal(precision, scale)) => {
514                self.clone().into_decimal(*precision, scale.unwrap())
515            },
516
517            #[cfg(feature = "dtype-categorical")]
518            (D::UInt32, D::Categorical(revmap, ordering)) => match revmap {
519                Some(revmap) => Ok(unsafe {
520                    CategoricalChunked::from_cats_and_rev_map_unchecked(
521                        self.u32().unwrap().clone(),
522                        revmap.clone(),
523                        false,
524                        *ordering,
525                    )
526                }
527                .into_series()),
528                // In the streaming engine this is `None` and the global string cache is turned on
529                // for the duration of the query.
530                None => Ok(unsafe {
531                    CategoricalChunked::from_global_indices_unchecked(
532                        self.u32().unwrap().clone(),
533                        *ordering,
534                    )
535                    .into_series()
536                }),
537            },
538            #[cfg(feature = "dtype-categorical")]
539            (D::UInt32, D::Enum(revmap, ordering)) => Ok(unsafe {
540                CategoricalChunked::from_cats_and_rev_map_unchecked(
541                    self.u32().unwrap().clone(),
542                    revmap.as_ref().unwrap().clone(),
543                    true,
544                    *ordering,
545                )
546            }
547            .into_series()),
548
549            (D::Int32, D::Date) => feature_gated!("dtype-time", Ok(self.clone().into_date())),
550            (D::Int64, D::Datetime(tu, tz)) => feature_gated!(
551                "dtype-datetime",
552                Ok(self.clone().into_datetime(*tu, tz.clone()))
553            ),
554            (D::Int64, D::Duration(tu)) => {
555                feature_gated!("dtype-duration", Ok(self.clone().into_duration(*tu)))
556            },
557            (D::Int64, D::Time) => feature_gated!("dtype-time", Ok(self.clone().into_time())),
558
559            (D::List(_), D::List(to)) => unsafe {
560                self.list()
561                    .unwrap()
562                    .from_physical_unchecked(to.as_ref().clone())
563                    .map(|ca| ca.into_series())
564            },
565            #[cfg(feature = "dtype-array")]
566            (D::Array(_, lw), D::Array(to, rw)) if lw == rw => unsafe {
567                self.array()
568                    .unwrap()
569                    .from_physical_unchecked(to.as_ref().clone())
570                    .map(|ca| ca.into_series())
571            },
572            #[cfg(feature = "dtype-struct")]
573            (D::Struct(_), D::Struct(to)) => unsafe {
574                self.struct_()
575                    .unwrap()
576                    .from_physical_unchecked(to.as_slice())
577                    .map(|ca| ca.into_series())
578            },
579
580            _ => panic!("invalid from_physical({dtype:?}) for {:?}", self.dtype()),
581        }
582    }
583
584    /// Cast numerical types to f64, and keep floats as is.
585    pub fn to_float(&self) -> PolarsResult<Series> {
586        match self.dtype() {
587            DataType::Float32 | DataType::Float64 => Ok(self.clone()),
588            _ => self.cast_with_options(&DataType::Float64, CastOptions::Overflowing),
589        }
590    }
591
592    /// Compute the sum of all values in this Series.
593    /// Returns `Some(0)` if the array is empty, and `None` if the array only
594    /// contains null values.
595    ///
596    /// If the [`DataType`] is one of `{Int8, UInt8, Int16, UInt16}` the `Series` is
597    /// first cast to `Int64` to prevent overflow issues.
598    pub fn sum<T>(&self) -> PolarsResult<T>
599    where
600        T: NumCast,
601    {
602        let sum = self.sum_reduce()?;
603        let sum = sum.value().extract().unwrap();
604        Ok(sum)
605    }
606
607    /// Returns the minimum value in the array, according to the natural order.
608    /// Returns an option because the array is nullable.
609    pub fn min<T>(&self) -> PolarsResult<Option<T>>
610    where
611        T: NumCast,
612    {
613        let min = self.min_reduce()?;
614        let min = min.value().extract::<T>();
615        Ok(min)
616    }
617
618    /// Returns the maximum value in the array, according to the natural order.
619    /// Returns an option because the array is nullable.
620    pub fn max<T>(&self) -> PolarsResult<Option<T>>
621    where
622        T: NumCast,
623    {
624        let max = self.max_reduce()?;
625        let max = max.value().extract::<T>();
626        Ok(max)
627    }
628
629    /// Explode a list Series. This expands every item to a new row..
630    pub fn explode(&self) -> PolarsResult<Series> {
631        match self.dtype() {
632            DataType::List(_) => self.list().unwrap().explode(),
633            #[cfg(feature = "dtype-array")]
634            DataType::Array(_, _) => self.array().unwrap().explode(),
635            _ => Ok(self.clone()),
636        }
637    }
638
639    /// Check if numeric value is NaN (note this is different than missing/ null)
640    pub fn is_nan(&self) -> PolarsResult<BooleanChunked> {
641        match self.dtype() {
642            DataType::Float32 => Ok(self.f32().unwrap().is_nan()),
643            DataType::Float64 => Ok(self.f64().unwrap().is_nan()),
644            DataType::Null => Ok(BooleanChunked::full_null(self.name().clone(), self.len())),
645            dt if dt.is_primitive_numeric() => {
646                let arr = BooleanArray::full(self.len(), false, ArrowDataType::Boolean)
647                    .with_validity(self.rechunk_validity());
648                Ok(BooleanChunked::with_chunk(self.name().clone(), arr))
649            },
650            _ => polars_bail!(opq = is_nan, self.dtype()),
651        }
652    }
653
654    /// Check if numeric value is NaN (note this is different than missing/null)
655    pub fn is_not_nan(&self) -> PolarsResult<BooleanChunked> {
656        match self.dtype() {
657            DataType::Float32 => Ok(self.f32().unwrap().is_not_nan()),
658            DataType::Float64 => Ok(self.f64().unwrap().is_not_nan()),
659            dt if dt.is_primitive_numeric() => {
660                let arr = BooleanArray::full(self.len(), true, ArrowDataType::Boolean)
661                    .with_validity(self.rechunk_validity());
662                Ok(BooleanChunked::with_chunk(self.name().clone(), arr))
663            },
664            _ => polars_bail!(opq = is_not_nan, self.dtype()),
665        }
666    }
667
668    /// Check if numeric value is finite
669    pub fn is_finite(&self) -> PolarsResult<BooleanChunked> {
670        match self.dtype() {
671            DataType::Float32 => Ok(self.f32().unwrap().is_finite()),
672            DataType::Float64 => Ok(self.f64().unwrap().is_finite()),
673            DataType::Null => Ok(BooleanChunked::full_null(self.name().clone(), self.len())),
674            dt if dt.is_primitive_numeric() => {
675                let arr = BooleanArray::full(self.len(), true, ArrowDataType::Boolean)
676                    .with_validity(self.rechunk_validity());
677                Ok(BooleanChunked::with_chunk(self.name().clone(), arr))
678            },
679            _ => polars_bail!(opq = is_finite, self.dtype()),
680        }
681    }
682
683    /// Check if numeric value is infinite
684    pub fn is_infinite(&self) -> PolarsResult<BooleanChunked> {
685        match self.dtype() {
686            DataType::Float32 => Ok(self.f32().unwrap().is_infinite()),
687            DataType::Float64 => Ok(self.f64().unwrap().is_infinite()),
688            DataType::Null => Ok(BooleanChunked::full_null(self.name().clone(), self.len())),
689            dt if dt.is_primitive_numeric() => {
690                let arr = BooleanArray::full(self.len(), false, ArrowDataType::Boolean)
691                    .with_validity(self.rechunk_validity());
692                Ok(BooleanChunked::with_chunk(self.name().clone(), arr))
693            },
694            _ => polars_bail!(opq = is_infinite, self.dtype()),
695        }
696    }
697
698    /// Create a new ChunkedArray with values from self where the mask evaluates `true` and values
699    /// from `other` where the mask evaluates `false`. This function automatically broadcasts unit
700    /// length inputs.
701    #[cfg(feature = "zip_with")]
702    pub fn zip_with(&self, mask: &BooleanChunked, other: &Series) -> PolarsResult<Series> {
703        let (lhs, rhs) = coerce_lhs_rhs(self, other)?;
704        lhs.zip_with_same_type(mask, rhs.as_ref())
705    }
706
707    /// Converts a Series to their physical representation, if they have one,
708    /// otherwise the series is left unchanged.
709    ///
710    /// * Date -> Int32
711    /// * Datetime -> Int64
712    /// * Duration -> Int64
713    /// * Decimal -> Int128
714    /// * Time -> Int64
715    /// * Categorical -> UInt32
716    /// * List(inner) -> List(physical of inner)
717    /// * Array(inner) -> Array(physical of inner)
718    /// * Struct -> Struct with physical repr of each struct column
719    pub fn to_physical_repr(&self) -> Cow<Series> {
720        use DataType::*;
721        match self.dtype() {
722            // NOTE: Don't use cast here, as it might rechunk (if all nulls)
723            // which is not allowed in a phys repr.
724            #[cfg(feature = "dtype-date")]
725            Date => Cow::Owned(self.date().unwrap().0.clone().into_series()),
726            #[cfg(feature = "dtype-datetime")]
727            Datetime(_, _) => Cow::Owned(self.datetime().unwrap().0.clone().into_series()),
728            #[cfg(feature = "dtype-duration")]
729            Duration(_) => Cow::Owned(self.duration().unwrap().0.clone().into_series()),
730            #[cfg(feature = "dtype-time")]
731            Time => Cow::Owned(self.time().unwrap().0.clone().into_series()),
732            #[cfg(feature = "dtype-categorical")]
733            Categorical(_, _) | Enum(_, _) => {
734                let ca = self.categorical().unwrap();
735                Cow::Owned(ca.physical().clone().into_series())
736            },
737            #[cfg(feature = "dtype-decimal")]
738            Decimal(_, _) => Cow::Owned(self.decimal().unwrap().0.clone().into_series()),
739            List(_) => match self.list().unwrap().to_physical_repr() {
740                Cow::Borrowed(_) => Cow::Borrowed(self),
741                Cow::Owned(ca) => Cow::Owned(ca.into_series()),
742            },
743            #[cfg(feature = "dtype-array")]
744            Array(_, _) => match self.array().unwrap().to_physical_repr() {
745                Cow::Borrowed(_) => Cow::Borrowed(self),
746                Cow::Owned(ca) => Cow::Owned(ca.into_series()),
747            },
748            #[cfg(feature = "dtype-struct")]
749            Struct(_) => match self.struct_().unwrap().to_physical_repr() {
750                Cow::Borrowed(_) => Cow::Borrowed(self),
751                Cow::Owned(ca) => Cow::Owned(ca.into_series()),
752            },
753            _ => Cow::Borrowed(self),
754        }
755    }
756
757    /// Traverse and collect every nth element in a new array.
758    pub fn gather_every(&self, n: usize, offset: usize) -> Series {
759        let idx = ((offset as IdxSize)..self.len() as IdxSize)
760            .step_by(n)
761            .collect_ca(PlSmallStr::EMPTY);
762        // SAFETY: we stay in-bounds.
763        unsafe { self.take_unchecked(&idx) }
764    }
765
766    #[cfg(feature = "dot_product")]
767    pub fn dot(&self, other: &Series) -> PolarsResult<f64> {
768        std::ops::Mul::mul(self, other)?.sum::<f64>()
769    }
770
771    /// Get the sum of the Series as a new Series of length 1.
772    /// Returns a Series with a single zeroed entry if self is an empty numeric series.
773    ///
774    /// If the [`DataType`] is one of `{Int8, UInt8, Int16, UInt16}` the `Series` is
775    /// first cast to `Int64` to prevent overflow issues.
776    pub fn sum_reduce(&self) -> PolarsResult<Scalar> {
777        use DataType::*;
778        match self.dtype() {
779            Int8 | UInt8 | Int16 | UInt16 => self.cast(&Int64).unwrap().sum_reduce(),
780            _ => self.0.sum_reduce(),
781        }
782    }
783
784    /// Get the product of an array.
785    ///
786    /// If the [`DataType`] is one of `{Int8, UInt8, Int16, UInt16}` the `Series` is
787    /// first cast to `Int64` to prevent overflow issues.
788    pub fn product(&self) -> PolarsResult<Scalar> {
789        #[cfg(feature = "product")]
790        {
791            use DataType::*;
792            match self.dtype() {
793                Boolean => self.cast(&DataType::Int64).unwrap().product(),
794                Int8 | UInt8 | Int16 | UInt16 | Int32 | UInt32 => {
795                    let s = self.cast(&Int64).unwrap();
796                    s.product()
797                },
798                Int64 => Ok(self.i64().unwrap().prod_reduce()),
799                UInt64 => Ok(self.u64().unwrap().prod_reduce()),
800                #[cfg(feature = "dtype-i128")]
801                Int128 => Ok(self.i128().unwrap().prod_reduce()),
802                Float32 => Ok(self.f32().unwrap().prod_reduce()),
803                Float64 => Ok(self.f64().unwrap().prod_reduce()),
804                dt => {
805                    polars_bail!(InvalidOperation: "`product` operation not supported for dtype `{dt}`")
806                },
807            }
808        }
809        #[cfg(not(feature = "product"))]
810        {
811            panic!("activate 'product' feature")
812        }
813    }
814
815    /// Cast throws an error if conversion had overflows
816    pub fn strict_cast(&self, dtype: &DataType) -> PolarsResult<Series> {
817        self.cast_with_options(dtype, CastOptions::Strict)
818    }
819
820    #[cfg(feature = "dtype-decimal")]
821    pub(crate) fn into_decimal(
822        self,
823        precision: Option<usize>,
824        scale: usize,
825    ) -> PolarsResult<Series> {
826        match self.dtype() {
827            DataType::Int128 => Ok(self
828                .i128()
829                .unwrap()
830                .clone()
831                .into_decimal(precision, scale)?
832                .into_series()),
833            DataType::Decimal(cur_prec, cur_scale)
834                if (cur_prec.is_none() || precision.is_none() || *cur_prec == precision)
835                    && *cur_scale == Some(scale) =>
836            {
837                Ok(self)
838            },
839            dt => panic!("into_decimal({precision:?}, {scale}) not implemented for {dt:?}"),
840        }
841    }
842
843    #[cfg(feature = "dtype-time")]
844    pub(crate) fn into_time(self) -> Series {
845        match self.dtype() {
846            DataType::Int64 => self.i64().unwrap().clone().into_time().into_series(),
847            DataType::Time => self
848                .time()
849                .unwrap()
850                .as_ref()
851                .clone()
852                .into_time()
853                .into_series(),
854            dt => panic!("date not implemented for {dt:?}"),
855        }
856    }
857
858    pub(crate) fn into_date(self) -> Series {
859        #[cfg(not(feature = "dtype-date"))]
860        {
861            panic!("activate feature dtype-date")
862        }
863        #[cfg(feature = "dtype-date")]
864        match self.dtype() {
865            DataType::Int32 => self.i32().unwrap().clone().into_date().into_series(),
866            DataType::Date => self
867                .date()
868                .unwrap()
869                .as_ref()
870                .clone()
871                .into_date()
872                .into_series(),
873            dt => panic!("date not implemented for {dt:?}"),
874        }
875    }
876
877    #[allow(unused_variables)]
878    pub(crate) fn into_datetime(self, timeunit: TimeUnit, tz: Option<TimeZone>) -> Series {
879        #[cfg(not(feature = "dtype-datetime"))]
880        {
881            panic!("activate feature dtype-datetime")
882        }
883
884        #[cfg(feature = "dtype-datetime")]
885        match self.dtype() {
886            DataType::Int64 => self
887                .i64()
888                .unwrap()
889                .clone()
890                .into_datetime(timeunit, tz)
891                .into_series(),
892            DataType::Datetime(_, _) => self
893                .datetime()
894                .unwrap()
895                .as_ref()
896                .clone()
897                .into_datetime(timeunit, tz)
898                .into_series(),
899            dt => panic!("into_datetime not implemented for {dt:?}"),
900        }
901    }
902
903    #[allow(unused_variables)]
904    pub(crate) fn into_duration(self, timeunit: TimeUnit) -> Series {
905        #[cfg(not(feature = "dtype-duration"))]
906        {
907            panic!("activate feature dtype-duration")
908        }
909        #[cfg(feature = "dtype-duration")]
910        match self.dtype() {
911            DataType::Int64 => self
912                .i64()
913                .unwrap()
914                .clone()
915                .into_duration(timeunit)
916                .into_series(),
917            DataType::Duration(_) => self
918                .duration()
919                .unwrap()
920                .as_ref()
921                .clone()
922                .into_duration(timeunit)
923                .into_series(),
924            dt => panic!("into_duration not implemented for {dt:?}"),
925        }
926    }
927
928    // used for formatting
929    pub fn str_value(&self, index: usize) -> PolarsResult<Cow<str>> {
930        Ok(self.0.get(index)?.str_value())
931    }
932    /// Get the head of the Series.
933    pub fn head(&self, length: Option<usize>) -> Series {
934        let len = length.unwrap_or(HEAD_DEFAULT_LENGTH);
935        self.slice(0, std::cmp::min(len, self.len()))
936    }
937
938    /// Get the tail of the Series.
939    pub fn tail(&self, length: Option<usize>) -> Series {
940        let len = length.unwrap_or(TAIL_DEFAULT_LENGTH);
941        let len = std::cmp::min(len, self.len());
942        self.slice(-(len as i64), len)
943    }
944
945    pub fn mean_reduce(&self) -> Scalar {
946        crate::scalar::reduce::mean_reduce(self.mean(), self.dtype().clone())
947    }
948
949    /// Compute the unique elements, but maintain order. This requires more work
950    /// than a naive [`Series::unique`](SeriesTrait::unique).
951    pub fn unique_stable(&self) -> PolarsResult<Series> {
952        let idx = self.arg_unique()?;
953        // SAFETY: Indices are in bounds.
954        unsafe { Ok(self.take_unchecked(&idx)) }
955    }
956
957    pub fn try_idx(&self) -> Option<&IdxCa> {
958        #[cfg(feature = "bigidx")]
959        {
960            self.try_u64()
961        }
962        #[cfg(not(feature = "bigidx"))]
963        {
964            self.try_u32()
965        }
966    }
967
968    pub fn idx(&self) -> PolarsResult<&IdxCa> {
969        #[cfg(feature = "bigidx")]
970        {
971            self.u64()
972        }
973        #[cfg(not(feature = "bigidx"))]
974        {
975            self.u32()
976        }
977    }
978
979    /// Returns an estimation of the total (heap) allocated size of the `Series` in bytes.
980    ///
981    /// # Implementation
982    /// This estimation is the sum of the size of its buffers, validity, including nested arrays.
983    /// Multiple arrays may share buffers and bitmaps. Therefore, the size of 2 arrays is not the
984    /// sum of the sizes computed from this function. In particular, [`StructArray`]'s size is an upper bound.
985    ///
986    /// When an array is sliced, its allocated size remains constant because the buffer unchanged.
987    /// However, this function will yield a smaller number. This is because this function returns
988    /// the visible size of the buffer, not its total capacity.
989    ///
990    /// FFI buffers are included in this estimation.
991    pub fn estimated_size(&self) -> usize {
992        let mut size = 0;
993        match self.dtype() {
994            #[cfg(feature = "dtype-categorical")]
995            DataType::Categorical(Some(rv), _) | DataType::Enum(Some(rv), _) => match &**rv {
996                RevMapping::Local(arr, _) => size += estimated_bytes_size(arr),
997                RevMapping::Global(map, arr, _) => {
998                    size += map.capacity() * size_of::<u32>() * 2 + estimated_bytes_size(arr);
999                },
1000            },
1001            #[cfg(feature = "object")]
1002            DataType::Object(_) => {
1003                let ArrowDataType::FixedSizeBinary(size) = self.chunks()[0].dtype() else {
1004                    unreachable!()
1005                };
1006                // This is only the pointer size in python. So will be a huge underestimation.
1007                return self.len() * *size;
1008            },
1009            _ => {},
1010        }
1011
1012        size += self
1013            .chunks()
1014            .iter()
1015            .map(|arr| estimated_bytes_size(&**arr))
1016            .sum::<usize>();
1017
1018        size
1019    }
1020
1021    /// Packs every element into a list.
1022    pub fn as_list(&self) -> ListChunked {
1023        let s = self.rechunk();
1024        // don't  use `to_arrow` as we need the physical types
1025        let values = s.chunks()[0].clone();
1026        let offsets = (0i64..(s.len() as i64 + 1)).collect::<Vec<_>>();
1027        let offsets = unsafe { Offsets::new_unchecked(offsets) };
1028
1029        let dtype = LargeListArray::default_datatype(
1030            s.dtype().to_physical().to_arrow(CompatLevel::newest()),
1031        );
1032        let new_arr = LargeListArray::new(dtype, offsets.into(), values, None);
1033        let mut out = ListChunked::with_chunk(s.name().clone(), new_arr);
1034        out.set_inner_dtype(s.dtype().clone());
1035        out
1036    }
1037}
1038
1039impl Deref for Series {
1040    type Target = dyn SeriesTrait;
1041
1042    fn deref(&self) -> &Self::Target {
1043        self.0.as_ref()
1044    }
1045}
1046
1047impl<'a> AsRef<(dyn SeriesTrait + 'a)> for Series {
1048    fn as_ref(&self) -> &(dyn SeriesTrait + 'a) {
1049        self.0.as_ref()
1050    }
1051}
1052
1053impl Default for Series {
1054    fn default() -> Self {
1055        Int64Chunked::default().into_series()
1056    }
1057}
1058
1059impl<T> AsRef<ChunkedArray<T>> for dyn SeriesTrait + '_
1060where
1061    T: 'static + PolarsDataType<IsLogical = FalseT>,
1062{
1063    fn as_ref(&self) -> &ChunkedArray<T> {
1064        // @NOTE: SeriesTrait `as_any` returns a std::any::Any for the underlying ChunkedArray /
1065        // Logical (so not the SeriesWrap).
1066        let Some(ca) = self.as_any().downcast_ref::<ChunkedArray<T>>() else {
1067            panic!(
1068                "implementation error, cannot get ref {:?} from {:?}",
1069                T::get_dtype(),
1070                self.dtype()
1071            );
1072        };
1073
1074        ca
1075    }
1076}
1077
1078impl<T> AsMut<ChunkedArray<T>> for dyn SeriesTrait + '_
1079where
1080    T: 'static + PolarsDataType<IsLogical = FalseT>,
1081{
1082    fn as_mut(&mut self) -> &mut ChunkedArray<T> {
1083        if !self.as_any_mut().is::<ChunkedArray<T>>() {
1084            panic!(
1085                "implementation error, cannot get ref {:?} from {:?}",
1086                T::get_dtype(),
1087                self.dtype()
1088            );
1089        }
1090
1091        // @NOTE: SeriesTrait `as_any` returns a std::any::Any for the underlying ChunkedArray /
1092        // Logical (so not the SeriesWrap).
1093        self.as_any_mut().downcast_mut::<ChunkedArray<T>>().unwrap()
1094    }
1095}
1096
1097#[cfg(test)]
1098mod test {
1099    use crate::prelude::*;
1100    use crate::series::*;
1101
1102    #[test]
1103    fn cast() {
1104        let ar = UInt32Chunked::new("a".into(), &[1, 2]);
1105        let s = ar.into_series();
1106        let s2 = s.cast(&DataType::Int64).unwrap();
1107
1108        assert!(s2.i64().is_ok());
1109        let s2 = s.cast(&DataType::Float32).unwrap();
1110        assert!(s2.f32().is_ok());
1111    }
1112
1113    #[test]
1114    fn new_series() {
1115        let _ = Series::new("boolean series".into(), &vec![true, false, true]);
1116        let _ = Series::new("int series".into(), &[1, 2, 3]);
1117        let ca = Int32Chunked::new("a".into(), &[1, 2, 3]);
1118        let _ = ca.into_series();
1119    }
1120
1121    #[test]
1122    #[cfg(feature = "dtype-date")]
1123    fn roundtrip_list_logical_20311() {
1124        let list = ListChunked::from_chunk_iter(
1125            PlSmallStr::from_static("a"),
1126            [ListArray::new(
1127                ArrowDataType::LargeList(Box::new(ArrowField::new(
1128                    PlSmallStr::from_static("item"),
1129                    ArrowDataType::Int32,
1130                    true,
1131                ))),
1132                unsafe { Offsets::new_unchecked(vec![0, 1]) }.into(),
1133                PrimitiveArray::new(ArrowDataType::Int32, vec![1i32].into(), None).to_boxed(),
1134                None,
1135            )],
1136        );
1137        let list = unsafe { list.from_physical_unchecked(DataType::Date) }.unwrap();
1138        assert_eq!(list.dtype(), &DataType::List(Box::new(DataType::Date)));
1139    }
1140
1141    #[test]
1142    #[cfg(feature = "dtype-struct")]
1143    fn new_series_from_empty_structs() {
1144        let dtype = DataType::Struct(vec![]);
1145        let empties = vec![AnyValue::StructOwned(Box::new((vec![], vec![]))); 3];
1146        let s = Series::from_any_values_and_dtype("".into(), &empties, &dtype, false).unwrap();
1147        assert_eq!(s.len(), 3);
1148    }
1149    #[test]
1150    fn new_series_from_arrow_primitive_array() {
1151        let array = UInt32Array::from_slice([1, 2, 3, 4, 5]);
1152        let array_ref: ArrayRef = Box::new(array);
1153
1154        let _ = Series::try_new("foo".into(), array_ref).unwrap();
1155    }
1156
1157    #[test]
1158    fn series_append() {
1159        let mut s1 = Series::new("a".into(), &[1, 2]);
1160        let s2 = Series::new("b".into(), &[3]);
1161        s1.append(&s2).unwrap();
1162        assert_eq!(s1.len(), 3);
1163
1164        // add wrong type
1165        let s2 = Series::new("b".into(), &[3.0]);
1166        assert!(s1.append(&s2).is_err())
1167    }
1168
1169    #[test]
1170    #[cfg(feature = "dtype-decimal")]
1171    fn series_append_decimal() {
1172        let s1 = Series::new("a".into(), &[1.1, 2.3])
1173            .cast(&DataType::Decimal(None, Some(2)))
1174            .unwrap();
1175        let s2 = Series::new("b".into(), &[3])
1176            .cast(&DataType::Decimal(None, Some(0)))
1177            .unwrap();
1178
1179        {
1180            let mut s1 = s1.clone();
1181            s1.append(&s2).unwrap();
1182            assert_eq!(s1.len(), 3);
1183            assert_eq!(s1.get(2).unwrap(), AnyValue::Decimal(300, 2));
1184        }
1185
1186        {
1187            let mut s2 = s2.clone();
1188            s2.extend(&s1).unwrap();
1189            assert_eq!(s2.get(2).unwrap(), AnyValue::Decimal(2, 0));
1190        }
1191    }
1192
1193    #[test]
1194    fn series_slice_works() {
1195        let series = Series::new("a".into(), &[1i64, 2, 3, 4, 5]);
1196
1197        let slice_1 = series.slice(-3, 3);
1198        let slice_2 = series.slice(-5, 5);
1199        let slice_3 = series.slice(0, 5);
1200
1201        assert_eq!(slice_1.get(0).unwrap(), AnyValue::Int64(3));
1202        assert_eq!(slice_2.get(0).unwrap(), AnyValue::Int64(1));
1203        assert_eq!(slice_3.get(0).unwrap(), AnyValue::Int64(1));
1204    }
1205
1206    #[test]
1207    fn out_of_range_slice_does_not_panic() {
1208        let series = Series::new("a".into(), &[1i64, 2, 3, 4, 5]);
1209
1210        let _ = series.slice(-3, 4);
1211        let _ = series.slice(-6, 2);
1212        let _ = series.slice(4, 2);
1213    }
1214}