polars_core/series/
mod.rs

1#![allow(unsafe_op_in_unsafe_fn)]
2//! Type agnostic columnar data structure.
3use crate::chunked_array::flags::StatisticsFlags;
4pub use crate::prelude::ChunkCompareEq;
5use crate::prelude::*;
6use crate::{HEAD_DEFAULT_LENGTH, TAIL_DEFAULT_LENGTH};
7
8macro_rules! invalid_operation_panic {
9    ($op:ident, $s:expr) => {
10        panic!(
11            "`{}` operation not supported for dtype `{}`",
12            stringify!($op),
13            $s._dtype()
14        )
15    };
16}
17
18pub mod amortized_iter;
19mod any_value;
20pub mod arithmetic;
21pub mod builder;
22mod comparison;
23mod from;
24pub mod implementations;
25mod into;
26pub(crate) mod iterator;
27pub mod ops;
28mod series_trait;
29
30use std::borrow::Cow;
31use std::hash::{Hash, Hasher};
32use std::ops::Deref;
33
34use arrow::compute::aggregate::estimated_bytes_size;
35use arrow::offset::Offsets;
36pub use from::*;
37pub use iterator::{SeriesIter, SeriesPhysIter};
38use num_traits::NumCast;
39use polars_error::feature_gated;
40use polars_utils::float::IsFloat;
41pub use series_trait::{IsSorted, *};
42
43use crate::POOL;
44use crate::chunked_array::cast::CastOptions;
45#[cfg(feature = "zip_with")]
46use crate::series::arithmetic::coerce_lhs_rhs;
47use crate::utils::{Wrap, handle_casting_failures, materialize_dyn_int};
48
49/// # Series
50/// The columnar data type for a DataFrame.
51///
52/// Most of the available functions are defined in the [SeriesTrait trait](crate::series::SeriesTrait).
53///
54/// The `Series` struct consists
55/// of typed [ChunkedArray]'s. To quickly cast
56/// a `Series` to a `ChunkedArray` you can call the method with the name of the type:
57///
58/// ```
59/// # use polars_core::prelude::*;
60/// let s: Series = [1, 2, 3].iter().collect();
61/// // Quickly obtain the ChunkedArray wrapped by the Series.
62/// let chunked_array = s.i32().unwrap();
63/// ```
64///
65/// ## Arithmetic
66///
67/// You can do standard arithmetic on series.
68/// ```
69/// # use polars_core::prelude::*;
70/// let s = Series::new("a".into(), [1 , 2, 3]);
71/// let out_add = &s + &s;
72/// let out_sub = &s - &s;
73/// let out_div = &s / &s;
74/// let out_mul = &s * &s;
75/// ```
76///
77/// Or with series and numbers.
78///
79/// ```
80/// # use polars_core::prelude::*;
81/// let s: Series = (1..3).collect();
82/// let out_add_one = &s + 1;
83/// let out_multiply = &s * 10;
84///
85/// // Could not overload left hand side operator.
86/// let out_divide = 1.div(&s);
87/// let out_add = 1.add(&s);
88/// let out_subtract = 1.sub(&s);
89/// let out_multiply = 1.mul(&s);
90/// ```
91///
92/// ## Comparison
93/// You can obtain boolean mask by comparing series.
94///
95/// ```
96/// # use polars_core::prelude::*;
97/// let s = Series::new("dollars".into(), &[1, 2, 3]);
98/// let mask = s.equal(1).unwrap();
99/// let valid = [true, false, false].iter();
100/// assert!(mask
101///     .into_iter()
102///     .map(|opt_bool| opt_bool.unwrap()) // option, because series can be null
103///     .zip(valid)
104///     .all(|(a, b)| a == *b))
105/// ```
106///
107/// See all the comparison operators in the [ChunkCompareEq trait](crate::chunked_array::ops::ChunkCompareEq) and
108/// [ChunkCompareIneq trait](crate::chunked_array::ops::ChunkCompareIneq).
109///
110/// ## Iterators
111/// The Series variants contain differently typed [ChunkedArray]s.
112/// These structs can be turned into iterators, making it possible to use any function/ closure you want
113/// on a Series.
114///
115/// These iterators return an `Option<T>` because the values of a series may be null.
116///
117/// ```
118/// use polars_core::prelude::*;
119/// let pi = 3.14;
120/// let s = Series::new("angle".into(), [2f32 * pi, pi, 1.5 * pi].as_ref());
121/// let s_cos: Series = s.f32()
122///                     .expect("series was not an f32 dtype")
123///                     .into_iter()
124///                     .map(|opt_angle| opt_angle.map(|angle| angle.cos()))
125///                     .collect();
126/// ```
127///
128/// ## Creation
129/// Series can be create from different data structures. Below we'll show a few ways we can create
130/// a Series object.
131///
132/// ```
133/// # use polars_core::prelude::*;
134/// // Series can be created from Vec's, slices and arrays
135/// Series::new("boolean series".into(), &[true, false, true]);
136/// Series::new("int series".into(), &[1, 2, 3]);
137/// // And can be nullable
138/// Series::new("got nulls".into(), &[Some(1), None, Some(2)]);
139///
140/// // Series can also be collected from iterators
141/// let from_iter: Series = (0..10)
142///     .into_iter()
143///     .collect();
144///
145/// ```
146#[derive(Clone)]
147#[must_use]
148pub struct Series(pub Arc<dyn SeriesTrait>);
149
150impl PartialEq for Wrap<Series> {
151    fn eq(&self, other: &Self) -> bool {
152        self.0.equals_missing(other)
153    }
154}
155
156impl Eq for Wrap<Series> {}
157
158impl Hash for Wrap<Series> {
159    fn hash<H: Hasher>(&self, state: &mut H) {
160        let rs = PlSeedableRandomStateQuality::fixed();
161        let mut h = vec![];
162        if self.0.vec_hash(rs, &mut h).is_ok() {
163            let h = h.into_iter().fold(0, |a: u64, b| a.wrapping_add(b));
164            h.hash(state)
165        } else {
166            self.len().hash(state);
167            self.null_count().hash(state);
168            self.dtype().hash(state);
169        }
170    }
171}
172
173impl Series {
174    /// Create a new empty Series.
175    pub fn new_empty(name: PlSmallStr, dtype: &DataType) -> Series {
176        Series::full_null(name, 0, dtype)
177    }
178
179    pub fn clear(&self) -> Series {
180        if self.is_empty() {
181            self.clone()
182        } else {
183            match self.dtype() {
184                #[cfg(feature = "object")]
185                DataType::Object(_) => self
186                    .take(&ChunkedArray::<IdxType>::new_vec(PlSmallStr::EMPTY, vec![]))
187                    .unwrap(),
188                dt => Series::new_empty(self.name().clone(), dt),
189            }
190        }
191    }
192
193    #[doc(hidden)]
194    pub fn _get_inner_mut(&mut self) -> &mut dyn SeriesTrait {
195        if Arc::weak_count(&self.0) + Arc::strong_count(&self.0) != 1 {
196            self.0 = self.0.clone_inner();
197        }
198        Arc::get_mut(&mut self.0).expect("implementation error")
199    }
200
201    /// Take or clone a owned copy of the inner [`ChunkedArray`].
202    pub fn take_inner<T: PolarsPhysicalType>(self) -> ChunkedArray<T> {
203        let arc_any = self.0.as_arc_any();
204        let downcast = arc_any
205            .downcast::<implementations::SeriesWrap<ChunkedArray<T>>>()
206            .unwrap();
207
208        match Arc::try_unwrap(downcast) {
209            Ok(ca) => ca.0,
210            Err(ca) => ca.as_ref().as_ref().clone(),
211        }
212    }
213
214    /// # Safety
215    /// The caller must ensure the length and the data types of `ArrayRef` does not change.
216    /// And that the null_count is updated (e.g. with a `compute_len()`)
217    pub unsafe fn chunks_mut(&mut self) -> &mut Vec<ArrayRef> {
218        #[allow(unused_mut)]
219        let mut ca = self._get_inner_mut();
220        ca.chunks_mut()
221    }
222
223    pub fn into_chunks(mut self) -> Vec<ArrayRef> {
224        let ca = self._get_inner_mut();
225        let chunks = std::mem::take(unsafe { ca.chunks_mut() });
226        ca.compute_len();
227        chunks
228    }
229
230    // TODO! this probably can now be removed, now we don't have special case for structs.
231    pub fn select_chunk(&self, i: usize) -> Self {
232        let mut new = self.clear();
233        let mut flags = self.get_flags();
234
235        use StatisticsFlags as F;
236        flags &= F::IS_SORTED_ANY | F::CAN_FAST_EXPLODE_LIST;
237
238        // Assign mut so we go through arc only once.
239        let mut_new = new._get_inner_mut();
240        let chunks = unsafe { mut_new.chunks_mut() };
241        let chunk = self.chunks()[i].clone();
242        chunks.clear();
243        chunks.push(chunk);
244        mut_new.compute_len();
245        mut_new._set_flags(flags);
246        new
247    }
248
249    pub fn is_sorted_flag(&self) -> IsSorted {
250        if self.len() <= 1 {
251            return IsSorted::Ascending;
252        }
253        self.get_flags().is_sorted()
254    }
255
256    pub fn set_sorted_flag(&mut self, sorted: IsSorted) {
257        let mut flags = self.get_flags();
258        flags.set_sorted(sorted);
259        self.set_flags(flags);
260    }
261
262    pub(crate) fn clear_flags(&mut self) {
263        self.set_flags(StatisticsFlags::empty());
264    }
265    pub fn get_flags(&self) -> StatisticsFlags {
266        self.0._get_flags()
267    }
268
269    pub(crate) fn set_flags(&mut self, flags: StatisticsFlags) {
270        self._get_inner_mut()._set_flags(flags)
271    }
272
273    pub fn into_frame(self) -> DataFrame {
274        // SAFETY: A single-column dataframe cannot have length mismatches or duplicate names
275        unsafe { DataFrame::new_no_checks(self.len(), vec![self.into()]) }
276    }
277
278    /// Rename series.
279    pub fn rename(&mut self, name: PlSmallStr) -> &mut Series {
280        self._get_inner_mut().rename(name);
281        self
282    }
283
284    /// Return this Series with a new name.
285    pub fn with_name(mut self, name: PlSmallStr) -> Series {
286        self.rename(name);
287        self
288    }
289
290    pub fn from_arrow_chunks(name: PlSmallStr, arrays: Vec<ArrayRef>) -> PolarsResult<Series> {
291        Self::try_from((name, arrays))
292    }
293
294    pub fn from_arrow(name: PlSmallStr, array: ArrayRef) -> PolarsResult<Series> {
295        Self::try_from((name, array))
296    }
297
298    /// Shrink the capacity of this array to fit its length.
299    pub fn shrink_to_fit(&mut self) {
300        self._get_inner_mut().shrink_to_fit()
301    }
302
303    /// Append in place. This is done by adding the chunks of `other` to this [`Series`].
304    ///
305    /// See [`ChunkedArray::append`] and [`ChunkedArray::extend`].
306    pub fn append(&mut self, other: &Series) -> PolarsResult<&mut Self> {
307        let must_cast = other.dtype().matches_schema_type(self.dtype())?;
308        if must_cast {
309            let other = other.cast(self.dtype())?;
310            self.append_owned(other)?;
311        } else {
312            self._get_inner_mut().append(other)?;
313        }
314        Ok(self)
315    }
316
317    /// Append in place. This is done by adding the chunks of `other` to this [`Series`].
318    ///
319    /// See [`ChunkedArray::append_owned`] and [`ChunkedArray::extend`].
320    pub fn append_owned(&mut self, other: Series) -> PolarsResult<&mut Self> {
321        let must_cast = other.dtype().matches_schema_type(self.dtype())?;
322        if must_cast {
323            let other = other.cast(self.dtype())?;
324            self._get_inner_mut().append_owned(other)?;
325        } else {
326            self._get_inner_mut().append_owned(other)?;
327        }
328        Ok(self)
329    }
330
331    /// Redo a length and null_count compute
332    pub fn compute_len(&mut self) {
333        self._get_inner_mut().compute_len()
334    }
335
336    /// Extend the memory backed by this array with the values from `other`.
337    ///
338    /// See [`ChunkedArray::extend`] and [`ChunkedArray::append`].
339    pub fn extend(&mut self, other: &Series) -> PolarsResult<&mut Self> {
340        let must_cast = other.dtype().matches_schema_type(self.dtype())?;
341        if must_cast {
342            let other = other.cast(self.dtype())?;
343            self._get_inner_mut().extend(&other)?;
344        } else {
345            self._get_inner_mut().extend(other)?;
346        }
347        Ok(self)
348    }
349
350    /// Sort the series with specific options.
351    ///
352    /// # Example
353    ///
354    /// ```rust
355    /// # use polars_core::prelude::*;
356    /// # fn main() -> PolarsResult<()> {
357    /// let s = Series::new("foo".into(), [2, 1, 3]);
358    /// let sorted = s.sort(SortOptions::default())?;
359    /// assert_eq!(sorted, Series::new("foo".into(), [1, 2, 3]));
360    /// # Ok(())
361    /// }
362    /// ```
363    ///
364    /// See [`SortOptions`] for more options.
365    pub fn sort(&self, sort_options: SortOptions) -> PolarsResult<Self> {
366        self.sort_with(sort_options)
367    }
368
369    /// Only implemented for numeric types
370    pub fn as_single_ptr(&mut self) -> PolarsResult<usize> {
371        self._get_inner_mut().as_single_ptr()
372    }
373
374    pub fn cast(&self, dtype: &DataType) -> PolarsResult<Self> {
375        self.cast_with_options(dtype, CastOptions::NonStrict)
376    }
377
378    /// Cast [`Series`] to another [`DataType`].
379    pub fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Self> {
380        let slf = self
381            .trim_lists_to_normalized_offsets()
382            .map_or(Cow::Borrowed(self), Cow::Owned);
383        let slf = slf.propagate_nulls().map_or(slf, Cow::Owned);
384
385        use DataType as D;
386        let do_clone = match dtype {
387            D::Unknown(UnknownKind::Any | UnknownKind::Ufunc) => true,
388            D::Unknown(UnknownKind::Int(_)) if slf.dtype().is_integer() => true,
389            D::Unknown(UnknownKind::Float) if slf.dtype().is_float() => true,
390            D::Unknown(UnknownKind::Str)
391                if slf.dtype().is_string() | slf.dtype().is_categorical() =>
392            {
393                true
394            },
395            dt if dt.is_primitive() && dt == slf.dtype() => true,
396            _ => false,
397        };
398
399        if do_clone {
400            return Ok(slf.into_owned());
401        }
402
403        pub fn cast_dtype(dtype: &DataType) -> Option<DataType> {
404            match dtype {
405                D::Unknown(UnknownKind::Int(v)) => Some(materialize_dyn_int(*v).dtype()),
406                D::Unknown(UnknownKind::Float) => Some(DataType::Float64),
407                D::Unknown(UnknownKind::Str) => Some(DataType::String),
408                // Best leave as is.
409                D::List(inner) => cast_dtype(inner.as_ref()).map(Box::new).map(D::List),
410                #[cfg(feature = "dtype-struct")]
411                D::Struct(fields) => {
412                    // @NOTE: We only allocate if we really need to.
413
414                    let mut field_iter = fields.iter().enumerate();
415                    let mut new_fields = loop {
416                        let (i, field) = field_iter.next()?;
417
418                        if let Some(dtype) = cast_dtype(&field.dtype) {
419                            let mut new_fields = Vec::with_capacity(fields.len());
420                            new_fields.extend(fields.iter().take(i).cloned());
421                            new_fields.push(Field {
422                                name: field.name.clone(),
423                                dtype,
424                            });
425                            break new_fields;
426                        }
427                    };
428
429                    new_fields.extend(fields.iter().skip(new_fields.len()).cloned().map(|field| {
430                        let dtype = cast_dtype(&field.dtype).unwrap_or(field.dtype);
431                        Field {
432                            name: field.name,
433                            dtype,
434                        }
435                    }));
436
437                    Some(D::Struct(new_fields))
438                },
439                _ => None,
440            }
441        }
442
443        let mut casted = cast_dtype(dtype);
444        if dtype.is_list() && dtype.inner_dtype().is_some_and(|dt| dt.is_null()) {
445            if let Some(from_inner_dtype) = slf.dtype().inner_dtype() {
446                casted = Some(DataType::List(Box::new(from_inner_dtype.clone())));
447            }
448        }
449        let dtype = match casted {
450            None => dtype,
451            Some(ref dtype) => dtype,
452        };
453
454        // Always allow casting all nulls to other all nulls.
455        let len = slf.len();
456        if slf.null_count() == len {
457            return Ok(Series::full_null(slf.name().clone(), len, dtype));
458        }
459
460        let new_options = match options {
461            // Strictness is handled on this level to improve error messages.
462            CastOptions::Strict => CastOptions::NonStrict,
463            opt => opt,
464        };
465
466        let out = slf.0.cast(dtype, new_options)?;
467        if options.is_strict() {
468            handle_casting_failures(slf.as_ref(), &out)?;
469        }
470        Ok(out)
471    }
472
473    /// Cast from physical to logical types without any checks on the validity of the cast.
474    ///
475    /// # Safety
476    ///
477    /// This can lead to invalid memory access in downstream code.
478    pub unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Self> {
479        match self.dtype() {
480            #[cfg(feature = "dtype-struct")]
481            DataType::Struct(_) => self.struct_().unwrap().cast_unchecked(dtype),
482            DataType::List(_) => self.list().unwrap().cast_unchecked(dtype),
483            dt if dt.is_primitive_numeric() => {
484                with_match_physical_numeric_polars_type!(dt, |$T| {
485                    let ca: &ChunkedArray<$T> = self.as_ref().as_ref().as_ref();
486                        ca.cast_unchecked(dtype)
487                })
488            },
489            DataType::Binary => self.binary().unwrap().cast_unchecked(dtype),
490            _ => self.cast_with_options(dtype, CastOptions::Overflowing),
491        }
492    }
493
494    /// Convert a non-logical series back into a logical series without casting.
495    ///
496    /// # Safety
497    ///
498    /// This can lead to invalid memory access in downstream code.
499    pub unsafe fn from_physical_unchecked(&self, dtype: &DataType) -> PolarsResult<Self> {
500        debug_assert!(!self.dtype().is_logical(), "{:?}", self.dtype());
501
502        if self.dtype() == dtype {
503            return Ok(self.clone());
504        }
505
506        use DataType as D;
507        match (self.dtype(), dtype) {
508            #[cfg(feature = "dtype-decimal")]
509            (D::Int128, D::Decimal(precision, scale)) => {
510                let ca = self.i128().unwrap();
511                Ok(ca
512                    .clone()
513                    .into_decimal_unchecked(*precision, *scale)
514                    .into_series())
515            },
516
517            #[cfg(feature = "dtype-categorical")]
518            (phys, D::Categorical(cats, _)) if &cats.physical().dtype() == phys => {
519                with_match_categorical_physical_type!(cats.physical(), |$C| {
520                    type CA = ChunkedArray<<$C as PolarsCategoricalType>::PolarsPhysical>;
521                    let ca = self.as_ref().as_any().downcast_ref::<CA>().unwrap();
522                    Ok(CategoricalChunked::<$C>::from_cats_and_dtype_unchecked(
523                        ca.clone(),
524                        dtype.clone(),
525                    )
526                    .into_series())
527                })
528            },
529            #[cfg(feature = "dtype-categorical")]
530            (phys, D::Enum(fcats, _)) if &fcats.physical().dtype() == phys => {
531                with_match_categorical_physical_type!(fcats.physical(), |$C| {
532                    type CA = ChunkedArray<<$C as PolarsCategoricalType>::PolarsPhysical>;
533                    let ca = self.as_ref().as_any().downcast_ref::<CA>().unwrap();
534                    Ok(CategoricalChunked::<$C>::from_cats_and_dtype_unchecked(
535                        ca.clone(),
536                        dtype.clone(),
537                    )
538                    .into_series())
539                })
540            },
541
542            (D::Int32, D::Date) => feature_gated!("dtype-time", Ok(self.clone().into_date())),
543            (D::Int64, D::Datetime(tu, tz)) => feature_gated!(
544                "dtype-datetime",
545                Ok(self.clone().into_datetime(*tu, tz.clone()))
546            ),
547            (D::Int64, D::Duration(tu)) => {
548                feature_gated!("dtype-duration", Ok(self.clone().into_duration(*tu)))
549            },
550            (D::Int64, D::Time) => feature_gated!("dtype-time", Ok(self.clone().into_time())),
551
552            (D::List(_), D::List(to)) => unsafe {
553                self.list()
554                    .unwrap()
555                    .from_physical_unchecked(to.as_ref().clone())
556                    .map(|ca| ca.into_series())
557            },
558            #[cfg(feature = "dtype-array")]
559            (D::Array(_, lw), D::Array(to, rw)) if lw == rw => unsafe {
560                self.array()
561                    .unwrap()
562                    .from_physical_unchecked(to.as_ref().clone())
563                    .map(|ca| ca.into_series())
564            },
565            #[cfg(feature = "dtype-struct")]
566            (D::Struct(_), D::Struct(to)) => unsafe {
567                self.struct_()
568                    .unwrap()
569                    .from_physical_unchecked(to.as_slice())
570                    .map(|ca| ca.into_series())
571            },
572
573            _ => panic!("invalid from_physical({dtype:?}) for {:?}", self.dtype()),
574        }
575    }
576
577    /// Cast numerical types to f64, and keep floats as is.
578    pub fn to_float(&self) -> PolarsResult<Series> {
579        match self.dtype() {
580            DataType::Float32 | DataType::Float64 => Ok(self.clone()),
581            _ => self.cast_with_options(&DataType::Float64, CastOptions::Overflowing),
582        }
583    }
584
585    /// Compute the sum of all values in this Series.
586    /// Returns `Some(0)` if the array is empty, and `None` if the array only
587    /// contains null values.
588    ///
589    /// If the [`DataType`] is one of `{Int8, UInt8, Int16, UInt16}` the `Series` is
590    /// first cast to `Int64` to prevent overflow issues.
591    pub fn sum<T>(&self) -> PolarsResult<T>
592    where
593        T: NumCast + IsFloat,
594    {
595        let sum = self.sum_reduce()?;
596        let sum = sum.value().extract().unwrap();
597        Ok(sum)
598    }
599
600    /// Returns the minimum value in the array, according to the natural order.
601    /// Returns an option because the array is nullable.
602    pub fn min<T>(&self) -> PolarsResult<Option<T>>
603    where
604        T: NumCast + IsFloat,
605    {
606        let min = self.min_reduce()?;
607        let min = min.value().extract::<T>();
608        Ok(min)
609    }
610
611    /// Returns the maximum value in the array, according to the natural order.
612    /// Returns an option because the array is nullable.
613    pub fn max<T>(&self) -> PolarsResult<Option<T>>
614    where
615        T: NumCast + IsFloat,
616    {
617        let max = self.max_reduce()?;
618        let max = max.value().extract::<T>();
619        Ok(max)
620    }
621
622    /// Explode a list Series. This expands every item to a new row..
623    pub fn explode(&self, skip_empty: bool) -> PolarsResult<Series> {
624        match self.dtype() {
625            DataType::List(_) => self.list().unwrap().explode(skip_empty),
626            #[cfg(feature = "dtype-array")]
627            DataType::Array(_, _) => self.array().unwrap().explode(skip_empty),
628            _ => Ok(self.clone()),
629        }
630    }
631
632    /// Check if numeric value is NaN (note this is different than missing/ null)
633    pub fn is_nan(&self) -> PolarsResult<BooleanChunked> {
634        match self.dtype() {
635            DataType::Float32 => Ok(self.f32().unwrap().is_nan()),
636            DataType::Float64 => Ok(self.f64().unwrap().is_nan()),
637            DataType::Null => Ok(BooleanChunked::full_null(self.name().clone(), self.len())),
638            dt if dt.is_primitive_numeric() => {
639                let arr = BooleanArray::full(self.len(), false, ArrowDataType::Boolean)
640                    .with_validity(self.rechunk_validity());
641                Ok(BooleanChunked::with_chunk(self.name().clone(), arr))
642            },
643            _ => polars_bail!(opq = is_nan, self.dtype()),
644        }
645    }
646
647    /// Check if numeric value is NaN (note this is different than missing/null)
648    pub fn is_not_nan(&self) -> PolarsResult<BooleanChunked> {
649        match self.dtype() {
650            DataType::Float32 => Ok(self.f32().unwrap().is_not_nan()),
651            DataType::Float64 => Ok(self.f64().unwrap().is_not_nan()),
652            dt if dt.is_primitive_numeric() => {
653                let arr = BooleanArray::full(self.len(), true, ArrowDataType::Boolean)
654                    .with_validity(self.rechunk_validity());
655                Ok(BooleanChunked::with_chunk(self.name().clone(), arr))
656            },
657            _ => polars_bail!(opq = is_not_nan, self.dtype()),
658        }
659    }
660
661    /// Check if numeric value is finite
662    pub fn is_finite(&self) -> PolarsResult<BooleanChunked> {
663        match self.dtype() {
664            DataType::Float32 => Ok(self.f32().unwrap().is_finite()),
665            DataType::Float64 => Ok(self.f64().unwrap().is_finite()),
666            DataType::Null => Ok(BooleanChunked::full_null(self.name().clone(), self.len())),
667            dt if dt.is_primitive_numeric() => {
668                let arr = BooleanArray::full(self.len(), true, ArrowDataType::Boolean)
669                    .with_validity(self.rechunk_validity());
670                Ok(BooleanChunked::with_chunk(self.name().clone(), arr))
671            },
672            _ => polars_bail!(opq = is_finite, self.dtype()),
673        }
674    }
675
676    /// Check if numeric value is infinite
677    pub fn is_infinite(&self) -> PolarsResult<BooleanChunked> {
678        match self.dtype() {
679            DataType::Float32 => Ok(self.f32().unwrap().is_infinite()),
680            DataType::Float64 => Ok(self.f64().unwrap().is_infinite()),
681            DataType::Null => Ok(BooleanChunked::full_null(self.name().clone(), self.len())),
682            dt if dt.is_primitive_numeric() => {
683                let arr = BooleanArray::full(self.len(), false, ArrowDataType::Boolean)
684                    .with_validity(self.rechunk_validity());
685                Ok(BooleanChunked::with_chunk(self.name().clone(), arr))
686            },
687            _ => polars_bail!(opq = is_infinite, self.dtype()),
688        }
689    }
690
691    /// Create a new ChunkedArray with values from self where the mask evaluates `true` and values
692    /// from `other` where the mask evaluates `false`. This function automatically broadcasts unit
693    /// length inputs.
694    #[cfg(feature = "zip_with")]
695    pub fn zip_with(&self, mask: &BooleanChunked, other: &Series) -> PolarsResult<Series> {
696        let (lhs, rhs) = coerce_lhs_rhs(self, other)?;
697        lhs.zip_with_same_type(mask, rhs.as_ref())
698    }
699
700    /// Converts a Series to their physical representation, if they have one,
701    /// otherwise the series is left unchanged.
702    ///
703    /// * Date -> Int32
704    /// * Datetime -> Int64
705    /// * Duration -> Int64
706    /// * Decimal -> Int128
707    /// * Time -> Int64
708    /// * Categorical -> U8/U16/U32
709    /// * List(inner) -> List(physical of inner)
710    /// * Array(inner) -> Array(physical of inner)
711    /// * Struct -> Struct with physical repr of each struct column
712    pub fn to_physical_repr(&self) -> Cow<'_, Series> {
713        use DataType::*;
714        match self.dtype() {
715            // NOTE: Don't use cast here, as it might rechunk (if all nulls)
716            // which is not allowed in a phys repr.
717            #[cfg(feature = "dtype-date")]
718            Date => Cow::Owned(self.date().unwrap().phys.clone().into_series()),
719            #[cfg(feature = "dtype-datetime")]
720            Datetime(_, _) => Cow::Owned(self.datetime().unwrap().phys.clone().into_series()),
721            #[cfg(feature = "dtype-duration")]
722            Duration(_) => Cow::Owned(self.duration().unwrap().phys.clone().into_series()),
723            #[cfg(feature = "dtype-time")]
724            Time => Cow::Owned(self.time().unwrap().phys.clone().into_series()),
725            #[cfg(feature = "dtype-categorical")]
726            dt @ (Categorical(_, _) | Enum(_, _)) => {
727                with_match_categorical_physical_type!(dt.cat_physical().unwrap(), |$C| {
728                    let ca = self.cat::<$C>().unwrap();
729                    Cow::Owned(ca.physical().clone().into_series())
730                })
731            },
732            #[cfg(feature = "dtype-decimal")]
733            Decimal(_, _) => Cow::Owned(self.decimal().unwrap().phys.clone().into_series()),
734            List(_) => match self.list().unwrap().to_physical_repr() {
735                Cow::Borrowed(_) => Cow::Borrowed(self),
736                Cow::Owned(ca) => Cow::Owned(ca.into_series()),
737            },
738            #[cfg(feature = "dtype-array")]
739            Array(_, _) => match self.array().unwrap().to_physical_repr() {
740                Cow::Borrowed(_) => Cow::Borrowed(self),
741                Cow::Owned(ca) => Cow::Owned(ca.into_series()),
742            },
743            #[cfg(feature = "dtype-struct")]
744            Struct(_) => match self.struct_().unwrap().to_physical_repr() {
745                Cow::Borrowed(_) => Cow::Borrowed(self),
746                Cow::Owned(ca) => Cow::Owned(ca.into_series()),
747            },
748            _ => Cow::Borrowed(self),
749        }
750    }
751
752    /// Traverse and collect every nth element in a new array.
753    pub fn gather_every(&self, n: usize, offset: usize) -> PolarsResult<Series> {
754        polars_ensure!(n > 0, ComputeError: "cannot perform gather every for `n=0`");
755        let idx = ((offset as IdxSize)..self.len() as IdxSize)
756            .step_by(n)
757            .collect_ca(PlSmallStr::EMPTY);
758        // SAFETY: we stay in-bounds.
759        Ok(unsafe { self.take_unchecked(&idx) })
760    }
761
762    #[cfg(feature = "dot_product")]
763    pub fn dot(&self, other: &Series) -> PolarsResult<f64> {
764        std::ops::Mul::mul(self, other)?.sum::<f64>()
765    }
766
767    /// Get the sum of the Series as a new Series of length 1.
768    /// Returns a Series with a single zeroed entry if self is an empty numeric series.
769    ///
770    /// If the [`DataType`] is one of `{Int8, UInt8, Int16, UInt16}` the `Series` is
771    /// first cast to `Int64` to prevent overflow issues.
772    pub fn sum_reduce(&self) -> PolarsResult<Scalar> {
773        use DataType::*;
774        match self.dtype() {
775            Int8 | UInt8 | Int16 | UInt16 => self.cast(&Int64).unwrap().sum_reduce(),
776            _ => self.0.sum_reduce(),
777        }
778    }
779
780    /// Get the product of an array.
781    ///
782    /// If the [`DataType`] is one of `{Int8, UInt8, Int16, UInt16}` the `Series` is
783    /// first cast to `Int64` to prevent overflow issues.
784    pub fn product(&self) -> PolarsResult<Scalar> {
785        #[cfg(feature = "product")]
786        {
787            use DataType::*;
788            match self.dtype() {
789                Boolean => self.cast(&DataType::Int64).unwrap().product(),
790                Int8 | UInt8 | Int16 | UInt16 | Int32 | UInt32 => {
791                    let s = self.cast(&Int64).unwrap();
792                    s.product()
793                },
794                Int64 => Ok(self.i64().unwrap().prod_reduce()),
795                UInt64 => Ok(self.u64().unwrap().prod_reduce()),
796                #[cfg(feature = "dtype-i128")]
797                Int128 => Ok(self.i128().unwrap().prod_reduce()),
798                #[cfg(feature = "dtype-u128")]
799                UInt128 => Ok(self.u128().unwrap().prod_reduce()),
800                Float32 => Ok(self.f32().unwrap().prod_reduce()),
801                Float64 => Ok(self.f64().unwrap().prod_reduce()),
802                dt => {
803                    polars_bail!(InvalidOperation: "`product` operation not supported for dtype `{dt}`")
804                },
805            }
806        }
807        #[cfg(not(feature = "product"))]
808        {
809            panic!("activate 'product' feature")
810        }
811    }
812
813    /// Cast throws an error if conversion had overflows
814    pub fn strict_cast(&self, dtype: &DataType) -> PolarsResult<Series> {
815        self.cast_with_options(dtype, CastOptions::Strict)
816    }
817
818    #[cfg(feature = "dtype-decimal")]
819    pub(crate) fn into_decimal(self, precision: usize, scale: usize) -> PolarsResult<Series> {
820        match self.dtype() {
821            DataType::Int128 => Ok(self
822                .i128()
823                .unwrap()
824                .clone()
825                .into_decimal(precision, scale)?
826                .into_series()),
827            DataType::Decimal(cur_prec, cur_scale)
828                if scale == *cur_scale && precision >= *cur_prec =>
829            {
830                Ok(self)
831            },
832            dt => panic!("into_decimal({precision:?}, {scale}) not implemented for {dt:?}"),
833        }
834    }
835
836    #[cfg(feature = "dtype-time")]
837    pub(crate) fn into_time(self) -> Series {
838        match self.dtype() {
839            DataType::Int64 => self.i64().unwrap().clone().into_time().into_series(),
840            DataType::Time => self
841                .time()
842                .unwrap()
843                .physical()
844                .clone()
845                .into_time()
846                .into_series(),
847            dt => panic!("date not implemented for {dt:?}"),
848        }
849    }
850
851    pub(crate) fn into_date(self) -> Series {
852        #[cfg(not(feature = "dtype-date"))]
853        {
854            panic!("activate feature dtype-date")
855        }
856        #[cfg(feature = "dtype-date")]
857        match self.dtype() {
858            DataType::Int32 => self.i32().unwrap().clone().into_date().into_series(),
859            DataType::Date => self
860                .date()
861                .unwrap()
862                .physical()
863                .clone()
864                .into_date()
865                .into_series(),
866            dt => panic!("date not implemented for {dt:?}"),
867        }
868    }
869
870    #[allow(unused_variables)]
871    pub(crate) fn into_datetime(self, timeunit: TimeUnit, tz: Option<TimeZone>) -> Series {
872        #[cfg(not(feature = "dtype-datetime"))]
873        {
874            panic!("activate feature dtype-datetime")
875        }
876
877        #[cfg(feature = "dtype-datetime")]
878        match self.dtype() {
879            DataType::Int64 => self
880                .i64()
881                .unwrap()
882                .clone()
883                .into_datetime(timeunit, tz)
884                .into_series(),
885            DataType::Datetime(_, _) => self
886                .datetime()
887                .unwrap()
888                .physical()
889                .clone()
890                .into_datetime(timeunit, tz)
891                .into_series(),
892            dt => panic!("into_datetime not implemented for {dt:?}"),
893        }
894    }
895
896    #[allow(unused_variables)]
897    pub(crate) fn into_duration(self, timeunit: TimeUnit) -> Series {
898        #[cfg(not(feature = "dtype-duration"))]
899        {
900            panic!("activate feature dtype-duration")
901        }
902        #[cfg(feature = "dtype-duration")]
903        match self.dtype() {
904            DataType::Int64 => self
905                .i64()
906                .unwrap()
907                .clone()
908                .into_duration(timeunit)
909                .into_series(),
910            DataType::Duration(_) => self
911                .duration()
912                .unwrap()
913                .physical()
914                .clone()
915                .into_duration(timeunit)
916                .into_series(),
917            dt => panic!("into_duration not implemented for {dt:?}"),
918        }
919    }
920
921    // used for formatting
922    pub fn str_value(&self, index: usize) -> PolarsResult<Cow<'_, str>> {
923        Ok(self.0.get(index)?.str_value())
924    }
925    /// Get the head of the Series.
926    pub fn head(&self, length: Option<usize>) -> Series {
927        let len = length.unwrap_or(HEAD_DEFAULT_LENGTH);
928        self.slice(0, std::cmp::min(len, self.len()))
929    }
930
931    /// Get the tail of the Series.
932    pub fn tail(&self, length: Option<usize>) -> Series {
933        let len = length.unwrap_or(TAIL_DEFAULT_LENGTH);
934        let len = std::cmp::min(len, self.len());
935        self.slice(-(len as i64), len)
936    }
937
938    pub fn mean_reduce(&self) -> Scalar {
939        crate::scalar::reduce::mean_reduce(self.mean(), self.dtype().clone())
940    }
941
942    /// Compute the unique elements, but maintain order. This requires more work
943    /// than a naive [`Series::unique`](SeriesTrait::unique).
944    pub fn unique_stable(&self) -> PolarsResult<Series> {
945        let idx = self.arg_unique()?;
946        // SAFETY: Indices are in bounds.
947        unsafe { Ok(self.take_unchecked(&idx)) }
948    }
949
950    pub fn try_idx(&self) -> Option<&IdxCa> {
951        #[cfg(feature = "bigidx")]
952        {
953            self.try_u64()
954        }
955        #[cfg(not(feature = "bigidx"))]
956        {
957            self.try_u32()
958        }
959    }
960
961    pub fn idx(&self) -> PolarsResult<&IdxCa> {
962        #[cfg(feature = "bigidx")]
963        {
964            self.u64()
965        }
966        #[cfg(not(feature = "bigidx"))]
967        {
968            self.u32()
969        }
970    }
971
972    /// Returns an estimation of the total (heap) allocated size of the `Series` in bytes.
973    ///
974    /// # Implementation
975    /// This estimation is the sum of the size of its buffers, validity, including nested arrays.
976    /// Multiple arrays may share buffers and bitmaps. Therefore, the size of 2 arrays is not the
977    /// sum of the sizes computed from this function. In particular, [`StructArray`]'s size is an upper bound.
978    ///
979    /// When an array is sliced, its allocated size remains constant because the buffer unchanged.
980    /// However, this function will yield a smaller number. This is because this function returns
981    /// the visible size of the buffer, not its total capacity.
982    ///
983    /// FFI buffers are included in this estimation.
984    pub fn estimated_size(&self) -> usize {
985        let mut size = 0;
986        match self.dtype() {
987            // TODO @ cat-rework: include mapping size here?
988            #[cfg(feature = "object")]
989            DataType::Object(_) => {
990                let ArrowDataType::FixedSizeBinary(size) = self.chunks()[0].dtype() else {
991                    unreachable!()
992                };
993                // This is only the pointer size in python. So will be a huge underestimation.
994                return self.len() * *size;
995            },
996            _ => {},
997        }
998
999        size += self
1000            .chunks()
1001            .iter()
1002            .map(|arr| estimated_bytes_size(&**arr))
1003            .sum::<usize>();
1004
1005        size
1006    }
1007
1008    /// Packs every element into a list.
1009    pub fn as_list(&self) -> ListChunked {
1010        let s = self.rechunk();
1011        // don't  use `to_arrow` as we need the physical types
1012        let values = s.chunks()[0].clone();
1013        let offsets = (0i64..(s.len() as i64 + 1)).collect::<Vec<_>>();
1014        let offsets = unsafe { Offsets::new_unchecked(offsets) };
1015
1016        let dtype = LargeListArray::default_datatype(
1017            s.dtype().to_physical().to_arrow(CompatLevel::newest()),
1018        );
1019        let new_arr = LargeListArray::new(dtype, offsets.into(), values, None);
1020        let mut out = ListChunked::with_chunk(s.name().clone(), new_arr);
1021        out.set_inner_dtype(s.dtype().clone());
1022        out
1023    }
1024
1025    pub fn row_encode_unordered(&self) -> PolarsResult<BinaryOffsetChunked> {
1026        row_encode::_get_rows_encoded_ca_unordered(
1027            self.name().clone(),
1028            &[self.clone().into_column()],
1029        )
1030    }
1031
1032    pub fn row_encode_ordered(
1033        &self,
1034        descending: bool,
1035        nulls_last: bool,
1036    ) -> PolarsResult<BinaryOffsetChunked> {
1037        row_encode::_get_rows_encoded_ca(
1038            self.name().clone(),
1039            &[self.clone().into_column()],
1040            &[descending],
1041            &[nulls_last],
1042        )
1043    }
1044}
1045
1046impl Deref for Series {
1047    type Target = dyn SeriesTrait;
1048
1049    fn deref(&self) -> &Self::Target {
1050        self.0.as_ref()
1051    }
1052}
1053
1054impl<'a> AsRef<dyn SeriesTrait + 'a> for Series {
1055    fn as_ref(&self) -> &(dyn SeriesTrait + 'a) {
1056        self.0.as_ref()
1057    }
1058}
1059
1060impl Default for Series {
1061    fn default() -> Self {
1062        Int64Chunked::default().into_series()
1063    }
1064}
1065
1066impl<T: PolarsPhysicalType> AsRef<ChunkedArray<T>> for dyn SeriesTrait + '_ {
1067    fn as_ref(&self) -> &ChunkedArray<T> {
1068        // @NOTE: SeriesTrait `as_any` returns a std::any::Any for the underlying ChunkedArray /
1069        // Logical (so not the SeriesWrap).
1070        let Some(ca) = self.as_any().downcast_ref::<ChunkedArray<T>>() else {
1071            panic!(
1072                "implementation error, cannot get ref {:?} from {:?}",
1073                T::get_static_dtype(),
1074                self.dtype()
1075            );
1076        };
1077
1078        ca
1079    }
1080}
1081
1082impl<T: PolarsPhysicalType> AsMut<ChunkedArray<T>> for dyn SeriesTrait + '_ {
1083    fn as_mut(&mut self) -> &mut ChunkedArray<T> {
1084        if !self.as_any_mut().is::<ChunkedArray<T>>() {
1085            panic!(
1086                "implementation error, cannot get ref {:?} from {:?}",
1087                T::get_static_dtype(),
1088                self.dtype()
1089            );
1090        }
1091
1092        // @NOTE: SeriesTrait `as_any` returns a std::any::Any for the underlying ChunkedArray /
1093        // Logical (so not the SeriesWrap).
1094        self.as_any_mut().downcast_mut::<ChunkedArray<T>>().unwrap()
1095    }
1096}
1097
1098#[cfg(test)]
1099mod test {
1100    use crate::prelude::*;
1101    use crate::series::*;
1102
1103    #[test]
1104    fn cast() {
1105        let ar = UInt32Chunked::new("a".into(), &[1, 2]);
1106        let s = ar.into_series();
1107        let s2 = s.cast(&DataType::Int64).unwrap();
1108
1109        assert!(s2.i64().is_ok());
1110        let s2 = s.cast(&DataType::Float32).unwrap();
1111        assert!(s2.f32().is_ok());
1112    }
1113
1114    #[test]
1115    fn new_series() {
1116        let _ = Series::new("boolean series".into(), &vec![true, false, true]);
1117        let _ = Series::new("int series".into(), &[1, 2, 3]);
1118        let ca = Int32Chunked::new("a".into(), &[1, 2, 3]);
1119        let _ = ca.into_series();
1120    }
1121
1122    #[test]
1123    #[cfg(feature = "dtype-date")]
1124    fn roundtrip_list_logical_20311() {
1125        let list = ListChunked::from_chunk_iter(
1126            PlSmallStr::from_static("a"),
1127            [ListArray::new(
1128                ArrowDataType::LargeList(Box::new(ArrowField::new(
1129                    LIST_VALUES_NAME,
1130                    ArrowDataType::Int32,
1131                    true,
1132                ))),
1133                unsafe { Offsets::new_unchecked(vec![0, 1]) }.into(),
1134                PrimitiveArray::new(ArrowDataType::Int32, vec![1i32].into(), None).to_boxed(),
1135                None,
1136            )],
1137        );
1138        let list = unsafe { list.from_physical_unchecked(DataType::Date) }.unwrap();
1139        assert_eq!(list.dtype(), &DataType::List(Box::new(DataType::Date)));
1140    }
1141
1142    #[test]
1143    #[cfg(feature = "dtype-struct")]
1144    fn new_series_from_empty_structs() {
1145        let dtype = DataType::Struct(vec![]);
1146        let empties = vec![AnyValue::StructOwned(Box::new((vec![], vec![]))); 3];
1147        let s = Series::from_any_values_and_dtype("".into(), &empties, &dtype, false).unwrap();
1148        assert_eq!(s.len(), 3);
1149    }
1150    #[test]
1151    fn new_series_from_arrow_primitive_array() {
1152        let array = UInt32Array::from_slice([1, 2, 3, 4, 5]);
1153        let array_ref: ArrayRef = Box::new(array);
1154
1155        let _ = Series::try_new("foo".into(), array_ref).unwrap();
1156    }
1157
1158    #[test]
1159    fn series_append() {
1160        let mut s1 = Series::new("a".into(), &[1, 2]);
1161        let s2 = Series::new("b".into(), &[3]);
1162        s1.append(&s2).unwrap();
1163        assert_eq!(s1.len(), 3);
1164
1165        // add wrong type
1166        let s2 = Series::new("b".into(), &[3.0]);
1167        assert!(s1.append(&s2).is_err())
1168    }
1169
1170    #[test]
1171    #[cfg(feature = "dtype-decimal")]
1172    fn series_append_decimal() {
1173        let s1 = Series::new("a".into(), &[1.1, 2.3])
1174            .cast(&DataType::Decimal(38, 2))
1175            .unwrap();
1176        let s2 = Series::new("b".into(), &[3])
1177            .cast(&DataType::Decimal(38, 0))
1178            .unwrap();
1179
1180        {
1181            let mut s1 = s1.clone();
1182            s1.append(&s2).unwrap();
1183            assert_eq!(s1.len(), 3);
1184            assert_eq!(s1.get(2).unwrap(), AnyValue::Decimal(300, 38, 2));
1185        }
1186
1187        {
1188            let mut s2 = s2;
1189            s2.extend(&s1).unwrap();
1190            assert_eq!(s2.get(2).unwrap(), AnyValue::Decimal(2, 38, 0));
1191        }
1192    }
1193
1194    #[test]
1195    fn series_slice_works() {
1196        let series = Series::new("a".into(), &[1i64, 2, 3, 4, 5]);
1197
1198        let slice_1 = series.slice(-3, 3);
1199        let slice_2 = series.slice(-5, 5);
1200        let slice_3 = series.slice(0, 5);
1201
1202        assert_eq!(slice_1.get(0).unwrap(), AnyValue::Int64(3));
1203        assert_eq!(slice_2.get(0).unwrap(), AnyValue::Int64(1));
1204        assert_eq!(slice_3.get(0).unwrap(), AnyValue::Int64(1));
1205    }
1206
1207    #[test]
1208    fn out_of_range_slice_does_not_panic() {
1209        let series = Series::new("a".into(), &[1i64, 2, 3, 4, 5]);
1210
1211        let _ = series.slice(-3, 4);
1212        let _ = series.slice(-6, 2);
1213        let _ = series.slice(4, 2);
1214    }
1215}