polars_core/series/
mod.rs

1#![allow(unsafe_op_in_unsafe_fn)]
2//! Type agnostic columnar data structure.
3use crate::chunked_array::flags::StatisticsFlags;
4pub use crate::prelude::ChunkCompareEq;
5use crate::prelude::*;
6use crate::{HEAD_DEFAULT_LENGTH, TAIL_DEFAULT_LENGTH};
7
8macro_rules! invalid_operation_panic {
9    ($op:ident, $s:expr) => {
10        panic!(
11            "`{}` operation not supported for dtype `{}`",
12            stringify!($op),
13            $s._dtype()
14        )
15    };
16}
17
18pub mod amortized_iter;
19mod any_value;
20pub mod arithmetic;
21pub mod builder;
22#[cfg(feature = "dtype-categorical")]
23pub mod categorical_to_arrow;
24mod comparison;
25mod from;
26pub mod implementations;
27mod into;
28pub use into::ToArrowConverter;
29pub(crate) mod iterator;
30pub mod ops;
31#[cfg(feature = "proptest")]
32pub mod proptest;
33mod series_trait;
34
35use std::borrow::Cow;
36use std::hash::{Hash, Hasher};
37use std::ops::Deref;
38
39use arrow::compute::aggregate::estimated_bytes_size;
40use arrow::offset::Offsets;
41pub use from::*;
42pub use iterator::{SeriesIter, SeriesPhysIter};
43use num_traits::NumCast;
44use polars_error::feature_gated;
45use polars_utils::float::IsFloat;
46pub use series_trait::{IsSorted, *};
47
48use crate::POOL;
49use crate::chunked_array::cast::CastOptions;
50#[cfg(feature = "zip_with")]
51use crate::series::arithmetic::coerce_lhs_rhs;
52use crate::utils::{Wrap, handle_casting_failures, materialize_dyn_int};
53
54/// # Series
55/// The columnar data type for a DataFrame.
56///
57/// Most of the available functions are defined in the [SeriesTrait trait](crate::series::SeriesTrait).
58///
59/// The `Series` struct consists
60/// of typed [ChunkedArray]'s. To quickly cast
61/// a `Series` to a `ChunkedArray` you can call the method with the name of the type:
62///
63/// ```
64/// # use polars_core::prelude::*;
65/// let s: Series = [1, 2, 3].iter().collect();
66/// // Quickly obtain the ChunkedArray wrapped by the Series.
67/// let chunked_array = s.i32().unwrap();
68/// ```
69///
70/// ## Arithmetic
71///
72/// You can do standard arithmetic on series.
73/// ```
74/// # use polars_core::prelude::*;
75/// let s = Series::new("a".into(), [1 , 2, 3]);
76/// let out_add = &s + &s;
77/// let out_sub = &s - &s;
78/// let out_div = &s / &s;
79/// let out_mul = &s * &s;
80/// ```
81///
82/// Or with series and numbers.
83///
84/// ```
85/// # use polars_core::prelude::*;
86/// let s: Series = (1..3).collect();
87/// let out_add_one = &s + 1;
88/// let out_multiply = &s * 10;
89///
90/// // Could not overload left hand side operator.
91/// let out_divide = 1.div(&s);
92/// let out_add = 1.add(&s);
93/// let out_subtract = 1.sub(&s);
94/// let out_multiply = 1.mul(&s);
95/// ```
96///
97/// ## Comparison
98/// You can obtain boolean mask by comparing series.
99///
100/// ```
101/// # use polars_core::prelude::*;
102/// let s = Series::new("dollars".into(), &[1, 2, 3]);
103/// let mask = s.equal(1).unwrap();
104/// let valid = [true, false, false].iter();
105/// assert!(mask
106///     .into_iter()
107///     .map(|opt_bool| opt_bool.unwrap()) // option, because series can be null
108///     .zip(valid)
109///     .all(|(a, b)| a == *b))
110/// ```
111///
112/// See all the comparison operators in the [ChunkCompareEq trait](crate::chunked_array::ops::ChunkCompareEq) and
113/// [ChunkCompareIneq trait](crate::chunked_array::ops::ChunkCompareIneq).
114///
115/// ## Iterators
116/// The Series variants contain differently typed [ChunkedArray]s.
117/// These structs can be turned into iterators, making it possible to use any function/ closure you want
118/// on a Series.
119///
120/// These iterators return an `Option<T>` because the values of a series may be null.
121///
122/// ```
123/// use polars_core::prelude::*;
124/// let pi = 3.14;
125/// let s = Series::new("angle".into(), [2f32 * pi, pi, 1.5 * pi].as_ref());
126/// let s_cos: Series = s.f32()
127///                     .expect("series was not an f32 dtype")
128///                     .into_iter()
129///                     .map(|opt_angle| opt_angle.map(|angle| angle.cos()))
130///                     .collect();
131/// ```
132///
133/// ## Creation
134/// Series can be create from different data structures. Below we'll show a few ways we can create
135/// a Series object.
136///
137/// ```
138/// # use polars_core::prelude::*;
139/// // Series can be created from Vec's, slices and arrays
140/// Series::new("boolean series".into(), &[true, false, true]);
141/// Series::new("int series".into(), &[1, 2, 3]);
142/// // And can be nullable
143/// Series::new("got nulls".into(), &[Some(1), None, Some(2)]);
144///
145/// // Series can also be collected from iterators
146/// let from_iter: Series = (0..10)
147///     .into_iter()
148///     .collect();
149///
150/// ```
151#[derive(Clone)]
152#[must_use]
153pub struct Series(pub Arc<dyn SeriesTrait>);
154
155impl PartialEq for Wrap<Series> {
156    fn eq(&self, other: &Self) -> bool {
157        self.0.equals_missing(other)
158    }
159}
160
161impl Eq for Wrap<Series> {}
162
163impl Hash for Wrap<Series> {
164    fn hash<H: Hasher>(&self, state: &mut H) {
165        let rs = PlSeedableRandomStateQuality::fixed();
166        let mut h = vec![];
167        if self.0.vec_hash(rs, &mut h).is_ok() {
168            let h = h.into_iter().fold(0, |a: u64, b| a.wrapping_add(b));
169            h.hash(state)
170        } else {
171            self.len().hash(state);
172            self.null_count().hash(state);
173            self.dtype().hash(state);
174        }
175    }
176}
177
178impl Series {
179    /// Create a new empty Series.
180    pub fn new_empty(name: PlSmallStr, dtype: &DataType) -> Series {
181        Series::full_null(name, 0, dtype)
182    }
183
184    pub fn clear(&self) -> Series {
185        if self.is_empty() {
186            self.clone()
187        } else {
188            match self.dtype() {
189                #[cfg(feature = "object")]
190                DataType::Object(_) => self
191                    .take(&ChunkedArray::<IdxType>::new_vec(PlSmallStr::EMPTY, vec![]))
192                    .unwrap(),
193                dt => Series::new_empty(self.name().clone(), dt),
194            }
195        }
196    }
197
198    #[doc(hidden)]
199    pub fn _get_inner_mut(&mut self) -> &mut dyn SeriesTrait {
200        if Arc::weak_count(&self.0) + Arc::strong_count(&self.0) != 1 {
201            self.0 = self.0.clone_inner();
202        }
203        Arc::get_mut(&mut self.0).expect("implementation error")
204    }
205
206    /// Take or clone a owned copy of the inner [`ChunkedArray`].
207    pub fn take_inner<T: PolarsPhysicalType>(self) -> ChunkedArray<T> {
208        let arc_any = self.0.as_arc_any();
209        let downcast = arc_any
210            .downcast::<implementations::SeriesWrap<ChunkedArray<T>>>()
211            .unwrap();
212
213        match Arc::try_unwrap(downcast) {
214            Ok(ca) => ca.0,
215            Err(ca) => ca.as_ref().as_ref().clone(),
216        }
217    }
218
219    /// # Safety
220    /// The caller must ensure the length and the data types of `ArrayRef` does not change.
221    /// And that the null_count is updated (e.g. with a `compute_len()`)
222    pub unsafe fn chunks_mut(&mut self) -> &mut Vec<ArrayRef> {
223        #[allow(unused_mut)]
224        let mut ca = self._get_inner_mut();
225        ca.chunks_mut()
226    }
227
228    pub fn into_chunks(mut self) -> Vec<ArrayRef> {
229        let ca = self._get_inner_mut();
230        let chunks = std::mem::take(unsafe { ca.chunks_mut() });
231        ca.compute_len();
232        chunks
233    }
234
235    // TODO! this probably can now be removed, now we don't have special case for structs.
236    pub fn select_chunk(&self, i: usize) -> Self {
237        let mut new = self.clear();
238        let mut flags = self.get_flags();
239
240        use StatisticsFlags as F;
241        flags &= F::IS_SORTED_ANY | F::CAN_FAST_EXPLODE_LIST;
242
243        // Assign mut so we go through arc only once.
244        let mut_new = new._get_inner_mut();
245        let chunks = unsafe { mut_new.chunks_mut() };
246        let chunk = self.chunks()[i].clone();
247        chunks.clear();
248        chunks.push(chunk);
249        mut_new.compute_len();
250        mut_new._set_flags(flags);
251        new
252    }
253
254    pub fn is_sorted_flag(&self) -> IsSorted {
255        if self.len() <= 1 {
256            return IsSorted::Ascending;
257        }
258        self.get_flags().is_sorted()
259    }
260
261    pub fn set_sorted_flag(&mut self, sorted: IsSorted) {
262        let mut flags = self.get_flags();
263        flags.set_sorted(sorted);
264        self.set_flags(flags);
265    }
266
267    pub(crate) fn clear_flags(&mut self) {
268        self.set_flags(StatisticsFlags::empty());
269    }
270    pub fn get_flags(&self) -> StatisticsFlags {
271        self.0._get_flags()
272    }
273
274    pub(crate) fn set_flags(&mut self, flags: StatisticsFlags) {
275        self._get_inner_mut()._set_flags(flags)
276    }
277
278    pub fn into_frame(self) -> DataFrame {
279        // SAFETY: A single-column dataframe cannot have length mismatches or duplicate names
280        unsafe { DataFrame::new_unchecked(self.len(), vec![self.into()]) }
281    }
282
283    /// Rename series.
284    pub fn rename(&mut self, name: PlSmallStr) -> &mut Series {
285        self._get_inner_mut().rename(name);
286        self
287    }
288
289    /// Return this Series with a new name.
290    pub fn with_name(mut self, name: PlSmallStr) -> Series {
291        self.rename(name);
292        self
293    }
294
295    pub fn from_arrow_chunks(name: PlSmallStr, arrays: Vec<ArrayRef>) -> PolarsResult<Series> {
296        Self::try_from((name, arrays))
297    }
298
299    pub fn from_arrow(name: PlSmallStr, array: ArrayRef) -> PolarsResult<Series> {
300        Self::try_from((name, array))
301    }
302
303    /// Shrink the capacity of this array to fit its length.
304    pub fn shrink_to_fit(&mut self) {
305        self._get_inner_mut().shrink_to_fit()
306    }
307
308    /// Append in place. This is done by adding the chunks of `other` to this [`Series`].
309    ///
310    /// See [`ChunkedArray::append`] and [`ChunkedArray::extend`].
311    pub fn append(&mut self, other: &Series) -> PolarsResult<&mut Self> {
312        let must_cast = other.dtype().matches_schema_type(self.dtype())?;
313        if must_cast {
314            let other = other.cast(self.dtype())?;
315            self.append_owned(other)?;
316        } else {
317            self._get_inner_mut().append(other)?;
318        }
319        Ok(self)
320    }
321
322    /// Append in place. This is done by adding the chunks of `other` to this [`Series`].
323    ///
324    /// See [`ChunkedArray::append_owned`] and [`ChunkedArray::extend`].
325    pub fn append_owned(&mut self, other: Series) -> PolarsResult<&mut Self> {
326        let must_cast = other.dtype().matches_schema_type(self.dtype())?;
327        if must_cast {
328            let other = other.cast(self.dtype())?;
329            self._get_inner_mut().append_owned(other)?;
330        } else {
331            self._get_inner_mut().append_owned(other)?;
332        }
333        Ok(self)
334    }
335
336    /// Redo a length and null_count compute
337    pub fn compute_len(&mut self) {
338        self._get_inner_mut().compute_len()
339    }
340
341    /// Extend the memory backed by this array with the values from `other`.
342    ///
343    /// See [`ChunkedArray::extend`] and [`ChunkedArray::append`].
344    pub fn extend(&mut self, other: &Series) -> PolarsResult<&mut Self> {
345        let must_cast = other.dtype().matches_schema_type(self.dtype())?;
346        if must_cast {
347            let other = other.cast(self.dtype())?;
348            self._get_inner_mut().extend(&other)?;
349        } else {
350            self._get_inner_mut().extend(other)?;
351        }
352        Ok(self)
353    }
354
355    /// Sort the series with specific options.
356    ///
357    /// # Example
358    ///
359    /// ```rust
360    /// # use polars_core::prelude::*;
361    /// # fn main() -> PolarsResult<()> {
362    /// let s = Series::new("foo".into(), [2, 1, 3]);
363    /// let sorted = s.sort(SortOptions::default())?;
364    /// assert_eq!(sorted, Series::new("foo".into(), [1, 2, 3]));
365    /// # Ok(())
366    /// }
367    /// ```
368    ///
369    /// See [`SortOptions`] for more options.
370    pub fn sort(&self, sort_options: SortOptions) -> PolarsResult<Self> {
371        self.sort_with(sort_options)
372    }
373
374    /// Only implemented for numeric types
375    pub fn as_single_ptr(&mut self) -> PolarsResult<usize> {
376        self._get_inner_mut().as_single_ptr()
377    }
378
379    pub fn cast(&self, dtype: &DataType) -> PolarsResult<Self> {
380        self.cast_with_options(dtype, CastOptions::NonStrict)
381    }
382
383    /// Cast [`Series`] to another [`DataType`].
384    pub fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Self> {
385        let slf = self
386            .trim_lists_to_normalized_offsets()
387            .map_or(Cow::Borrowed(self), Cow::Owned);
388        let slf = slf.propagate_nulls().map_or(slf, Cow::Owned);
389
390        use DataType as D;
391        let do_clone = match dtype {
392            D::Unknown(UnknownKind::Any) => true,
393            D::Unknown(UnknownKind::Int(_)) if slf.dtype().is_integer() => true,
394            D::Unknown(UnknownKind::Float) if slf.dtype().is_float() => true,
395            D::Unknown(UnknownKind::Str)
396                if slf.dtype().is_string() | slf.dtype().is_categorical() =>
397            {
398                true
399            },
400            dt if (dt.is_primitive() || dt.is_extension()) && dt == slf.dtype() => true,
401            _ => false,
402        };
403
404        if do_clone {
405            return Ok(slf.into_owned());
406        }
407
408        pub fn cast_dtype(dtype: &DataType) -> Option<DataType> {
409            match dtype {
410                D::Unknown(UnknownKind::Int(v)) => Some(materialize_dyn_int(*v).dtype()),
411                D::Unknown(UnknownKind::Float) => Some(DataType::Float64),
412                D::Unknown(UnknownKind::Str) => Some(DataType::String),
413                // Best leave as is.
414                D::List(inner) => cast_dtype(inner.as_ref()).map(Box::new).map(D::List),
415                #[cfg(feature = "dtype-struct")]
416                D::Struct(fields) => {
417                    // @NOTE: We only allocate if we really need to.
418
419                    let mut field_iter = fields.iter().enumerate();
420                    let mut new_fields = loop {
421                        let (i, field) = field_iter.next()?;
422
423                        if let Some(dtype) = cast_dtype(&field.dtype) {
424                            let mut new_fields = Vec::with_capacity(fields.len());
425                            new_fields.extend(fields.iter().take(i).cloned());
426                            new_fields.push(Field {
427                                name: field.name.clone(),
428                                dtype,
429                            });
430                            break new_fields;
431                        }
432                    };
433
434                    new_fields.extend(fields.iter().skip(new_fields.len()).cloned().map(|field| {
435                        let dtype = cast_dtype(&field.dtype).unwrap_or(field.dtype);
436                        Field {
437                            name: field.name,
438                            dtype,
439                        }
440                    }));
441
442                    Some(D::Struct(new_fields))
443                },
444                _ => None,
445            }
446        }
447
448        let mut casted = cast_dtype(dtype);
449        if dtype.is_list() && dtype.inner_dtype().is_some_and(|dt| dt.is_null()) {
450            if let Some(from_inner_dtype) = slf.dtype().inner_dtype() {
451                casted = Some(DataType::List(Box::new(from_inner_dtype.clone())));
452            }
453        }
454        let dtype = match casted {
455            None => dtype,
456            Some(ref dtype) => dtype,
457        };
458
459        // Always allow casting all nulls to other all nulls.
460        let len = slf.len();
461        if slf.null_count() == len {
462            return Ok(Series::full_null(slf.name().clone(), len, dtype));
463        }
464
465        let new_options = match options {
466            // Strictness is handled on this level to improve error messages, if not nested.
467            // Nested types could hide cast errors, so have to be done internally.
468            CastOptions::Strict if !dtype.is_nested() => CastOptions::NonStrict,
469            opt => opt,
470        };
471
472        let out = slf.0.cast(dtype, new_options)?;
473        if options.is_strict() {
474            handle_casting_failures(slf.as_ref(), &out)?;
475        }
476        Ok(out)
477    }
478
479    /// Cast from physical to logical types without any checks on the validity of the cast.
480    ///
481    /// # Safety
482    ///
483    /// This can lead to invalid memory access in downstream code.
484    pub unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Self> {
485        match self.dtype() {
486            #[cfg(feature = "dtype-struct")]
487            DataType::Struct(_) => self.struct_().unwrap().cast_unchecked(dtype),
488            DataType::List(_) => self.list().unwrap().cast_unchecked(dtype),
489            dt if dt.is_primitive_numeric() => {
490                with_match_physical_numeric_polars_type!(dt, |$T| {
491                    let ca: &ChunkedArray<$T> = self.as_ref().as_ref().as_ref();
492                        ca.cast_unchecked(dtype)
493                })
494            },
495            DataType::Binary => self.binary().unwrap().cast_unchecked(dtype),
496            _ => self.cast_with_options(dtype, CastOptions::Overflowing),
497        }
498    }
499
500    /// Convert a non-logical series back into a logical series without casting.
501    ///
502    /// # Safety
503    ///
504    /// This can lead to invalid memory access in downstream code.
505    pub unsafe fn from_physical_unchecked(&self, dtype: &DataType) -> PolarsResult<Self> {
506        debug_assert!(!self.dtype().is_logical(), "{:?}", self.dtype());
507
508        if self.dtype() == dtype {
509            return Ok(self.clone());
510        }
511
512        use DataType as D;
513        match (self.dtype(), dtype) {
514            #[cfg(feature = "dtype-decimal")]
515            (D::Int128, D::Decimal(precision, scale)) => {
516                let ca = self.i128().unwrap();
517                Ok(ca
518                    .clone()
519                    .into_decimal_unchecked(*precision, *scale)
520                    .into_series())
521            },
522
523            #[cfg(feature = "dtype-categorical")]
524            (phys, D::Categorical(cats, _)) if &cats.physical().dtype() == phys => {
525                with_match_categorical_physical_type!(cats.physical(), |$C| {
526                    type CA = ChunkedArray<<$C as PolarsCategoricalType>::PolarsPhysical>;
527                    let ca = self.as_ref().as_any().downcast_ref::<CA>().unwrap();
528                    Ok(CategoricalChunked::<$C>::from_cats_and_dtype_unchecked(
529                        ca.clone(),
530                        dtype.clone(),
531                    )
532                    .into_series())
533                })
534            },
535            #[cfg(feature = "dtype-categorical")]
536            (phys, D::Enum(fcats, _)) if &fcats.physical().dtype() == phys => {
537                with_match_categorical_physical_type!(fcats.physical(), |$C| {
538                    type CA = ChunkedArray<<$C as PolarsCategoricalType>::PolarsPhysical>;
539                    let ca = self.as_ref().as_any().downcast_ref::<CA>().unwrap();
540                    Ok(CategoricalChunked::<$C>::from_cats_and_dtype_unchecked(
541                        ca.clone(),
542                        dtype.clone(),
543                    )
544                    .into_series())
545                })
546            },
547
548            (D::Int32, D::Date) => feature_gated!("dtype-time", Ok(self.clone().into_date())),
549            (D::Int64, D::Datetime(tu, tz)) => feature_gated!(
550                "dtype-datetime",
551                Ok(self.clone().into_datetime(*tu, tz.clone()))
552            ),
553            (D::Int64, D::Duration(tu)) => {
554                feature_gated!("dtype-duration", Ok(self.clone().into_duration(*tu)))
555            },
556            (D::Int64, D::Time) => feature_gated!("dtype-time", Ok(self.clone().into_time())),
557
558            (D::List(_), D::List(to)) => unsafe {
559                self.list()
560                    .unwrap()
561                    .from_physical_unchecked(to.as_ref().clone())
562                    .map(|ca| ca.into_series())
563            },
564            #[cfg(feature = "dtype-array")]
565            (D::Array(_, lw), D::Array(to, rw)) if lw == rw => unsafe {
566                self.array()
567                    .unwrap()
568                    .from_physical_unchecked(to.as_ref().clone())
569                    .map(|ca| ca.into_series())
570            },
571            #[cfg(feature = "dtype-struct")]
572            (D::Struct(_), D::Struct(to)) => unsafe {
573                self.struct_()
574                    .unwrap()
575                    .from_physical_unchecked(to.as_slice())
576                    .map(|ca| ca.into_series())
577            },
578
579            #[cfg(feature = "dtype-extension")]
580            (_, D::Extension(typ, storage)) => {
581                let storage_series = self.from_physical_unchecked(storage.as_ref())?;
582                let ext = ExtensionChunked::from_storage(typ.clone(), storage_series);
583                Ok(ext.into_series())
584            },
585
586            _ => panic!("invalid from_physical({dtype:?}) for {:?}", self.dtype()),
587        }
588    }
589
590    #[cfg(feature = "dtype-extension")]
591    pub fn into_extension(self, typ: ExtensionTypeInstance) -> Series {
592        assert!(!self.dtype().is_extension());
593        let ext = ExtensionChunked::from_storage(typ, self);
594        ext.into_series()
595    }
596
597    /// Cast numerical types to f64, and keep floats as is.
598    pub fn to_float(&self) -> PolarsResult<Series> {
599        match self.dtype() {
600            DataType::Float32 | DataType::Float64 => Ok(self.clone()),
601            _ => self.cast_with_options(&DataType::Float64, CastOptions::Overflowing),
602        }
603    }
604
605    /// Compute the sum of all values in this Series.
606    /// Returns `Some(0)` if the array is empty, and `None` if the array only
607    /// contains null values.
608    ///
609    /// If the [`DataType`] is one of `{Int8, UInt8, Int16, UInt16}` the `Series` is
610    /// first cast to `Int64` to prevent overflow issues.
611    pub fn sum<T>(&self) -> PolarsResult<T>
612    where
613        T: NumCast + IsFloat,
614    {
615        let sum = self.sum_reduce()?;
616        let sum = sum.value().extract().unwrap();
617        Ok(sum)
618    }
619
620    /// Returns the minimum value in the array, according to the natural order.
621    /// Returns an option because the array is nullable.
622    pub fn min<T>(&self) -> PolarsResult<Option<T>>
623    where
624        T: NumCast + IsFloat,
625    {
626        let min = self.min_reduce()?;
627        let min = min.value().extract::<T>();
628        Ok(min)
629    }
630
631    /// Returns the maximum value in the array, according to the natural order.
632    /// Returns an option because the array is nullable.
633    pub fn max<T>(&self) -> PolarsResult<Option<T>>
634    where
635        T: NumCast + IsFloat,
636    {
637        let max = self.max_reduce()?;
638        let max = max.value().extract::<T>();
639        Ok(max)
640    }
641
642    /// Explode a list Series. This expands every item to a new row..
643    pub fn explode(&self, options: ExplodeOptions) -> PolarsResult<Series> {
644        match self.dtype() {
645            DataType::List(_) => self.list().unwrap().explode(options),
646            #[cfg(feature = "dtype-array")]
647            DataType::Array(_, _) => self.array().unwrap().explode(options),
648            _ => Ok(self.clone()),
649        }
650    }
651
652    /// Check if numeric value is NaN (note this is different than missing/ null)
653    pub fn is_nan(&self) -> PolarsResult<BooleanChunked> {
654        match self.dtype() {
655            #[cfg(feature = "dtype-f16")]
656            DataType::Float16 => Ok(self.f16().unwrap().is_nan()),
657            DataType::Float32 => Ok(self.f32().unwrap().is_nan()),
658            DataType::Float64 => Ok(self.f64().unwrap().is_nan()),
659            DataType::Null => Ok(BooleanChunked::full_null(self.name().clone(), self.len())),
660            dt if dt.is_primitive_numeric() => {
661                let arr = BooleanArray::full(self.len(), false, ArrowDataType::Boolean)
662                    .with_validity(self.rechunk_validity());
663                Ok(BooleanChunked::with_chunk(self.name().clone(), arr))
664            },
665            _ => polars_bail!(opq = is_nan, self.dtype()),
666        }
667    }
668
669    /// Check if numeric value is NaN (note this is different than missing/null)
670    pub fn is_not_nan(&self) -> PolarsResult<BooleanChunked> {
671        match self.dtype() {
672            #[cfg(feature = "dtype-f16")]
673            DataType::Float16 => Ok(self.f16().unwrap().is_not_nan()),
674            DataType::Float32 => Ok(self.f32().unwrap().is_not_nan()),
675            DataType::Float64 => Ok(self.f64().unwrap().is_not_nan()),
676            dt if dt.is_primitive_numeric() => {
677                let arr = BooleanArray::full(self.len(), true, ArrowDataType::Boolean)
678                    .with_validity(self.rechunk_validity());
679                Ok(BooleanChunked::with_chunk(self.name().clone(), arr))
680            },
681            _ => polars_bail!(opq = is_not_nan, self.dtype()),
682        }
683    }
684
685    /// Check if numeric value is finite
686    pub fn is_finite(&self) -> PolarsResult<BooleanChunked> {
687        match self.dtype() {
688            #[cfg(feature = "dtype-f16")]
689            DataType::Float16 => Ok(self.f16().unwrap().is_finite()),
690            DataType::Float32 => Ok(self.f32().unwrap().is_finite()),
691            DataType::Float64 => Ok(self.f64().unwrap().is_finite()),
692            DataType::Null => Ok(BooleanChunked::full_null(self.name().clone(), self.len())),
693            dt if dt.is_primitive_numeric() => {
694                let arr = BooleanArray::full(self.len(), true, ArrowDataType::Boolean)
695                    .with_validity(self.rechunk_validity());
696                Ok(BooleanChunked::with_chunk(self.name().clone(), arr))
697            },
698            _ => polars_bail!(opq = is_finite, self.dtype()),
699        }
700    }
701
702    /// Check if numeric value is infinite
703    pub fn is_infinite(&self) -> PolarsResult<BooleanChunked> {
704        match self.dtype() {
705            #[cfg(feature = "dtype-f16")]
706            DataType::Float16 => Ok(self.f16().unwrap().is_infinite()),
707            DataType::Float32 => Ok(self.f32().unwrap().is_infinite()),
708            DataType::Float64 => Ok(self.f64().unwrap().is_infinite()),
709            DataType::Null => Ok(BooleanChunked::full_null(self.name().clone(), self.len())),
710            dt if dt.is_primitive_numeric() => {
711                let arr = BooleanArray::full(self.len(), false, ArrowDataType::Boolean)
712                    .with_validity(self.rechunk_validity());
713                Ok(BooleanChunked::with_chunk(self.name().clone(), arr))
714            },
715            _ => polars_bail!(opq = is_infinite, self.dtype()),
716        }
717    }
718
719    /// Create a new ChunkedArray with values from self where the mask evaluates `true` and values
720    /// from `other` where the mask evaluates `false`. This function automatically broadcasts unit
721    /// length inputs.
722    #[cfg(feature = "zip_with")]
723    pub fn zip_with(&self, mask: &BooleanChunked, other: &Series) -> PolarsResult<Series> {
724        let (lhs, rhs) = coerce_lhs_rhs(self, other)?;
725        lhs.zip_with_same_type(mask, rhs.as_ref())
726    }
727
728    /// Converts a Series to their physical representation, if they have one,
729    /// otherwise the series is left unchanged.
730    ///
731    /// * Date -> Int32
732    /// * Datetime -> Int64
733    /// * Duration -> Int64
734    /// * Decimal -> Int128
735    /// * Time -> Int64
736    /// * Categorical -> U8/U16/U32
737    /// * List(inner) -> List(physical of inner)
738    /// * Array(inner) -> Array(physical of inner)
739    /// * Struct -> Struct with physical repr of each struct column
740    /// * Extension -> physical of storage type
741    pub fn to_physical_repr(&self) -> Cow<'_, Series> {
742        use DataType::*;
743        match self.dtype() {
744            // NOTE: Don't use cast here, as it might rechunk (if all nulls)
745            // which is not allowed in a phys repr.
746            #[cfg(feature = "dtype-date")]
747            Date => Cow::Owned(self.date().unwrap().phys.clone().into_series()),
748            #[cfg(feature = "dtype-datetime")]
749            Datetime(_, _) => Cow::Owned(self.datetime().unwrap().phys.clone().into_series()),
750            #[cfg(feature = "dtype-duration")]
751            Duration(_) => Cow::Owned(self.duration().unwrap().phys.clone().into_series()),
752            #[cfg(feature = "dtype-time")]
753            Time => Cow::Owned(self.time().unwrap().phys.clone().into_series()),
754            #[cfg(feature = "dtype-categorical")]
755            dt @ (Categorical(_, _) | Enum(_, _)) => {
756                with_match_categorical_physical_type!(dt.cat_physical().unwrap(), |$C| {
757                    let ca = self.cat::<$C>().unwrap();
758                    Cow::Owned(ca.physical().clone().into_series())
759                })
760            },
761            #[cfg(feature = "dtype-decimal")]
762            Decimal(_, _) => Cow::Owned(self.decimal().unwrap().phys.clone().into_series()),
763            List(_) => match self.list().unwrap().to_physical_repr() {
764                Cow::Borrowed(_) => Cow::Borrowed(self),
765                Cow::Owned(ca) => Cow::Owned(ca.into_series()),
766            },
767            #[cfg(feature = "dtype-array")]
768            Array(_, _) => match self.array().unwrap().to_physical_repr() {
769                Cow::Borrowed(_) => Cow::Borrowed(self),
770                Cow::Owned(ca) => Cow::Owned(ca.into_series()),
771            },
772            #[cfg(feature = "dtype-struct")]
773            Struct(_) => match self.struct_().unwrap().to_physical_repr() {
774                Cow::Borrowed(_) => Cow::Borrowed(self),
775                Cow::Owned(ca) => Cow::Owned(ca.into_series()),
776            },
777            #[cfg(feature = "dtype-extension")]
778            Extension(_, _) => self.ext().unwrap().storage().to_physical_repr(),
779            _ => Cow::Borrowed(self),
780        }
781    }
782
783    /// If the Series is an Extension type, return its storage Series.
784    /// Otherwise, return itself.
785    pub fn to_storage(&self) -> &Series {
786        #[cfg(feature = "dtype-extension")]
787        {
788            if let DataType::Extension(_, _) = self.dtype() {
789                return self.ext().unwrap().storage();
790            }
791        }
792        self
793    }
794
795    /// Traverse and collect every nth element in a new array.
796    pub fn gather_every(&self, n: usize, offset: usize) -> PolarsResult<Series> {
797        polars_ensure!(n > 0, ComputeError: "cannot perform gather every for `n=0`");
798        let idx = ((offset as IdxSize)..self.len() as IdxSize)
799            .step_by(n)
800            .collect_ca(PlSmallStr::EMPTY);
801        // SAFETY: we stay in-bounds.
802        Ok(unsafe { self.take_unchecked(&idx) })
803    }
804
805    #[cfg(feature = "dot_product")]
806    pub fn dot(&self, other: &Series) -> PolarsResult<f64> {
807        std::ops::Mul::mul(self, other)?.sum::<f64>()
808    }
809
810    /// Get the sum of the Series as a new Series of length 1.
811    /// Returns a Series with a single zeroed entry if self is an empty numeric series.
812    ///
813    /// If the [`DataType`] is one of `{Int8, UInt8, Int16, UInt16}` the `Series` is
814    /// first cast to `Int64` to prevent overflow issues.
815    pub fn sum_reduce(&self) -> PolarsResult<Scalar> {
816        use DataType::*;
817        match self.dtype() {
818            Int8 | UInt8 | Int16 | UInt16 => self.cast(&Int64).unwrap().sum_reduce(),
819            _ => self.0.sum_reduce(),
820        }
821    }
822
823    /// Get the mean of the Series as a new Series of length 1.
824    /// Returns a Series with a single null entry if self is an empty numeric series.
825    pub fn mean_reduce(&self) -> PolarsResult<Scalar> {
826        self.0.mean_reduce()
827    }
828
829    /// Get the product of an array.
830    ///
831    /// If the [`DataType`] is one of `{Int8, UInt8, Int16, UInt16}` the `Series` is
832    /// first cast to `Int64` to prevent overflow issues.
833    pub fn product(&self) -> PolarsResult<Scalar> {
834        #[cfg(feature = "product")]
835        {
836            use DataType::*;
837            match self.dtype() {
838                Boolean => self.cast(&DataType::Int64).unwrap().product(),
839                Int8 | UInt8 | Int16 | UInt16 | Int32 | UInt32 => {
840                    let s = self.cast(&Int64).unwrap();
841                    s.product()
842                },
843                Int64 => Ok(self.i64().unwrap().prod_reduce()),
844                UInt64 => Ok(self.u64().unwrap().prod_reduce()),
845                #[cfg(feature = "dtype-i128")]
846                Int128 => Ok(self.i128().unwrap().prod_reduce()),
847                #[cfg(feature = "dtype-u128")]
848                UInt128 => Ok(self.u128().unwrap().prod_reduce()),
849                #[cfg(feature = "dtype-f16")]
850                Float16 => Ok(self.f16().unwrap().prod_reduce()),
851                Float32 => Ok(self.f32().unwrap().prod_reduce()),
852                Float64 => Ok(self.f64().unwrap().prod_reduce()),
853                dt => {
854                    polars_bail!(InvalidOperation: "`product` operation not supported for dtype `{dt}`")
855                },
856            }
857        }
858        #[cfg(not(feature = "product"))]
859        {
860            panic!("activate 'product' feature")
861        }
862    }
863
864    /// Cast throws an error if conversion had overflows
865    pub fn strict_cast(&self, dtype: &DataType) -> PolarsResult<Series> {
866        self.cast_with_options(dtype, CastOptions::Strict)
867    }
868
869    #[cfg(feature = "dtype-decimal")]
870    pub fn into_decimal(self, precision: usize, scale: usize) -> PolarsResult<Series> {
871        match self.dtype() {
872            DataType::Int128 => Ok(self
873                .i128()
874                .unwrap()
875                .clone()
876                .into_decimal(precision, scale)?
877                .into_series()),
878            DataType::Decimal(cur_prec, cur_scale)
879                if scale == *cur_scale && precision >= *cur_prec =>
880            {
881                Ok(self)
882            },
883            dt => panic!("into_decimal({precision:?}, {scale}) not implemented for {dt:?}"),
884        }
885    }
886
887    #[cfg(feature = "dtype-time")]
888    pub fn into_time(self) -> Series {
889        match self.dtype() {
890            DataType::Int64 => self.i64().unwrap().clone().into_time().into_series(),
891            DataType::Time => self
892                .time()
893                .unwrap()
894                .physical()
895                .clone()
896                .into_time()
897                .into_series(),
898            dt => panic!("date not implemented for {dt:?}"),
899        }
900    }
901
902    pub fn into_date(self) -> Series {
903        #[cfg(not(feature = "dtype-date"))]
904        {
905            panic!("activate feature dtype-date")
906        }
907        #[cfg(feature = "dtype-date")]
908        match self.dtype() {
909            DataType::Int32 => self.i32().unwrap().clone().into_date().into_series(),
910            DataType::Date => self
911                .date()
912                .unwrap()
913                .physical()
914                .clone()
915                .into_date()
916                .into_series(),
917            dt => panic!("date not implemented for {dt:?}"),
918        }
919    }
920
921    #[allow(unused_variables)]
922    pub fn into_datetime(self, timeunit: TimeUnit, tz: Option<TimeZone>) -> Series {
923        #[cfg(not(feature = "dtype-datetime"))]
924        {
925            panic!("activate feature dtype-datetime")
926        }
927
928        #[cfg(feature = "dtype-datetime")]
929        match self.dtype() {
930            DataType::Int64 => self
931                .i64()
932                .unwrap()
933                .clone()
934                .into_datetime(timeunit, tz)
935                .into_series(),
936            DataType::Datetime(_, _) => self
937                .datetime()
938                .unwrap()
939                .physical()
940                .clone()
941                .into_datetime(timeunit, tz)
942                .into_series(),
943            dt => panic!("into_datetime not implemented for {dt:?}"),
944        }
945    }
946
947    #[allow(unused_variables)]
948    pub fn into_duration(self, timeunit: TimeUnit) -> Series {
949        #[cfg(not(feature = "dtype-duration"))]
950        {
951            panic!("activate feature dtype-duration")
952        }
953        #[cfg(feature = "dtype-duration")]
954        match self.dtype() {
955            DataType::Int64 => self
956                .i64()
957                .unwrap()
958                .clone()
959                .into_duration(timeunit)
960                .into_series(),
961            DataType::Duration(_) => self
962                .duration()
963                .unwrap()
964                .physical()
965                .clone()
966                .into_duration(timeunit)
967                .into_series(),
968            dt => panic!("into_duration not implemented for {dt:?}"),
969        }
970    }
971
972    // used for formatting
973    pub fn str_value(&self, index: usize) -> PolarsResult<Cow<'_, str>> {
974        Ok(self.0.get(index)?.str_value())
975    }
976    /// Get the head of the Series.
977    pub fn head(&self, length: Option<usize>) -> Series {
978        let len = length.unwrap_or(HEAD_DEFAULT_LENGTH);
979        self.slice(0, std::cmp::min(len, self.len()))
980    }
981
982    /// Get the tail of the Series.
983    pub fn tail(&self, length: Option<usize>) -> Series {
984        let len = length.unwrap_or(TAIL_DEFAULT_LENGTH);
985        let len = std::cmp::min(len, self.len());
986        self.slice(-(len as i64), len)
987    }
988
989    /// Compute the unique elements, but maintain order. This requires more work
990    /// than a naive [`Series::unique`](SeriesTrait::unique).
991    pub fn unique_stable(&self) -> PolarsResult<Series> {
992        let idx = self.arg_unique()?;
993        // SAFETY: Indices are in bounds.
994        unsafe { Ok(self.take_unchecked(&idx)) }
995    }
996
997    pub fn try_idx(&self) -> Option<&IdxCa> {
998        #[cfg(feature = "bigidx")]
999        {
1000            self.try_u64()
1001        }
1002        #[cfg(not(feature = "bigidx"))]
1003        {
1004            self.try_u32()
1005        }
1006    }
1007
1008    pub fn idx(&self) -> PolarsResult<&IdxCa> {
1009        #[cfg(feature = "bigidx")]
1010        {
1011            self.u64()
1012        }
1013        #[cfg(not(feature = "bigidx"))]
1014        {
1015            self.u32()
1016        }
1017    }
1018
1019    /// Returns an estimation of the total (heap) allocated size of the `Series` in bytes.
1020    ///
1021    /// # Implementation
1022    /// This estimation is the sum of the size of its buffers, validity, including nested arrays.
1023    /// Multiple arrays may share buffers and bitmaps. Therefore, the size of 2 arrays is not the
1024    /// sum of the sizes computed from this function. In particular, [`StructArray`]'s size is an upper bound.
1025    ///
1026    /// When an array is sliced, its allocated size remains constant because the buffer unchanged.
1027    /// However, this function will yield a smaller number. This is because this function returns
1028    /// the visible size of the buffer, not its total capacity.
1029    ///
1030    /// FFI buffers are included in this estimation.
1031    pub fn estimated_size(&self) -> usize {
1032        let mut size = 0;
1033        match self.dtype() {
1034            // TODO @ cat-rework: include mapping size here?
1035            #[cfg(feature = "object")]
1036            DataType::Object(_) => {
1037                let ArrowDataType::FixedSizeBinary(size) = self.chunks()[0].dtype() else {
1038                    unreachable!()
1039                };
1040                // This is only the pointer size in python. So will be a huge underestimation.
1041                return self.len() * *size;
1042            },
1043            _ => {},
1044        }
1045
1046        size += self
1047            .chunks()
1048            .iter()
1049            .map(|arr| estimated_bytes_size(&**arr))
1050            .sum::<usize>();
1051
1052        size
1053    }
1054
1055    /// Packs every element into a list.
1056    pub fn as_list(&self) -> ListChunked {
1057        let s = self.rechunk();
1058        // don't  use `to_arrow` as we need the physical types
1059        let values = s.chunks()[0].clone();
1060        let offsets = (0i64..(s.len() as i64 + 1)).collect::<Vec<_>>();
1061        let offsets = unsafe { Offsets::new_unchecked(offsets) };
1062
1063        let dtype = LargeListArray::default_datatype(
1064            s.dtype().to_physical().to_arrow(CompatLevel::newest()),
1065        );
1066        let new_arr = LargeListArray::new(dtype, offsets.into(), values, None);
1067        let mut out = ListChunked::with_chunk(s.name().clone(), new_arr);
1068        out.set_inner_dtype(s.dtype().clone());
1069        out
1070    }
1071
1072    pub fn row_encode_unordered(&self) -> PolarsResult<BinaryOffsetChunked> {
1073        row_encode::_get_rows_encoded_ca_unordered(
1074            self.name().clone(),
1075            &[self.clone().into_column()],
1076        )
1077    }
1078
1079    pub fn row_encode_ordered(
1080        &self,
1081        descending: bool,
1082        nulls_last: bool,
1083    ) -> PolarsResult<BinaryOffsetChunked> {
1084        row_encode::_get_rows_encoded_ca(
1085            self.name().clone(),
1086            &[self.clone().into_column()],
1087            &[descending],
1088            &[nulls_last],
1089            false,
1090        )
1091    }
1092}
1093
1094impl Default for Series {
1095    fn default() -> Self {
1096        NullChunked::new(PlSmallStr::EMPTY, 0).into_series()
1097    }
1098}
1099
1100impl Deref for Series {
1101    type Target = dyn SeriesTrait;
1102
1103    fn deref(&self) -> &Self::Target {
1104        self.0.as_ref()
1105    }
1106}
1107
1108impl<'a> AsRef<dyn SeriesTrait + 'a> for Series {
1109    fn as_ref(&self) -> &(dyn SeriesTrait + 'a) {
1110        self.0.as_ref()
1111    }
1112}
1113
1114impl<T: PolarsPhysicalType> AsRef<ChunkedArray<T>> for dyn SeriesTrait + '_ {
1115    fn as_ref(&self) -> &ChunkedArray<T> {
1116        // @NOTE: SeriesTrait `as_any` returns a std::any::Any for the underlying ChunkedArray /
1117        // Logical (so not the SeriesWrap).
1118        let Some(ca) = self.as_any().downcast_ref::<ChunkedArray<T>>() else {
1119            panic!(
1120                "implementation error, cannot get ref {:?} from {:?}",
1121                T::get_static_dtype(),
1122                self.dtype()
1123            );
1124        };
1125
1126        ca
1127    }
1128}
1129
1130impl<T: PolarsPhysicalType> AsMut<ChunkedArray<T>> for dyn SeriesTrait + '_ {
1131    fn as_mut(&mut self) -> &mut ChunkedArray<T> {
1132        if !self.as_any_mut().is::<ChunkedArray<T>>() {
1133            panic!(
1134                "implementation error, cannot get ref {:?} from {:?}",
1135                T::get_static_dtype(),
1136                self.dtype()
1137            );
1138        }
1139
1140        // @NOTE: SeriesTrait `as_any` returns a std::any::Any for the underlying ChunkedArray /
1141        // Logical (so not the SeriesWrap).
1142        self.as_any_mut().downcast_mut::<ChunkedArray<T>>().unwrap()
1143    }
1144}
1145
1146#[cfg(test)]
1147mod test {
1148    use crate::prelude::*;
1149    use crate::series::*;
1150
1151    #[test]
1152    fn cast() {
1153        let ar = UInt32Chunked::new("a".into(), &[1, 2]);
1154        let s = ar.into_series();
1155        let s2 = s.cast(&DataType::Int64).unwrap();
1156
1157        assert!(s2.i64().is_ok());
1158        let s2 = s.cast(&DataType::Float32).unwrap();
1159        assert!(s2.f32().is_ok());
1160    }
1161
1162    #[test]
1163    fn new_series() {
1164        let _ = Series::new("boolean series".into(), &vec![true, false, true]);
1165        let _ = Series::new("int series".into(), &[1, 2, 3]);
1166        let ca = Int32Chunked::new("a".into(), &[1, 2, 3]);
1167        let _ = ca.into_series();
1168    }
1169
1170    #[test]
1171    #[cfg(feature = "dtype-date")]
1172    fn roundtrip_list_logical_20311() {
1173        let list = ListChunked::from_chunk_iter(
1174            PlSmallStr::from_static("a"),
1175            [ListArray::new(
1176                ArrowDataType::LargeList(Box::new(ArrowField::new(
1177                    LIST_VALUES_NAME,
1178                    ArrowDataType::Int32,
1179                    true,
1180                ))),
1181                unsafe { Offsets::new_unchecked(vec![0, 1]) }.into(),
1182                PrimitiveArray::new(ArrowDataType::Int32, vec![1i32].into(), None).to_boxed(),
1183                None,
1184            )],
1185        );
1186        let list = unsafe { list.from_physical_unchecked(DataType::Date) }.unwrap();
1187        assert_eq!(list.dtype(), &DataType::List(Box::new(DataType::Date)));
1188    }
1189
1190    #[test]
1191    #[cfg(feature = "dtype-struct")]
1192    fn new_series_from_empty_structs() {
1193        let dtype = DataType::Struct(vec![]);
1194        let empties = vec![AnyValue::StructOwned(Box::new((vec![], vec![]))); 3];
1195        let s = Series::from_any_values_and_dtype("".into(), &empties, &dtype, false).unwrap();
1196        assert_eq!(s.len(), 3);
1197    }
1198    #[test]
1199    fn new_series_from_arrow_primitive_array() {
1200        let array = UInt32Array::from_slice([1, 2, 3, 4, 5]);
1201        let array_ref: ArrayRef = Box::new(array);
1202
1203        let _ = Series::try_new("foo".into(), array_ref).unwrap();
1204    }
1205
1206    #[test]
1207    fn series_append() {
1208        let mut s1 = Series::new("a".into(), &[1, 2]);
1209        let s2 = Series::new("b".into(), &[3]);
1210        s1.append(&s2).unwrap();
1211        assert_eq!(s1.len(), 3);
1212
1213        // add wrong type
1214        let s2 = Series::new("b".into(), &[3.0]);
1215        assert!(s1.append(&s2).is_err())
1216    }
1217
1218    #[test]
1219    #[cfg(feature = "dtype-decimal")]
1220    fn series_append_decimal() {
1221        let s1 = Series::new("a".into(), &[1.1, 2.3])
1222            .cast(&DataType::Decimal(38, 2))
1223            .unwrap();
1224        let s2 = Series::new("b".into(), &[3])
1225            .cast(&DataType::Decimal(38, 0))
1226            .unwrap();
1227
1228        {
1229            let mut s1 = s1.clone();
1230            s1.append(&s2).unwrap();
1231            assert_eq!(s1.len(), 3);
1232            assert_eq!(s1.get(2).unwrap(), AnyValue::Decimal(300, 38, 2));
1233        }
1234
1235        {
1236            let mut s2 = s2;
1237            s2.extend(&s1).unwrap();
1238            assert_eq!(s2.get(2).unwrap(), AnyValue::Decimal(2, 38, 0));
1239        }
1240    }
1241
1242    #[test]
1243    fn series_slice_works() {
1244        let series = Series::new("a".into(), &[1i64, 2, 3, 4, 5]);
1245
1246        let slice_1 = series.slice(-3, 3);
1247        let slice_2 = series.slice(-5, 5);
1248        let slice_3 = series.slice(0, 5);
1249
1250        assert_eq!(slice_1.get(0).unwrap(), AnyValue::Int64(3));
1251        assert_eq!(slice_2.get(0).unwrap(), AnyValue::Int64(1));
1252        assert_eq!(slice_3.get(0).unwrap(), AnyValue::Int64(1));
1253    }
1254
1255    #[test]
1256    fn out_of_range_slice_does_not_panic() {
1257        let series = Series::new("a".into(), &[1i64, 2, 3, 4, 5]);
1258
1259        let _ = series.slice(-3, 4);
1260        let _ = series.slice(-6, 2);
1261        let _ = series.slice(4, 2);
1262    }
1263}