polars/docs/
eager.rs

1//!
2//! # Polars Eager cookbook
3//!
4//! This page should serve as a cookbook to quickly get you started with most fundamental operations
5//! executed on a [`ChunkedArray`], [`Series`] or [`DataFrame`].
6//!
7//! [`ChunkedArray`]: crate::chunked_array::ChunkedArray
8//! [`Series`]: crate::series::Series
9//! [`DataFrame`]: crate::frame::DataFrame
10//!
11//! ## Tree Of Contents
12//!
13//! * [Creation of data structures](#creation-of-data-structures)
14//!     - [ChunkedArray](#chunkedarray)
15//!     - [Series](#series)
16//!     - [DataFrame](#dataframe)
17//! * [Arithmetic](#arithmetic)
18//! * [Comparisons](#comparisons)
19//! * [Apply functions/ closures](#apply-functions-closures)
20//!     - [Series / ChunkedArrays](#dataframe-1)
21//!     - [DataFrame](#dataframe-1)
22//! * [Filter](#filter)
23//! * [Sort](#sort)
24//! * [Joins](#joins)
25//! * [GroupBy](#group_by)
26//! * [pivot](#pivot)
27//! * [Unpivot](#unpivot)
28//! * [Explode](#explode)
29//! * [IO](#io)
30//!     - [Read CSV](#read-csv)
31//!     - [Write CSV](#write-csv)
32//!     - [Read IPC](#read-ipc)
33//!     - [Write IPC](#write-ipc)
34//!     - [Read Parquet](#read-parquet)
35//!     - [Write Parquet](#write-parquet)
36//! * [Various](#various)
37//!     - [Replace NaN with Missing](#replace-nan-with-missing)
38//!     - [Extracting data](#extracting-data)
39//!
40//! ## Creation of data structures
41//!
42//! ### ChunkedArray
43//!
44//! ```
45//! use polars::prelude::*;
46//!
47//! // use iterators
48//! let ca: UInt32Chunked = (0..10).map(Some).collect();
49//!
50//! // from slices
51//! let ca = UInt32Chunked::new("foo".into(), &[1, 2, 3]);
52//!
53//! // use builders
54//! let mut builder = PrimitiveChunkedBuilder::<UInt32Type>::new("foo".into(), 10);
55//! for value in 0..10 {
56//!     builder.append_value(value);
57//! }
58//! let ca = builder.finish();
59//! ```
60//!
61//! ### Series
62//!
63//! ```
64//! use polars::prelude::*;
65//!
66//! // use iterators
67//! let s: Series = (0..10).map(Some).collect();
68//!
69//! // from slices
70//! let s = Series::new("foo".into(), &[1, 2, 3]);
71//!
72//! // from a chunked-array
73//! let ca = UInt32Chunked::new("foo".into(), &[Some(1), None, Some(3)]);
74//! let s = ca.into_series();
75//!
76//! // into a Column
77//! let s = s.into_column();
78//! ```
79//!
80//! ### DataFrame
81//!
82//! ```
83//! use polars::prelude::*;
84//! use polars::df;
85//! # fn example() -> PolarsResult<()> {
86//!
87//! // use macro
88//! let df = df! [
89//!     "names" => ["a", "b", "c"],
90//!     "values" => [1, 2, 3],
91//!     "values_nulls" => [Some(1), None, Some(3)]
92//! ]?;
93//!
94//! // from a Vec<Column>
95//! let c1 = Column::new("names".into(), &["a", "b", "c"]);
96//! let c2 = Column::new("values".into(), &[Some(1), None, Some(3)]);
97//! let df = DataFrame::new(vec![c1, c2])?;
98//! # Ok(())
99//! # }
100//! ```
101//!
102//! ## Arithmetic
103//! Arithmetic can be done on both [`Series`] and [`ChunkedArray`]. The most notable difference is that
104//! a [`Series`] coerces the data to match the underlying data types.
105//!
106//! ```
107//! use polars::prelude::*;
108//! # fn example() -> PolarsResult<()> {
109//! let s_int = Series::new("a".into(), &[1, 2, 3]);
110//! let s_flt = Series::new("b".into(), &[1.0, 2.0, 3.0]);
111//!
112//! let added = &s_int + &s_flt;
113//! let subtracted = &s_int - &s_flt;
114//! let multiplied = &s_int * &s_flt;
115//! let divided = &s_int / &s_flt;
116//! let moduloed = &s_int % &s_flt;
117//!
118//!
119//! // on chunked-arrays we first need to cast to same types
120//! let ca_int = s_int.i32()?;
121//! let ca_flt = s_flt.f32()?;
122//!
123//! ca_int.cast(&DataType::Float32)?.f32()? * ca_flt;
124//! ca_flt.cast(&DataType::Int32)?.i32()? * ca_int;
125//!
126//! // we can also do arithmetic with numeric values
127//! let multiplied = ca_int * 2.0;
128//! let multiplied = s_flt * 2.0;
129//!
130//! // or broadcast Series to match the operands type
131//! let added = &s_int * &Series::new("broadcast_me".into(), &[10]);
132//!
133//! # Ok(())
134//! # }
135//! ```
136//!
137//! Because Rust's Orphan Rule doesn't allow us to implement left side operations, we need to call
138//! such operations directly.
139//!
140//! ```rust
141//! # use polars::prelude::*;
142//! let series = Series::new("foo".into(), [1, 2, 3]);
143//!
144//! // 1 / s
145//! let divide_one_by_s = 1.div(&series);
146//!
147//! // 1 - s
148//! let subtract_one_by_s = 1.sub(&series);
149//! ```
150//!
151//! For [`ChunkedArray`] left hand side operations can be done with the [`apply_values`] method.
152//!
153//! [`apply_values`]: crate::chunked_array::ops::ChunkApply::apply_values
154//!
155//! ```rust
156//! # use polars::prelude::*;
157//! let ca = UInt32Chunked::new("foo".into(), &[1, 2, 3]);
158//!
159//! // 1 / ca
160//! let divide_one_by_ca = ca.apply_values(|rhs| 1 / rhs);
161//! ```
162//!
163//! ## Comparisons
164//!
165//! [`Series`] and [`ChunkedArray`] can be used in comparison operations to create _boolean_ masks/predicates.
166//!
167//! ```
168//! use polars::prelude::*;
169//! # fn example() -> PolarsResult<()> {
170//!
171//! let s = Series::new("a".into(), &[1, 2, 3]);
172//! let ca = UInt32Chunked::new("b".into(), &[Some(3), None, Some(1)]);
173//!
174//! // compare Series with numeric values
175//! // ==
176//! s.equal(2);
177//! // !=
178//! s.not_equal(2);
179//! // >
180//! s.gt(2);
181//! // >=
182//! s.gt_eq(2);
183//! // <
184//! s.lt(2);
185//! // <=
186//! s.lt_eq(2);
187//!
188//!
189//! // compare Series with Series
190//! // ==
191//! s.equal(&s);
192//! // !=
193//! s.not_equal(&s);
194//! // >
195//! s.gt(&s);
196//! // >=
197//! s.gt_eq(&s);
198//! // <
199//! s.lt(&s);
200//! // <=
201//! s.lt_eq(&s);
202//!
203//!
204//! // compare chunked-array with numeric values
205//! // ==
206//! ca.equal(2);
207//! // !=
208//! ca.not_equal(2);
209//! // >
210//! ca.gt(2);
211//! // >=
212//! ca.gt_eq(2);
213//! // <
214//! ca.lt(2);
215//! // <=
216//! ca.lt_eq(2);
217//!
218//! // compare chunked-array with chunked-array
219//! // ==
220//! ca.equal(&ca);
221//! // !=
222//! ca.not_equal(&ca);
223//! // >
224//! ca.gt(&ca);
225//! // >=
226//! ca.gt_eq(&ca);
227//! // <
228//! ca.lt(&ca);
229//! // <=
230//! ca.lt_eq(&ca);
231//!
232//! // use iterators
233//! let a: BooleanChunked = ca.iter()
234//!     .map(|opt_value| {
235//!          match opt_value {
236//!          Some(value) => value < 10,
237//!          None => false
238//! }}).collect();
239//!
240//! # Ok(())
241//! # }
242//! ```
243//!
244//!
245//! ## Apply functions/ closures
246//!
247//! See all possible [apply methods here](crate::chunked_array::ops::ChunkApply).
248//!
249//! ### Series / ChunkedArrays
250//!
251//! ```
252//! use polars::prelude::*;
253//! use polars::prelude::arity::unary_elementwise_values;
254//! # fn example() -> PolarsResult<()> {
255//!
256//! // apply a closure over all values
257//! let s = Series::new("foo".into(), &[Some(1), Some(2), None]);
258//! s.i32()?.apply_values(|value| value * 20);
259//!
260//! // count string lengths
261//! let s = Series::new("foo".into(), &["foo", "bar", "foobar"]);
262//! unary_elementwise_values::<StringType, UInt64Type, _>(s.str()?, |str_val| str_val.len() as u64);
263//!
264//! # Ok(())
265//! # }
266//! ```
267//!
268//!
269//! ### Multiple columns
270//!
271//! ```
272//! use polars::prelude::*;
273//! fn my_black_box_function(a: f32, b: f32) -> f32 {
274//!     // do something
275//!     a
276//! }
277//!
278//! fn apply_multiples(col_a: &Series, col_b: &Series) -> Float32Chunked {
279//!     match (col_a.dtype(), col_b.dtype()) {
280//!         (DataType::Float32, DataType::Float32) => {
281//!             // downcast to `ChunkedArray`
282//!             let a = col_a.f32().unwrap();
283//!             let b = col_b.f32().unwrap();
284//!
285//!             a.into_iter()
286//!                 .zip(b.into_iter())
287//!                 .map(|(opt_a, opt_b)| match (opt_a, opt_b) {
288//!                     (Some(a), Some(b)) => Some(my_black_box_function(a, b)),
289//!                     // if either value is `None` we propagate that null
290//!                     _ => None,
291//!                 })
292//!                 .collect()
293//!         }
294//!         _ => panic!("unexpected dtypes"),
295//!     }
296//! }
297//! ```
298//!
299//! ### DataFrame
300//!
301//! ```
302//! use polars::prelude::*;
303//! use polars::df;
304//! # fn example() -> PolarsResult<()> {
305//!
306//! let mut df = df![
307//!     "letters" => ["a", "b", "c", "d"],
308//!     "numbers" => [1, 2, 3, 4]
309//! ]?;
310//!
311//!
312//! // coerce numbers to floats
313//! df.try_apply("number", |s: &Series| s.cast(&DataType::Float64))?;
314//!
315//! // transform letters to uppercase letters
316//! df.try_apply("letters", |s: &Series| {
317//!     Ok(s.str()?.to_uppercase())
318//! });
319//!
320//! # Ok(())
321//! # }
322//! ```
323//!
324//! ## Filter
325//! ```
326//! use polars::prelude::*;
327//!
328//! # fn example(df: &DataFrame) -> PolarsResult<()> {
329//! // create a mask to filter out null values
330//! let mask = df.column("sepal_width")?.is_not_null();
331//!
332//! // select column
333//! let s = df.column("sepal_length")?;
334//!
335//! // apply filter on a Series
336//! let filtered_series = s.filter(&mask);
337//!
338//! // apply the filter on a DataFrame
339//! let filtered_df = df.filter(&mask)?;
340//!
341//! # Ok(())
342//! # }
343//! ```
344//!
345//! ## Sort
346//! ```
347//! use polars::prelude::*;
348//! use polars::df;
349//!
350//! # fn example() -> PolarsResult<()> {
351//! let df = df![
352//!     "a" => [1, 2, 3],
353//!     "b" => ["a", "a", "b"]
354//! ]?;
355//! // sort this DataFrame by multiple columns
356//!
357//! // ordering of the columns
358//! let descending = vec![true, false];
359//! // columns to sort by
360//! let by = [PlSmallStr::from_static("b"), PlSmallStr::from_static("a")];
361//! // do the sort operation
362//! let sorted = df.sort(
363//!     by,
364//!     SortMultipleOptions::default()
365//!         .with_order_descending_multi(descending)
366//!         .with_maintain_order(true)
367//! )?;
368//!
369//! // sorted:
370//!
371//! // ╭─────┬─────╮
372//! // │ a   ┆ b   │
373//! // │ --- ┆ --- │
374//! // │ i64 ┆ str │
375//! // ╞═════╪═════╡
376//! // │ 1   ┆ "a" │
377//! // │ 2   ┆ "a" │
378//! // │ 3   ┆ "b" │
379//! // ╰─────┴─────╯
380//!
381//! # Ok(())
382//! # }
383//! ```
384//!
385//! ## Joins
386//!
387//! ```
388//! use polars::prelude::*;
389//! use polars::df;
390//!
391//! # fn example() -> PolarsResult<()> {
392//! // Create first df.
393//! let temp = df!("days" => &[0, 1, 2, 3, 4],
394//!                "temp" => &[22.1, 19.9, 7., 2., 3.],
395//!                "other" => &[1, 2, 3, 4, 5]
396//! )?;
397//!
398//! // Create second df.
399//! let rain = df!("days" => &[1, 2],
400//!                "rain" => &[0.1, 0.2],
401//!                "other" => &[1, 2, 3, 4, 5]
402//! )?;
403//!
404//! // join on a single column
405//! temp.left_join(&rain, ["days"], ["days"]);
406//! temp.inner_join(&rain, ["days"], ["days"]);
407//! temp.full_join(&rain, ["days"], ["days"]);
408//!
409//! // join on multiple columns
410//! temp.join(&rain, vec!["days", "other"], vec!["days", "other"], JoinArgs::new(JoinType::Left), None);
411//!
412//! # Ok(())
413//! # }
414//! ```
415//!
416//! ## Groupby
417//!
418//! Note that Polars lazy is a lot more powerful in and more performant in group_by operations.
419//! In lazy a myriad of aggregations can be combined from expressions.
420//!
421//! See more in:
422//!
423//! * [Groupby](crate::frame::group_by::GroupBy)
424//!
425//! ### GroupBy
426//! ```
427//! use polars::prelude::*;
428//!
429//! # fn example(df: &DataFrame) -> PolarsResult<()> {
430//!  // group_by "groups" | sum "foo"
431//!  let out = df.group_by(["groups"])?
432//!     .select(["foo"])
433//!     .sum();
434//!
435//! # Ok(())
436//! # }
437//!
438//! ```
439//!
440//! ### Pivot
441//!
442//! ```
443//! use polars::prelude::*;
444//! use polars::df;
445//!
446//! # fn example(df: &DataFrame) -> PolarsResult<()> {
447//!  let df = df!("foo" => ["A", "A", "B", "B", "C"],
448//!      "N" => [1, 2, 2, 4, 2],
449//!      "bar" => ["k", "l", "m", "n", "0"]
450//!      )?;
451//!
452//! // group_by "foo" | pivot "bar" column | aggregate "N"
453//!  let pivoted = pivot::pivot(
454//!     &df,
455//!     [PlSmallStr::from_static("foo")],
456//!     Some([PlSmallStr::from_static("bar")]),
457//!     Some([PlSmallStr::from_static("N")]),
458//!     false, Some(first()),
459//!     None
460//! );
461//!
462//! // pivoted:
463//! // +-----+------+------+------+------+------+
464//! // | foo | o    | n    | m    | l    | k    |
465//! // | --- | ---  | ---  | ---  | ---  | ---  |
466//! // | str | i32  | i32  | i32  | i32  | i32  |
467//! // +=====+======+======+======+======+======+
468//! // | "A" | null | null | null | 2    | 1    |
469//! // +-----+------+------+------+------+------+
470//! // | "B" | null | 4    | 2    | null | null |
471//! // +-----+------+------+------+------+------+
472//! // | "C" | 2    | null | null | null | null |
473//! // +-----+------+------+------+------+------+!
474//!
475//! # Ok(())
476//! # }
477//! ```
478//!
479//! ## Unpivot
480//!
481//! ```
482//! use polars::prelude::*;
483//! use polars::df;
484//!
485//! # fn example(df: &DataFrame) -> PolarsResult<()> {
486//! let df = df!["A" => &["a", "b", "a"],
487//!              "B" => &[1, 3, 5],
488//!              "C" => &[10, 11, 12],
489//!              "D" => &[2, 4, 6]
490//!     ]?;
491//!
492//! let unpivoted = df.unpivot(
493//!     [PlSmallStr::from_static("A"), PlSmallStr::from_static("B")],
494//!     [PlSmallStr::from_static("C"), PlSmallStr::from_static("D")],
495//! ).unwrap();
496//! // unpivoted:
497//!
498//! // +-----+-----+----------+-------+
499//! // | A   | B   | variable | value |
500//! // | --- | --- | ---      | ---   |
501//! // | str | i32 | str      | i32   |
502//! // +=====+=====+==========+=======+
503//! // | "a" | 1   | "C"      | 10    |
504//! // +-----+-----+----------+-------+
505//! // | "b" | 3   | "C"      | 11    |
506//! // +-----+-----+----------+-------+
507//! // | "a" | 5   | "C"      | 12    |
508//! // +-----+-----+----------+-------+
509//! // | "a" | 1   | "D"      | 2     |
510//! // +-----+-----+----------+-------+
511//! // | "b" | 3   | "D"      | 4     |
512//! // +-----+-----+----------+-------+
513//! // | "a" | 5   | "D"      | 6     |
514//! // +-----+-----+----------+-------+
515//!
516//! # Ok(())
517//! # }
518//! ```
519//!
520//! ## Explode
521//!
522//! ```
523//! use polars::prelude::*;
524//! use polars::df;
525//!
526//! # fn example(df: &DataFrame) -> PolarsResult<()> {
527//! let s0 = Series::new("a".into(), &[1i64, 2, 3]);
528//! let s1 = Series::new("b".into(), &[1i64, 1, 1]);
529//! let s2 = Series::new("c".into(), &[2i64, 2, 2]);
530//! // construct a new ListChunked for a slice of Series.
531//! let list = Column::new("foo".into(), &[s0, s1, s2]);
532//!
533//! // construct a few more Series.
534//! let s0 = Column::new("B".into(), [1, 2, 3]);
535//! let s1 = Column::new("C".into(), [1, 1, 1]);
536//! let df = DataFrame::new(vec![list, s0, s1])?;
537//!
538//! let exploded = df.explode([PlSmallStr::from("foo")])?;
539//! // exploded:
540//!
541//! // +-----+-----+-----+
542//! // | foo | B   | C   |
543//! // | --- | --- | --- |
544//! // | i64 | i32 | i32 |
545//! // +=====+=====+=====+
546//! // | 1   | 1   | 1   |
547//! // +-----+-----+-----+
548//! // | 2   | 1   | 1   |
549//! // +-----+-----+-----+
550//! // | 3   | 1   | 1   |
551//! // +-----+-----+-----+
552//! // | 1   | 2   | 1   |
553//! // +-----+-----+-----+
554//! // | 1   | 2   | 1   |
555//! // +-----+-----+-----+
556//! // | 1   | 2   | 1   |
557//! // +-----+-----+-----+
558//! // | 2   | 3   | 1   |
559//! // +-----+-----+-----+
560//! // | 2   | 3   | 1   |
561//! // +-----+-----+-----+
562//! // | 2   | 3   | 1   |
563//! // +-----+-----+-----+
564//!
565//! # Ok(())
566//! # }
567//! ```
568//!
569//! ## IO
570//!
571//! ### Read CSV
572//!
573//! ```
574//! use polars::prelude::*;
575//!
576//! # fn example(df: &DataFrame) -> PolarsResult<()> {
577//! // read from path
578//! let mut file = std::fs::File::open("iris.csv")?;
579//! let df = CsvReader::new(file).finish()?;
580//! # Ok(())
581//! # }
582//! ```
583//!
584//! ### Write CSV
585//!
586//! ```
587//! use polars::prelude::*;
588//! use std::fs::File;
589//!
590//! # fn example(df: &mut DataFrame) -> PolarsResult<()> {
591//! // create a file
592//! let mut file = File::create("example.csv").expect("could not create file");
593//!
594//! // write DataFrame to file
595//! CsvWriter::new(&mut file)
596//!     .include_header(true)
597//!     .with_separator(b',')
598//!     .finish(df);
599//! # Ok(())
600//! # }
601//! ```
602//!
603//! ### Read IPC
604//! ```
605//! use polars::prelude::*;
606//! use std::fs::File;
607//!
608//! # fn example(df: &DataFrame) -> PolarsResult<()> {
609//! // open file
610//! let file = File::open("file.ipc").expect("file not found");
611//!
612//! // read to DataFrame
613//! let df = IpcReader::new(file)
614//!    .finish()?;
615//! # Ok(())
616//! # }
617//! ```
618//!
619//! ### Write IPC
620//! ```
621//! use polars::prelude::*;
622//! use std::fs::File;
623//!
624//! # fn example(df: &mut DataFrame) -> PolarsResult<()> {
625//! // create a file
626//! let mut file = File::create("file.ipc").expect("could not create file");
627//!
628//! // write DataFrame to file
629//! IpcWriter::new(&mut file)
630//!     .finish(df)
631//! # }
632//! ```
633//!
634//! ### Read Parquet
635//!
636//! ```
637//! use polars::prelude::*;
638//! use std::fs::File;
639//!
640//! # fn example(df: &DataFrame) -> PolarsResult<()> {
641//! // open file
642//! let file = File::open("some_file.parquet").unwrap();
643//!
644//! // read to DataFrame
645//! let df = ParquetReader::new(file).finish()?;
646//! # Ok(())
647//! # }
648//! ```
649//!
650//! ### Write Parquet
651//! ```
652//! use polars::prelude::*;
653//! use std::fs::File;
654//!
655//! # fn example(df: &mut DataFrame) -> PolarsResult<u64> {
656//! // create a file
657//! let file = File::create("example.parquet").expect("could not create file");
658//!
659//! ParquetWriter::new(file)
660//!     .finish(df)
661//! # }
662//! ```
663//!
664//! # Various
665//!
666//! ## Replace NaN with Missing.
667//! The floating point [Not a Number: NaN](https://en.wikipedia.org/wiki/NaN) is conceptually different
668//! than missing data in Polars. In the snippet below we show how we can replace [`NaN`] values with
669//! missing values, by setting them to [`None`].
670//!
671//! [`NaN`]: https://doc.rust-lang.org/std/primitive.f64.html#associatedconstant.NAN
672//!
673//! ```
674//! use polars::prelude::*;
675//! use polars::df;
676//!
677//! /// Replaces NaN with missing values.
678//! fn fill_nan_with_nulls() -> PolarsResult<DataFrame> {
679//!     let nan = f64::NAN;
680//!
681//!     let mut df = df! {
682//!        "a" => [nan, 1.0, 2.0],
683//!        "b" => [nan, 1.0, 2.0]
684//!     }
685//!     .unwrap();
686//!
687//!     for idx in 0..df.width() {
688//!         df.try_apply_at_idx(idx, |series| {
689//!             let mask = series.is_nan()?;
690//!             let ca = series.f64()?;
691//!             ca.set(&mask, None)
692//!         })?;
693//!     }
694//!     Ok(df)
695//! }
696//! ```
697//!
698//! ## Extracting data
699//!
700//! To iterate over the values of a [`Series`], or to convert the [`Series`] into another structure
701//! such as a [`Vec<T>`], we must first downcast to a data type aware [`ChunkedArray<T>`].
702//!
703//! [`ChunkedArray<T>`]: crate::chunked_array::ChunkedArray
704//!
705//! ```
706//! use polars::prelude::*;
707//! use polars::df;
708//!
709//! fn extract_data() -> PolarsResult<()> {
710//!     let df = df! [
711//!        "a" => [None, Some(1.0f32), Some(2.0)],
712//!        "str" => ["foo", "bar", "ham"]
713//!     ]?;
714//!
715//!     // first extract ChunkedArray to get the inner type.
716//!     let ca = df.column("a")?.f32()?;
717//!
718//!     // Then convert to vec
719//!     let to_vec: Vec<Option<f32>> = Vec::from(ca);
720//!
721//!     // We can also do this with iterators
722//!     let ca = df.column("str")?.str()?;
723//!     let to_vec: Vec<Option<&str>> = ca.into_iter().collect();
724//!     let to_vec_no_options: Vec<&str> = ca.into_no_null_iter().collect();
725//!
726//!     Ok(())
727//! }
728//! ```
729//!
730//!