Array#

Function

Description

ARRAY_AGG

Aggregate a column/expression values as an array.

ARRAY_CONTAINS

Returns true if the array contains the value.

ARRAY_GET

Returns the value at the given index in the array.

ARRAY_LENGTH

Returns the length of the array.

ARRAY_LOWER

Returns the lower bound (min value) in an array.

ARRAY_MEAN

Returns the mean of all values in an array.

ARRAY_REVERSE

Returns the array with the elements in reverse order.

ARRAY_SUM

Returns the sum of all values in an array.

ARRAY_TO_STRING

Takes all elements of the array and joins them into one string.

ARRAY_UNIQUE

Returns the array with the unique elements.

ARRAY_UPPER

Returns the upper bound (max value) in an array.

UNNEST

Unnests (explodes) an array column into multiple rows.

ARRAY_AGG#

Aggregate a column/expression as an array (equivalent to implode).

Supports optional inline ORDER BY and LIMIT clauses.

Example:

df = pl.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})
df.sql("""
  SELECT
    ARRAY_AGG(foo ORDER BY foo DESC) AS arr_foo,
    ARRAY_AGG(bar LIMIT 2) AS arr_bar
  FROM self
""")
# shape: (1, 2)
# ┌───────────┬───────────┐
# │ arr_foo   ┆ arr_bar   │
# │ ---       ┆ ---       │
# │ list[i64] ┆ list[i64] │
# ╞═══════════╪═══════════╡
# │ [3, 2, 1] ┆ [4, 5]    │
# └───────────┴───────────┘

ARRAY_CONTAINS#

Returns true if the array contains the value.

Example:

df = pl.DataFrame({"foo": [[1, 2], [4, 3]]})
df.sql("""
  SELECT foo, ARRAY_CONTAINS(foo, 2) AS has_two FROM self
""")
# shape: (2, 2)
# ┌───────────┬─────────┐
# │ foo       ┆ has_two │
# │ ---       ┆ ---     │
# │ list[i64] ┆ bool    │
# ╞═══════════╪═════════╡
# │ [1, 2]    ┆ true    │
# │ [4, 3]    ┆ false   │
# └───────────┴─────────┘

ARRAY_GET#

Returns the value at the given index in the array.

Example:

df = pl.DataFrame(
  {
    "foo": [[1, 2], [4, 3, 2]],
    "bar": [[6, 7], [8, 9, 10]]
  }
)
df.sql("""
  SELECT
    foo, bar,
    ARRAY_GET(foo, 1) AS foo_at_1,
    ARRAY_GET(bar, 3) AS bar_at_2
  FROM self
""")
# shape: (2, 4)
# ┌───────────┬────────────┬──────────┬──────────┐
# │ foo       ┆ bar        ┆ foo_at_1 ┆ bar_at_2 │
# │ ---       ┆ ---        ┆ ---      ┆ ---      │
# │ list[i64] ┆ list[i64]  ┆ i64      ┆ i64      │
# ╞═══════════╪════════════╪══════════╪══════════╡
# │ [1, 2]    ┆ [6, 7]     ┆ 1        ┆ null     │
# │ [4, 3, 2] ┆ [8, 9, 10] ┆ 4        ┆ 10       │
# └───────────┴────────────┴──────────┴──────────┘

ARRAY_LENGTH#

Returns the length of the array.

Example:

df = pl.DataFrame({"foo": [[1, 2], [4, 3, 2]]})
df.sql("""
  SELECT foo, ARRAY_LENGTH(foo) AS n_elems FROM self
""")
# shape: (2, 2)
# ┌───────────┬─────────┐
# │ foo       ┆ n_elems │
# │ ---       ┆ ---     │
# │ list[i64] ┆ u32     │
# ╞═══════════╪═════════╡
# │ [1, 2]    ┆ 2       │
# │ [4, 3, 2] ┆ 3       │
# └───────────┴─────────┘

ARRAY_LOWER#

Returns the lower bound (min value) in an array.

Example:

df = pl.DataFrame({"foo": [[1, 2], [4, -2, 8]]})
df.sql("""
  SELECT foo, ARRAY_LOWER(foo) AS min_elem FROM self
""")
# shape: (2, 2)
# ┌────────────┬──────────┐
# │ foo        ┆ min_elem │
# │ ---        ┆ ---      │
# │ list[i64]  ┆ i64      │
# ╞════════════╪══════════╡
# │ [1, 2]     ┆ 1        │
# │ [4, -2, 8] ┆ -2       │
# └────────────┴──────────┘

ARRAY_MEAN#

Returns the mean of all values in an array.

Example:

df = pl.DataFrame({"foo": [[1, 2], [4, 3, -1]]})
df.sql("""
  SELECT foo, ARRAY_MEAN(foo) AS foo_mean FROM self
""")
# shape: (2, 2)
# ┌────────────┬──────────┐
# │ foo        ┆ foo_mean │
# │ ---        ┆ ---      │
# │ list[i64]  ┆ f64      │
# ╞════════════╪══════════╡
# │ [1, 2]     ┆ 1.5      │
# │ [4, 3, -1] ┆ 2.0      │
# └────────────┴──────────┘

ARRAY_REVERSE#

Returns the array with the elements in reverse order.

Example:

df = pl.DataFrame(
  {
    "foo": [[1, 2], [4, 3, 2]],
    "bar": [[6, 7], [8, 9, 10]]
  }
)
df.sql("""
  SELECT
    foo,
    ARRAY_REVERSE(foo) AS oof,
    ARRAY_REVERSE(bar) AS rab
  FROM self
""")
# shape: (2, 3)
# ┌───────────┬───────────┬────────────┐
# │ foo       ┆ oof       ┆ rab        │
# │ ---       ┆ ---       ┆ ---        │
# │ list[i64] ┆ list[i64] ┆ list[i64]  │
# ╞═══════════╪═══════════╪════════════╡
# │ [1, 2]    ┆ [2, 1]    ┆ [7, 6]     │
# │ [4, 3, 2] ┆ [2, 3, 4] ┆ [10, 9, 8] │
# └───────────┴───────────┴────────────┘

ARRAY_SUM#

Returns the sum of all values in an array.

Example:

df = pl.DataFrame({"foo": [[1, -2], [10, 3, -2]]})
df.sql("""
  SELECT
    foo,
    ARRAY_SUM(foo) AS foo_sum
  FROM self
""")
# shape: (2, 2)
# ┌─────────────┬─────────┐
# │ foo         ┆ foo_sum │
# │ ---         ┆ ---     │
# │ list[i64]   ┆ i64     │
# ╞═════════════╪═════════╡
# │ [1, -2]     ┆ -1      │
# │ [10, 3, -2] ┆ 11      │
# └─────────────┴─────────┘

ARRAY_TO_STRING#

Takes all elements of the array and joins them into one string.

Example:

df = pl.DataFrame(
    {
        "foo": [["a", "b"], ["c", "d", "e"]],
        "bar": [[8, None, 8], [3, 2, 1, 0]],
    }
)
df.sql("""
  SELECT
    ARRAY_TO_STRING(foo,':') AS s_foo,
    ARRAY_TO_STRING(bar,':') AS s_bar
  FROM self
""")
# shape: (2, 2)
# ┌───────┬─────────┐
# │ s_foo ┆ s_bar   │
# │ ---   ┆ ---     │
# │ str   ┆ str     │
# ╞═══════╪═════════╡
# │ a:b   ┆ 8:8     │
# │ c:d:e ┆ 3:2:1:0 │
# └───────┴─────────┘

ARRAY_UNIQUE#

Returns the array with the unique elements.

Example:

df = pl.DataFrame({"foo": [["a", "b"], ["b", "b", "e"]]})
df.sql("""
  SELECT ARRAY_UNIQUE(foo) AS foo_unique FROM self
""")
# shape: (2, 1)
# ┌────────────┐
# │ foo_unique │
# │ ---        │
# │ list[str]  │
# ╞════════════╡
# │ ["a", "b"] │
# │ ["e", "b"] │
# └────────────┘

ARRAY_UPPER#

Returns the upper bound (max value) in an array.

Example:

df = pl.DataFrame({"foo": [[5, 0], [4, 8, -2]]})
df.sql("""
  SELECT foo, ARRAY_UPPER(foo) AS max_elem FROM self
""")
# shape: (2, 2)
# ┌────────────┬──────────┐
# │ foo        ┆ max_elem │
# │ ---        ┆ ---      │
# │ list[i64]  ┆ i64      │
# ╞════════════╪══════════╡
# │ [5, 0]     ┆ 5        │
# │ [4, 8, -2] ┆ 8        │
# └────────────┴──────────┘

UNNEST#

Unnest/explode an array column into multiple rows.

Example:

df = pl.DataFrame(
  {
    "foo": [["a", "b"], ["c", "d", "e"]],
    "bar": [[6, 7, 8], [9, 10]]
  }
)
df.sql("""
  SELECT
    UNNEST(foo) AS f,
    UNNEST(bar) AS b
  FROM self
""")
# shape: (5, 2)
# ┌─────┬─────┐
# │ f   ┆ b   │
# │ --- ┆ --- │
# │ str ┆ i64 │
# ╞═════╪═════╡
# │ a   ┆ 6   │
# │ b   ┆ 7   │
# │ c   ┆ 8   │
# │ d   ┆ 9   │
# │ e   ┆ 10  │
# └─────┴─────┘