Array#
Function |
Description |
---|---|
Aggregate a column/expression values as an array. |
|
Returns true if the array contains the value. |
|
Returns the value at the given index in the array. |
|
Returns the length of the array. |
|
Returns the lower bound (min value) in an array. |
|
Returns the mean of all values in an array. |
|
Returns the array with the elements in reverse order. |
|
Returns the sum of all values in an array. |
|
Takes all elements of the array and joins them into one string. |
|
Returns the array with the unique elements. |
|
Returns the upper bound (max value) in an array. |
|
Unnests (explodes) an array column into multiple rows. |
ARRAY_AGG#
Aggregate a column/expression as an array (equivalent to implode
).
Supports optional inline ORDER BY
and LIMIT
clauses.
Example:
df = pl.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})
df.sql("""
SELECT
ARRAY_AGG(foo ORDER BY foo DESC) AS arr_foo,
ARRAY_AGG(bar LIMIT 2) AS arr_bar
FROM self
""")
# shape: (1, 2)
# ┌───────────┬───────────┐
# │ arr_foo ┆ arr_bar │
# │ --- ┆ --- │
# │ list[i64] ┆ list[i64] │
# ╞═══════════╪═══════════╡
# │ [3, 2, 1] ┆ [4, 5] │
# └───────────┴───────────┘
ARRAY_CONTAINS#
Returns true if the array contains the value.
Example:
df = pl.DataFrame({"foo": [[1, 2], [4, 3]]})
df.sql("""
SELECT foo, ARRAY_CONTAINS(foo, 2) AS has_two FROM self
""")
# shape: (2, 2)
# ┌───────────┬─────────┐
# │ foo ┆ has_two │
# │ --- ┆ --- │
# │ list[i64] ┆ bool │
# ╞═══════════╪═════════╡
# │ [1, 2] ┆ true │
# │ [4, 3] ┆ false │
# └───────────┴─────────┘
ARRAY_GET#
Returns the value at the given index in the array.
Example:
df = pl.DataFrame(
{
"foo": [[1, 2], [4, 3, 2]],
"bar": [[6, 7], [8, 9, 10]]
}
)
df.sql("""
SELECT
foo, bar,
ARRAY_GET(foo, 1) AS foo_at_1,
ARRAY_GET(bar, 3) AS bar_at_2
FROM self
""")
# shape: (2, 4)
# ┌───────────┬────────────┬──────────┬──────────┐
# │ foo ┆ bar ┆ foo_at_1 ┆ bar_at_2 │
# │ --- ┆ --- ┆ --- ┆ --- │
# │ list[i64] ┆ list[i64] ┆ i64 ┆ i64 │
# ╞═══════════╪════════════╪══════════╪══════════╡
# │ [1, 2] ┆ [6, 7] ┆ 1 ┆ null │
# │ [4, 3, 2] ┆ [8, 9, 10] ┆ 4 ┆ 10 │
# └───────────┴────────────┴──────────┴──────────┘
ARRAY_LENGTH#
Returns the length of the array.
Example:
df = pl.DataFrame({"foo": [[1, 2], [4, 3, 2]]})
df.sql("""
SELECT foo, ARRAY_LENGTH(foo) AS n_elems FROM self
""")
# shape: (2, 2)
# ┌───────────┬─────────┐
# │ foo ┆ n_elems │
# │ --- ┆ --- │
# │ list[i64] ┆ u32 │
# ╞═══════════╪═════════╡
# │ [1, 2] ┆ 2 │
# │ [4, 3, 2] ┆ 3 │
# └───────────┴─────────┘
ARRAY_LOWER#
Returns the lower bound (min value) in an array.
Example:
df = pl.DataFrame({"foo": [[1, 2], [4, -2, 8]]})
df.sql("""
SELECT foo, ARRAY_LOWER(foo) AS min_elem FROM self
""")
# shape: (2, 2)
# ┌────────────┬──────────┐
# │ foo ┆ min_elem │
# │ --- ┆ --- │
# │ list[i64] ┆ i64 │
# ╞════════════╪══════════╡
# │ [1, 2] ┆ 1 │
# │ [4, -2, 8] ┆ -2 │
# └────────────┴──────────┘
ARRAY_MEAN#
Returns the mean of all values in an array.
Example:
df = pl.DataFrame({"foo": [[1, 2], [4, 3, -1]]})
df.sql("""
SELECT foo, ARRAY_MEAN(foo) AS foo_mean FROM self
""")
# shape: (2, 2)
# ┌────────────┬──────────┐
# │ foo ┆ foo_mean │
# │ --- ┆ --- │
# │ list[i64] ┆ f64 │
# ╞════════════╪══════════╡
# │ [1, 2] ┆ 1.5 │
# │ [4, 3, -1] ┆ 2.0 │
# └────────────┴──────────┘
ARRAY_REVERSE#
Returns the array with the elements in reverse order.
Example:
df = pl.DataFrame(
{
"foo": [[1, 2], [4, 3, 2]],
"bar": [[6, 7], [8, 9, 10]]
}
)
df.sql("""
SELECT
foo,
ARRAY_REVERSE(foo) AS oof,
ARRAY_REVERSE(bar) AS rab
FROM self
""")
# shape: (2, 3)
# ┌───────────┬───────────┬────────────┐
# │ foo ┆ oof ┆ rab │
# │ --- ┆ --- ┆ --- │
# │ list[i64] ┆ list[i64] ┆ list[i64] │
# ╞═══════════╪═══════════╪════════════╡
# │ [1, 2] ┆ [2, 1] ┆ [7, 6] │
# │ [4, 3, 2] ┆ [2, 3, 4] ┆ [10, 9, 8] │
# └───────────┴───────────┴────────────┘
ARRAY_SUM#
Returns the sum of all values in an array.
Example:
df = pl.DataFrame({"foo": [[1, -2], [10, 3, -2]]})
df.sql("""
SELECT
foo,
ARRAY_SUM(foo) AS foo_sum
FROM self
""")
# shape: (2, 2)
# ┌─────────────┬─────────┐
# │ foo ┆ foo_sum │
# │ --- ┆ --- │
# │ list[i64] ┆ i64 │
# ╞═════════════╪═════════╡
# │ [1, -2] ┆ -1 │
# │ [10, 3, -2] ┆ 11 │
# └─────────────┴─────────┘
ARRAY_TO_STRING#
Takes all elements of the array and joins them into one string.
Example:
df = pl.DataFrame(
{
"foo": [["a", "b"], ["c", "d", "e"]],
"bar": [[8, None, 8], [3, 2, 1, 0]],
}
)
df.sql("""
SELECT
ARRAY_TO_STRING(foo,':') AS s_foo,
ARRAY_TO_STRING(bar,':') AS s_bar
FROM self
""")
# shape: (2, 2)
# ┌───────┬─────────┐
# │ s_foo ┆ s_bar │
# │ --- ┆ --- │
# │ str ┆ str │
# ╞═══════╪═════════╡
# │ a:b ┆ 8:8 │
# │ c:d:e ┆ 3:2:1:0 │
# └───────┴─────────┘
ARRAY_UNIQUE#
Returns the array with the unique elements.
Example:
df = pl.DataFrame({"foo": [["a", "b"], ["b", "b", "e"]]})
df.sql("""
SELECT ARRAY_UNIQUE(foo) AS foo_unique FROM self
""")
# shape: (2, 1)
# ┌────────────┐
# │ foo_unique │
# │ --- │
# │ list[str] │
# ╞════════════╡
# │ ["a", "b"] │
# │ ["e", "b"] │
# └────────────┘
ARRAY_UPPER#
Returns the upper bound (max value) in an array.
Example:
df = pl.DataFrame({"foo": [[5, 0], [4, 8, -2]]})
df.sql("""
SELECT foo, ARRAY_UPPER(foo) AS max_elem FROM self
""")
# shape: (2, 2)
# ┌────────────┬──────────┐
# │ foo ┆ max_elem │
# │ --- ┆ --- │
# │ list[i64] ┆ i64 │
# ╞════════════╪══════════╡
# │ [5, 0] ┆ 5 │
# │ [4, 8, -2] ┆ 8 │
# └────────────┴──────────┘
UNNEST#
Unnest/explode an array column into multiple rows.
Example:
df = pl.DataFrame(
{
"foo": [["a", "b"], ["c", "d", "e"]],
"bar": [[6, 7, 8], [9, 10]]
}
)
df.sql("""
SELECT
UNNEST(foo) AS f,
UNNEST(bar) AS b
FROM self
""")
# shape: (5, 2)
# ┌─────┬─────┐
# │ f ┆ b │
# │ --- ┆ --- │
# │ str ┆ i64 │
# ╞═════╪═════╡
# │ a ┆ 6 │
# │ b ┆ 7 │
# │ c ┆ 8 │
# │ d ┆ 9 │
# │ e ┆ 10 │
# └─────┴─────┘