Type Alias LargeStringArray

pub type LargeStringArray = Utf8Array<i64>;

Aliased Type§

struct LargeStringArray { /* private fields */ }

Implementations

§

impl<O> Utf8Array<O>
where O: Offset,

pub fn try_new( dtype: ArrowDataType, offsets: OffsetsBuffer<O>, values: Buffer<u8>, validity: Option<Bitmap>, ) -> Result<Utf8Array<O>, PolarsError>

Returns a [Utf8Array] created from its internal representation.

§Errors

This function returns an error iff:

  • The last offset is greater than the values’ length.
  • the validity’s length is not equal to offsets.len_proxy().
  • The dtype’s [crate::datatypes::PhysicalType] is not equal to either Utf8 or LargeUtf8.
  • The values between two consecutive offsets are not valid utf8
§Implementation

This function is O(N) - checking utf8 is O(N)

pub fn from_slice<T, P>(slice: P) -> Utf8Array<O>
where T: AsRef<str>, P: AsRef<[T]>,

Returns a [Utf8Array] from a slice of &str.

A convenience method that uses [Self::from_trusted_len_values_iter].

pub fn from<T, P>(slice: P) -> Utf8Array<O>
where T: AsRef<str>, P: AsRef<[Option<T>]>,

Returns a new [Utf8Array] from a slice of &str.

A convenience method that uses [Self::from_trusted_len_iter].

pub fn iter( &self, ) -> ZipValidity<&str, ArrayValuesIter<'_, Utf8Array<O>>, BitmapIter<'_>>

Returns an iterator of Option<&str>

pub fn values_iter(&self) -> ArrayValuesIter<'_, Utf8Array<O>>

Returns an iterator of &str

pub fn non_null_values_iter(&self) -> NonNullValuesIter<'_, Utf8Array<O>>

Returns an iterator of the non-null values `&str.

pub fn len(&self) -> usize

Returns the length of this array

pub fn value(&self, i: usize) -> &str

Returns the value of the element at index i, ignoring the array’s validity.

§Panic

This function panics iff i >= self.len.

pub unsafe fn value_unchecked(&self, i: usize) -> &str

Returns the value of the element at index i, ignoring the array’s validity.

§Safety

This function is safe iff i < self.len.

pub fn get(&self, i: usize) -> Option<&str>

Returns the element at index i or None if it is null

§Panics

iff i >= self.len()

pub fn dtype(&self) -> &ArrowDataType

Returns the ArrowDataType of this array.

pub fn values(&self) -> &Buffer<u8>

Returns the values of this [Utf8Array].

pub fn offsets(&self) -> &OffsetsBuffer<O>

Returns the offsets of this [Utf8Array].

pub fn validity(&self) -> Option<&Bitmap>

The optional validity.

pub fn slice(&mut self, offset: usize, length: usize)

Slices this [Utf8Array].

§Implementation

This function is O(1).

§Panics

iff offset + length > self.len().

pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize)

Slices this [Utf8Array].

§Implementation

This function is O(1)

§Safety

The caller must ensure that offset + length <= self.len().

pub fn sliced(self, offset: usize, length: usize) -> Utf8Array<O>

Returns this array sliced.

§Implementation

This function is O(1).

§Panics

iff offset + length > self.len().

pub unsafe fn sliced_unchecked( self, offset: usize, length: usize, ) -> Utf8Array<O>

Returns this array sliced.

§Implementation

This function is O(1).

§Safety

The caller must ensure that offset + length <= self.len().

pub fn with_validity(self, validity: Option<Bitmap>) -> Utf8Array<O>

Returns this array with a new validity.

§Panic

Panics iff validity.len() != self.len().

pub fn set_validity(&mut self, validity: Option<Bitmap>)

Sets the validity of this array.

§Panics

This function panics iff values.len() != self.len().

pub fn take_validity(&mut self) -> Option<Bitmap>

Takes the validity of this array, leaving it without a validity mask.

pub fn boxed(self) -> Box<dyn Array>

Boxes this array into a Box<dyn Array>.

pub fn arced(self) -> Arc<dyn Array>

Arcs this array into a std::sync::Arc<dyn Array>.

pub fn into_inner( self, ) -> (ArrowDataType, OffsetsBuffer<O>, Buffer<u8>, Option<Bitmap>)

Returns its internal representation

pub fn into_mut(self) -> Either<Utf8Array<O>, MutableUtf8Array<O>>

Try to convert this Utf8Array to a MutableUtf8Array

pub fn new_empty(dtype: ArrowDataType) -> Utf8Array<O>

Returns a new empty [Utf8Array].

The array is guaranteed to have no elements nor validity.

pub fn new_null(dtype: ArrowDataType, length: usize) -> Utf8Array<O>

Returns a new [Utf8Array] whose all slots are null / None.

pub fn default_dtype() -> ArrowDataType

Returns a default ArrowDataType of this array, which depends on the generic parameter O: DataType::Utf8 or DataType::LargeUtf8

pub unsafe fn new_unchecked( dtype: ArrowDataType, offsets: OffsetsBuffer<O>, values: Buffer<u8>, validity: Option<Bitmap>, ) -> Utf8Array<O>

Creates a new [Utf8Array] without checking for offsets monotinicity nor utf8-validity

§Panic

This function panics (in debug mode only) iff:

  • The last offset is greater than the values’ length.
  • the validity’s length is not equal to offsets.len_proxy().
  • The dtype’s [crate::datatypes::PhysicalType] is not equal to either Utf8 or LargeUtf8.
§Safety

This function is unsound iff:

  • The values between two consecutive offsets are not valid utf8
§Implementation

This function is O(1)

pub fn new( dtype: ArrowDataType, offsets: OffsetsBuffer<O>, values: Buffer<u8>, validity: Option<Bitmap>, ) -> Utf8Array<O>

Creates a new [Utf8Array].

§Panics

This function panics iff:

  • offsets.last() is greater than values.len().
  • the validity’s length is not equal to offsets.len_proxy().
  • The dtype’s [crate::datatypes::PhysicalType] is not equal to either Utf8 or LargeUtf8.
  • The values between two consecutive offsets are not valid utf8
§Implementation

This function is O(N) - checking utf8 is O(N)

pub fn from_trusted_len_values_iter<T, I>(iterator: I) -> Utf8Array<O>
where T: AsRef<str>, I: TrustedLen<Item = T>,

Returns a (non-null) [Utf8Array] created from a [TrustedLen] of &str.

§Implementation

This function is O(N)

pub fn from_iter_values<T, I>(iterator: I) -> Utf8Array<O>
where T: AsRef<str>, I: Iterator<Item = T>,

Creates a new [Utf8Array] from a Iterator of &str.

pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Utf8Array<O>
where P: AsRef<str>, I: Iterator<Item = Option<P>>,

Creates a [Utf8Array] from an iterator of trusted length.

§Safety

The iterator must be TrustedLen. I.e. that size_hint().1 correctly reports its length.

pub fn from_trusted_len_iter<I, P>(iterator: I) -> Utf8Array<O>
where P: AsRef<str>, I: TrustedLen<Item = Option<P>>,

Creates a [Utf8Array] from an iterator of trusted length.

pub unsafe fn try_from_trusted_len_iter_unchecked<E, I, P>( iterator: I, ) -> Result<Utf8Array<O>, E>
where P: AsRef<str>, I: IntoIterator<Item = Result<Option<P>, E>>,

Creates a [Utf8Array] from an falible iterator of trusted length.

§Safety

The iterator must be TrustedLen. I.e. that size_hint().1 correctly reports its length.

pub fn try_from_trusted_len_iter<E, I, P>(iter: I) -> Result<Utf8Array<O>, E>
where P: AsRef<str>, I: TrustedLen<Item = Result<Option<P>, E>>,

Creates a [Utf8Array] from an fallible iterator of trusted length.

pub fn apply_validity<F>(&mut self, f: F)
where F: FnOnce(Bitmap) -> Bitmap,

Applies a function f to the validity of this array.

This is an API to leverage clone-on-write

§Panics

This function panics if the function f modifies the length of the [Bitmap].

pub fn to_binary(&self) -> BinaryArray<O>

Trait Implementations

§

impl<O> Array for Utf8Array<O>
where O: Offset,

§

fn as_any(&self) -> &(dyn Any + 'static)

Converts itself to a reference of Any, which enables downcasting to concrete types.
§

fn as_any_mut(&mut self) -> &mut (dyn Any + 'static)

Converts itself to a mutable reference of Any, which enables mutable downcasting to concrete types.
§

fn len(&self) -> usize

The length of the [Array]. Every array has a length corresponding to the number of elements (slots).
§

fn dtype(&self) -> &ArrowDataType

The ArrowDataType of the [Array]. In combination with [Array::as_any], this can be used to downcast trait objects (dyn Array) to concrete arrays.
§

fn split_at_boxed(&self, offset: usize) -> (Box<dyn Array>, Box<dyn Array>)

Split [Self] at offset into two boxed [Array]s where offset <= self.len().
§

unsafe fn split_at_boxed_unchecked( &self, offset: usize, ) -> (Box<dyn Array>, Box<dyn Array>)

Split [Self] at offset into two boxed [Array]s without checking offset <= self.len(). Read more
§

fn slice(&mut self, offset: usize, length: usize)

Slices this [Array]. Read more
§

unsafe fn slice_unchecked(&mut self, offset: usize, length: usize)

Slices the [Array]. Read more
§

fn to_boxed(&self) -> Box<dyn Array>

Clone a &dyn Array to an owned Box<dyn Array>.
§

fn validity(&self) -> Option<&Bitmap>

The validity of the [Array]: every array has an optional [Bitmap] that, when available specifies whether the array slot is valid or not (null). When the validity is None, all slots are valid.
§

fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array>

Clones this [Array] with a new assigned bitmap. Read more
§

fn is_empty(&self) -> bool

whether the array is empty
§

fn null_count(&self) -> usize

The number of null slots on this [Array]. Read more
§

fn has_nulls(&self) -> bool

§

fn is_null(&self, i: usize) -> bool

Returns whether slot i is null. Read more
§

unsafe fn is_null_unchecked(&self, i: usize) -> bool

Returns whether slot i is null. Read more
§

fn is_valid(&self, i: usize) -> bool

Returns whether slot i is valid. Read more
§

fn sliced(&self, offset: usize, length: usize) -> Box<dyn Array>

Returns a slice of this [Array]. Read more
§

unsafe fn sliced_unchecked( &self, offset: usize, length: usize, ) -> Box<dyn Array>

Returns a slice of this [Array]. Read more
§

impl<'a, O> ArrayAccessor<'a> for Utf8Array<O>
where O: Offset,

§

type Item = &'a str

§

unsafe fn value_unchecked( &'a self, index: usize, ) -> <Utf8Array<O> as ArrayAccessor<'a>>::Item

Safety Read more
§

fn len(&self) -> usize

§

impl<T> ArrayFromIter<Option<T>> for Utf8Array<i64>
where T: StrIntoBytes,

§

fn arr_from_iter<I>(iter: I) -> Utf8Array<i64>
where I: IntoIterator<Item = Option<T>>,

§

fn arr_from_iter_trusted<I>(iter: I) -> Utf8Array<i64>
where I: IntoIterator<Item = Option<T>>, <I as IntoIterator>::IntoIter: TrustedLen,

§

fn try_arr_from_iter<E, I>(iter: I) -> Result<Utf8Array<i64>, E>
where I: IntoIterator<Item = Result<Option<T>, E>>,

§

fn try_arr_from_iter_trusted<E, I>(iter: I) -> Result<Utf8Array<i64>, E>
where I: IntoIterator<Item = Result<Option<T>, E>>,

§

impl<T> ArrayFromIter<T> for Utf8Array<i64>
where T: StrIntoBytes,

§

fn arr_from_iter<I>(iter: I) -> Utf8Array<i64>
where I: IntoIterator<Item = T>,

§

fn arr_from_iter_trusted<I>(iter: I) -> Utf8Array<i64>
where I: IntoIterator<Item = T>, <I as IntoIterator>::IntoIter: TrustedLen,

§

fn try_arr_from_iter<E, I>(iter: I) -> Result<Utf8Array<i64>, E>
where I: IntoIterator<Item = Result<T, E>>,

§

fn try_arr_from_iter_trusted<E, I>(iter: I) -> Result<Utf8Array<i64>, E>
where I: IntoIterator<Item = Result<T, E>>,

§

impl<O> Clone for Utf8Array<O>
where O: Clone + Offset,

§

fn clone(&self) -> Utf8Array<O>

Returns a copy of the value. Read more
1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
§

impl<O> Debug for Utf8Array<O>
where O: Offset,

§

fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>

Formats the value using the given formatter. Read more
§

impl<O> Default for Utf8Array<O>
where O: Offset,

§

fn default() -> Utf8Array<O>

Returns the “default value” for a type. Read more
§

impl<O> From<MutableUtf8Array<O>> for Utf8Array<O>
where O: Offset,

§

fn from(other: MutableUtf8Array<O>) -> Utf8Array<O>

Converts to this type from the input type.
§

impl<O> From<MutableUtf8ValuesArray<O>> for Utf8Array<O>
where O: Offset,

§

fn from(other: MutableUtf8ValuesArray<O>) -> Utf8Array<O>

Converts to this type from the input type.
§

impl FromDataUtf8 for Utf8Array<i64>

§

unsafe fn from_data_unchecked_default( offsets: Buffer<i64>, values: Buffer<u8>, validity: Option<Bitmap>, ) -> Utf8Array<i64>

Safety Read more
§

impl<O, P> FromIterator<Option<P>> for Utf8Array<O>
where O: Offset, P: AsRef<str>,

§

fn from_iter<I>(iter: I) -> Utf8Array<O>
where I: IntoIterator<Item = Option<P>>,

Creates a value from an iterator. Read more
§

impl<O> GenericBinaryArray<O> for Utf8Array<O>
where O: Offset,

§

fn values(&self) -> &[u8]

The values of the array
§

fn offsets(&self) -> &[O]

The offsets of the array
§

impl<O> MinMaxKernel for Utf8Array<O>
where O: Offset,

§

type Scalar<'a> = &'a str

§

fn min_ignore_nan_kernel( &self, ) -> Option<<Utf8Array<O> as MinMaxKernel>::Scalar<'_>>

§

fn max_ignore_nan_kernel( &self, ) -> Option<<Utf8Array<O> as MinMaxKernel>::Scalar<'_>>

§

fn min_propagate_nan_kernel( &self, ) -> Option<<Utf8Array<O> as MinMaxKernel>::Scalar<'_>>

§

fn max_propagate_nan_kernel( &self, ) -> Option<<Utf8Array<O> as MinMaxKernel>::Scalar<'_>>

§

fn min_max_ignore_nan_kernel( &self, ) -> Option<(Self::Scalar<'_>, Self::Scalar<'_>)>

§

fn min_max_propagate_nan_kernel( &self, ) -> Option<(Self::Scalar<'_>, Self::Scalar<'_>)>

§

impl ParameterFreeDtypeStaticArray for Utf8Array<i64>

§

impl<O> PartialEq<&(dyn Array + 'static)> for Utf8Array<O>
where O: Offset,

§

fn eq(&self, other: &&(dyn Array + 'static)) -> bool

Tests for self and other values to be equal, and is used by ==.
1.0.0 · Source§

fn ne(&self, other: &Rhs) -> bool

Tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.
§

impl<O> PartialEq for Utf8Array<O>
where O: Offset,

§

fn eq(&self, other: &Utf8Array<O>) -> bool

Tests for self and other values to be equal, and is used by ==.
1.0.0 · Source§

fn ne(&self, other: &Rhs) -> bool

Tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.
§

impl<O> Splitable for Utf8Array<O>
where O: Offset,

§

fn check_bound(&self, offset: usize) -> bool

§

unsafe fn _split_at_unchecked( &self, offset: usize, ) -> (Utf8Array<O>, Utf8Array<O>)

Internal implementation of split_at_unchecked. For any usage, prefer the using split_at or split_at_unchecked. Read more
§

fn split_at(&self, offset: usize) -> (Self, Self)

Split [Self] at offset where offset <= self.len().
§

unsafe fn split_at_unchecked(&self, offset: usize) -> (Self, Self)

Split [Self] at offset without checking offset <= self.len(). Read more
§

impl StaticArray for Utf8Array<i64>

§

type ValueT<'a> = &'a str

§

type ZeroableValueT<'a> = Option<&'a str>

§

type ValueIterT<'a> = ArrayValuesIter<'a, Utf8Array<i64>>

§

unsafe fn value_unchecked( &self, idx: usize, ) -> <Utf8Array<i64> as StaticArray>::ValueT<'_>

Safety Read more
§

fn values_iter(&self) -> <Utf8Array<i64> as StaticArray>::ValueIterT<'_>

§

fn iter( &self, ) -> ZipValidity<<Utf8Array<i64> as StaticArray>::ValueT<'_>, <Utf8Array<i64> as StaticArray>::ValueIterT<'_>, BitmapIter<'_>>

§

fn with_validity_typed(self, validity: Option<Bitmap>) -> Utf8Array<i64>

§

fn full_null(length: usize, dtype: ArrowDataType) -> Utf8Array<i64>

§

fn get(&self, idx: usize) -> Option<Self::ValueT<'_>>

§

unsafe fn get_unchecked(&self, idx: usize) -> Option<Self::ValueT<'_>>

Safety Read more
§

fn last(&self) -> Option<Self::ValueT<'_>>

§

fn value(&self, idx: usize) -> Self::ValueT<'_>

§

fn as_slice(&self) -> Option<&[Self::ValueT<'_>]>

§

fn from_vec(v: Vec<Self::ValueT<'_>>, dtype: ArrowDataType) -> Self

§

fn from_zeroable_vec( v: Vec<Self::ZeroableValueT<'_>>, dtype: ArrowDataType, ) -> Self

§

fn full(length: usize, value: Self::ValueT<'_>, dtype: ArrowDataType) -> Self

§

impl<O> TotalEqKernel for Utf8Array<O>
where O: Offset,

§

type Scalar = str

§

fn tot_eq_kernel(&self, other: &Utf8Array<O>) -> Bitmap

§

fn tot_ne_kernel(&self, other: &Utf8Array<O>) -> Bitmap

§

fn tot_eq_kernel_broadcast( &self, other: &<Utf8Array<O> as TotalEqKernel>::Scalar, ) -> Bitmap

§

fn tot_ne_kernel_broadcast( &self, other: &<Utf8Array<O> as TotalEqKernel>::Scalar, ) -> Bitmap

§

fn tot_eq_missing_kernel(&self, other: &Self) -> Bitmap

§

fn tot_ne_missing_kernel(&self, other: &Self) -> Bitmap

§

fn tot_eq_missing_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap

§

fn tot_ne_missing_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap

§

impl<O> TotalOrdKernel for Utf8Array<O>
where O: Offset,

§

type Scalar = str

§

fn tot_lt_kernel(&self, other: &Utf8Array<O>) -> Bitmap

§

fn tot_le_kernel(&self, other: &Utf8Array<O>) -> Bitmap

§

fn tot_lt_kernel_broadcast( &self, other: &<Utf8Array<O> as TotalOrdKernel>::Scalar, ) -> Bitmap

§

fn tot_le_kernel_broadcast( &self, other: &<Utf8Array<O> as TotalOrdKernel>::Scalar, ) -> Bitmap

§

fn tot_gt_kernel_broadcast( &self, other: &<Utf8Array<O> as TotalOrdKernel>::Scalar, ) -> Bitmap

§

fn tot_ge_kernel_broadcast( &self, other: &<Utf8Array<O> as TotalOrdKernel>::Scalar, ) -> Bitmap

§

fn tot_gt_kernel(&self, other: &Self) -> Bitmap

§

fn tot_ge_kernel(&self, other: &Self) -> Bitmap

§

impl Utf8FromIter for Utf8Array<i64>

§

fn from_values_iter<I, S>( iter: I, len: usize, size_hint: usize, ) -> Utf8Array<i64>
where S: AsRef<str>, I: Iterator<Item = S>,

§

impl<O> ValueSize for Utf8Array<O>
where O: Offset,

§

fn get_values_size(&self) -> usize

Get the values size that is still “visible” to the underlying array. E.g. take the offsets into account.
§

impl ArrowArray for Utf8Array<i64>