polars_io/utils/mod.rs
1pub mod compression;
2mod other;
3
4pub use other::*;
5#[cfg(feature = "cloud")]
6pub mod byte_source;
7pub mod file;
8pub mod mkdir;
9pub mod slice;
10pub mod sync_on_close;
11
12/// Excludes only the unreserved URI characters in RFC-3986:
13///
14/// <https://datatracker.ietf.org/doc/html/rfc3986#section-2.3>
15///
16/// Characters that are allowed in a URI but do not have a reserved
17/// purpose are called unreserved. These include uppercase and lowercase
18/// letters, decimal digits, hyphen, period, underscore, and tilde.
19///
20/// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
21pub const URL_ENCODE_CHARSET: &percent_encoding::AsciiSet = &percent_encoding::NON_ALPHANUMERIC
22 .remove(b'-')
23 .remove(b'.')
24 .remove(b'_')
25 .remove(b'~');
26
27/// Characters to percent-encode for hive values such that they round-trip from bucket storage.
28///
29/// This is much more relaxed than the RFC-3986 URI spec as bucket storage is more permissive of allowed
30/// characters.
31pub const HIVE_VALUE_ENCODE_CHARSET: &percent_encoding::AsciiSet = &percent_encoding::CONTROLS
32 .add(b'/') // Exclude path separator
33 .add(b'=') // Exclude hive `key=value` separator
34 .add(b'%') // Percent itself.
35 // Colon and space are supported by object storage, but are encoded to mimic
36 // the datetime output format from pyarrow:
37 // * i.e. 'date2=2023-01-01 00:00:00.000000' becomes 'date2=2023-01-01%2000%3A00%3A00.000000'
38 .add(b':')
39 .add(b' ');