polars_io/utils/
compression.rs

1use std::io::Read;
2
3use polars_core::prelude::*;
4use polars_error::{feature_gated, to_compute_err};
5
6/// Represents the compression algorithms that we have decoders for
7pub enum SupportedCompression {
8    GZIP,
9    ZLIB,
10    ZSTD,
11}
12
13impl SupportedCompression {
14    /// If the given byte slice starts with the "magic" bytes for a supported compression family, return
15    /// that family, for unsupported/uncompressed slices, return None.
16    /// Based on <https://en.wikipedia.org/wiki/List_of_file_signatures>.
17    pub fn check(bytes: &[u8]) -> Option<Self> {
18        if bytes.len() < 4 {
19            // not enough bytes to perform prefix checks
20            return None;
21        }
22        match bytes[..4] {
23            [0x1f, 0x8b, _, _] => Some(Self::GZIP),
24            // Different zlib compression levels without preset dictionary.
25            [0x78, 0x01, _, _] => Some(Self::ZLIB),
26            [0x78, 0x5e, _, _] => Some(Self::ZLIB),
27            [0x78, 0x9c, _, _] => Some(Self::ZLIB),
28            [0x78, 0xda, _, _] => Some(Self::ZLIB),
29            [0x28, 0xb5, 0x2f, 0xfd] => Some(Self::ZSTD),
30            _ => None,
31        }
32    }
33}
34
35/// Decompress `bytes` if compression is detected, otherwise simply return it.
36/// An `out` vec must be given for ownership of the decompressed data.
37#[allow(clippy::ptr_arg)]
38pub fn maybe_decompress_bytes<'a>(bytes: &'a [u8], out: &'a mut Vec<u8>) -> PolarsResult<&'a [u8]> {
39    assert!(out.is_empty());
40
41    if let Some(algo) = SupportedCompression::check(bytes) {
42        feature_gated!("decompress", {
43            match algo {
44                SupportedCompression::GZIP => {
45                    flate2::read::MultiGzDecoder::new(bytes)
46                        .read_to_end(out)
47                        .map_err(to_compute_err)?;
48                },
49                SupportedCompression::ZLIB => {
50                    flate2::read::ZlibDecoder::new(bytes)
51                        .read_to_end(out)
52                        .map_err(to_compute_err)?;
53                },
54                SupportedCompression::ZSTD => {
55                    zstd::Decoder::with_buffer(bytes)?.read_to_end(out)?;
56                },
57            }
58
59            Ok(out)
60        })
61    } else {
62        Ok(bytes)
63    }
64}