polars_io/utils/
compression.rs

1use std::io::Read;
2
3use polars_core::prelude::*;
4use polars_error::{feature_gated, to_compute_err};
5
6/// Represents the compression algorithms that we have decoders for
7pub enum SupportedCompression {
8    GZIP,
9    ZLIB,
10    ZSTD,
11}
12
13impl SupportedCompression {
14    /// If the given byte slice starts with the "magic" bytes for a supported compression family, return
15    /// that family, for unsupported/uncompressed slices, return None
16    pub fn check(bytes: &[u8]) -> Option<Self> {
17        if bytes.len() < 4 {
18            // not enough bytes to perform prefix checks
19            return None;
20        }
21        match bytes[..4] {
22            [31, 139, _, _]          => Some(Self::GZIP),
23            [0x78, 0x01, _, _] | // ZLIB0
24            [0x78, 0x9C, _, _] | // ZLIB1
25            [0x78, 0xDA, _, _]   // ZLIB2
26                                     => Some(Self::ZLIB),
27            [0x28, 0xB5, 0x2F, 0xFD] => Some(Self::ZSTD),
28            _ => None,
29        }
30    }
31}
32
33/// Decompress `bytes` if compression is detected, otherwise simply return it.
34/// An `out` vec must be given for ownership of the decompressed data.
35pub fn maybe_decompress_bytes<'a>(bytes: &'a [u8], out: &'a mut Vec<u8>) -> PolarsResult<&'a [u8]> {
36    assert!(out.is_empty());
37
38    if let Some(algo) = SupportedCompression::check(bytes) {
39        feature_gated!("decompress", {
40            match algo {
41                SupportedCompression::GZIP => {
42                    flate2::read::MultiGzDecoder::new(bytes)
43                        .read_to_end(out)
44                        .map_err(to_compute_err)?;
45                },
46                SupportedCompression::ZLIB => {
47                    flate2::read::ZlibDecoder::new(bytes)
48                        .read_to_end(out)
49                        .map_err(to_compute_err)?;
50                },
51                SupportedCompression::ZSTD => {
52                    zstd::Decoder::with_buffer(bytes)?.read_to_end(out)?;
53                },
54            }
55
56            Ok(out)
57        })
58    } else {
59        Ok(bytes)
60    }
61}