polars_io/
mmap.rs

1use std::fs::File;
2use std::io::{BufReader, Cursor, Read, Seek};
3
4use polars_core::config::verbose;
5use polars_utils::file::ClosableFile;
6use polars_utils::mmap::MemSlice;
7
8/// Trait used to get a hold to file handler or to the underlying bytes
9/// without performing a Read.
10pub trait MmapBytesReader: Read + Seek + Send + Sync {
11    fn to_file(&self) -> Option<&File> {
12        None
13    }
14
15    fn to_bytes(&self) -> Option<&[u8]> {
16        None
17    }
18}
19
20impl MmapBytesReader for File {
21    fn to_file(&self) -> Option<&File> {
22        Some(self)
23    }
24}
25
26impl MmapBytesReader for ClosableFile {
27    fn to_file(&self) -> Option<&File> {
28        Some(self.as_ref())
29    }
30}
31
32impl MmapBytesReader for BufReader<File> {
33    fn to_file(&self) -> Option<&File> {
34        Some(self.get_ref())
35    }
36}
37
38impl MmapBytesReader for BufReader<&File> {
39    fn to_file(&self) -> Option<&File> {
40        Some(self.get_ref())
41    }
42}
43
44impl<T> MmapBytesReader for Cursor<T>
45where
46    T: AsRef<[u8]> + Send + Sync,
47{
48    fn to_bytes(&self) -> Option<&[u8]> {
49        Some(self.get_ref().as_ref())
50    }
51}
52
53impl<T: MmapBytesReader + ?Sized> MmapBytesReader for Box<T> {
54    fn to_file(&self) -> Option<&File> {
55        T::to_file(self)
56    }
57
58    fn to_bytes(&self) -> Option<&[u8]> {
59        T::to_bytes(self)
60    }
61}
62
63impl<T: MmapBytesReader> MmapBytesReader for &mut T {
64    fn to_file(&self) -> Option<&File> {
65        T::to_file(self)
66    }
67
68    fn to_bytes(&self) -> Option<&[u8]> {
69        T::to_bytes(self)
70    }
71}
72
73// Handle various forms of input bytes
74pub enum ReaderBytes<'a> {
75    Borrowed(&'a [u8]),
76    Owned(MemSlice),
77}
78
79impl std::ops::Deref for ReaderBytes<'_> {
80    type Target = [u8];
81    fn deref(&self) -> &[u8] {
82        match self {
83            Self::Borrowed(ref_bytes) => ref_bytes,
84            Self::Owned(vec) => vec,
85        }
86    }
87}
88
89/// There are some places that perform manual lifetime management after transmuting `ReaderBytes`
90/// to have a `'static` inner lifetime. The advantage to doing this is that it lets you construct a
91/// `MemSlice` from the `ReaderBytes` in a zero-copy manner regardless of the underlying enum
92/// variant.
93impl ReaderBytes<'static> {
94    /// Construct a `MemSlice` in a zero-copy manner from the underlying bytes, with the assumption
95    /// that the underlying bytes have a `'static` lifetime.
96    pub fn to_memslice(&self) -> MemSlice {
97        match self {
98            ReaderBytes::Borrowed(v) => MemSlice::from_static(v),
99            ReaderBytes::Owned(v) => v.clone(),
100        }
101    }
102}
103
104impl<'a, T: 'a + MmapBytesReader> From<&'a mut T> for ReaderBytes<'a> {
105    fn from(m: &'a mut T) -> Self {
106        match m.to_bytes() {
107            // , but somehow bchk doesn't see that lifetime is 'a.
108            Some(s) => {
109                let s = unsafe { std::mem::transmute::<&[u8], &'a [u8]>(s) };
110                ReaderBytes::Borrowed(s)
111            },
112            None => {
113                if let Some(f) = m.to_file() {
114                    ReaderBytes::Owned(MemSlice::from_file(f).unwrap())
115                } else {
116                    if verbose() {
117                        eprintln!("could not memory map file; read to buffer.")
118                    }
119                    let mut buf = vec![];
120                    m.read_to_end(&mut buf).expect("could not read");
121                    ReaderBytes::Owned(MemSlice::from_vec(buf))
122                }
123            },
124        }
125    }
126}