polars_io/
mmap.rs

1use std::fs::File;
2use std::io::{BufReader, Cursor, Read, Seek};
3
4use polars_core::config::verbose;
5use polars_utils::mmap::MemSlice;
6
7/// Trait used to get a hold to file handler or to the underlying bytes
8/// without performing a Read.
9pub trait MmapBytesReader: Read + Seek + Send + Sync {
10    fn to_file(&self) -> Option<&File> {
11        None
12    }
13
14    fn to_bytes(&self) -> Option<&[u8]> {
15        None
16    }
17}
18
19impl MmapBytesReader for File {
20    fn to_file(&self) -> Option<&File> {
21        Some(self)
22    }
23}
24
25impl MmapBytesReader for BufReader<File> {
26    fn to_file(&self) -> Option<&File> {
27        Some(self.get_ref())
28    }
29}
30
31impl MmapBytesReader for BufReader<&File> {
32    fn to_file(&self) -> Option<&File> {
33        Some(self.get_ref())
34    }
35}
36
37impl<T> MmapBytesReader for Cursor<T>
38where
39    T: AsRef<[u8]> + Send + Sync,
40{
41    fn to_bytes(&self) -> Option<&[u8]> {
42        Some(self.get_ref().as_ref())
43    }
44}
45
46impl<T: MmapBytesReader + ?Sized> MmapBytesReader for Box<T> {
47    fn to_file(&self) -> Option<&File> {
48        T::to_file(self)
49    }
50
51    fn to_bytes(&self) -> Option<&[u8]> {
52        T::to_bytes(self)
53    }
54}
55
56impl<T: MmapBytesReader> MmapBytesReader for &mut T {
57    fn to_file(&self) -> Option<&File> {
58        T::to_file(self)
59    }
60
61    fn to_bytes(&self) -> Option<&[u8]> {
62        T::to_bytes(self)
63    }
64}
65
66// Handle various forms of input bytes
67pub enum ReaderBytes<'a> {
68    Borrowed(&'a [u8]),
69    Owned(MemSlice),
70}
71
72impl std::ops::Deref for ReaderBytes<'_> {
73    type Target = [u8];
74    fn deref(&self) -> &[u8] {
75        match self {
76            Self::Borrowed(ref_bytes) => ref_bytes,
77            Self::Owned(vec) => vec,
78        }
79    }
80}
81
82/// There are some places that perform manual lifetime management after transmuting `ReaderBytes`
83/// to have a `'static` inner lifetime. The advantage to doing this is that it lets you construct a
84/// `MemSlice` from the `ReaderBytes` in a zero-copy manner regardless of the underlying enum
85/// variant.
86impl ReaderBytes<'static> {
87    /// Construct a `MemSlice` in a zero-copy manner from the underlying bytes, with the assumption
88    /// that the underlying bytes have a `'static` lifetime.
89    pub fn to_memslice(&self) -> MemSlice {
90        match self {
91            ReaderBytes::Borrowed(v) => MemSlice::from_static(v),
92            ReaderBytes::Owned(v) => v.clone(),
93        }
94    }
95}
96
97impl<'a, T: 'a + MmapBytesReader> From<&'a mut T> for ReaderBytes<'a> {
98    fn from(m: &'a mut T) -> Self {
99        match m.to_bytes() {
100            // , but somehow bchk doesn't see that lifetime is 'a.
101            Some(s) => {
102                let s = unsafe { std::mem::transmute::<&[u8], &'a [u8]>(s) };
103                ReaderBytes::Borrowed(s)
104            },
105            None => {
106                if let Some(f) = m.to_file() {
107                    ReaderBytes::Owned(MemSlice::from_file(f).unwrap())
108                } else {
109                    if verbose() {
110                        eprintln!("could not memory map file; read to buffer.")
111                    }
112                    let mut buf = vec![];
113                    m.read_to_end(&mut buf).expect("could not read");
114                    ReaderBytes::Owned(MemSlice::from_vec(buf))
115                }
116            },
117        }
118    }
119}