polars_utils/
mem.rs

1use std::sync::LazyLock;
2
3pub static PAGE_SIZE: LazyLock<usize> = LazyLock::new(|| {
4    #[cfg(target_family = "unix")]
5    unsafe {
6        libc::sysconf(libc::_SC_PAGESIZE) as usize
7    }
8    #[cfg(not(target_family = "unix"))]
9    {
10        4096
11    }
12});
13
14pub mod prefetch {
15    use super::PAGE_SIZE;
16
17    /// # Safety
18    ///
19    /// This should only be called with pointers to valid memory.
20    unsafe fn prefetch_l2_impl(ptr: *const u8) {
21        #[cfg(target_arch = "x86_64")]
22        {
23            use std::arch::x86_64::*;
24            unsafe { _mm_prefetch(ptr as *const _, _MM_HINT_T1) };
25        }
26
27        #[cfg(all(target_arch = "aarch64", feature = "nightly"))]
28        {
29            use std::arch::aarch64::*;
30            unsafe { _prefetch(ptr as *const _, _PREFETCH_READ, _PREFETCH_LOCALITY2) };
31        }
32    }
33
34    /// Attempt to prefetch the memory in the slice to the L2 cache.
35    pub fn prefetch_l2(slice: &[u8]) {
36        if slice.is_empty() {
37            return;
38        }
39
40        // @TODO: We can play a bit more with this prefetching. Maybe introduce a maximum number of
41        // prefetches as to not overwhelm the processor. The linear prefetcher should pick it up
42        // at a certain point.
43
44        for i in (0..slice.len()).step_by(*PAGE_SIZE) {
45            unsafe { prefetch_l2_impl(slice[i..].as_ptr()) };
46        }
47
48        unsafe { prefetch_l2_impl(slice[slice.len() - 1..].as_ptr()) }
49    }
50
51    /// `madvise()` with `MADV_SEQUENTIAL` on unix systems. This is a no-op on non-unix systems.
52    pub fn madvise_sequential(#[allow(unused)] slice: &[u8]) {
53        #[cfg(target_family = "unix")]
54        madvise(slice, libc::MADV_SEQUENTIAL);
55    }
56
57    /// `madvise()` with `MADV_WILLNEED` on unix systems. This is a no-op on non-unix systems.
58    pub fn madvise_willneed(#[allow(unused)] slice: &[u8]) {
59        #[cfg(target_family = "unix")]
60        madvise(slice, libc::MADV_WILLNEED);
61    }
62
63    /// `madvise()` with `MADV_POPULATE_READ` on linux systems. This a no-op on non-linux systems.
64    pub fn madvise_populate_read(#[allow(unused)] slice: &[u8]) {
65        #[cfg(target_os = "linux")]
66        madvise(slice, libc::MADV_POPULATE_READ);
67    }
68
69    /// Forcibly reads at least one byte each page.
70    pub fn force_populate_read(slice: &[u8]) {
71        for i in (0..slice.len()).step_by(*PAGE_SIZE) {
72            std::hint::black_box(slice[i]);
73        }
74
75        std::hint::black_box(slice.last().copied());
76    }
77
78    #[cfg(target_family = "unix")]
79    fn madvise(slice: &[u8], advice: libc::c_int) {
80        if slice.is_empty() {
81            return;
82        }
83        let ptr = slice.as_ptr();
84
85        let align = ptr as usize % *PAGE_SIZE;
86        let ptr = ptr.wrapping_sub(align);
87        let len = slice.len() + align;
88
89        if unsafe { libc::madvise(ptr as *mut libc::c_void, len, advice) } != 0 {
90            let err = std::io::Error::last_os_error();
91            if let std::io::ErrorKind::InvalidInput = err.kind() {
92                panic!("{}", err);
93            }
94        }
95    }
96
97    pub fn no_prefetch(_: &[u8]) {}
98
99    /// Get the configured memory prefetch function.
100    pub fn get_memory_prefetch_func(verbose: bool) -> fn(&[u8]) -> () {
101        let memory_prefetch_func = match std::env::var("POLARS_MEMORY_PREFETCH").ok().as_deref() {
102            None => {
103                // madvise_willneed performed the best on both MacOS on Apple Silicon and Ubuntu on x86-64,
104                // using PDS-H query 3 SF=10 after clearing file cache as a benchmark.
105                #[cfg(target_family = "unix")]
106                {
107                    madvise_willneed
108                }
109                #[cfg(not(target_family = "unix"))]
110                {
111                    no_prefetch
112                }
113            },
114            Some("no_prefetch") => no_prefetch,
115            Some("prefetch_l2") => prefetch_l2,
116            Some("madvise_sequential") => {
117                #[cfg(target_family = "unix")]
118                {
119                    madvise_sequential
120                }
121                #[cfg(not(target_family = "unix"))]
122                {
123                    panic!(
124                        "POLARS_MEMORY_PREFETCH=madvise_sequential is not supported by this system"
125                    );
126                }
127            },
128            Some("madvise_willneed") => {
129                #[cfg(target_family = "unix")]
130                {
131                    madvise_willneed
132                }
133                #[cfg(not(target_family = "unix"))]
134                {
135                    panic!(
136                        "POLARS_MEMORY_PREFETCH=madvise_willneed is not supported by this system"
137                    );
138                }
139            },
140            Some("madvise_populate_read") => {
141                #[cfg(target_os = "linux")]
142                {
143                    madvise_populate_read
144                }
145                #[cfg(not(target_os = "linux"))]
146                {
147                    panic!(
148                        "POLARS_MEMORY_PREFETCH=madvise_populate_read is not supported by this system"
149                    );
150                }
151            },
152            Some("force_populate_read") => force_populate_read,
153            Some(v) => panic!("invalid value for POLARS_MEMORY_PREFETCH: {v}"),
154        };
155
156        if verbose {
157            let func_name = match memory_prefetch_func as usize {
158                v if v == no_prefetch as usize => "no_prefetch",
159                v if v == prefetch_l2 as usize => "prefetch_l2",
160                v if v == madvise_sequential as usize => "madvise_sequential",
161                v if v == madvise_willneed as usize => "madvise_willneed",
162                v if v == madvise_populate_read as usize => "madvise_populate_read",
163                v if v == force_populate_read as usize => "force_populate_read",
164                _ => unreachable!(),
165            };
166
167            eprintln!("memory prefetch function: {func_name}");
168        }
169
170        memory_prefetch_func
171    }
172}