polars_utils/
mem.rs

1/// # Safety
2/// This may break aliasing rules, make sure you are the only owner.
3#[allow(clippy::mut_from_ref)]
4pub unsafe fn to_mutable_slice<T: Copy>(s: &[T]) -> &mut [T] {
5    let ptr = s.as_ptr() as *mut T;
6    let len = s.len();
7    unsafe { std::slice::from_raw_parts_mut(ptr, len) }
8}
9
10pub mod prefetch {
11    use std::sync::LazyLock;
12    static PAGE_SIZE: LazyLock<usize> = LazyLock::new(|| {
13        #[cfg(target_family = "unix")]
14        unsafe {
15            libc::sysconf(libc::_SC_PAGESIZE) as usize
16        }
17        #[cfg(not(target_family = "unix"))]
18        {
19            4096
20        }
21    });
22
23    /// # Safety
24    ///
25    /// This should only be called with pointers to valid memory.
26    unsafe fn prefetch_l2_impl(ptr: *const u8) {
27        #[cfg(target_arch = "x86_64")]
28        {
29            use std::arch::x86_64::*;
30            unsafe { _mm_prefetch(ptr as *const _, _MM_HINT_T1) };
31        }
32
33        #[cfg(all(target_arch = "aarch64", feature = "nightly"))]
34        {
35            use std::arch::aarch64::*;
36            unsafe { _prefetch(ptr as *const _, _PREFETCH_READ, _PREFETCH_LOCALITY2) };
37        }
38    }
39
40    /// Attempt to prefetch the memory in the slice to the L2 cache.
41    pub fn prefetch_l2(slice: &[u8]) {
42        if slice.is_empty() {
43            return;
44        }
45
46        // @TODO: We can play a bit more with this prefetching. Maybe introduce a maximum number of
47        // prefetches as to not overwhelm the processor. The linear prefetcher should pick it up
48        // at a certain point.
49
50        for i in (0..slice.len()).step_by(*PAGE_SIZE) {
51            unsafe { prefetch_l2_impl(slice[i..].as_ptr()) };
52        }
53
54        unsafe { prefetch_l2_impl(slice[slice.len() - 1..].as_ptr()) }
55    }
56
57    /// `madvise()` with `MADV_SEQUENTIAL` on unix systems. This is a no-op on non-unix systems.
58    pub fn madvise_sequential(#[allow(unused)] slice: &[u8]) {
59        #[cfg(target_family = "unix")]
60        madvise(slice, libc::MADV_SEQUENTIAL);
61    }
62
63    /// `madvise()` with `MADV_WILLNEED` on unix systems. This is a no-op on non-unix systems.
64    pub fn madvise_willneed(#[allow(unused)] slice: &[u8]) {
65        #[cfg(target_family = "unix")]
66        madvise(slice, libc::MADV_WILLNEED);
67    }
68
69    /// `madvise()` with `MADV_POPULATE_READ` on linux systems. This a no-op on non-linux systems.
70    pub fn madvise_populate_read(#[allow(unused)] slice: &[u8]) {
71        #[cfg(target_os = "linux")]
72        madvise(slice, libc::MADV_POPULATE_READ);
73    }
74
75    /// Forcibly reads at least one byte each page.
76    pub fn force_populate_read(slice: &[u8]) {
77        for i in (0..slice.len()).step_by(*PAGE_SIZE) {
78            std::hint::black_box(slice[i]);
79        }
80
81        std::hint::black_box(slice.last().copied());
82    }
83
84    #[cfg(target_family = "unix")]
85    fn madvise(slice: &[u8], advice: libc::c_int) {
86        if slice.is_empty() {
87            return;
88        }
89        let ptr = slice.as_ptr();
90
91        let align = ptr as usize % *PAGE_SIZE;
92        let ptr = ptr.wrapping_sub(align);
93        let len = slice.len() + align;
94
95        if unsafe { libc::madvise(ptr as *mut libc::c_void, len, advice) } != 0 {
96            let err = std::io::Error::last_os_error();
97            if let std::io::ErrorKind::InvalidInput = err.kind() {
98                panic!("{}", err);
99            }
100        }
101    }
102
103    pub fn no_prefetch(_: &[u8]) {}
104
105    /// Get the configured memory prefetch function.
106    pub fn get_memory_prefetch_func(verbose: bool) -> fn(&[u8]) -> () {
107        let memory_prefetch_func = match std::env::var("POLARS_MEMORY_PREFETCH").ok().as_deref() {
108            None => {
109                // madvise_willneed performed the best on both MacOS on Apple Silicon and Ubuntu on x86-64,
110                // using PDS-H query 3 SF=10 after clearing file cache as a benchmark.
111                #[cfg(target_family = "unix")]
112                {
113                    madvise_willneed
114                }
115                #[cfg(not(target_family = "unix"))]
116                {
117                    no_prefetch
118                }
119            },
120            Some("no_prefetch") => no_prefetch,
121            Some("prefetch_l2") => prefetch_l2,
122            Some("madvise_sequential") => {
123                #[cfg(target_family = "unix")]
124                {
125                    madvise_sequential
126                }
127                #[cfg(not(target_family = "unix"))]
128                {
129                    panic!(
130                        "POLARS_MEMORY_PREFETCH=madvise_sequential is not supported by this system"
131                    );
132                }
133            },
134            Some("madvise_willneed") => {
135                #[cfg(target_family = "unix")]
136                {
137                    madvise_willneed
138                }
139                #[cfg(not(target_family = "unix"))]
140                {
141                    panic!(
142                        "POLARS_MEMORY_PREFETCH=madvise_willneed is not supported by this system"
143                    );
144                }
145            },
146            Some("madvise_populate_read") => {
147                #[cfg(target_os = "linux")]
148                {
149                    madvise_populate_read
150                }
151                #[cfg(not(target_os = "linux"))]
152                {
153                    panic!(
154                        "POLARS_MEMORY_PREFETCH=madvise_populate_read is not supported by this system"
155                    );
156                }
157            },
158            Some("force_populate_read") => force_populate_read,
159            Some(v) => panic!("invalid value for POLARS_MEMORY_PREFETCH: {}", v),
160        };
161
162        if verbose {
163            let func_name = match memory_prefetch_func as usize {
164                v if v == no_prefetch as usize => "no_prefetch",
165                v if v == prefetch_l2 as usize => "prefetch_l2",
166                v if v == madvise_sequential as usize => "madvise_sequential",
167                v if v == madvise_willneed as usize => "madvise_willneed",
168                v if v == madvise_populate_read as usize => "madvise_populate_read",
169                v if v == force_populate_read as usize => "force_populate_read",
170                _ => unreachable!(),
171            };
172
173            eprintln!("memory prefetch function: {}", func_name);
174        }
175
176        memory_prefetch_func
177    }
178}