1/// # Safety
2/// This may break aliasing rules, make sure you are the only owner.
3#[allow(clippy::mut_from_ref)]
4pub unsafe fn to_mutable_slice<T: Copy>(s: &[T]) -> &mut [T] {
5let ptr = s.as_ptr() as *mut T;
6let len = s.len();
7unsafe { std::slice::from_raw_parts_mut(ptr, len) }
8}
910pub mod prefetch {
11use std::sync::LazyLock;
12static PAGE_SIZE: LazyLock<usize> = LazyLock::new(|| {
13#[cfg(target_family = "unix")]
14unsafe {
15 libc::sysconf(libc::_SC_PAGESIZE) as usize
16 }
17#[cfg(not(target_family = "unix"))]
18{
194096
20}
21 });
2223/// # Safety
24 ///
25 /// This should only be called with pointers to valid memory.
26unsafe fn prefetch_l2_impl(ptr: *const u8) {
27#[cfg(target_arch = "x86_64")]
28{
29use std::arch::x86_64::*;
30unsafe { _mm_prefetch(ptr as *const _, _MM_HINT_T1) };
31 }
3233#[cfg(all(target_arch = "aarch64", feature = "nightly"))]
34{
35use std::arch::aarch64::*;
36unsafe { _prefetch(ptr as *const _, _PREFETCH_READ, _PREFETCH_LOCALITY2) };
37 }
38 }
3940/// Attempt to prefetch the memory in the slice to the L2 cache.
41pub fn prefetch_l2(slice: &[u8]) {
42if slice.is_empty() {
43return;
44 }
4546// @TODO: We can play a bit more with this prefetching. Maybe introduce a maximum number of
47 // prefetches as to not overwhelm the processor. The linear prefetcher should pick it up
48 // at a certain point.
4950for i in (0..slice.len()).step_by(*PAGE_SIZE) {
51unsafe { prefetch_l2_impl(slice[i..].as_ptr()) };
52 }
5354unsafe { prefetch_l2_impl(slice[slice.len() - 1..].as_ptr()) }
55 }
5657/// `madvise()` with `MADV_SEQUENTIAL` on unix systems. This is a no-op on non-unix systems.
58pub fn madvise_sequential(#[allow(unused)] slice: &[u8]) {
59#[cfg(target_family = "unix")]
60madvise(slice, libc::MADV_SEQUENTIAL);
61 }
6263/// `madvise()` with `MADV_WILLNEED` on unix systems. This is a no-op on non-unix systems.
64pub fn madvise_willneed(#[allow(unused)] slice: &[u8]) {
65#[cfg(target_family = "unix")]
66madvise(slice, libc::MADV_WILLNEED);
67 }
6869/// `madvise()` with `MADV_POPULATE_READ` on linux systems. This a no-op on non-linux systems.
70pub fn madvise_populate_read(#[allow(unused)] slice: &[u8]) {
71#[cfg(target_os = "linux")]
72madvise(slice, libc::MADV_POPULATE_READ);
73 }
7475/// Forcibly reads at least one byte each page.
76pub fn force_populate_read(slice: &[u8]) {
77for i in (0..slice.len()).step_by(*PAGE_SIZE) {
78 std::hint::black_box(slice[i]);
79 }
8081 std::hint::black_box(slice.last().copied());
82 }
8384#[cfg(target_family = "unix")]
85fn madvise(slice: &[u8], advice: libc::c_int) {
86if slice.is_empty() {
87return;
88 }
89let ptr = slice.as_ptr();
9091let align = ptr as usize % *PAGE_SIZE;
92let ptr = ptr.wrapping_sub(align);
93let len = slice.len() + align;
9495if unsafe { libc::madvise(ptr as *mut libc::c_void, len, advice) } != 0 {
96let err = std::io::Error::last_os_error();
97if let std::io::ErrorKind::InvalidInput = err.kind() {
98panic!("{}", err);
99 }
100 }
101 }
102103pub fn no_prefetch(_: &[u8]) {}
104105/// Get the configured memory prefetch function.
106pub fn get_memory_prefetch_func(verbose: bool) -> fn(&[u8]) -> () {
107let memory_prefetch_func = match std::env::var("POLARS_MEMORY_PREFETCH").ok().as_deref() {
108None => {
109// madvise_willneed performed the best on both MacOS on Apple Silicon and Ubuntu on x86-64,
110 // using PDS-H query 3 SF=10 after clearing file cache as a benchmark.
111#[cfg(target_family = "unix")]
112{
113 madvise_willneed
114 }
115#[cfg(not(target_family = "unix"))]
116{
117 no_prefetch
118 }
119 },
120Some("no_prefetch") => no_prefetch,
121Some("prefetch_l2") => prefetch_l2,
122Some("madvise_sequential") => {
123#[cfg(target_family = "unix")]
124{
125 madvise_sequential
126 }
127#[cfg(not(target_family = "unix"))]
128{
129panic!(
130"POLARS_MEMORY_PREFETCH=madvise_sequential is not supported by this system"
131);
132 }
133 },
134Some("madvise_willneed") => {
135#[cfg(target_family = "unix")]
136{
137 madvise_willneed
138 }
139#[cfg(not(target_family = "unix"))]
140{
141panic!(
142"POLARS_MEMORY_PREFETCH=madvise_willneed is not supported by this system"
143);
144 }
145 },
146Some("madvise_populate_read") => {
147#[cfg(target_os = "linux")]
148{
149 madvise_populate_read
150 }
151#[cfg(not(target_os = "linux"))]
152{
153panic!(
154"POLARS_MEMORY_PREFETCH=madvise_populate_read is not supported by this system"
155);
156 }
157 },
158Some("force_populate_read") => force_populate_read,
159Some(v) => panic!("invalid value for POLARS_MEMORY_PREFETCH: {}", v),
160 };
161162if verbose {
163let func_name = match memory_prefetch_func as usize {
164 v if v == no_prefetch as usize => "no_prefetch",
165 v if v == prefetch_l2 as usize => "prefetch_l2",
166 v if v == madvise_sequential as usize => "madvise_sequential",
167 v if v == madvise_willneed as usize => "madvise_willneed",
168 v if v == madvise_populate_read as usize => "madvise_populate_read",
169 v if v == force_populate_read as usize => "force_populate_read",
170_ => unreachable!(),
171 };
172173eprintln!("memory prefetch function: {}", func_name);
174 }
175176 memory_prefetch_func
177 }
178}