polars_utils/
clmul.rs
1#[cfg(all(target_arch = "x86_64", target_feature = "pclmulqdq"))]
2fn intel_clmul64(x: u64, y: u64) -> u64 {
3 use core::arch::x86_64::*;
4 unsafe {
5 _mm_cvtsi128_si64(_mm_clmulepi64_si128(
7 _mm_cvtsi64_si128(x as i64),
8 _mm_cvtsi64_si128(y as i64),
9 0,
10 )) as u64
11 }
12}
13
14#[cfg(all(
15 target_arch = "aarch64",
16 target_feature = "neon",
17 target_feature = "aes"
18))]
19fn arm_clmul64(x: u64, y: u64) -> u64 {
20 unsafe {
21 use core::arch::aarch64::*;
23 vmull_p64(x, y) as u64
24 }
25}
26
27#[inline]
28pub fn portable_clmul64(x: u64, mut y: u64) -> u64 {
29 let mut out = 0;
30 while y > 0 {
31 let lsb = y & y.wrapping_neg();
32 out ^= x.wrapping_mul(lsb);
33 y ^= lsb;
34 }
35 out
36}
37
38#[inline]
40pub fn clmul64(x: u64, y: u64) -> u64 {
41 #[cfg(all(target_arch = "x86_64", target_feature = "pclmulqdq"))]
42 return intel_clmul64(x, y);
43
44 #[cfg(all(
45 target_arch = "aarch64",
46 target_feature = "neon",
47 target_feature = "aes"
48 ))]
49 return arm_clmul64(x, y);
50
51 #[allow(unreachable_code)]
52 portable_clmul64(x, y)
53}
54
55#[inline]
56pub fn portable_prefix_xorsum(x: u64) -> u64 {
57 portable_prefix_xorsum_inclusive(x << 1)
58}
59
60#[inline]
62pub fn prefix_xorsum(x: u64) -> u64 {
63 #[cfg(all(target_arch = "x86_64", target_feature = "pclmulqdq"))]
64 return intel_clmul64(x, u64::MAX ^ 1);
65
66 #[cfg(all(
67 target_arch = "aarch64",
68 target_feature = "neon",
69 target_feature = "aes"
70 ))]
71 return arm_clmul64(x, u64::MAX ^ 1);
72
73 #[allow(unreachable_code)]
74 portable_prefix_xorsum(x)
75}
76
77#[inline]
78pub fn portable_prefix_xorsum_inclusive(mut x: u64) -> u64 {
79 for i in 0..6 {
80 x ^= x << (1 << i);
81 }
82 x
83}
84
85#[inline]
87pub fn prefix_xorsum_inclusive(x: u64) -> u64 {
88 #[cfg(all(target_arch = "x86_64", target_feature = "pclmulqdq"))]
89 return intel_clmul64(x, u64::MAX);
90
91 #[cfg(all(
92 target_arch = "aarch64",
93 target_feature = "neon",
94 target_feature = "aes"
95 ))]
96 return arm_clmul64(x, u64::MAX);
97
98 #[allow(unreachable_code)]
99 portable_prefix_xorsum_inclusive(x)
100}
101
102#[cfg(test)]
103mod test {
104 use rand::prelude::*;
105
106 use super::*;
107
108 #[test]
109 fn test_clmul() {
110 let mut rng = StdRng::seed_from_u64(0xdeadbeef);
112 for _ in 0..100 {
113 let x = rng.r#gen();
114 let y = rng.r#gen();
115 assert_eq!(portable_clmul64(x, y), clmul64(x, y));
116 }
117
118 assert_eq!(
120 portable_clmul64(0x8b44729195dde0ef, 0xb976c5ae2726fab0),
121 0x4ae14eae84899290
122 );
123 assert_eq!(
124 portable_clmul64(0x399b6ed00c44b301, 0x693341db5acb2ff0),
125 0x48dfa88344823ff0
126 );
127 assert_eq!(
128 portable_clmul64(0xdf4c9f6e60deb640, 0x6d4bcdb217ac4880),
129 0x7300ffe474792000
130 );
131 assert_eq!(
132 portable_clmul64(0xa7adf3c53a200a51, 0x818cb40fe11b431e),
133 0x6a280181d521797e
134 );
135 assert_eq!(
136 portable_clmul64(0x5e78e12b744f228c, 0x4225ff19e9273266),
137 0xa48b73cafb9665a8
138 );
139 }
140
141 #[test]
142 fn test_prefix_xorsum() {
143 let mut rng = StdRng::seed_from_u64(0xdeadbeef);
145 for _ in 0..100 {
146 let x = rng.r#gen();
147 assert_eq!(portable_prefix_xorsum(x), prefix_xorsum(x));
148 }
149
150 assert_eq!(
152 portable_prefix_xorsum(0x8b44729195dde0ef),
153 0x0d87a31ee696bf4a
154 );
155 assert_eq!(
156 portable_prefix_xorsum(0xb976c5ae2726fab0),
157 0x2e5b79343a3b5320
158 );
159 assert_eq!(
160 portable_prefix_xorsum(0x399b6ed00c44b301),
161 0xd1124b600878ddfe
162 );
163 assert_eq!(
164 portable_prefix_xorsum(0x693341db5acb2ff0),
165 0x4e227e926c8dcaa0
166 );
167 assert_eq!(
168 portable_prefix_xorsum(0xdf4c9f6e60deb640),
169 0x6a7715b44094db80
170 );
171 }
172}