polars_utils/
clmul.rs

1#[cfg(all(target_arch = "x86_64", target_feature = "pclmulqdq"))]
2fn intel_clmul64(x: u64, y: u64) -> u64 {
3    use core::arch::x86_64::*;
4    unsafe {
5        // SAFETY: we have the target feature.
6        _mm_cvtsi128_si64(_mm_clmulepi64_si128(
7            _mm_cvtsi64_si128(x as i64),
8            _mm_cvtsi64_si128(y as i64),
9            0,
10        )) as u64
11    }
12}
13
14#[cfg(all(
15    target_arch = "aarch64",
16    target_feature = "neon",
17    target_feature = "aes"
18))]
19fn arm_clmul64(x: u64, y: u64) -> u64 {
20    unsafe {
21        // SAFETY: we have the target feature.
22        use core::arch::aarch64::*;
23        vmull_p64(x, y) as u64
24    }
25}
26
27#[inline]
28pub fn portable_clmul64(x: u64, mut y: u64) -> u64 {
29    let mut out = 0;
30    while y > 0 {
31        let lsb = y & y.wrapping_neg();
32        out ^= x.wrapping_mul(lsb);
33        y ^= lsb;
34    }
35    out
36}
37
38// Computes the carryless multiplication of x and y.
39#[inline]
40pub fn clmul64(x: u64, y: u64) -> u64 {
41    #[cfg(all(target_arch = "x86_64", target_feature = "pclmulqdq"))]
42    return intel_clmul64(x, y);
43
44    #[cfg(all(
45        target_arch = "aarch64",
46        target_feature = "neon",
47        target_feature = "aes"
48    ))]
49    return arm_clmul64(x, y);
50
51    #[allow(unreachable_code)]
52    portable_clmul64(x, y)
53}
54
55#[inline]
56pub fn portable_prefix_xorsum(x: u64) -> u64 {
57    portable_prefix_xorsum_inclusive(x << 1)
58}
59
60// Computes for each bit i the XOR of bits[0..i].
61#[inline]
62pub fn prefix_xorsum(x: u64) -> u64 {
63    #[cfg(all(target_arch = "x86_64", target_feature = "pclmulqdq"))]
64    return intel_clmul64(x, u64::MAX ^ 1);
65
66    #[cfg(all(
67        target_arch = "aarch64",
68        target_feature = "neon",
69        target_feature = "aes"
70    ))]
71    return arm_clmul64(x, u64::MAX ^ 1);
72
73    #[allow(unreachable_code)]
74    portable_prefix_xorsum(x)
75}
76
77#[inline]
78pub fn portable_prefix_xorsum_inclusive(mut x: u64) -> u64 {
79    for i in 0..6 {
80        x ^= x << (1 << i);
81    }
82    x
83}
84
85// Computes for each bit i the XOR of bits[0..=i].
86#[inline]
87pub fn prefix_xorsum_inclusive(x: u64) -> u64 {
88    #[cfg(all(target_arch = "x86_64", target_feature = "pclmulqdq"))]
89    return intel_clmul64(x, u64::MAX);
90
91    #[cfg(all(
92        target_arch = "aarch64",
93        target_feature = "neon",
94        target_feature = "aes"
95    ))]
96    return arm_clmul64(x, u64::MAX);
97
98    #[allow(unreachable_code)]
99    portable_prefix_xorsum_inclusive(x)
100}
101
102#[cfg(test)]
103mod test {
104    use rand::prelude::*;
105
106    use super::*;
107
108    #[test]
109    fn test_clmul() {
110        // Verify platform-specific clmul to portable.
111        let mut rng = StdRng::seed_from_u64(0xdeadbeef);
112        for _ in 0..100 {
113            let x = rng.r#gen();
114            let y = rng.r#gen();
115            assert_eq!(portable_clmul64(x, y), clmul64(x, y));
116        }
117
118        // Verify portable clmul for known test vectors.
119        assert_eq!(
120            portable_clmul64(0x8b44729195dde0ef, 0xb976c5ae2726fab0),
121            0x4ae14eae84899290
122        );
123        assert_eq!(
124            portable_clmul64(0x399b6ed00c44b301, 0x693341db5acb2ff0),
125            0x48dfa88344823ff0
126        );
127        assert_eq!(
128            portable_clmul64(0xdf4c9f6e60deb640, 0x6d4bcdb217ac4880),
129            0x7300ffe474792000
130        );
131        assert_eq!(
132            portable_clmul64(0xa7adf3c53a200a51, 0x818cb40fe11b431e),
133            0x6a280181d521797e
134        );
135        assert_eq!(
136            portable_clmul64(0x5e78e12b744f228c, 0x4225ff19e9273266),
137            0xa48b73cafb9665a8
138        );
139    }
140
141    #[test]
142    fn test_prefix_xorsum() {
143        // Verify platform-specific prefix_xorsum to portable.
144        let mut rng = StdRng::seed_from_u64(0xdeadbeef);
145        for _ in 0..100 {
146            let x = rng.r#gen();
147            assert_eq!(portable_prefix_xorsum(x), prefix_xorsum(x));
148        }
149
150        // Verify portable prefix_xorsum for known test vectors.
151        assert_eq!(
152            portable_prefix_xorsum(0x8b44729195dde0ef),
153            0x0d87a31ee696bf4a
154        );
155        assert_eq!(
156            portable_prefix_xorsum(0xb976c5ae2726fab0),
157            0x2e5b79343a3b5320
158        );
159        assert_eq!(
160            portable_prefix_xorsum(0x399b6ed00c44b301),
161            0xd1124b600878ddfe
162        );
163        assert_eq!(
164            portable_prefix_xorsum(0x693341db5acb2ff0),
165            0x4e227e926c8dcaa0
166        );
167        assert_eq!(
168            portable_prefix_xorsum(0xdf4c9f6e60deb640),
169            0x6a7715b44094db80
170        );
171    }
172}