Add SIMD shuffles for SimdType{2,4,8,16,32,64}

This const generic implementation for certain lane sizes represents a more limited interface than what LLVM's shufflevector instruction can handle, as normally the length of U can be different from the length of T, but offers an interface that it is expected to be able to expand the capabilities of in the future.
2021-02-04 14:39:15 -08:00 · 2021-02-04 14:39:15 -08:00 · 5424140b66
commit 5424140b66
parent 9b6b5d7142
5 changed files with 64 additions and 1 deletions
--- a/crates/core_simd/src/intrinsics.rs
+++ b/crates/core_simd/src/intrinsics.rs
@ -52,4 +52,12 @@ extern "platform-intrinsic" {
    pub(crate) fn simd_le<T, U>(x: T, y: T) -> U;
    pub(crate) fn simd_gt<T, U>(x: T, y: T) -> U;
    pub(crate) fn simd_ge<T, U>(x: T, y: T) -> U;
+
+    // shufflevector
+    pub(crate) fn simd_shuffle2<T, U>(x: T, y: T, idx: [u32; 2]) -> U;
+    pub(crate) fn simd_shuffle4<T, U>(x: T, y: T, idx: [u32; 4]) -> U;
+    pub(crate) fn simd_shuffle8<T, U>(x: T, y: T, idx: [u32; 8]) -> U;
+    pub(crate) fn simd_shuffle16<T, U>(x: T, y: T, idx: [u32; 16]) -> U;
+    pub(crate) fn simd_shuffle32<T, U>(x: T, y: T, idx: [u32; 32]) -> U;
+    pub(crate) fn simd_shuffle64<T, U>(x: T, y: T, idx: [u32; 64]) -> U;
 }
--- a/crates/core_simd/src/lib.rs
+++ b/crates/core_simd/src/lib.rs
@ -1,10 +1,19 @@
 #![no_std]
-#![feature(repr_simd, platform_intrinsics, link_llvm_intrinsics, simd_ffi)]
+#![allow(incomplete_features)]
+#![feature(
+    repr_simd,
+    platform_intrinsics,
+    link_llvm_intrinsics,
+    simd_ffi,
+    const_generics
+)]
 #![warn(missing_docs)]
 //! Portable SIMD module.

 #[macro_use]
 mod macros;
+#[macro_use]
+mod permute;

 mod fmt;
 mod intrinsics;
--- a/crates/core_simd/src/macros.rs
+++ b/crates/core_simd/src/macros.rs
@ -148,6 +148,8 @@ macro_rules! impl_vector {
                Self::splat(value)
            }
        }
+
+        impl_shuffle_2pow_lanes!{ $name }
    }
 }

--- a/crates/core_simd/src/permute.rs
+++ b/crates/core_simd/src/permute.rs
@ -0,0 +1,29 @@
+macro_rules! impl_shuffle_lane {
+    { $name:ident, $fn:ident, $n:literal } => {
+        impl $name<$n> {
+            /// A const SIMD shuffle that takes 2 SIMD vectors and produces another vector, using
+            /// the indices in the const parameter. The first or "self" vector will have its lanes
+            /// indexed from 0, and the second vector will have its first lane indexed at $n.
+            /// Indices must be in-bounds of either vector at compile time.
+            ///
+            /// Some SIMD shuffle instructions can be quite slow, so avoiding them by loading data
+            /// into the desired patterns in advance is preferred, but shuffles are still faster
+            /// than storing and reloading from memory.
+            #[inline]
+            pub fn shuffle<const IDX: [u32; $n]>(self, second: Self) -> Self {
+                unsafe { crate::intrinsics::$fn(self, second, IDX) }
+            }
+        }
+    }
+}
+
+macro_rules! impl_shuffle_2pow_lanes {
+    { $name:ident } => {
+        impl_shuffle_lane!{ $name, simd_shuffle2, 2 }
+        impl_shuffle_lane!{ $name, simd_shuffle4, 4 }
+        impl_shuffle_lane!{ $name, simd_shuffle8, 8 }
+        impl_shuffle_lane!{ $name, simd_shuffle16, 16 }
+        impl_shuffle_lane!{ $name, simd_shuffle32, 32 }
+        impl_shuffle_lane!{ $name, simd_shuffle64, 64 }
+    }
+}
--- a/crates/core_simd/tests/permute.rs
+++ b/crates/core_simd/tests/permute.rs
@ -0,0 +1,15 @@
+use core_simd::SimdU32;
+
+#[cfg(target_arch = "wasm32")]
+use wasm_bindgen_test::*;
+
+#[cfg(target_arch = "wasm32")]
+wasm_bindgen_test_configure!(run_in_browser);
+
+#[test]
+#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+fn simple_shuffle() {
+    let a = SimdU32::from_array([2, 4, 1, 9]);
+    let b = a;
+    assert_eq!(a.shuffle::<{ [3, 1, 4, 6] }>(b).to_array(), [9, 4, 2, 1]);
+}