Add SIMD shuffles for SimdType{2,4,8,16,32,64}

This const generic implementation for certain lane sizes represents
a more limited interface than what LLVM's shufflevector instruction
can handle, as normally the length of U can be different from the
length of T, but offers an interface that it is expected to be able
to expand the capabilities of in the future.
This commit is contained in:
Jubilee Young 2021-02-04 14:39:15 -08:00
parent 9b6b5d7142
commit 5424140b66
5 changed files with 64 additions and 1 deletions

View file

@ -52,4 +52,12 @@ extern "platform-intrinsic" {
pub(crate) fn simd_le<T, U>(x: T, y: T) -> U;
pub(crate) fn simd_gt<T, U>(x: T, y: T) -> U;
pub(crate) fn simd_ge<T, U>(x: T, y: T) -> U;
// shufflevector
pub(crate) fn simd_shuffle2<T, U>(x: T, y: T, idx: [u32; 2]) -> U;
pub(crate) fn simd_shuffle4<T, U>(x: T, y: T, idx: [u32; 4]) -> U;
pub(crate) fn simd_shuffle8<T, U>(x: T, y: T, idx: [u32; 8]) -> U;
pub(crate) fn simd_shuffle16<T, U>(x: T, y: T, idx: [u32; 16]) -> U;
pub(crate) fn simd_shuffle32<T, U>(x: T, y: T, idx: [u32; 32]) -> U;
pub(crate) fn simd_shuffle64<T, U>(x: T, y: T, idx: [u32; 64]) -> U;
}

View file

@ -1,10 +1,19 @@
#![no_std]
#![feature(repr_simd, platform_intrinsics, link_llvm_intrinsics, simd_ffi)]
#![allow(incomplete_features)]
#![feature(
repr_simd,
platform_intrinsics,
link_llvm_intrinsics,
simd_ffi,
const_generics
)]
#![warn(missing_docs)]
//! Portable SIMD module.
#[macro_use]
mod macros;
#[macro_use]
mod permute;
mod fmt;
mod intrinsics;

View file

@ -148,6 +148,8 @@ macro_rules! impl_vector {
Self::splat(value)
}
}
impl_shuffle_2pow_lanes!{ $name }
}
}

View file

@ -0,0 +1,29 @@
macro_rules! impl_shuffle_lane {
{ $name:ident, $fn:ident, $n:literal } => {
impl $name<$n> {
/// A const SIMD shuffle that takes 2 SIMD vectors and produces another vector, using
/// the indices in the const parameter. The first or "self" vector will have its lanes
/// indexed from 0, and the second vector will have its first lane indexed at $n.
/// Indices must be in-bounds of either vector at compile time.
///
/// Some SIMD shuffle instructions can be quite slow, so avoiding them by loading data
/// into the desired patterns in advance is preferred, but shuffles are still faster
/// than storing and reloading from memory.
#[inline]
pub fn shuffle<const IDX: [u32; $n]>(self, second: Self) -> Self {
unsafe { crate::intrinsics::$fn(self, second, IDX) }
}
}
}
}
macro_rules! impl_shuffle_2pow_lanes {
{ $name:ident } => {
impl_shuffle_lane!{ $name, simd_shuffle2, 2 }
impl_shuffle_lane!{ $name, simd_shuffle4, 4 }
impl_shuffle_lane!{ $name, simd_shuffle8, 8 }
impl_shuffle_lane!{ $name, simd_shuffle16, 16 }
impl_shuffle_lane!{ $name, simd_shuffle32, 32 }
impl_shuffle_lane!{ $name, simd_shuffle64, 64 }
}
}

View file

@ -0,0 +1,15 @@
use core_simd::SimdU32;
#[cfg(target_arch = "wasm32")]
use wasm_bindgen_test::*;
#[cfg(target_arch = "wasm32")]
wasm_bindgen_test_configure!(run_in_browser);
#[test]
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
fn simple_shuffle() {
let a = SimdU32::from_array([2, 4, 1, 9]);
let b = a;
assert_eq!(a.shuffle::<{ [3, 1, 4, 6] }>(b).to_array(), [9, 4, 2, 1]);
}