rust-lang/portable-simd#443: Add armv7 neon mplementation for Simd<u8, 16>::swizzle_dyn
Use arm neon intrinsics to swizzle two u8x8 blocks with a u8x8x2 lookup table.
This commit is contained in:
parent
158e2409fe
commit
7e162d19dd
1 changed files with 29 additions and 0 deletions
|
|
@ -57,6 +57,13 @@ where
|
|||
target_endian = "little"
|
||||
))]
|
||||
16 => transize(vqtbl1q_u8, self, idxs),
|
||||
#[cfg(all(
|
||||
target_arch = "arm",
|
||||
target_feature = "v7",
|
||||
target_feature = "neon",
|
||||
target_endian = "little"
|
||||
))]
|
||||
16 => transize(armv7_neon_swizzle_u8x16, self, idxs),
|
||||
#[cfg(all(target_feature = "avx2", not(target_feature = "avx512vbmi")))]
|
||||
32 => transize(avx2_pshufb, self, idxs),
|
||||
#[cfg(all(target_feature = "avx512vl", target_feature = "avx512vbmi"))]
|
||||
|
|
@ -98,6 +105,28 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
/// armv7 neon supports swizzling `u8x16` by swizzling two u8x8 blocks
|
||||
/// with a u8x8x2 lookup table.
|
||||
///
|
||||
/// # Safety
|
||||
/// This requires armv7 neon to work
|
||||
#[cfg(all(
|
||||
target_arch = "arm",
|
||||
target_feature = "v7",
|
||||
target_feature = "neon",
|
||||
target_endian = "little"
|
||||
))]
|
||||
unsafe fn armv7_neon_swizzle_u8x16(bytes: Simd<u8, 16>, idxs: Simd<u8, 16>) -> Simd<u8, 16> {
|
||||
use core::arch::arm::{uint8x8x2_t, vcombine_u8, vget_high_u8, vget_low_u8, vtbl2_u8};
|
||||
// SAFETY: Caller promised arm neon support
|
||||
unsafe {
|
||||
let bytes = uint8x8x2_t(vget_low_u8(bytes.into()), vget_high_u8(bytes.into()));
|
||||
let lo = vtbl2_u8(bytes, vget_low_u8(idxs.into()));
|
||||
let hi = vtbl2_u8(bytes, vget_high_u8(idxs.into()));
|
||||
vcombine_u8(lo, hi).into()
|
||||
}
|
||||
}
|
||||
|
||||
/// "vpshufb like it was meant to be" on AVX2
|
||||
///
|
||||
/// # Safety
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue