add swap_bytes/to_le/to_be (#517)

* add large shuffle intrinsics

* add swap_bytes/to_le

* add to_be

* more tests

* improve swap_bytes tests
This commit is contained in:
TheIronBorn 2018-07-10 01:20:52 -07:00 committed by gnzlbg
parent e0752318f7
commit 83e5d232ac
6 changed files with 281 additions and 3 deletions

View file

@ -9,7 +9,7 @@ macro_rules! impl_mask_reductions {
pub fn all(self) -> bool {
unsafe { super::codegen::masks_reductions::All::all(self) }
}
/// Is `any` vector lanes `true`?
/// Is `any` vector lane `true`?
#[inline]
pub fn any(self) -> bool {
unsafe { super::codegen::masks_reductions::Any::any(self) }

View file

@ -72,6 +72,8 @@ mod masks_select;
mod scalar_shifts;
#[macro_use]
mod shifts;
#[macro_use]
mod swap_bytes;
/// Sealed trait used for constraining select implementations.
pub trait Lanes<A> {}
@ -143,7 +145,8 @@ macro_rules! simd_i_ty {
[impl_eq, $id],
[impl_partial_eq, $id],
[impl_default, $id, $elem_ty],
[impl_int_minmax_ops, $id]
[impl_int_minmax_ops, $id],
[impl_swap_bytes, $id]
);
$test_macro!(
@ -197,7 +200,8 @@ macro_rules! simd_u_ty {
[impl_eq, $id],
[impl_partial_eq, $id],
[impl_default, $id, $elem_ty],
[impl_int_minmax_ops, $id]
[impl_int_minmax_ops, $id],
[impl_swap_bytes, $id]
);
$test_macro!(
@ -221,6 +225,7 @@ macro_rules! simd_u_ty {
test_default!($id, $elem_ty);
test_mask_select!($mask_ty, $id, $elem_ty);
test_int_minmax_ops!($id, $elem_ty);
test_swap_bytes!($id, $elem_ty);
}
);
}

View file

@ -0,0 +1,130 @@
//! Horizontal swap bytes.
macro_rules! impl_swap_bytes {
($id:ident) => {
impl $id {
/// Reverses the byte order of the vector.
#[inline]
pub fn swap_bytes(self) -> Self {
unsafe {
super::codegen::swap_bytes::SwapBytes::swap_bytes(self)
}
}
/// Converts self to little endian from the target's endianness.
///
/// On little endian this is a no-op. On big endian the bytes are
/// swapped.
#[inline]
pub fn to_le(self) -> Self {
#[cfg(target_endian = "little")]
{
self
}
#[cfg(not(target_endian = "little"))]
{
self.swap_bytes()
}
}
/// Converts self to big endian from the target's endianness.
///
/// On big endian this is a no-op. On little endian the bytes are
/// swapped.
#[inline]
pub fn to_be(self) -> Self {
#[cfg(target_endian = "big")]
{
self
}
#[cfg(not(target_endian = "big"))]
{
self.swap_bytes()
}
}
}
};
}
#[cfg(test)]
macro_rules! test_swap_bytes {
($id:ident, $elem_ty:ty) => {
use coresimd::simd::$id;
use std::{mem, slice};
const BYTES: [u8; 64] = [
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
];
macro_rules! swap {
($func: ident) => {{
// catch possible future >512 vectors
assert!(mem::size_of::<$id>() <= 64);
let mut actual = BYTES;
let elems: &mut [$elem_ty] = unsafe {
slice::from_raw_parts_mut(
actual.as_mut_ptr() as *mut $elem_ty,
$id::lanes(),
)
};
let vec = $id::load_unaligned(elems);
vec.$func().store_unaligned(elems);
actual
}};
}
macro_rules! test_swap {
($func: ident) => {{
let actual = swap!($func);
let expected =
BYTES.iter().rev().skip(64 - mem::size_of::<$id>());
assert!(actual.iter().zip(expected).all(|(x, y)| x == y));
}};
}
macro_rules! test_no_swap {
($func: ident) => {{
let actual = swap!($func);
let expected = BYTES.iter().take(mem::size_of::<$id>());
assert!(actual.iter().zip(expected).all(|(x, y)| x == y));
}};
}
#[test]
fn swap_bytes() {
test_swap!(swap_bytes);
}
#[test]
fn to_le() {
#[cfg(target_endian = "little")]
{
test_no_swap!(to_le);
}
#[cfg(not(target_endian = "little"))]
{
test_swap!(to_le);
}
}
#[test]
fn to_be() {
#[cfg(target_endian = "big")]
{
test_no_swap!(to_be);
}
#[cfg(not(target_endian = "big"))]
{
test_swap!(to_be);
}
}
};
}

View file

@ -4,6 +4,7 @@
pub mod wrapping;
pub mod masks_reductions;
pub mod swap_bytes;
pub mod abs;
pub mod cos;

View file

@ -0,0 +1,140 @@
//! Horizontal mask reductions.
#![allow(unused)]
use coresimd::simd::*;
pub trait SwapBytes {
unsafe fn swap_bytes(self) -> Self;
}
// TODO: switch to shuffle API once it lands
// TODO: investigate `llvm.bswap`
macro_rules! impl_swap_bytes {
(v16, $($id:ident,)+) => {$(
impl SwapBytes for $id {
#[inline]
unsafe fn swap_bytes(self) -> Self {
use coresimd::simd_llvm::simd_shuffle2;
const INDICES: [u32; 2] = [1, 0];
simd_shuffle2(self, self, INDICES)
}
}
)+};
(v32, $($id:ident,)+) => {$(
impl SwapBytes for $id {
#[inline]
unsafe fn swap_bytes(self) -> Self {
use coresimd::simd_llvm::simd_shuffle4;
const INDICES: [u32; 4] = [3, 2, 1, 0];
let vec8 = u8x4::from_bits(self);
let shuffled: u8x4 = simd_shuffle4(vec8, vec8, INDICES);
$id::from_bits(shuffled)
}
}
)+};
(v64, $($id:ident,)+) => {$(
impl SwapBytes for $id {
#[inline]
unsafe fn swap_bytes(self) -> Self {
use coresimd::simd_llvm::simd_shuffle8;
const INDICES: [u32; 8] = [7, 6, 5, 4, 3, 2, 1, 0];
let vec8 = u8x8::from_bits(self);
let shuffled: u8x8 = simd_shuffle8(vec8, vec8, INDICES);
$id::from_bits(shuffled)
}
}
)+};
(v128, $($id:ident,)+) => {$(
impl SwapBytes for $id {
#[inline]
unsafe fn swap_bytes(self) -> Self {
use coresimd::simd_llvm::simd_shuffle16;
const INDICES: [u32; 16] = [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0];
let vec8 = u8x16::from_bits(self);
let shuffled: u8x16 = simd_shuffle16(vec8, vec8, INDICES);
$id::from_bits(shuffled)
}
}
)+};
(v256, $($id:ident,)+) => {$(
impl SwapBytes for $id {
#[inline]
unsafe fn swap_bytes(self) -> Self {
use coresimd::simd_llvm::simd_shuffle32;
const INDICES: [u32; 32] = [
31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16,
15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
];
let vec8 = u8x32::from_bits(self);
let shuffled: u8x32 = simd_shuffle32(vec8, vec8, INDICES);
$id::from_bits(shuffled)
}
}
)+};
(v512, $($id:ident,)+) => {$(
impl SwapBytes for $id {
#[inline]
unsafe fn swap_bytes(self) -> Self {
use coresimd::simd_llvm::simd_shuffle64;
const INDICES: [u32; 64] = [
63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48,
47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32,
31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16,
15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
];
let vec8 = u8x64::from_bits(self);
let shuffled: u8x64 = simd_shuffle64(vec8, vec8, INDICES);
$id::from_bits(shuffled)
}
}
)+};
}
vector_impl!(
[impl_swap_bytes, v16, u8x2, i8x2,],
[impl_swap_bytes, v32, u8x4, i8x4, u16x2, i16x2,],
[impl_swap_bytes, v64, u8x8, i8x8, u16x4, i16x4, u32x2, i32x2,],
[
impl_swap_bytes,
v128,
u8x16,
i8x16,
u16x8,
i16x8,
u32x4,
i32x4,
u64x2,
i64x2,
],
[
impl_swap_bytes,
v256,
u8x32,
i8x32,
u16x16,
i16x16,
u32x8,
i32x8,
u64x4,
i64x4,
],
[
impl_swap_bytes,
v512,
u8x64,
i8x64,
u16x32,
i16x32,
u32x16,
i32x16,
u64x8,
i64x8,
]
);

View file

@ -15,6 +15,8 @@ extern "platform-intrinsic" {
pub fn simd_shuffle8<T, U>(x: T, y: T, idx: [u32; 8]) -> U;
pub fn simd_shuffle16<T, U>(x: T, y: T, idx: [u32; 16]) -> U;
pub fn simd_shuffle32<T, U>(x: T, y: T, idx: [u32; 32]) -> U;
pub fn simd_shuffle64<T, U>(x: T, y: T, idx: [u32; 64]) -> U;
pub fn simd_shuffle128<T, U>(x: T, y: T, idx: [u32; 128]) -> U;
pub fn simd_insert<T, U>(x: T, idx: u32, val: U) -> T;
pub fn simd_extract<T, U>(x: T, idx: u32) -> U;