add swap_bytes/to_le/to_be (#517)
* add large shuffle intrinsics * add swap_bytes/to_le * add to_be * more tests * improve swap_bytes tests
This commit is contained in:
parent
e0752318f7
commit
83e5d232ac
6 changed files with 281 additions and 3 deletions
|
|
@ -9,7 +9,7 @@ macro_rules! impl_mask_reductions {
|
|||
pub fn all(self) -> bool {
|
||||
unsafe { super::codegen::masks_reductions::All::all(self) }
|
||||
}
|
||||
/// Is `any` vector lanes `true`?
|
||||
/// Is `any` vector lane `true`?
|
||||
#[inline]
|
||||
pub fn any(self) -> bool {
|
||||
unsafe { super::codegen::masks_reductions::Any::any(self) }
|
||||
|
|
|
|||
|
|
@ -72,6 +72,8 @@ mod masks_select;
|
|||
mod scalar_shifts;
|
||||
#[macro_use]
|
||||
mod shifts;
|
||||
#[macro_use]
|
||||
mod swap_bytes;
|
||||
|
||||
/// Sealed trait used for constraining select implementations.
|
||||
pub trait Lanes<A> {}
|
||||
|
|
@ -143,7 +145,8 @@ macro_rules! simd_i_ty {
|
|||
[impl_eq, $id],
|
||||
[impl_partial_eq, $id],
|
||||
[impl_default, $id, $elem_ty],
|
||||
[impl_int_minmax_ops, $id]
|
||||
[impl_int_minmax_ops, $id],
|
||||
[impl_swap_bytes, $id]
|
||||
);
|
||||
|
||||
$test_macro!(
|
||||
|
|
@ -197,7 +200,8 @@ macro_rules! simd_u_ty {
|
|||
[impl_eq, $id],
|
||||
[impl_partial_eq, $id],
|
||||
[impl_default, $id, $elem_ty],
|
||||
[impl_int_minmax_ops, $id]
|
||||
[impl_int_minmax_ops, $id],
|
||||
[impl_swap_bytes, $id]
|
||||
);
|
||||
|
||||
$test_macro!(
|
||||
|
|
@ -221,6 +225,7 @@ macro_rules! simd_u_ty {
|
|||
test_default!($id, $elem_ty);
|
||||
test_mask_select!($mask_ty, $id, $elem_ty);
|
||||
test_int_minmax_ops!($id, $elem_ty);
|
||||
test_swap_bytes!($id, $elem_ty);
|
||||
}
|
||||
);
|
||||
}
|
||||
|
|
|
|||
130
library/stdarch/coresimd/ppsv/api/swap_bytes.rs
Normal file
130
library/stdarch/coresimd/ppsv/api/swap_bytes.rs
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
//! Horizontal swap bytes.
|
||||
|
||||
macro_rules! impl_swap_bytes {
|
||||
($id:ident) => {
|
||||
impl $id {
|
||||
/// Reverses the byte order of the vector.
|
||||
#[inline]
|
||||
pub fn swap_bytes(self) -> Self {
|
||||
unsafe {
|
||||
super::codegen::swap_bytes::SwapBytes::swap_bytes(self)
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts self to little endian from the target's endianness.
|
||||
///
|
||||
/// On little endian this is a no-op. On big endian the bytes are
|
||||
/// swapped.
|
||||
#[inline]
|
||||
pub fn to_le(self) -> Self {
|
||||
#[cfg(target_endian = "little")]
|
||||
{
|
||||
self
|
||||
}
|
||||
#[cfg(not(target_endian = "little"))]
|
||||
{
|
||||
self.swap_bytes()
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts self to big endian from the target's endianness.
|
||||
///
|
||||
/// On big endian this is a no-op. On little endian the bytes are
|
||||
/// swapped.
|
||||
#[inline]
|
||||
pub fn to_be(self) -> Self {
|
||||
#[cfg(target_endian = "big")]
|
||||
{
|
||||
self
|
||||
}
|
||||
#[cfg(not(target_endian = "big"))]
|
||||
{
|
||||
self.swap_bytes()
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
macro_rules! test_swap_bytes {
|
||||
($id:ident, $elem_ty:ty) => {
|
||||
use coresimd::simd::$id;
|
||||
use std::{mem, slice};
|
||||
|
||||
const BYTES: [u8; 64] = [
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
|
||||
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
|
||||
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
|
||||
51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
||||
];
|
||||
|
||||
macro_rules! swap {
|
||||
($func: ident) => {{
|
||||
// catch possible future >512 vectors
|
||||
assert!(mem::size_of::<$id>() <= 64);
|
||||
|
||||
let mut actual = BYTES;
|
||||
let elems: &mut [$elem_ty] = unsafe {
|
||||
slice::from_raw_parts_mut(
|
||||
actual.as_mut_ptr() as *mut $elem_ty,
|
||||
$id::lanes(),
|
||||
)
|
||||
};
|
||||
|
||||
let vec = $id::load_unaligned(elems);
|
||||
vec.$func().store_unaligned(elems);
|
||||
|
||||
actual
|
||||
}};
|
||||
}
|
||||
|
||||
macro_rules! test_swap {
|
||||
($func: ident) => {{
|
||||
let actual = swap!($func);
|
||||
let expected =
|
||||
BYTES.iter().rev().skip(64 - mem::size_of::<$id>());
|
||||
|
||||
assert!(actual.iter().zip(expected).all(|(x, y)| x == y));
|
||||
}};
|
||||
}
|
||||
|
||||
macro_rules! test_no_swap {
|
||||
($func: ident) => {{
|
||||
let actual = swap!($func);
|
||||
let expected = BYTES.iter().take(mem::size_of::<$id>());
|
||||
|
||||
assert!(actual.iter().zip(expected).all(|(x, y)| x == y));
|
||||
}};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn swap_bytes() {
|
||||
test_swap!(swap_bytes);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn to_le() {
|
||||
#[cfg(target_endian = "little")]
|
||||
{
|
||||
test_no_swap!(to_le);
|
||||
}
|
||||
#[cfg(not(target_endian = "little"))]
|
||||
{
|
||||
test_swap!(to_le);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn to_be() {
|
||||
#[cfg(target_endian = "big")]
|
||||
{
|
||||
test_no_swap!(to_be);
|
||||
}
|
||||
#[cfg(not(target_endian = "big"))]
|
||||
{
|
||||
test_swap!(to_be);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
@ -4,6 +4,7 @@
|
|||
pub mod wrapping;
|
||||
|
||||
pub mod masks_reductions;
|
||||
pub mod swap_bytes;
|
||||
|
||||
pub mod abs;
|
||||
pub mod cos;
|
||||
|
|
|
|||
140
library/stdarch/coresimd/ppsv/codegen/swap_bytes.rs
Normal file
140
library/stdarch/coresimd/ppsv/codegen/swap_bytes.rs
Normal file
|
|
@ -0,0 +1,140 @@
|
|||
//! Horizontal mask reductions.
|
||||
|
||||
#![allow(unused)]
|
||||
|
||||
use coresimd::simd::*;
|
||||
|
||||
pub trait SwapBytes {
|
||||
unsafe fn swap_bytes(self) -> Self;
|
||||
}
|
||||
|
||||
// TODO: switch to shuffle API once it lands
|
||||
// TODO: investigate `llvm.bswap`
|
||||
macro_rules! impl_swap_bytes {
|
||||
(v16, $($id:ident,)+) => {$(
|
||||
impl SwapBytes for $id {
|
||||
#[inline]
|
||||
unsafe fn swap_bytes(self) -> Self {
|
||||
use coresimd::simd_llvm::simd_shuffle2;
|
||||
|
||||
const INDICES: [u32; 2] = [1, 0];
|
||||
simd_shuffle2(self, self, INDICES)
|
||||
}
|
||||
}
|
||||
)+};
|
||||
(v32, $($id:ident,)+) => {$(
|
||||
impl SwapBytes for $id {
|
||||
#[inline]
|
||||
unsafe fn swap_bytes(self) -> Self {
|
||||
use coresimd::simd_llvm::simd_shuffle4;
|
||||
|
||||
const INDICES: [u32; 4] = [3, 2, 1, 0];
|
||||
let vec8 = u8x4::from_bits(self);
|
||||
let shuffled: u8x4 = simd_shuffle4(vec8, vec8, INDICES);
|
||||
$id::from_bits(shuffled)
|
||||
}
|
||||
}
|
||||
)+};
|
||||
(v64, $($id:ident,)+) => {$(
|
||||
impl SwapBytes for $id {
|
||||
#[inline]
|
||||
unsafe fn swap_bytes(self) -> Self {
|
||||
use coresimd::simd_llvm::simd_shuffle8;
|
||||
|
||||
const INDICES: [u32; 8] = [7, 6, 5, 4, 3, 2, 1, 0];
|
||||
let vec8 = u8x8::from_bits(self);
|
||||
let shuffled: u8x8 = simd_shuffle8(vec8, vec8, INDICES);
|
||||
$id::from_bits(shuffled)
|
||||
}
|
||||
}
|
||||
)+};
|
||||
(v128, $($id:ident,)+) => {$(
|
||||
impl SwapBytes for $id {
|
||||
#[inline]
|
||||
unsafe fn swap_bytes(self) -> Self {
|
||||
use coresimd::simd_llvm::simd_shuffle16;
|
||||
|
||||
const INDICES: [u32; 16] = [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0];
|
||||
let vec8 = u8x16::from_bits(self);
|
||||
let shuffled: u8x16 = simd_shuffle16(vec8, vec8, INDICES);
|
||||
$id::from_bits(shuffled)
|
||||
}
|
||||
}
|
||||
)+};
|
||||
(v256, $($id:ident,)+) => {$(
|
||||
impl SwapBytes for $id {
|
||||
#[inline]
|
||||
unsafe fn swap_bytes(self) -> Self {
|
||||
use coresimd::simd_llvm::simd_shuffle32;
|
||||
|
||||
const INDICES: [u32; 32] = [
|
||||
31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16,
|
||||
15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
|
||||
];
|
||||
let vec8 = u8x32::from_bits(self);
|
||||
let shuffled: u8x32 = simd_shuffle32(vec8, vec8, INDICES);
|
||||
$id::from_bits(shuffled)
|
||||
}
|
||||
}
|
||||
)+};
|
||||
(v512, $($id:ident,)+) => {$(
|
||||
impl SwapBytes for $id {
|
||||
#[inline]
|
||||
unsafe fn swap_bytes(self) -> Self {
|
||||
use coresimd::simd_llvm::simd_shuffle64;
|
||||
|
||||
const INDICES: [u32; 64] = [
|
||||
63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48,
|
||||
47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32,
|
||||
31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16,
|
||||
15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
|
||||
];
|
||||
let vec8 = u8x64::from_bits(self);
|
||||
let shuffled: u8x64 = simd_shuffle64(vec8, vec8, INDICES);
|
||||
$id::from_bits(shuffled)
|
||||
}
|
||||
}
|
||||
)+};
|
||||
}
|
||||
|
||||
vector_impl!(
|
||||
[impl_swap_bytes, v16, u8x2, i8x2,],
|
||||
[impl_swap_bytes, v32, u8x4, i8x4, u16x2, i16x2,],
|
||||
[impl_swap_bytes, v64, u8x8, i8x8, u16x4, i16x4, u32x2, i32x2,],
|
||||
[
|
||||
impl_swap_bytes,
|
||||
v128,
|
||||
u8x16,
|
||||
i8x16,
|
||||
u16x8,
|
||||
i16x8,
|
||||
u32x4,
|
||||
i32x4,
|
||||
u64x2,
|
||||
i64x2,
|
||||
],
|
||||
[
|
||||
impl_swap_bytes,
|
||||
v256,
|
||||
u8x32,
|
||||
i8x32,
|
||||
u16x16,
|
||||
i16x16,
|
||||
u32x8,
|
||||
i32x8,
|
||||
u64x4,
|
||||
i64x4,
|
||||
],
|
||||
[
|
||||
impl_swap_bytes,
|
||||
v512,
|
||||
u8x64,
|
||||
i8x64,
|
||||
u16x32,
|
||||
i16x32,
|
||||
u32x16,
|
||||
i32x16,
|
||||
u64x8,
|
||||
i64x8,
|
||||
]
|
||||
);
|
||||
|
|
@ -15,6 +15,8 @@ extern "platform-intrinsic" {
|
|||
pub fn simd_shuffle8<T, U>(x: T, y: T, idx: [u32; 8]) -> U;
|
||||
pub fn simd_shuffle16<T, U>(x: T, y: T, idx: [u32; 16]) -> U;
|
||||
pub fn simd_shuffle32<T, U>(x: T, y: T, idx: [u32; 32]) -> U;
|
||||
pub fn simd_shuffle64<T, U>(x: T, y: T, idx: [u32; 64]) -> U;
|
||||
pub fn simd_shuffle128<T, U>(x: T, y: T, idx: [u32; 128]) -> U;
|
||||
|
||||
pub fn simd_insert<T, U>(x: T, idx: u32, val: U) -> T;
|
||||
pub fn simd_extract<T, U>(x: T, idx: u32) -> U;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue