From 83e5d232ac1ffbeb6d3832fdf8392760a2dd7511 Mon Sep 17 00:00:00 2001 From: TheIronBorn Date: Tue, 10 Jul 2018 01:20:52 -0700 Subject: [PATCH] add swap_bytes/to_le/to_be (#517) * add large shuffle intrinsics * add swap_bytes/to_le * add to_be * more tests * improve swap_bytes tests --- .../coresimd/ppsv/api/masks_reductions.rs | 2 +- library/stdarch/coresimd/ppsv/api/mod.rs | 9 +- .../stdarch/coresimd/ppsv/api/swap_bytes.rs | 130 ++++++++++++++++ library/stdarch/coresimd/ppsv/codegen/mod.rs | 1 + .../coresimd/ppsv/codegen/swap_bytes.rs | 140 ++++++++++++++++++ library/stdarch/coresimd/simd_llvm.rs | 2 + 6 files changed, 281 insertions(+), 3 deletions(-) create mode 100644 library/stdarch/coresimd/ppsv/api/swap_bytes.rs create mode 100644 library/stdarch/coresimd/ppsv/codegen/swap_bytes.rs diff --git a/library/stdarch/coresimd/ppsv/api/masks_reductions.rs b/library/stdarch/coresimd/ppsv/api/masks_reductions.rs index bc7ac36d34fe..85ba11c4a760 100644 --- a/library/stdarch/coresimd/ppsv/api/masks_reductions.rs +++ b/library/stdarch/coresimd/ppsv/api/masks_reductions.rs @@ -9,7 +9,7 @@ macro_rules! impl_mask_reductions { pub fn all(self) -> bool { unsafe { super::codegen::masks_reductions::All::all(self) } } - /// Is `any` vector lanes `true`? + /// Is `any` vector lane `true`? #[inline] pub fn any(self) -> bool { unsafe { super::codegen::masks_reductions::Any::any(self) } diff --git a/library/stdarch/coresimd/ppsv/api/mod.rs b/library/stdarch/coresimd/ppsv/api/mod.rs index 4379e1c713e2..1c38926a654a 100644 --- a/library/stdarch/coresimd/ppsv/api/mod.rs +++ b/library/stdarch/coresimd/ppsv/api/mod.rs @@ -72,6 +72,8 @@ mod masks_select; mod scalar_shifts; #[macro_use] mod shifts; +#[macro_use] +mod swap_bytes; /// Sealed trait used for constraining select implementations. pub trait Lanes {} @@ -143,7 +145,8 @@ macro_rules! simd_i_ty { [impl_eq, $id], [impl_partial_eq, $id], [impl_default, $id, $elem_ty], - [impl_int_minmax_ops, $id] + [impl_int_minmax_ops, $id], + [impl_swap_bytes, $id] ); $test_macro!( @@ -197,7 +200,8 @@ macro_rules! simd_u_ty { [impl_eq, $id], [impl_partial_eq, $id], [impl_default, $id, $elem_ty], - [impl_int_minmax_ops, $id] + [impl_int_minmax_ops, $id], + [impl_swap_bytes, $id] ); $test_macro!( @@ -221,6 +225,7 @@ macro_rules! simd_u_ty { test_default!($id, $elem_ty); test_mask_select!($mask_ty, $id, $elem_ty); test_int_minmax_ops!($id, $elem_ty); + test_swap_bytes!($id, $elem_ty); } ); } diff --git a/library/stdarch/coresimd/ppsv/api/swap_bytes.rs b/library/stdarch/coresimd/ppsv/api/swap_bytes.rs new file mode 100644 index 000000000000..d94dbb592fc5 --- /dev/null +++ b/library/stdarch/coresimd/ppsv/api/swap_bytes.rs @@ -0,0 +1,130 @@ +//! Horizontal swap bytes. + +macro_rules! impl_swap_bytes { + ($id:ident) => { + impl $id { + /// Reverses the byte order of the vector. + #[inline] + pub fn swap_bytes(self) -> Self { + unsafe { + super::codegen::swap_bytes::SwapBytes::swap_bytes(self) + } + } + + /// Converts self to little endian from the target's endianness. + /// + /// On little endian this is a no-op. On big endian the bytes are + /// swapped. + #[inline] + pub fn to_le(self) -> Self { + #[cfg(target_endian = "little")] + { + self + } + #[cfg(not(target_endian = "little"))] + { + self.swap_bytes() + } + } + + /// Converts self to big endian from the target's endianness. + /// + /// On big endian this is a no-op. On little endian the bytes are + /// swapped. + #[inline] + pub fn to_be(self) -> Self { + #[cfg(target_endian = "big")] + { + self + } + #[cfg(not(target_endian = "big"))] + { + self.swap_bytes() + } + } + } + }; +} + +#[cfg(test)] +macro_rules! test_swap_bytes { + ($id:ident, $elem_ty:ty) => { + use coresimd::simd::$id; + use std::{mem, slice}; + + const BYTES: [u8; 64] = [ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + ]; + + macro_rules! swap { + ($func: ident) => {{ + // catch possible future >512 vectors + assert!(mem::size_of::<$id>() <= 64); + + let mut actual = BYTES; + let elems: &mut [$elem_ty] = unsafe { + slice::from_raw_parts_mut( + actual.as_mut_ptr() as *mut $elem_ty, + $id::lanes(), + ) + }; + + let vec = $id::load_unaligned(elems); + vec.$func().store_unaligned(elems); + + actual + }}; + } + + macro_rules! test_swap { + ($func: ident) => {{ + let actual = swap!($func); + let expected = + BYTES.iter().rev().skip(64 - mem::size_of::<$id>()); + + assert!(actual.iter().zip(expected).all(|(x, y)| x == y)); + }}; + } + + macro_rules! test_no_swap { + ($func: ident) => {{ + let actual = swap!($func); + let expected = BYTES.iter().take(mem::size_of::<$id>()); + + assert!(actual.iter().zip(expected).all(|(x, y)| x == y)); + }}; + } + + #[test] + fn swap_bytes() { + test_swap!(swap_bytes); + } + + #[test] + fn to_le() { + #[cfg(target_endian = "little")] + { + test_no_swap!(to_le); + } + #[cfg(not(target_endian = "little"))] + { + test_swap!(to_le); + } + } + + #[test] + fn to_be() { + #[cfg(target_endian = "big")] + { + test_no_swap!(to_be); + } + #[cfg(not(target_endian = "big"))] + { + test_swap!(to_be); + } + } + }; +} diff --git a/library/stdarch/coresimd/ppsv/codegen/mod.rs b/library/stdarch/coresimd/ppsv/codegen/mod.rs index 4ca39957ff0b..6e9a73fe5ce3 100644 --- a/library/stdarch/coresimd/ppsv/codegen/mod.rs +++ b/library/stdarch/coresimd/ppsv/codegen/mod.rs @@ -4,6 +4,7 @@ pub mod wrapping; pub mod masks_reductions; +pub mod swap_bytes; pub mod abs; pub mod cos; diff --git a/library/stdarch/coresimd/ppsv/codegen/swap_bytes.rs b/library/stdarch/coresimd/ppsv/codegen/swap_bytes.rs new file mode 100644 index 000000000000..e9d291873719 --- /dev/null +++ b/library/stdarch/coresimd/ppsv/codegen/swap_bytes.rs @@ -0,0 +1,140 @@ +//! Horizontal mask reductions. + +#![allow(unused)] + +use coresimd::simd::*; + +pub trait SwapBytes { + unsafe fn swap_bytes(self) -> Self; +} + +// TODO: switch to shuffle API once it lands +// TODO: investigate `llvm.bswap` +macro_rules! impl_swap_bytes { + (v16, $($id:ident,)+) => {$( + impl SwapBytes for $id { + #[inline] + unsafe fn swap_bytes(self) -> Self { + use coresimd::simd_llvm::simd_shuffle2; + + const INDICES: [u32; 2] = [1, 0]; + simd_shuffle2(self, self, INDICES) + } + } + )+}; + (v32, $($id:ident,)+) => {$( + impl SwapBytes for $id { + #[inline] + unsafe fn swap_bytes(self) -> Self { + use coresimd::simd_llvm::simd_shuffle4; + + const INDICES: [u32; 4] = [3, 2, 1, 0]; + let vec8 = u8x4::from_bits(self); + let shuffled: u8x4 = simd_shuffle4(vec8, vec8, INDICES); + $id::from_bits(shuffled) + } + } + )+}; + (v64, $($id:ident,)+) => {$( + impl SwapBytes for $id { + #[inline] + unsafe fn swap_bytes(self) -> Self { + use coresimd::simd_llvm::simd_shuffle8; + + const INDICES: [u32; 8] = [7, 6, 5, 4, 3, 2, 1, 0]; + let vec8 = u8x8::from_bits(self); + let shuffled: u8x8 = simd_shuffle8(vec8, vec8, INDICES); + $id::from_bits(shuffled) + } + } + )+}; + (v128, $($id:ident,)+) => {$( + impl SwapBytes for $id { + #[inline] + unsafe fn swap_bytes(self) -> Self { + use coresimd::simd_llvm::simd_shuffle16; + + const INDICES: [u32; 16] = [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]; + let vec8 = u8x16::from_bits(self); + let shuffled: u8x16 = simd_shuffle16(vec8, vec8, INDICES); + $id::from_bits(shuffled) + } + } + )+}; + (v256, $($id:ident,)+) => {$( + impl SwapBytes for $id { + #[inline] + unsafe fn swap_bytes(self) -> Self { + use coresimd::simd_llvm::simd_shuffle32; + + const INDICES: [u32; 32] = [ + 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + ]; + let vec8 = u8x32::from_bits(self); + let shuffled: u8x32 = simd_shuffle32(vec8, vec8, INDICES); + $id::from_bits(shuffled) + } + } + )+}; + (v512, $($id:ident,)+) => {$( + impl SwapBytes for $id { + #[inline] + unsafe fn swap_bytes(self) -> Self { + use coresimd::simd_llvm::simd_shuffle64; + + const INDICES: [u32; 64] = [ + 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, + 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, + 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + ]; + let vec8 = u8x64::from_bits(self); + let shuffled: u8x64 = simd_shuffle64(vec8, vec8, INDICES); + $id::from_bits(shuffled) + } + } + )+}; +} + +vector_impl!( + [impl_swap_bytes, v16, u8x2, i8x2,], + [impl_swap_bytes, v32, u8x4, i8x4, u16x2, i16x2,], + [impl_swap_bytes, v64, u8x8, i8x8, u16x4, i16x4, u32x2, i32x2,], + [ + impl_swap_bytes, + v128, + u8x16, + i8x16, + u16x8, + i16x8, + u32x4, + i32x4, + u64x2, + i64x2, + ], + [ + impl_swap_bytes, + v256, + u8x32, + i8x32, + u16x16, + i16x16, + u32x8, + i32x8, + u64x4, + i64x4, + ], + [ + impl_swap_bytes, + v512, + u8x64, + i8x64, + u16x32, + i16x32, + u32x16, + i32x16, + u64x8, + i64x8, + ] +); diff --git a/library/stdarch/coresimd/simd_llvm.rs b/library/stdarch/coresimd/simd_llvm.rs index c83c2d4b350c..2ba3944bd434 100644 --- a/library/stdarch/coresimd/simd_llvm.rs +++ b/library/stdarch/coresimd/simd_llvm.rs @@ -15,6 +15,8 @@ extern "platform-intrinsic" { pub fn simd_shuffle8(x: T, y: T, idx: [u32; 8]) -> U; pub fn simd_shuffle16(x: T, y: T, idx: [u32; 16]) -> U; pub fn simd_shuffle32(x: T, y: T, idx: [u32; 32]) -> U; + pub fn simd_shuffle64(x: T, y: T, idx: [u32; 64]) -> U; + pub fn simd_shuffle128(x: T, y: T, idx: [u32; 128]) -> U; pub fn simd_insert(x: T, idx: u32, val: U) -> T; pub fn simd_extract(x: T, idx: u32) -> U;