add swap_bytes/to_le/to_be (#517)

* add large shuffle intrinsics * add swap_bytes/to_le * add to_be * more tests * improve swap_bytes tests
2018-07-10 01:20:52 -07:00 · 2018-07-10 01:20:52 -07:00 · 83e5d232ac
commit 83e5d232ac
parent e0752318f7
6 changed files with 281 additions and 3 deletions
--- a/library/stdarch/coresimd/ppsv/api/masks_reductions.rs
+++ b/library/stdarch/coresimd/ppsv/api/masks_reductions.rs
@ -9,7 +9,7 @@ macro_rules! impl_mask_reductions {
            pub fn all(self) -> bool {
                unsafe { super::codegen::masks_reductions::All::all(self) }
            }
-            /// Is `any` vector lanes `true`?
+            /// Is `any` vector lane `true`?
            #[inline]
            pub fn any(self) -> bool {
                unsafe { super::codegen::masks_reductions::Any::any(self) }
--- a/library/stdarch/coresimd/ppsv/api/mod.rs
+++ b/library/stdarch/coresimd/ppsv/api/mod.rs
@ -72,6 +72,8 @@ mod masks_select;
 mod scalar_shifts;
 #[macro_use]
 mod shifts;
+#[macro_use]
+mod swap_bytes;

 /// Sealed trait used for constraining select implementations.
 pub trait Lanes<A> {}
@ -143,7 +145,8 @@ macro_rules! simd_i_ty {
            [impl_eq, $id],
            [impl_partial_eq, $id],
            [impl_default, $id, $elem_ty],
-            [impl_int_minmax_ops, $id]
+            [impl_int_minmax_ops, $id],
+            [impl_swap_bytes, $id]
        );

        $test_macro!(
@ -197,7 +200,8 @@ macro_rules! simd_u_ty {
            [impl_eq, $id],
            [impl_partial_eq, $id],
            [impl_default, $id, $elem_ty],
-            [impl_int_minmax_ops, $id]
+            [impl_int_minmax_ops, $id],
+            [impl_swap_bytes, $id]
        );

        $test_macro!(
@ -221,6 +225,7 @@ macro_rules! simd_u_ty {
                test_default!($id, $elem_ty);
                test_mask_select!($mask_ty, $id, $elem_ty);
                test_int_minmax_ops!($id, $elem_ty);
+                test_swap_bytes!($id, $elem_ty);
            }
        );
    }
--- a/library/stdarch/coresimd/ppsv/api/swap_bytes.rs
+++ b/library/stdarch/coresimd/ppsv/api/swap_bytes.rs
@ -0,0 +1,130 @@
+//! Horizontal swap bytes.
+
+macro_rules! impl_swap_bytes {
+    ($id:ident) => {
+        impl $id {
+            /// Reverses the byte order of the vector.
+            #[inline]
+            pub fn swap_bytes(self) -> Self {
+                unsafe {
+                    super::codegen::swap_bytes::SwapBytes::swap_bytes(self)
+                }
+            }
+
+            /// Converts self to little endian from the target's endianness.
+            ///
+            /// On little endian this is a no-op. On big endian the bytes are
+            /// swapped.
+            #[inline]
+            pub fn to_le(self) -> Self {
+                #[cfg(target_endian = "little")]
+                {
+                    self
+                }
+                #[cfg(not(target_endian = "little"))]
+                {
+                    self.swap_bytes()
+                }
+            }
+
+            /// Converts self to big endian from the target's endianness.
+            ///
+            /// On big endian this is a no-op. On little endian the bytes are
+            /// swapped.
+            #[inline]
+            pub fn to_be(self) -> Self {
+                #[cfg(target_endian = "big")]
+                {
+                    self
+                }
+                #[cfg(not(target_endian = "big"))]
+                {
+                    self.swap_bytes()
+                }
+            }
+        }
+    };
+}
+
+#[cfg(test)]
+macro_rules! test_swap_bytes {
+    ($id:ident, $elem_ty:ty) => {
+        use coresimd::simd::$id;
+        use std::{mem, slice};
+
+        const BYTES: [u8; 64] = [
+            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+            19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
+            35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
+            51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+        ];
+
+        macro_rules! swap {
+            ($func: ident) => {{
+                // catch possible future >512 vectors
+                assert!(mem::size_of::<$id>() <= 64);
+
+                let mut actual = BYTES;
+                let elems: &mut [$elem_ty] = unsafe {
+                    slice::from_raw_parts_mut(
+                        actual.as_mut_ptr() as *mut $elem_ty,
+                        $id::lanes(),
+                    )
+                };
+
+                let vec = $id::load_unaligned(elems);
+                vec.$func().store_unaligned(elems);
+
+                actual
+            }};
+        }
+
+        macro_rules! test_swap {
+            ($func: ident) => {{
+                let actual = swap!($func);
+                let expected =
+                    BYTES.iter().rev().skip(64 - mem::size_of::<$id>());
+
+                assert!(actual.iter().zip(expected).all(|(x, y)| x == y));
+            }};
+        }
+
+        macro_rules! test_no_swap {
+            ($func: ident) => {{
+                let actual = swap!($func);
+                let expected = BYTES.iter().take(mem::size_of::<$id>());
+
+                assert!(actual.iter().zip(expected).all(|(x, y)| x == y));
+            }};
+        }
+
+        #[test]
+        fn swap_bytes() {
+            test_swap!(swap_bytes);
+        }
+
+        #[test]
+        fn to_le() {
+            #[cfg(target_endian = "little")]
+            {
+                test_no_swap!(to_le);
+            }
+            #[cfg(not(target_endian = "little"))]
+            {
+                test_swap!(to_le);
+            }
+        }
+
+        #[test]
+        fn to_be() {
+            #[cfg(target_endian = "big")]
+            {
+                test_no_swap!(to_be);
+            }
+            #[cfg(not(target_endian = "big"))]
+            {
+                test_swap!(to_be);
+            }
+        }
+    };
+}
--- a/library/stdarch/coresimd/ppsv/codegen/mod.rs
+++ b/library/stdarch/coresimd/ppsv/codegen/mod.rs
@ -4,6 +4,7 @@
 pub mod wrapping;

 pub mod masks_reductions;
+pub mod swap_bytes;

 pub mod abs;
 pub mod cos;
--- a/library/stdarch/coresimd/ppsv/codegen/swap_bytes.rs
+++ b/library/stdarch/coresimd/ppsv/codegen/swap_bytes.rs
@ -0,0 +1,140 @@
+//! Horizontal mask reductions.
+
+#![allow(unused)]
+
+use coresimd::simd::*;
+
+pub trait SwapBytes {
+    unsafe fn swap_bytes(self) -> Self;
+}
+
+// TODO: switch to shuffle API once it lands
+// TODO: investigate `llvm.bswap`
+macro_rules! impl_swap_bytes {
+    (v16, $($id:ident,)+) => {$(
+        impl SwapBytes for $id {
+            #[inline]
+            unsafe fn swap_bytes(self) -> Self {
+                use coresimd::simd_llvm::simd_shuffle2;
+
+                const INDICES: [u32; 2] = [1, 0];
+                simd_shuffle2(self, self, INDICES)
+            }
+        }
+    )+};
+    (v32, $($id:ident,)+) => {$(
+        impl SwapBytes for $id {
+            #[inline]
+            unsafe fn swap_bytes(self) -> Self {
+                use coresimd::simd_llvm::simd_shuffle4;
+
+                const INDICES: [u32; 4] = [3, 2, 1, 0];
+                let vec8 = u8x4::from_bits(self);
+                let shuffled: u8x4 = simd_shuffle4(vec8, vec8, INDICES);
+                $id::from_bits(shuffled)
+            }
+        }
+    )+};
+    (v64, $($id:ident,)+) => {$(
+        impl SwapBytes for $id {
+            #[inline]
+            unsafe fn swap_bytes(self) -> Self {
+                use coresimd::simd_llvm::simd_shuffle8;
+
+                const INDICES: [u32; 8] = [7, 6, 5, 4, 3, 2, 1, 0];
+                let vec8 = u8x8::from_bits(self);
+                let shuffled: u8x8 = simd_shuffle8(vec8, vec8, INDICES);
+                $id::from_bits(shuffled)
+            }
+        }
+    )+};
+    (v128, $($id:ident,)+) => {$(
+        impl SwapBytes for $id {
+            #[inline]
+            unsafe fn swap_bytes(self) -> Self {
+                use coresimd::simd_llvm::simd_shuffle16;
+
+                const INDICES: [u32; 16] = [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0];
+                let vec8 = u8x16::from_bits(self);
+                let shuffled: u8x16 = simd_shuffle16(vec8, vec8, INDICES);
+                $id::from_bits(shuffled)
+            }
+        }
+    )+};
+    (v256, $($id:ident,)+) => {$(
+        impl SwapBytes for $id {
+            #[inline]
+            unsafe fn swap_bytes(self) -> Self {
+                use coresimd::simd_llvm::simd_shuffle32;
+
+                const INDICES: [u32; 32] = [
+                    31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16,
+                    15, 14, 13, 12, 11, 10, 9,  8,  7,  6,  5,  4,  3,  2,  1,  0,
+                ];
+                let vec8 = u8x32::from_bits(self);
+                let shuffled: u8x32 = simd_shuffle32(vec8, vec8, INDICES);
+                $id::from_bits(shuffled)
+            }
+        }
+    )+};
+    (v512, $($id:ident,)+) => {$(
+        impl SwapBytes for $id {
+            #[inline]
+            unsafe fn swap_bytes(self) -> Self {
+                use coresimd::simd_llvm::simd_shuffle64;
+
+                const INDICES: [u32; 64] = [
+                    63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48,
+                    47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32,
+                    31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16,
+                    15, 14, 13, 12, 11, 10, 9,  8,  7,  6,  5,  4,  3,  2,  1,  0,
+                ];
+                let vec8 = u8x64::from_bits(self);
+                let shuffled: u8x64 = simd_shuffle64(vec8, vec8, INDICES);
+                $id::from_bits(shuffled)
+            }
+        }
+    )+};
+}
+
+vector_impl!(
+    [impl_swap_bytes, v16, u8x2, i8x2,],
+    [impl_swap_bytes, v32, u8x4, i8x4, u16x2, i16x2,],
+    [impl_swap_bytes, v64, u8x8, i8x8, u16x4, i16x4, u32x2, i32x2,],
+    [
+        impl_swap_bytes,
+        v128,
+        u8x16,
+        i8x16,
+        u16x8,
+        i16x8,
+        u32x4,
+        i32x4,
+        u64x2,
+        i64x2,
+    ],
+    [
+        impl_swap_bytes,
+        v256,
+        u8x32,
+        i8x32,
+        u16x16,
+        i16x16,
+        u32x8,
+        i32x8,
+        u64x4,
+        i64x4,
+    ],
+    [
+        impl_swap_bytes,
+        v512,
+        u8x64,
+        i8x64,
+        u16x32,
+        i16x32,
+        u32x16,
+        i32x16,
+        u64x8,
+        i64x8,
+    ]
+);
--- a/library/stdarch/coresimd/simd_llvm.rs
+++ b/library/stdarch/coresimd/simd_llvm.rs
@ -15,6 +15,8 @@ extern "platform-intrinsic" {
    pub fn simd_shuffle8<T, U>(x: T, y: T, idx: [u32; 8]) -> U;
    pub fn simd_shuffle16<T, U>(x: T, y: T, idx: [u32; 16]) -> U;
    pub fn simd_shuffle32<T, U>(x: T, y: T, idx: [u32; 32]) -> U;
+    pub fn simd_shuffle64<T, U>(x: T, y: T, idx: [u32; 64]) -> U;
+    pub fn simd_shuffle128<T, U>(x: T, y: T, idx: [u32; 128]) -> U;

    pub fn simd_insert<T, U>(x: T, idx: u32, val: U) -> T;
    pub fn simd_extract<T, U>(x: T, idx: u32) -> U;