From ac11d6941dd8d7790ec285d880f3e536a429a55d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Oliveira?= Date: Mon, 6 Nov 2017 15:09:14 +0000 Subject: [PATCH] Add _mm_cvtepu8_epi{16, 32, 64} --- library/stdarch/src/lib.rs | 14 +++++++-- library/stdarch/src/v64.rs | 1 + library/stdarch/src/x86/sse41.rs | 50 ++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 2 deletions(-) diff --git a/library/stdarch/src/lib.rs b/library/stdarch/src/lib.rs index 6e41a7026f46..2ea9d96867a2 100644 --- a/library/stdarch/src/lib.rs +++ b/library/stdarch/src/lib.rs @@ -176,7 +176,11 @@ mod v32 { define_ty! { u8x4, u8, u8, u8, u8 } define_impl! { u8x4, u8, 4, i8x4, x0, x1, x2, x3 } - define_casts!((i8x4, i32x4, as_i32x4), (i16x2, i64x2, as_i64x2)); + define_casts!( + (i8x4, i32x4, as_i32x4), + (u8x4, i32x4, as_i32x4), + (i16x2, i64x2, as_i64x2) + ); } /// 16-bit wide vector tpyes @@ -186,7 +190,13 @@ mod v16 { define_ty! { i8x2, i8, i8 } define_impl! { i8x2, i8, 2, i8x2, x0, x1 } - define_casts!((i8x2, i64x2, as_i64x2)); + define_ty! { u8x2, u8, u8 } + define_impl! { u8x2, u8, 2, i8x2, x0, x1 } + + define_casts!( + (i8x2, i64x2, as_i64x2), + (u8x2, i64x2, as_i64x2) + ); } #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] diff --git a/library/stdarch/src/v64.rs b/library/stdarch/src/v64.rs index 0df2e878d685..9b4670bae970 100644 --- a/library/stdarch/src/v64.rs +++ b/library/stdarch/src/v64.rs @@ -60,6 +60,7 @@ define_casts!( (u8x8, i8x8, as_i8x8), (i8x8, u8x8, as_u8x8), (i8x8, i16x8, as_i16x8), + (u8x8, i16x8, as_i16x8), (i16x4, i32x4, as_i32x4), (i32x2, i64x2, as_i64x2), (u8x8, u16x8, as_u16x8), diff --git a/library/stdarch/src/x86/sse41.rs b/library/stdarch/src/x86/sse41.rs index 6f5dbe078098..23d25819994a 100644 --- a/library/stdarch/src/x86/sse41.rs +++ b/library/stdarch/src/x86/sse41.rs @@ -346,6 +346,30 @@ pub unsafe fn _mm_cvtepi32_epi64(a: i32x4) -> i64x2 { simd_shuffle2::<_, ::v64::i32x2>(a, a, [0, 1]).as_i64x2() } +/// Zero extend packed unsigned 8-bit integers in `a` to packed 16-bit integers +#[inline(always)] +#[target_feature = "+sse4.1"] +#[cfg_attr(test, assert_instr(pmovzxbw))] +pub unsafe fn _mm_cvtepu8_epi16(a: u8x16) -> i16x8 { + simd_shuffle8::<_, ::v64::u8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]).as_i16x8() +} + +/// Zero extend packed unsigned 8-bit integers in `a` to packed 16-bit integers +#[inline(always)] +#[target_feature = "+sse4.1"] +#[cfg_attr(test, assert_instr(pmovzxbd))] +pub unsafe fn _mm_cvtepu8_epi32(a: u8x16) -> i32x4 { + simd_shuffle4::<_, ::v32::u8x4>(a, a, [0, 1, 2, 3]).as_i32x4() +} + +/// Zero extend packed unsigned 8-bit integers in `a` to packed 16-bit integers +#[inline(always)] +#[target_feature = "+sse4.1"] +#[cfg_attr(test, assert_instr(pmovzxbq))] +pub unsafe fn _mm_cvtepu8_epi64(a: u8x16) -> i64x2 { + simd_shuffle2::<_, ::v16::u8x2>(a, a, [0, 1]).as_i64x2() +} + /// Returns the dot product of two f64x2 vectors. /// /// `imm8[1:0]` is the broadcast mask, and `imm8[5:4]` is the condition mask. @@ -1041,6 +1065,32 @@ mod tests { assert_eq!(r, e); } + #[simd_test = "sse4.1"] + unsafe fn _mm_cvtepu8_epi16() { + let a = u8x16::splat(10); + let r = sse41::_mm_cvtepu8_epi16(a); + let e = i16x8::splat(10); + assert_eq!(r, e); + } + + #[simd_test = "sse4.1"] + unsafe fn _mm_cvtepu8_epi32() { + let a = u8x16::splat(10); + let r = sse41::_mm_cvtepu8_epi32(a); + let e = i32x4::splat(10); + assert_eq!(r, e); + } + + #[simd_test = "sse4.1"] + unsafe fn _mm_cvtepu8_epi64() { + let a = u8x16::splat(10); + let r = sse41::_mm_cvtepu8_epi64(a); + let e = i64x2::splat(10); + assert_eq!(r, e); + } + + + #[simd_test = "sse4.1"] unsafe fn _mm_dp_pd() { let a = f64x2::new(2.0, 3.0);