Add _mm_cvtepu8_epi{16, 32, 64}

2017-11-06 15:09:14 +00:00 · 2017-11-06 15:09:14 +00:00 · ac11d6941d
commit ac11d6941d
parent 48027e994b
3 changed files with 63 additions and 2 deletions
--- a/library/stdarch/src/lib.rs
+++ b/library/stdarch/src/lib.rs
@ -176,7 +176,11 @@ mod v32 {
    define_ty! { u8x4, u8, u8, u8, u8 }
    define_impl! { u8x4, u8, 4, i8x4, x0, x1, x2, x3 }

-    define_casts!((i8x4, i32x4, as_i32x4), (i16x2, i64x2, as_i64x2));
+    define_casts!(
+        (i8x4, i32x4, as_i32x4),
+        (u8x4, i32x4, as_i32x4),
+        (i16x2, i64x2, as_i64x2)
+    );
 }

 /// 16-bit wide vector tpyes
@ -186,7 +190,13 @@ mod v16 {
    define_ty! { i8x2, i8, i8 }
    define_impl! { i8x2, i8, 2, i8x2, x0, x1 }

-    define_casts!((i8x2, i64x2, as_i64x2));
+    define_ty! { u8x2, u8, u8 }
+    define_impl! { u8x2, u8, 2, i8x2, x0, x1 }
+
+    define_casts!(
+        (i8x2, i64x2, as_i64x2),
+        (u8x2, i64x2, as_i64x2)
+    );
 }

 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
--- a/library/stdarch/src/v64.rs
+++ b/library/stdarch/src/v64.rs
@ -60,6 +60,7 @@ define_casts!(
    (u8x8, i8x8, as_i8x8),
    (i8x8, u8x8, as_u8x8),
    (i8x8, i16x8, as_i16x8),
+    (u8x8, i16x8, as_i16x8),
    (i16x4, i32x4, as_i32x4),
    (i32x2, i64x2, as_i64x2),
    (u8x8, u16x8, as_u16x8),
--- a/library/stdarch/src/x86/sse41.rs
+++ b/library/stdarch/src/x86/sse41.rs
@ -346,6 +346,30 @@ pub unsafe fn _mm_cvtepi32_epi64(a: i32x4) -> i64x2 {
    simd_shuffle2::<_, ::v64::i32x2>(a, a, [0, 1]).as_i64x2()
 }

+/// Zero extend packed unsigned 8-bit integers in `a` to packed 16-bit integers
+#[inline(always)]
+#[target_feature = "+sse4.1"]
+#[cfg_attr(test, assert_instr(pmovzxbw))]
+pub unsafe fn _mm_cvtepu8_epi16(a: u8x16) -> i16x8 {
+    simd_shuffle8::<_, ::v64::u8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]).as_i16x8()
+}
+
+/// Zero extend packed unsigned 8-bit integers in `a` to packed 16-bit integers
+#[inline(always)]
+#[target_feature = "+sse4.1"]
+#[cfg_attr(test, assert_instr(pmovzxbd))]
+pub unsafe fn _mm_cvtepu8_epi32(a: u8x16) -> i32x4 {
+    simd_shuffle4::<_, ::v32::u8x4>(a, a, [0, 1, 2, 3]).as_i32x4()
+}
+
+/// Zero extend packed unsigned 8-bit integers in `a` to packed 16-bit integers
+#[inline(always)]
+#[target_feature = "+sse4.1"]
+#[cfg_attr(test, assert_instr(pmovzxbq))]
+pub unsafe fn _mm_cvtepu8_epi64(a: u8x16) -> i64x2 {
+    simd_shuffle2::<_, ::v16::u8x2>(a, a, [0, 1]).as_i64x2()
+}
+
 /// Returns the dot product of two f64x2 vectors.
 ///
 /// `imm8[1:0]` is the broadcast mask, and `imm8[5:4]` is the condition mask.
@ -1041,6 +1065,32 @@ mod tests {
        assert_eq!(r, e);
    }

+    #[simd_test = "sse4.1"]
+    unsafe fn _mm_cvtepu8_epi16() {
+        let a = u8x16::splat(10);
+        let r = sse41::_mm_cvtepu8_epi16(a);
+        let e = i16x8::splat(10);
+        assert_eq!(r, e);
+    }
+
+    #[simd_test = "sse4.1"]
+    unsafe fn _mm_cvtepu8_epi32() {
+        let a = u8x16::splat(10);
+        let r = sse41::_mm_cvtepu8_epi32(a);
+        let e = i32x4::splat(10);
+        assert_eq!(r, e);
+    }
+
+        #[simd_test = "sse4.1"]
+    unsafe fn _mm_cvtepu8_epi64() {
+        let a = u8x16::splat(10);
+        let r = sse41::_mm_cvtepu8_epi64(a);
+        let e = i64x2::splat(10);
+        assert_eq!(r, e);
+    }
+
+
+
    #[simd_test = "sse4.1"]
    unsafe fn _mm_dp_pd() {
        let a = f64x2::new(2.0, 3.0);