Add _mm_cvtepi8_epi32

- This might be wrong since the cast and the shuffle nedded to be inverted
2017-11-03 11:06:44 +00:00 · 2017-11-03 11:06:44 +00:00 · 37396f3471
commit 37396f3471
parent f9caf376b2
1 changed files with 22 additions and 2 deletions
--- a/library/stdarch/src/x86/sse41.rs
+++ b/library/stdarch/src/x86/sse41.rs
@ -4,7 +4,7 @@ use std::mem;

 #[cfg(test)]
 use stdsimd_test::assert_instr;
-use simd_llvm::{simd_cast, simd_shuffle8};
+use simd_llvm::{simd_cast, simd_shuffle4, simd_shuffle8};

 use v128::*;

@ -260,13 +260,21 @@ pub unsafe fn _mm_cmpeq_epi64(a: i64x2, b: i64x2) -> i64x2 {
    a.eq(b)
 }

-/// Sign extend packed 8-bit integers in a to packed 16-bit integers
+/// Sign extend packed 8-bit integers in `a` to packed 16-bit integers
 #[target_feature = "+sse4.1"]
 #[cfg_attr(test, assert_instr(pmovsxbw))]
 pub unsafe fn _mm_cvtepi8_epi16(a: i8x16) -> i16x8 {
    simd_cast::<::v64::i8x8, _>(simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]))
 }

+/// Sign extend packed 8-bit integers in `a` to packed 32-bit integers
+#[target_feature = "+sse4.1"]
+#[cfg_attr(test, assert_instr(pmovsxbd))]
+pub unsafe fn _mm_cvtepi8_epi32(a: i8x16) -> i32x4 {
+    let cast = simd_cast::<_, ::v512::i32x16>(a);
+    simd_shuffle4(cast, cast, [0, 1, 2, 3])
+}
+
 /// Returns the dot product of two f64x2 vectors.
 ///
 /// `imm8[1:0]` is the broadcast mask, and `imm8[5:4]` is the condition mask.
@ -779,6 +787,18 @@ mod tests {
        assert_eq!(r, e);
    }

+    #[simd_test = "sse4.1"]
+    unsafe fn _mm_cvtepi8_epi32() {
+        let a = i8x16::splat(10);
+        let r = sse41::_mm_cvtepi8_epi32(a);
+        let e = i32x4::splat(10);
+        assert_eq!(r, e);
+        let a = i8x16::splat(-10);
+        let r = sse41::_mm_cvtepi8_epi32(a);
+        let e = i32x4::splat(-10);
+        assert_eq!(r, e);
+    }
+
    #[simd_test = "sse4.1"]
    unsafe fn _mm_dp_pd() {
        let a = f64x2::new(2.0, 3.0);