diff --git a/library/stdarch/src/lib.rs b/library/stdarch/src/lib.rs index 7d6cc0916af3..a981e4107577 100644 --- a/library/stdarch/src/lib.rs +++ b/library/stdarch/src/lib.rs @@ -163,6 +163,32 @@ mod v256; mod v512; mod v64; +mod v32 { + use simd_llvm::*; + + define_ty! { i16x2, i16, i16 } + define_impl! { i16x2, i16, 2, i16x2, x0, x1 } + + define_ty! { i8x4, i8, i8, i8, i8 } + define_impl! { i8x4, i8, 4, i8x4, x0, x1, x2, x3 } + + define_casts!( + (i8x4, i32x4, as_i32x4), + (i16x2, i64x2, as_i64x4) + ); +} + +mod v16 { + use simd_llvm::*; + + define_ty! { i8x2, i8, i8 } + define_impl! { i8x2, i8, 2, i8x2, x0, x1 } + + define_casts!( + (i8x2, i64x2, as_i64x2) + ); +} + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[macro_use] mod x86; diff --git a/library/stdarch/src/x86/sse41.rs b/library/stdarch/src/x86/sse41.rs index 7703754eb4c5..b12a11f3ad89 100644 --- a/library/stdarch/src/x86/sse41.rs +++ b/library/stdarch/src/x86/sse41.rs @@ -4,7 +4,7 @@ use std::mem; #[cfg(test)] use stdsimd_test::assert_instr; -use simd_llvm::{simd_cast, simd_shuffle2, simd_shuffle4, simd_shuffle8}; +use simd_llvm::{simd_shuffle2, simd_shuffle4, simd_shuffle8}; use v128::*; @@ -273,37 +273,32 @@ pub unsafe fn _mm_cvtepi8_epi16(a: i8x16) -> i16x8 { #[target_feature = "+sse4.1"] #[cfg_attr(test, assert_instr(pmovsxbd))] pub unsafe fn _mm_cvtepi8_epi32(a: i8x16) -> i32x4 { - let cast = simd_cast::<_, ::v512::i32x16>(a); - simd_shuffle4(cast, cast, [0, 1, 2, 3]) + simd_shuffle4::<_, ::v32::i8x4>(a, a, [0, 1, 2, 3]).as_i32x4() } /// Sign extend packed 8-bit integers in the low 8 bytes of `a` to packed 64-bit integers -/* #[inline(always)] #[target_feature = "+sse4.1"] #[cfg_attr(test, assert_instr(pmovsxbq))] pub unsafe fn _mm_cvtepi8_epi64(a: i8x16) -> i64x2 { - simd_cast::<::v16::i8x2, _>(simd_shuffle2(a, a, [0, 1])) + simd_shuffle2::<_, ::v16::i8x2>(a, a, [0, 1]).as_i64x2() } -*/ /// Sign extend packed 16-bit integers in `a` to packed 32-bit integers #[inline(always)] #[target_feature = "+sse4.1"] #[cfg_attr(test, assert_instr(pmovsxwd))] pub unsafe fn _mm_cvtepi16_epi32(a: i16x8) -> i32x4 { - simd_cast::<::v64::i16x4, _>(simd_shuffle4(a, a, [0, 1, 2, 3])) + simd_shuffle4::<_, ::v64::i16x4>(a, a, [0, 1, 2, 3]).as_i32x4() } /// Sign extend packed 16-bit integers in a to packed 64-bit integers -/* #[inline(always)] #[target_feature = "+sse4.1"] #[cfg_attr(test, assert_instr(pmovsxwq))] pub unsafe fn _mm_cvtepi16_epi64(a: i16x8) -> i64x2 { - simd_cast::<::v32::i16x2, _>(simd_shuffle2(a, a, [0, 1])) + simd_shuffle2::<_, ::v32::i16x2>(a, a, [0, 1]).as_i64x4() } -*/ /// Returns the dot product of two f64x2 vectors. /// @@ -829,7 +824,6 @@ mod tests { assert_eq!(r, e); } - /* #[simd_test = "sse4.1"] unsafe fn _mm_cvtepi8_epi64() { let a = i8x16::splat(10); @@ -841,7 +835,6 @@ mod tests { let e = i64x2::splat(-10); assert_eq!(r, e); } - */ #[simd_test = "sse4.1"] unsafe fn _mm_cvtepi16_epi32() { @@ -855,7 +848,6 @@ mod tests { assert_eq!(r, e); } -/* #[simd_test = "sse4.1"] unsafe fn _mm_cvtepi16_epi64() { let a = i16x8::splat(10); @@ -867,7 +859,7 @@ mod tests { let e = i64x2::splat(-10); assert_eq!(r, e); } -*/ + #[simd_test = "sse4.1"] unsafe fn _mm_dp_pd() { let a = f64x2::new(2.0, 3.0);