diff --git a/library/stdarch/src/x86/sse41.rs b/library/stdarch/src/x86/sse41.rs index 6433cd35f4a1..885f83a8baf3 100644 --- a/library/stdarch/src/x86/sse41.rs +++ b/library/stdarch/src/x86/sse41.rs @@ -287,6 +287,24 @@ pub unsafe fn _mm_cvtepi8_epi64(a: i8x16) -> i64x2 { } */ +/// Sign extend packed 16-bit integers in `a` to packed 32-bit integers +#[inline(always)] +#[target_feature = "+sse4.1"] +#[cfg_attr(test, assert_instr(pmovsxwd))] +pub unsafe fn _mm_cvtepi16_epi32(a: i16x8) -> i32x4 { + simd_cast::<::v64::i16x4, _>(simd_shuffle4(a, a, [0, 1, 2, 3])) +} + +/// Sign extend packed 16-bit integers in a to packed 64-bit integers +/* +#[inline(always)] +#[target_feature = "+sse4.1"] +#[cfg_attr(test, assert_instr(pmovsxwq))] +pub unsafe fn _mm_cvtepi16_epi64(a: i16x8) -> i64x2 { + simd_cast::<::v32::i16x2, _>(simd_shuffle2(a, a, [0, 1])) +} +*/ + /// Returns the dot product of two f64x2 vectors. /// /// `imm8[1:0]` is the broadcast mask, and `imm8[5:4]` is the condition mask. @@ -825,6 +843,31 @@ mod tests { } */ + #[simd_test = "sse4.1"] + unsafe fn _mm_cvtepi16_epi32() { + let a = i16x8::splat(10); + let r = sse41::_mm_cvtepi16_epi32(a); + let e = i32x4::splat(10); + assert_eq!(r, e); + let a = i16x8::splat(-10); + let r = sse41::_mm_cvtepi16_epi32(a); + let e = i32x4::splat(-10); + assert_eq!(r, e); + } + +/* + #[simd_test = "sse4.1"] + unsafe fn _mm_cvtepi16_epi64() { + let a = i16x8::splat(10); + let r = sse41::_mm_cvtepi16_epi64(a); + let e = i64x2::splat(10); + assert_eq!(r, e); + let a = i16x8::splat(-10); + let r = sse41::_mm_cvtepi16_epi64(a); + let e = i64x2::splat(-10); + assert_eq!(r, e); + } +*/ #[simd_test = "sse4.1"] unsafe fn _mm_dp_pd() { let a = f64x2::new(2.0, 3.0);