diff --git a/library/stdarch/src/lib.rs b/library/stdarch/src/lib.rs index 2ea9d96867a2..509935e42e46 100644 --- a/library/stdarch/src/lib.rs +++ b/library/stdarch/src/lib.rs @@ -169,17 +169,19 @@ mod v32 { define_ty! { i16x2, i16, i16 } define_impl! { i16x2, i16, 2, i16x2, x0, x1 } + define_ty! { u16x2, u16, u16 } + define_impl! { u16x2, u16, 2, i16x2, x0, x1 } define_ty! { i8x4, i8, i8, i8, i8 } define_impl! { i8x4, i8, 4, i8x4, x0, x1, x2, x3 } - define_ty! { u8x4, u8, u8, u8, u8 } define_impl! { u8x4, u8, 4, i8x4, x0, x1, x2, x3 } define_casts!( + (i16x2, i64x2, as_i64x2), + (u16x2, i64x2, as_i64x2), (i8x4, i32x4, as_i32x4), - (u8x4, i32x4, as_i32x4), - (i16x2, i64x2, as_i64x2) + (u8x4, i32x4, as_i32x4) ); } @@ -189,7 +191,6 @@ mod v16 { define_ty! { i8x2, i8, i8 } define_impl! { i8x2, i8, 2, i8x2, x0, x1 } - define_ty! { u8x2, u8, u8 } define_impl! { u8x2, u8, 2, i8x2, x0, x1 } diff --git a/library/stdarch/src/v64.rs b/library/stdarch/src/v64.rs index 9b4670bae970..c1e346d1b23a 100644 --- a/library/stdarch/src/v64.rs +++ b/library/stdarch/src/v64.rs @@ -65,7 +65,9 @@ define_casts!( (i32x2, i64x2, as_i64x2), (u8x8, u16x8, as_u16x8), (u16x4, u32x4, as_u32x4), - (u32x2, u64x2, as_u64x2) + (u16x4, i32x4, as_i32x4), + (u32x2, u64x2, as_u64x2), + (u32x2, i64x2, as_i64x2) ); #[cfg(test)] diff --git a/library/stdarch/src/x86/sse41.rs b/library/stdarch/src/x86/sse41.rs index 23d25819994a..06493a7526b4 100644 --- a/library/stdarch/src/x86/sse41.rs +++ b/library/stdarch/src/x86/sse41.rs @@ -370,6 +370,30 @@ pub unsafe fn _mm_cvtepu8_epi64(a: u8x16) -> i64x2 { simd_shuffle2::<_, ::v16::u8x2>(a, a, [0, 1]).as_i64x2() } +/// Zero extend packed unsigned 16-bit integers in `a` to packed 32-bit integers +#[inline(always)] +#[target_feature = "+sse4.1"] +#[cfg_attr(test, assert_instr(pmovzxwd))] +pub unsafe fn _mm_cvtepu16_epi32(a: u16x8) -> i32x4 { + simd_shuffle4::<_, ::v64::u16x4>(a, a, [0, 1, 2, 3]).as_i32x4() +} + +/// Zero extend packed unsigned 16-bit integers in `a` to packed 64-bit integers +#[inline(always)] +#[target_feature = "+sse4.1"] +#[cfg_attr(test, assert_instr(pmovzxwq))] +pub unsafe fn _mm_cvtepu16_epi64(a: u16x8) -> i64x2 { + simd_shuffle2::<_, ::v32::u16x2>(a, a, [0, 1]).as_i64x2() +} + +/// Zero extend packed unsigned 32-bit integers in `a` to packed 64-bit integers +#[inline(always)] +#[target_feature = "+sse4.1"] +#[cfg_attr(test, assert_instr(pmovzxdq))] +pub unsafe fn _mm_cvtepu32_epi64(a: u32x4) -> i64x2 { + simd_shuffle2::<_, ::v64::u32x2>(a, a, [0, 1]).as_i64x2() +} + /// Returns the dot product of two f64x2 vectors. /// /// `imm8[1:0]` is the broadcast mask, and `imm8[5:4]` is the condition mask. @@ -1081,7 +1105,7 @@ mod tests { assert_eq!(r, e); } - #[simd_test = "sse4.1"] + #[simd_test = "sse4.1"] unsafe fn _mm_cvtepu8_epi64() { let a = u8x16::splat(10); let r = sse41::_mm_cvtepu8_epi64(a); @@ -1089,7 +1113,29 @@ mod tests { assert_eq!(r, e); } + #[simd_test = "sse4.1"] + unsafe fn _mm_cvtepu16_epi32() { + let a = u16x8::splat(10); + let r = sse41::_mm_cvtepu16_epi32(a); + let e = i32x4::splat(10); + assert_eq!(r, e); + } + #[simd_test = "sse4.1"] + unsafe fn _mm_cvtepu16_epi64() { + let a = u16x8::splat(10); + let r = sse41::_mm_cvtepu16_epi64(a); + let e = i64x2::splat(10); + assert_eq!(r, e); + } + + #[simd_test = "sse4.1"] + unsafe fn _mm_cvtepu32_epi64() { + let a = u32x4::splat(10); + let r = sse41::_mm_cvtepu32_epi64(a); + let e = i64x2::splat(10); + assert_eq!(r, e); + } #[simd_test = "sse4.1"] unsafe fn _mm_dp_pd() {