Add remaining _mm_cvtep* intrinsics

This commit is contained in:
André Oliveira 2017-11-06 15:45:33 +00:00 committed by gnzlbg
parent ac11d6941d
commit 613cacb317
3 changed files with 55 additions and 6 deletions

View file

@ -169,17 +169,19 @@ mod v32 {
define_ty! { i16x2, i16, i16 }
define_impl! { i16x2, i16, 2, i16x2, x0, x1 }
define_ty! { u16x2, u16, u16 }
define_impl! { u16x2, u16, 2, i16x2, x0, x1 }
define_ty! { i8x4, i8, i8, i8, i8 }
define_impl! { i8x4, i8, 4, i8x4, x0, x1, x2, x3 }
define_ty! { u8x4, u8, u8, u8, u8 }
define_impl! { u8x4, u8, 4, i8x4, x0, x1, x2, x3 }
define_casts!(
(i16x2, i64x2, as_i64x2),
(u16x2, i64x2, as_i64x2),
(i8x4, i32x4, as_i32x4),
(u8x4, i32x4, as_i32x4),
(i16x2, i64x2, as_i64x2)
(u8x4, i32x4, as_i32x4)
);
}
@ -189,7 +191,6 @@ mod v16 {
define_ty! { i8x2, i8, i8 }
define_impl! { i8x2, i8, 2, i8x2, x0, x1 }
define_ty! { u8x2, u8, u8 }
define_impl! { u8x2, u8, 2, i8x2, x0, x1 }

View file

@ -65,7 +65,9 @@ define_casts!(
(i32x2, i64x2, as_i64x2),
(u8x8, u16x8, as_u16x8),
(u16x4, u32x4, as_u32x4),
(u32x2, u64x2, as_u64x2)
(u16x4, i32x4, as_i32x4),
(u32x2, u64x2, as_u64x2),
(u32x2, i64x2, as_i64x2)
);
#[cfg(test)]

View file

@ -370,6 +370,30 @@ pub unsafe fn _mm_cvtepu8_epi64(a: u8x16) -> i64x2 {
simd_shuffle2::<_, ::v16::u8x2>(a, a, [0, 1]).as_i64x2()
}
/// Zero extend packed unsigned 16-bit integers in `a` to packed 32-bit integers
#[inline(always)]
#[target_feature = "+sse4.1"]
#[cfg_attr(test, assert_instr(pmovzxwd))]
pub unsafe fn _mm_cvtepu16_epi32(a: u16x8) -> i32x4 {
simd_shuffle4::<_, ::v64::u16x4>(a, a, [0, 1, 2, 3]).as_i32x4()
}
/// Zero extend packed unsigned 16-bit integers in `a` to packed 64-bit integers
#[inline(always)]
#[target_feature = "+sse4.1"]
#[cfg_attr(test, assert_instr(pmovzxwq))]
pub unsafe fn _mm_cvtepu16_epi64(a: u16x8) -> i64x2 {
simd_shuffle2::<_, ::v32::u16x2>(a, a, [0, 1]).as_i64x2()
}
/// Zero extend packed unsigned 32-bit integers in `a` to packed 64-bit integers
#[inline(always)]
#[target_feature = "+sse4.1"]
#[cfg_attr(test, assert_instr(pmovzxdq))]
pub unsafe fn _mm_cvtepu32_epi64(a: u32x4) -> i64x2 {
simd_shuffle2::<_, ::v64::u32x2>(a, a, [0, 1]).as_i64x2()
}
/// Returns the dot product of two f64x2 vectors.
///
/// `imm8[1:0]` is the broadcast mask, and `imm8[5:4]` is the condition mask.
@ -1081,7 +1105,7 @@ mod tests {
assert_eq!(r, e);
}
#[simd_test = "sse4.1"]
#[simd_test = "sse4.1"]
unsafe fn _mm_cvtepu8_epi64() {
let a = u8x16::splat(10);
let r = sse41::_mm_cvtepu8_epi64(a);
@ -1089,7 +1113,29 @@ mod tests {
assert_eq!(r, e);
}
#[simd_test = "sse4.1"]
unsafe fn _mm_cvtepu16_epi32() {
let a = u16x8::splat(10);
let r = sse41::_mm_cvtepu16_epi32(a);
let e = i32x4::splat(10);
assert_eq!(r, e);
}
#[simd_test = "sse4.1"]
unsafe fn _mm_cvtepu16_epi64() {
let a = u16x8::splat(10);
let r = sse41::_mm_cvtepu16_epi64(a);
let e = i64x2::splat(10);
assert_eq!(r, e);
}
#[simd_test = "sse4.1"]
unsafe fn _mm_cvtepu32_epi64() {
let a = u32x4::splat(10);
let r = sse41::_mm_cvtepu32_epi64(a);
let e = i64x2::splat(10);
assert_eq!(r, e);
}
#[simd_test = "sse4.1"]
unsafe fn _mm_dp_pd() {