Add more mixed arithmetic instructions (#541)
* Add Vector Multiply Add * Add Vector Negative Multiply Subtract * Add Vector Sum Across Partial (1/2) Saturated * Add Vector Sum Across Partial (1/4) Saturated
This commit is contained in:
parent
4db62cdf71
commit
03a4a3ae30
1 changed files with 262 additions and 0 deletions
|
|
@ -81,12 +81,99 @@ extern "C" {
|
|||
#[link_name = "llvm.ppc.altivec.vmsumshm"]
|
||||
fn vmsumshm(
|
||||
a: vector_signed_short, b: vector_signed_short,c: vector_signed_int) -> vector_signed_int;
|
||||
#[link_name = "llvm.ppc.altivec.vmaddfp"]
|
||||
fn vmaddfp(
|
||||
a: vector_float, b: vector_float, c: vector_float) -> vector_float;
|
||||
#[link_name = "llvm.ppc.altivec.vnmsubfp"]
|
||||
fn vnmsubfp(
|
||||
a: vector_float, b: vector_float, c: vector_float) -> vector_float;
|
||||
#[link_name = "llvm.ppc.altivec.vsum2sws"]
|
||||
fn vsum2sws(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int;
|
||||
#[link_name = "llvm.ppc.altivec.vsum4ubs"]
|
||||
fn vsum4ubs(a: vector_unsigned_char, b: vector_unsigned_int) -> vector_unsigned_int;
|
||||
#[link_name = "llvm.ppc.altivec.vsum4sbs"]
|
||||
fn vsum4sbs(a: vector_signed_char, b: vector_signed_int) -> vector_signed_int;
|
||||
#[link_name = "llvm.ppc.altivec.vsum4shs"]
|
||||
fn vsum4shs(a: vector_signed_short, b: vector_signed_int) -> vector_signed_int;
|
||||
}
|
||||
|
||||
mod sealed {
|
||||
|
||||
use super::*;
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "altivec")]
|
||||
#[cfg_attr(test, assert_instr(vsum4ubs))]
|
||||
unsafe fn vec_vsum4ubs(a: vector_unsigned_char, b: vector_unsigned_int) -> vector_unsigned_int {
|
||||
vsum4ubs(a, b)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "altivec")]
|
||||
#[cfg_attr(test, assert_instr(vsum4sbs))]
|
||||
unsafe fn vec_vsum4sbs(a: vector_signed_char, b: vector_signed_int) -> vector_signed_int {
|
||||
vsum4sbs(a, b)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "altivec")]
|
||||
#[cfg_attr(test, assert_instr(vsum4shs))]
|
||||
unsafe fn vec_vsum4shs(a: vector_signed_short, b: vector_signed_int) -> vector_signed_int {
|
||||
vsum4shs(a, b)
|
||||
}
|
||||
|
||||
|
||||
pub trait VectorSum4s<Other> {
|
||||
unsafe fn vec_sum4s(self, b: Other) -> Other;
|
||||
}
|
||||
|
||||
impl VectorSum4s<vector_unsigned_int> for vector_unsigned_char {
|
||||
#[inline]
|
||||
#[target_feature(enable = "altivec")]
|
||||
unsafe fn vec_sum4s(self, b: vector_unsigned_int) -> vector_unsigned_int {
|
||||
vsum4ubs(self, b)
|
||||
}
|
||||
}
|
||||
|
||||
impl VectorSum4s<vector_signed_int> for vector_signed_char {
|
||||
#[inline]
|
||||
#[target_feature(enable = "altivec")]
|
||||
unsafe fn vec_sum4s(self, b: vector_signed_int) -> vector_signed_int {
|
||||
vsum4sbs(self, b)
|
||||
}
|
||||
}
|
||||
|
||||
impl VectorSum4s<vector_signed_int> for vector_signed_short {
|
||||
#[inline]
|
||||
#[target_feature(enable = "altivec")]
|
||||
unsafe fn vec_sum4s(self, b: vector_signed_int) -> vector_signed_int {
|
||||
vsum4shs(self, b)
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "altivec")]
|
||||
#[cfg_attr(test, assert_instr(vsum2sws))]
|
||||
unsafe fn vec_vsum2sws(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int {
|
||||
vsum2sws(a, b)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "altivec")]
|
||||
#[cfg_attr(test, assert_instr(vnmsubfp))]
|
||||
unsafe fn vec_vnmsubfp(
|
||||
a: vector_float, b: vector_float, c: vector_float) -> vector_float {
|
||||
vnmsubfp(a, b, c)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "altivec")]
|
||||
#[cfg_attr(test, assert_instr(vmaddfp))]
|
||||
unsafe fn vec_vmaddfp(
|
||||
a: vector_float, b: vector_float, c: vector_float) -> vector_float {
|
||||
vmaddfp(a, b, c)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "altivec")]
|
||||
#[cfg_attr(test, assert_instr(vmsumubm))]
|
||||
|
|
@ -582,6 +669,20 @@ mod endian {
|
|||
|
||||
b.vec_vperm(a, c)
|
||||
}
|
||||
|
||||
/// Vector Sum Across Partial (1/2) Saturated
|
||||
#[inline]
|
||||
#[target_feature(enable = "altivec")]
|
||||
pub unsafe fn vec_sum2s(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int {
|
||||
// vsum2sws has big-endian bias
|
||||
//
|
||||
// swap the even b elements with the odd ones
|
||||
let flip = ::mem::transmute(u8x16::new(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11));
|
||||
let b = vec_perm(b, b, flip);
|
||||
let c = vsum2sws(a, b);
|
||||
|
||||
vec_perm(c, c, flip)
|
||||
}
|
||||
}
|
||||
|
||||
/// Vector Multiply Add Saturated
|
||||
|
|
@ -629,6 +730,29 @@ pub unsafe fn vec_msums<T, U>(a: T, b: T, c: U) -> U
|
|||
a.vec_msums(b, c)
|
||||
}
|
||||
|
||||
/// Vector Multiply Add
|
||||
#[inline]
|
||||
#[target_feature(enable = "altivec")]
|
||||
pub unsafe fn vec_madd(a: vector_float, b: vector_float, c: vector_float) -> vector_float {
|
||||
vmaddfp(a, b, c)
|
||||
}
|
||||
|
||||
/// Vector Negative Multiply Subtract
|
||||
#[inline]
|
||||
#[target_feature(enable = "altivec")]
|
||||
pub unsafe fn vec_nmsub(a: vector_float, b: vector_float, c: vector_float) -> vector_float {
|
||||
vnmsubfp(a, b, c)
|
||||
}
|
||||
|
||||
/// Vector Sum Across Partial (1/4) Saturated
|
||||
#[inline]
|
||||
#[target_feature(enable = "altivec")]
|
||||
pub unsafe fn vec_sum4s<T, U>(a: T, b: U) -> U
|
||||
where
|
||||
T: sealed::VectorSum4s<U> {
|
||||
a.vec_sum4s(b)
|
||||
}
|
||||
|
||||
#[cfg(target_endian = "big")]
|
||||
mod endian {
|
||||
use super::*;
|
||||
|
|
@ -641,6 +765,13 @@ mod endian {
|
|||
{
|
||||
a.vec_vperm(b, c)
|
||||
}
|
||||
|
||||
/// Vector Sum Across Partial (1/2) Saturated
|
||||
#[inline]
|
||||
#[target_feature(enable = "altivec")]
|
||||
pub unsafe fn vec_sum2s(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int {
|
||||
vsum2sws(a, b)
|
||||
}
|
||||
}
|
||||
|
||||
pub use self::endian::*;
|
||||
|
|
@ -768,6 +899,34 @@ mod tests {
|
|||
assert_eq!(d, ::mem::transmute(vec_madds(a, b, c)));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "altivec")]
|
||||
unsafe fn test_vec_madd_float() {
|
||||
let a: vector_float = ::mem::transmute(f32x4::new(0.1, 0.2, 0.3, 0.4));
|
||||
let b: vector_float = ::mem::transmute(f32x4::new(0.1, 0.2, 0.3, 0.4));
|
||||
let c: vector_float = ::mem::transmute(f32x4::new(0.1, 0.2, 0.3, 0.4));
|
||||
let d = f32x4::new(
|
||||
0.1 * 0.1 + 0.1,
|
||||
0.2 * 0.2 + 0.2,
|
||||
0.3 * 0.3 + 0.3,
|
||||
0.4 * 0.4 + 0.4);
|
||||
|
||||
assert_eq!(d, ::mem::transmute(vec_madd(a, b, c)));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "altivec")]
|
||||
unsafe fn test_vec_nmsub_float() {
|
||||
let a: vector_float = ::mem::transmute(f32x4::new(0.1, 0.2, 0.3, 0.4));
|
||||
let b: vector_float = ::mem::transmute(f32x4::new(0.1, 0.2, 0.3, 0.4));
|
||||
let c: vector_float = ::mem::transmute(f32x4::new(0.1, 0.2, 0.3, 0.4));
|
||||
let d = f32x4::new(
|
||||
-(0.1 * 0.1 - 0.1),
|
||||
-(0.2 * 0.2 - 0.2),
|
||||
-(0.3 * 0.3 - 0.3),
|
||||
-(0.4 * 0.4 - 0.4),
|
||||
);
|
||||
assert_eq!(d, ::mem::transmute(vec_nmsub(a, b, c)));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "altivec")]
|
||||
unsafe fn test_vec_mradds() {
|
||||
let a: vector_signed_short = ::mem::transmute(i16x8::new(
|
||||
|
|
@ -991,6 +1150,109 @@ mod tests {
|
|||
assert_eq!(d, ::mem::transmute(vec_msums(a, b, c)));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "altivec")]
|
||||
unsafe fn test_vec_sum2s() {
|
||||
let a: vector_signed_int = ::mem::transmute(i32x4::new(0, 1, 2, 3));
|
||||
let b: vector_signed_int = ::mem::transmute(i32x4::new(0, 1, 2, 3));
|
||||
let d = i32x4::new(
|
||||
0,
|
||||
0 + 1 + 1,
|
||||
0,
|
||||
2 + 3 + 3);
|
||||
|
||||
assert_eq!(d, ::mem::transmute(vec_sum2s(a, b)));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "altivec")]
|
||||
unsafe fn test_vec_sum4s_unsigned_char() {
|
||||
let a: vector_unsigned_char = ::mem::transmute(u8x16::new(
|
||||
0,
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
|
||||
0,
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
));
|
||||
let b: vector_unsigned_int = ::mem::transmute(u32x4::new(0, 1, 2, 3));
|
||||
let d = u32x4::new(
|
||||
0 + 1 + 2 + 3 + 0,
|
||||
4 + 5 + 6 + 7 + 1,
|
||||
0 + 1 + 2 + 3 + 2,
|
||||
4 + 5 + 6 + 7 + 3,
|
||||
);
|
||||
|
||||
assert_eq!(d, ::mem::transmute(vec_sum4s(a, b)));
|
||||
}
|
||||
#[simd_test(enable = "altivec")]
|
||||
unsafe fn test_vec_sum4s_signed_char() {
|
||||
let a: vector_signed_char = ::mem::transmute(i8x16::new(
|
||||
0,
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
|
||||
0,
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
));
|
||||
let b: vector_signed_int = ::mem::transmute(i32x4::new(0, 1, 2, 3));
|
||||
let d = i32x4::new(
|
||||
0 + 1 + 2 + 3 + 0,
|
||||
4 + 5 + 6 + 7 + 1,
|
||||
0 + 1 + 2 + 3 + 2,
|
||||
4 + 5 + 6 + 7 + 3,
|
||||
);
|
||||
|
||||
assert_eq!(d, ::mem::transmute(vec_sum4s(a, b)));
|
||||
}
|
||||
#[simd_test(enable = "altivec")]
|
||||
unsafe fn test_vec_sum4s_signed_short() {
|
||||
let a: vector_signed_short = ::mem::transmute(i16x8::new(
|
||||
0,
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
));
|
||||
let b: vector_signed_int = ::mem::transmute(i32x4::new(0, 1, 2, 3));
|
||||
let d = i32x4::new(
|
||||
0 + 1 + 0,
|
||||
2 + 3 + 1,
|
||||
4 + 5 + 2,
|
||||
6 + 7 + 3,
|
||||
);
|
||||
|
||||
assert_eq!(d, ::mem::transmute(vec_sum4s(a, b)));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "altivec")]
|
||||
unsafe fn vec_add_i32x4_i32x4() {
|
||||
let x = i32x4::new(1, 2, 3, 4);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue