diff --git a/library/stdarch/coresimd/powerpc/altivec.rs b/library/stdarch/coresimd/powerpc/altivec.rs index c9c40f6c4a0c..7a1826aa2652 100644 --- a/library/stdarch/coresimd/powerpc/altivec.rs +++ b/library/stdarch/coresimd/powerpc/altivec.rs @@ -81,12 +81,99 @@ extern "C" { #[link_name = "llvm.ppc.altivec.vmsumshm"] fn vmsumshm( a: vector_signed_short, b: vector_signed_short,c: vector_signed_int) -> vector_signed_int; + #[link_name = "llvm.ppc.altivec.vmaddfp"] + fn vmaddfp( + a: vector_float, b: vector_float, c: vector_float) -> vector_float; + #[link_name = "llvm.ppc.altivec.vnmsubfp"] + fn vnmsubfp( + a: vector_float, b: vector_float, c: vector_float) -> vector_float; + #[link_name = "llvm.ppc.altivec.vsum2sws"] + fn vsum2sws(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; + #[link_name = "llvm.ppc.altivec.vsum4ubs"] + fn vsum4ubs(a: vector_unsigned_char, b: vector_unsigned_int) -> vector_unsigned_int; + #[link_name = "llvm.ppc.altivec.vsum4sbs"] + fn vsum4sbs(a: vector_signed_char, b: vector_signed_int) -> vector_signed_int; + #[link_name = "llvm.ppc.altivec.vsum4shs"] + fn vsum4shs(a: vector_signed_short, b: vector_signed_int) -> vector_signed_int; } mod sealed { use super::*; + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vsum4ubs))] + unsafe fn vec_vsum4ubs(a: vector_unsigned_char, b: vector_unsigned_int) -> vector_unsigned_int { + vsum4ubs(a, b) + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vsum4sbs))] + unsafe fn vec_vsum4sbs(a: vector_signed_char, b: vector_signed_int) -> vector_signed_int { + vsum4sbs(a, b) + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vsum4shs))] + unsafe fn vec_vsum4shs(a: vector_signed_short, b: vector_signed_int) -> vector_signed_int { + vsum4shs(a, b) + } + + + pub trait VectorSum4s { + unsafe fn vec_sum4s(self, b: Other) -> Other; + } + + impl VectorSum4s for vector_unsigned_char { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_sum4s(self, b: vector_unsigned_int) -> vector_unsigned_int { + vsum4ubs(self, b) + } + } + + impl VectorSum4s for vector_signed_char { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_sum4s(self, b: vector_signed_int) -> vector_signed_int { + vsum4sbs(self, b) + } + } + + impl VectorSum4s for vector_signed_short { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_sum4s(self, b: vector_signed_int) -> vector_signed_int { + vsum4shs(self, b) + } + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vsum2sws))] + unsafe fn vec_vsum2sws(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int { + vsum2sws(a, b) + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vnmsubfp))] + unsafe fn vec_vnmsubfp( + a: vector_float, b: vector_float, c: vector_float) -> vector_float { + vnmsubfp(a, b, c) + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vmaddfp))] + unsafe fn vec_vmaddfp( + a: vector_float, b: vector_float, c: vector_float) -> vector_float { + vmaddfp(a, b, c) + } + #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vmsumubm))] @@ -582,6 +669,20 @@ mod endian { b.vec_vperm(a, c) } + + /// Vector Sum Across Partial (1/2) Saturated + #[inline] + #[target_feature(enable = "altivec")] + pub unsafe fn vec_sum2s(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int { + // vsum2sws has big-endian bias + // + // swap the even b elements with the odd ones + let flip = ::mem::transmute(u8x16::new(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11)); + let b = vec_perm(b, b, flip); + let c = vsum2sws(a, b); + + vec_perm(c, c, flip) + } } /// Vector Multiply Add Saturated @@ -629,6 +730,29 @@ pub unsafe fn vec_msums(a: T, b: T, c: U) -> U a.vec_msums(b, c) } +/// Vector Multiply Add +#[inline] +#[target_feature(enable = "altivec")] +pub unsafe fn vec_madd(a: vector_float, b: vector_float, c: vector_float) -> vector_float { + vmaddfp(a, b, c) +} + +/// Vector Negative Multiply Subtract +#[inline] +#[target_feature(enable = "altivec")] +pub unsafe fn vec_nmsub(a: vector_float, b: vector_float, c: vector_float) -> vector_float { + vnmsubfp(a, b, c) +} + +/// Vector Sum Across Partial (1/4) Saturated +#[inline] +#[target_feature(enable = "altivec")] +pub unsafe fn vec_sum4s(a: T, b: U) -> U +where + T: sealed::VectorSum4s { + a.vec_sum4s(b) +} + #[cfg(target_endian = "big")] mod endian { use super::*; @@ -641,6 +765,13 @@ mod endian { { a.vec_vperm(b, c) } + + /// Vector Sum Across Partial (1/2) Saturated + #[inline] + #[target_feature(enable = "altivec")] + pub unsafe fn vec_sum2s(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int { + vsum2sws(a, b) + } } pub use self::endian::*; @@ -768,6 +899,34 @@ mod tests { assert_eq!(d, ::mem::transmute(vec_madds(a, b, c))); } + #[simd_test(enable = "altivec")] + unsafe fn test_vec_madd_float() { + let a: vector_float = ::mem::transmute(f32x4::new(0.1, 0.2, 0.3, 0.4)); + let b: vector_float = ::mem::transmute(f32x4::new(0.1, 0.2, 0.3, 0.4)); + let c: vector_float = ::mem::transmute(f32x4::new(0.1, 0.2, 0.3, 0.4)); + let d = f32x4::new( + 0.1 * 0.1 + 0.1, + 0.2 * 0.2 + 0.2, + 0.3 * 0.3 + 0.3, + 0.4 * 0.4 + 0.4); + + assert_eq!(d, ::mem::transmute(vec_madd(a, b, c))); + } + + #[simd_test(enable = "altivec")] + unsafe fn test_vec_nmsub_float() { + let a: vector_float = ::mem::transmute(f32x4::new(0.1, 0.2, 0.3, 0.4)); + let b: vector_float = ::mem::transmute(f32x4::new(0.1, 0.2, 0.3, 0.4)); + let c: vector_float = ::mem::transmute(f32x4::new(0.1, 0.2, 0.3, 0.4)); + let d = f32x4::new( + -(0.1 * 0.1 - 0.1), + -(0.2 * 0.2 - 0.2), + -(0.3 * 0.3 - 0.3), + -(0.4 * 0.4 - 0.4), + ); + assert_eq!(d, ::mem::transmute(vec_nmsub(a, b, c))); + } + #[simd_test(enable = "altivec")] unsafe fn test_vec_mradds() { let a: vector_signed_short = ::mem::transmute(i16x8::new( @@ -991,6 +1150,109 @@ mod tests { assert_eq!(d, ::mem::transmute(vec_msums(a, b, c))); } + #[simd_test(enable = "altivec")] + unsafe fn test_vec_sum2s() { + let a: vector_signed_int = ::mem::transmute(i32x4::new(0, 1, 2, 3)); + let b: vector_signed_int = ::mem::transmute(i32x4::new(0, 1, 2, 3)); + let d = i32x4::new( + 0, + 0 + 1 + 1, + 0, + 2 + 3 + 3); + + assert_eq!(d, ::mem::transmute(vec_sum2s(a, b))); + } + + #[simd_test(enable = "altivec")] + unsafe fn test_vec_sum4s_unsigned_char() { + let a: vector_unsigned_char = ::mem::transmute(u8x16::new( + 0, + 1, + 2, + 3, + + 4, + 5, + 6, + 7, + + 0, + 1, + 2, + 3, + + 4, + 5, + 6, + 7, + )); + let b: vector_unsigned_int = ::mem::transmute(u32x4::new(0, 1, 2, 3)); + let d = u32x4::new( + 0 + 1 + 2 + 3 + 0, + 4 + 5 + 6 + 7 + 1, + 0 + 1 + 2 + 3 + 2, + 4 + 5 + 6 + 7 + 3, + ); + + assert_eq!(d, ::mem::transmute(vec_sum4s(a, b))); + } + #[simd_test(enable = "altivec")] + unsafe fn test_vec_sum4s_signed_char() { + let a: vector_signed_char = ::mem::transmute(i8x16::new( + 0, + 1, + 2, + 3, + + 4, + 5, + 6, + 7, + + 0, + 1, + 2, + 3, + + 4, + 5, + 6, + 7, + )); + let b: vector_signed_int = ::mem::transmute(i32x4::new(0, 1, 2, 3)); + let d = i32x4::new( + 0 + 1 + 2 + 3 + 0, + 4 + 5 + 6 + 7 + 1, + 0 + 1 + 2 + 3 + 2, + 4 + 5 + 6 + 7 + 3, + ); + + assert_eq!(d, ::mem::transmute(vec_sum4s(a, b))); + } + #[simd_test(enable = "altivec")] + unsafe fn test_vec_sum4s_signed_short() { + let a: vector_signed_short = ::mem::transmute(i16x8::new( + 0, + 1, + 2, + 3, + + 4, + 5, + 6, + 7, + )); + let b: vector_signed_int = ::mem::transmute(i32x4::new(0, 1, 2, 3)); + let d = i32x4::new( + 0 + 1 + 0, + 2 + 3 + 1, + 4 + 5 + 2, + 6 + 7 + 3, + ); + + assert_eq!(d, ::mem::transmute(vec_sum4s(a, b))); + } + #[simd_test(enable = "altivec")] unsafe fn vec_add_i32x4_i32x4() { let x = i32x4::new(1, 2, 3, 4);