diff --git a/library/stdarch/examples/play.rs b/library/stdarch/examples/play.rs index 21e18aadcdd4..2107c22b7d42 100644 --- a/library/stdarch/examples/play.rs +++ b/library/stdarch/examples/play.rs @@ -8,17 +8,17 @@ fn main() { let arg1: u8 = env::args().nth(1).unwrap().parse().unwrap(); let arg2: u8 = env::args().nth(2).unwrap().parse().unwrap(); let arg3: u8 = env::args().nth(3).unwrap().parse().unwrap(); - let arg4: u8 = env::args().nth(4).unwrap().parse().unwrap(); + // let arg4: u8 = env::args().nth(4).unwrap().parse().unwrap(); unsafe { s::_mm_lfence(); s::_mm_pause(); let a = s::u8x16::new( arg1, arg1, arg1, arg1, arg1, arg1, arg1, arg1, arg2, arg2, arg2, arg2, arg2, arg2, arg2, arg2); - let b = s::u8x16::new( - arg3, arg3, arg3, arg3, arg3, arg3, arg3, arg3, - arg4, arg4, arg4, arg4, arg4, arg4, arg4, arg4); - let r = s::_mm_sad_epu8(a.as_m128i(), b.as_m128i()); - println!("{:?}", s::u64x2::from(r)); + // let b = s::u8x16::new( + // arg3, arg3, arg3, arg3, arg3, arg3, arg3, arg3, + // arg4, arg4, arg4, arg4, arg4, arg4, arg4, arg4); + let r = s::_mm_slli_si128(a.as_m128i(), arg3 as i32); + println!("{:?}", s::u8x16::from(r)); } } diff --git a/library/stdarch/src/lib.rs b/library/stdarch/src/lib.rs index 7897f847e006..80c14cd16b0f 100644 --- a/library/stdarch/src/lib.rs +++ b/library/stdarch/src/lib.rs @@ -1,5 +1,7 @@ #![allow(dead_code)] -#![feature(link_llvm_intrinsics, platform_intrinsics, repr_simd, simd_ffi)] +#![feature( + const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd, simd_ffi, +)] // pub use v128::{__m128, __m128d, __m128i}; pub use v128::*; diff --git a/library/stdarch/src/x86/sse2.rs b/library/stdarch/src/x86/sse2.rs index e7b96b27153d..d2d50a3ed1d9 100644 --- a/library/stdarch/src/x86/sse2.rs +++ b/library/stdarch/src/x86/sse2.rs @@ -207,15 +207,107 @@ pub unsafe fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i { psadbw(u8x16::from(a), u8x16::from(b)).as_m128i() } +/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`, +/// and return the results. +#[inline] +pub unsafe fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i { + simd_sub(u8x16::from(a), u8x16::from(b)).as_m128i() +} +/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`, +/// and return the results. +#[inline] +pub unsafe fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i { + simd_sub(u16x8::from(a), u16x8::from(b)).as_m128i() +} +/// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`, +/// and return the results. +#[inline] +pub unsafe fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i { + simd_sub(u32x4::from(a), u32x4::from(b)).as_m128i() +} +/// Subtract 64-bit integer `b` from 64-bit integer `a`, and return the result. +#[inline] +unsafe fn _mm_sub_si64(_a: __m64, _b: __m64) -> __m64 { + unimplemented!() +} +/// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`, +/// and return the results. +#[inline] +pub unsafe fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i { + simd_sub(u64x2::from(a), u64x2::from(b)).as_m128i() +} +/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a` +/// using saturation, and return the results. +#[inline] +pub unsafe fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i { + psubsb(i8x16::from(a), i8x16::from(b)).as_m128i() +} +/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a` +/// using saturation, and return the results. +#[inline] +pub unsafe fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i { + psubsw(i16x8::from(a), i16x8::from(b)).as_m128i() +} +/// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit +/// integers in `a` using saturation, and return the results. +#[inline] +pub unsafe fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i { + psubusb(u8x16::from(a), u8x16::from(b)).as_m128i() +} +/// Subtract packed unsigned 16-bit integers in `b` from packed unsigned 16-bit +/// integers in `a` using saturation, and return the results. +#[inline] +pub unsafe fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i { + psubusw(u16x8::from(a), u16x8::from(b)).as_m128i() +} +/// Shift `a` left by `imm8` bytes while shifting in zeros, and return the +/// results. +#[inline] +pub unsafe fn _mm_slli_si128(a: __m128i, imm8: i32) -> __m128i { + let (a, zero, imm8) = (u8x16::from(a), u8x16::splat(0), imm8 as u32); + const fn sub(a: u32, b: u32) -> u32 { a - b } + macro_rules! shuffle { + ($shift:expr) => { + simd_shuffle16::(zero, a, [ + sub(16, $shift), sub(17, $shift), + sub(18, $shift), sub(19, $shift), + sub(20, $shift), sub(21, $shift), + sub(22, $shift), sub(23, $shift), + sub(24, $shift), sub(25, $shift), + sub(26, $shift), sub(27, $shift), + sub(28, $shift), sub(29, $shift), + sub(30, $shift), sub(31, $shift), + ]) + } + } + match imm8 { + 0 => shuffle!(0), 1 => shuffle!(1), + 2 => shuffle!(2), 3 => shuffle!(3), + 4 => shuffle!(4), 5 => shuffle!(5), + 6 => shuffle!(6), 7 => shuffle!(7), + 8 => shuffle!(8), 9 => shuffle!(9), + 10 => shuffle!(10), 11 => shuffle!(11), + 12 => shuffle!(12), 13 => shuffle!(13), + 14 => shuffle!(14), 15 => shuffle!(15), + _ => shuffle!(16), + }.as_m128i() +} + +/// Shift `a` left by `imm8` bytes while shifting in zeros, and return the +/// results. +#[inline] +pub unsafe fn _mm_bslli_si128(a: __m128i, imm8: i32) -> __m128i { + _mm_slli_si128(a, imm8) +} @@ -281,6 +373,14 @@ extern { pub fn pmuludq(a: u32x4, b: u32x4) -> u64x2; #[link_name = "llvm.x86.sse2.psad.bw"] pub fn psadbw(a: u8x16, b: u8x16) -> u64x2; + #[link_name = "llvm.x86.sse2.psubs.b"] + pub fn psubsb(a: i8x16, b: i8x16) -> i8x16; + #[link_name = "llvm.x86.sse2.psubs.w"] + pub fn psubsw(a: i16x8, b: i16x8) -> i16x8; + #[link_name = "llvm.x86.sse2.psubus.b"] + pub fn psubusb(a: u8x16, b: u8x16) -> u8x16; + #[link_name = "llvm.x86.sse2.psubus.w"] + pub fn psubusw(a: u16x8, b: u16x8) -> u16x8; } #[cfg(test)] @@ -288,7 +388,6 @@ mod tests { use std::os::raw::c_void; use v128::*; - use v64::*; use x86::sse2 as sse2; #[test] @@ -350,15 +449,6 @@ mod tests { assert_eq!(u32x4::from(r), e); } - #[test] - #[ignore] - fn _mm_add_si64() { - let (a, b) = (u64x1::new(1), u64x1::new(2)); - let r = unsafe { sse2::_mm_add_si64(a.as_m64(), b.as_m64()) }; - let e = u64x1::new(3); - assert_eq!(u64x1::from(r), e); - } - #[test] fn _mm_add_epi64() { let a = u64x2::new(0, 1); @@ -534,16 +624,6 @@ mod tests { assert_eq!(i16x8::from(r), i16x8::splat(-17960)); } - #[test] - #[ignore] - fn _mm_mul_su32() { - let a = u32x2::new(1_000_000_000, 3); - let b = u32x2::new(1_000_000_000, 4); - let r = unsafe { sse2::_mm_mul_su32(a.as_m64(), b.as_m64()) }; - let e = u64x1::new(1_000_000_000 * 1_000_000_000); - assert_eq!(u64x1::from(r), e); - } - #[test] fn _mm_mul_epu32() { let a = u64x2::new(1_000_000_000, 1 << 34); @@ -565,4 +645,168 @@ mod tests { let e = u64x2::new(1020, 614); assert_eq!(u64x2::from(r), e); } + + #[test] + fn _mm_sub_epi8() { + let (a, b) = (u8x16::splat(5), u8x16::splat(2)); + let r = unsafe { sse2::_mm_sub_epi8(a.as_m128i(), b.as_m128i()) }; + assert_eq!(u8x16::from(r), u8x16::splat(3)); + } + + #[test] + fn _mm_sub_epi8_underflow() { + let (a, b) = (u8x16::splat(5), u8x16::splat(6)); + let r = unsafe { sse2::_mm_sub_epi8(a.as_m128i(), b.as_m128i()) }; + assert_eq!(u8x16::from(r), u8x16::splat(0xFF)); + } + + #[test] + fn _mm_sub_epi16() { + let (a, b) = (u16x8::splat(5), u16x8::splat(2)); + let r = unsafe { sse2::_mm_sub_epi16(a.as_m128i(), b.as_m128i()) }; + assert_eq!(u16x8::from(r), u16x8::splat(3)); + } + + #[test] + fn _mm_sub_epi16_underflow() { + let (a, b) = (u16x8::splat(5), u16x8::splat(6)); + let r = unsafe { sse2::_mm_sub_epi16(a.as_m128i(), b.as_m128i()) }; + assert_eq!(u16x8::from(r), u16x8::splat(0xFFFF)); + } + + #[test] + fn _mm_sub_epi32() { + let (a, b) = (u32x4::splat(5), u32x4::splat(2)); + let r = unsafe { sse2::_mm_sub_epi32(a.as_m128i(), b.as_m128i()) }; + assert_eq!(u32x4::from(r), u32x4::splat(3)); + } + + #[test] + fn _mm_sub_epi32_underflow() { + let (a, b) = (u32x4::splat(5), u32x4::splat(6)); + let r = unsafe { sse2::_mm_sub_epi32(a.as_m128i(), b.as_m128i()) }; + assert_eq!(u32x4::from(r), u32x4::splat(0xFFFFFFFF)); + } + + #[test] + fn _mm_sub_epi64() { + let (a, b) = (u64x2::splat(5), u64x2::splat(2)); + let r = unsafe { sse2::_mm_sub_epi64(a.as_m128i(), b.as_m128i()) }; + assert_eq!(u64x2::from(r), u64x2::splat(3)); + } + + #[test] + fn _mm_sub_epi64_underflow() { + let (a, b) = (u64x2::splat(5), u64x2::splat(6)); + let r = unsafe { sse2::_mm_sub_epi64(a.as_m128i(), b.as_m128i()) }; + assert_eq!(u64x2::from(r), u64x2::splat(0xFFFFFFFFFFFFFFFF)); + } + + #[test] + fn _mm_subs_epi8() { + let (a, b) = (i8x16::splat(5), i8x16::splat(2)); + let r = unsafe { sse2::_mm_subs_epi8(a.as_m128i(), b.as_m128i()) }; + assert_eq!(i8x16::from(r), i8x16::splat(3)); + } + + #[test] + fn _mm_subs_epi8_saturate_positive() { + let a = i8x16::splat(0x7F); + let b = i8x16::splat(-1); + let r = unsafe { sse2::_mm_subs_epi8(a.as_m128i(), b.as_m128i()) }; + assert_eq!(i8x16::from(r), a); + } + + #[test] + fn _mm_subs_epi8_saturate_negative() { + let a = i8x16::splat(-0x80); + let b = i8x16::splat(1); + let r = unsafe { sse2::_mm_subs_epi8(a.as_m128i(), b.as_m128i()) }; + assert_eq!(i8x16::from(r), a); + } + + #[test] + fn _mm_subs_epi16() { + let (a, b) = (i16x8::splat(5), i16x8::splat(2)); + let r = unsafe { sse2::_mm_subs_epi16(a.as_m128i(), b.as_m128i()) }; + assert_eq!(i16x8::from(r), i16x8::splat(3)); + } + + #[test] + fn _mm_subs_epi16_saturate_positive() { + let a = i16x8::splat(0x7FFF); + let b = i16x8::splat(-1); + let r = unsafe { sse2::_mm_subs_epi16(a.as_m128i(), b.as_m128i()) }; + assert_eq!(i16x8::from(r), a); + } + + #[test] + fn _mm_subs_epi16_saturate_negative() { + let a = i16x8::splat(-0x8000); + let b = i16x8::splat(1); + let r = unsafe { sse2::_mm_subs_epi16(a.as_m128i(), b.as_m128i()) }; + assert_eq!(i16x8::from(r), a); + } + + #[test] + fn _mm_subs_epu8() { + let (a, b) = (u8x16::splat(5), u8x16::splat(2)); + let r = unsafe { sse2::_mm_subs_epu8(a.as_m128i(), b.as_m128i()) }; + assert_eq!(u8x16::from(r), u8x16::splat(3)); + } + + #[test] + fn _mm_subs_epu8_saturate() { + let a = u8x16::splat(0); + let b = u8x16::splat(1); + let r = unsafe { sse2::_mm_subs_epu8(a.as_m128i(), b.as_m128i()) }; + assert_eq!(u8x16::from(r), a); + } + + #[test] + fn _mm_subs_epu16() { + let (a, b) = (u16x8::splat(5), u16x8::splat(2)); + let r = unsafe { sse2::_mm_subs_epu16(a.as_m128i(), b.as_m128i()) }; + assert_eq!(u16x8::from(r), u16x8::splat(3)); + } + + #[test] + fn _mm_subs_epu16_saturate() { + let a = u16x8::splat(0); + let b = u16x8::splat(1); + let r = unsafe { sse2::_mm_subs_epu16(a.as_m128i(), b.as_m128i()) }; + assert_eq!(u16x8::from(r), a); + } + + #[test] + fn _mm_slli_si128() { + let a = u8x16::new( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = unsafe { sse2::_mm_slli_si128(a.as_m128i(), 1) }; + let e = u8x16::new( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq!(u8x16::from(r), e); + + let a = u8x16::new( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = unsafe { sse2::_mm_slli_si128(a.as_m128i(), 15) }; + let e = u8x16::new( + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); + assert_eq!(u8x16::from(r), e); + + let a = u8x16::new( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = unsafe { sse2::_mm_slli_si128(a.as_m128i(), 16) }; + assert_eq!(u8x16::from(r), u8x16::splat(0)); + + let a = u8x16::new( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = unsafe { sse2::_mm_slli_si128(a.as_m128i(), -1) }; + assert_eq!(u8x16::from(r), u8x16::splat(0)); + + let a = u8x16::new( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = unsafe { sse2::_mm_slli_si128(a.as_m128i(), -0x80000000) }; + assert_eq!(u8x16::from(r), u8x16::splat(0)); + } }