From 217f89bc4fa654b5ac8e19206adc7e0e8d99b276 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 27 Feb 2018 08:41:07 -0600 Subject: [PATCH] Reorganize the x86/x86_64 intrinsic folders (#334) The public API isn't changing in this commit but the internal organization is being rejiggered. Instead of `x86/$subtarget/$feature.rs` the folders are changed to `coresimd/x86/$feature.rs` and `coresimd/x86_64/$feature.rs`. The `arch::x86_64` then reexports both the contents of the `x86` module and the `x86_64` module. --- library/stdarch/coresimd/mod.rs | 3 + .../stdarch/coresimd/x86/{i586 => }/abm.rs | 10 +- .../stdarch/coresimd/x86/{i686 => }/aes.rs | 0 .../stdarch/coresimd/x86/{i586 => }/avx.rs | 0 .../stdarch/coresimd/x86/{i586 => }/avx2.rs | 0 .../stdarch/coresimd/x86/{i586 => }/bmi.rs | 46 +- .../stdarch/coresimd/x86/{i586 => }/bmi2.rs | 22 +- .../stdarch/coresimd/x86/{i386 => }/bswap.rs | 0 .../stdarch/coresimd/x86/{i586 => }/cpuid.rs | 5 +- .../stdarch/coresimd/x86/{i386 => }/eflags.rs | 2 +- .../stdarch/coresimd/x86/{i386 => }/fxsr.rs | 0 library/stdarch/coresimd/x86/i386/mod.rs | 15 - library/stdarch/coresimd/x86/i586/mod.rs | 39 - library/stdarch/coresimd/x86/i686/mod.rs | 33 - library/stdarch/coresimd/x86/i686/sse.rs | 710 ------------------ library/stdarch/coresimd/x86/i686/sse2.rs | 225 ------ library/stdarch/coresimd/x86/i686/sse41.rs | 240 ------ library/stdarch/coresimd/x86/i686/sse42.rs | 35 - library/stdarch/coresimd/x86/i686/ssse3.rs | 361 --------- .../stdarch/coresimd/x86/{i686 => }/mmx.rs | 0 library/stdarch/coresimd/x86/mod.rs | 82 +- .../coresimd/x86/{i686 => }/pclmulqdq.rs | 0 .../stdarch/coresimd/x86/{i686 => }/rdrand.rs | 0 .../stdarch/coresimd/x86/{i386 => }/rdtsc.rs | 2 +- .../stdarch/coresimd/x86/{i586 => }/sse.rs | 694 +++++++++++++++++ .../stdarch/coresimd/x86/{i586 => }/sse2.rs | 206 ++++- .../stdarch/coresimd/x86/{i586 => }/sse3.rs | 0 .../stdarch/coresimd/x86/{i586 => }/sse41.rs | 223 ++++++ .../stdarch/coresimd/x86/{i586 => }/sse42.rs | 21 + .../stdarch/coresimd/x86/{i686 => }/sse4a.rs | 0 .../stdarch/coresimd/x86/{i586 => }/ssse3.rs | 345 +++++++++ .../stdarch/coresimd/x86/{i586 => }/tbm.rs | 118 +-- library/stdarch/coresimd/x86/test.rs | 7 +- .../stdarch/coresimd/x86/{i586 => }/xsave.rs | 34 +- .../stdarch/coresimd/{x86 => }/x86_64/abm.rs | 2 +- .../stdarch/coresimd/{x86 => }/x86_64/avx.rs | 0 .../stdarch/coresimd/{x86 => }/x86_64/avx2.rs | 2 +- .../stdarch/coresimd/{x86 => }/x86_64/bmi.rs | 1 + .../stdarch/coresimd/{x86 => }/x86_64/bmi2.rs | 2 +- .../coresimd/{x86 => }/x86_64/bswap.rs | 0 .../stdarch/coresimd/{x86 => }/x86_64/fxsr.rs | 0 .../stdarch/coresimd/{x86 => }/x86_64/mod.rs | 0 .../coresimd/{x86 => }/x86_64/rdrand.rs | 0 .../stdarch/coresimd/{x86 => }/x86_64/sse.rs | 2 +- .../stdarch/coresimd/{x86 => }/x86_64/sse2.rs | 2 +- .../coresimd/{x86 => }/x86_64/sse41.rs | 2 +- .../coresimd/{x86 => }/x86_64/sse42.rs | 2 +- .../coresimd/{x86 => }/x86_64/xsave.rs | 0 .../stdarch/crates/stdsimd-verify/src/lib.rs | 4 +- 49 files changed, 1690 insertions(+), 1807 deletions(-) rename library/stdarch/coresimd/x86/{i586 => }/abm.rs (88%) rename library/stdarch/coresimd/x86/{i686 => }/aes.rs (100%) rename library/stdarch/coresimd/x86/{i586 => }/avx.rs (100%) rename library/stdarch/coresimd/x86/{i586 => }/avx2.rs (100%) rename library/stdarch/coresimd/x86/{i586 => }/bmi.rs (76%) rename library/stdarch/coresimd/x86/{i586 => }/bmi2.rs (88%) rename library/stdarch/coresimd/x86/{i386 => }/bswap.rs (100%) rename library/stdarch/coresimd/x86/{i586 => }/cpuid.rs (96%) rename library/stdarch/coresimd/x86/{i386 => }/eflags.rs (97%) rename library/stdarch/coresimd/x86/{i386 => }/fxsr.rs (100%) delete mode 100644 library/stdarch/coresimd/x86/i386/mod.rs delete mode 100644 library/stdarch/coresimd/x86/i586/mod.rs delete mode 100644 library/stdarch/coresimd/x86/i686/mod.rs delete mode 100644 library/stdarch/coresimd/x86/i686/sse.rs delete mode 100644 library/stdarch/coresimd/x86/i686/sse2.rs delete mode 100644 library/stdarch/coresimd/x86/i686/sse41.rs delete mode 100644 library/stdarch/coresimd/x86/i686/sse42.rs delete mode 100644 library/stdarch/coresimd/x86/i686/ssse3.rs rename library/stdarch/coresimd/x86/{i686 => }/mmx.rs (100%) rename library/stdarch/coresimd/x86/{i686 => }/pclmulqdq.rs (100%) rename library/stdarch/coresimd/x86/{i686 => }/rdrand.rs (100%) rename library/stdarch/coresimd/x86/{i386 => }/rdtsc.rs (98%) rename library/stdarch/coresimd/x86/{i586 => }/sse.rs (81%) rename library/stdarch/coresimd/x86/{i586 => }/sse2.rs (95%) rename library/stdarch/coresimd/x86/{i586 => }/sse3.rs (100%) rename library/stdarch/coresimd/x86/{i586 => }/sse41.rs (88%) rename library/stdarch/coresimd/x86/{i586 => }/sse42.rs (97%) rename library/stdarch/coresimd/x86/{i686 => }/sse4a.rs (100%) rename library/stdarch/coresimd/x86/{i586 => }/ssse3.rs (58%) rename library/stdarch/coresimd/x86/{i586 => }/tbm.rs (79%) rename library/stdarch/coresimd/x86/{i586 => }/xsave.rs (92%) rename library/stdarch/coresimd/{x86 => }/x86_64/abm.rs (97%) rename library/stdarch/coresimd/{x86 => }/x86_64/avx.rs (100%) rename library/stdarch/coresimd/{x86 => }/x86_64/avx2.rs (97%) rename library/stdarch/coresimd/{x86 => }/x86_64/bmi.rs (99%) rename library/stdarch/coresimd/{x86 => }/x86_64/bmi2.rs (99%) rename library/stdarch/coresimd/{x86 => }/x86_64/bswap.rs (100%) rename library/stdarch/coresimd/{x86 => }/x86_64/fxsr.rs (100%) rename library/stdarch/coresimd/{x86 => }/x86_64/mod.rs (100%) rename library/stdarch/coresimd/{x86 => }/x86_64/rdrand.rs (100%) rename library/stdarch/coresimd/{x86 => }/x86_64/sse.rs (99%) rename library/stdarch/coresimd/{x86 => }/x86_64/sse2.rs (99%) rename library/stdarch/coresimd/{x86 => }/x86_64/sse41.rs (97%) rename library/stdarch/coresimd/{x86 => }/x86_64/sse42.rs (95%) rename library/stdarch/coresimd/{x86 => }/x86_64/xsave.rs (100%) diff --git a/library/stdarch/coresimd/mod.rs b/library/stdarch/coresimd/mod.rs index 1bc644ac0c80..2204bce4c96c 100644 --- a/library/stdarch/coresimd/mod.rs +++ b/library/stdarch/coresimd/mod.rs @@ -48,6 +48,7 @@ pub mod arch { #[cfg(target_arch = "x86_64")] pub mod x86_64 { pub use coresimd::x86::*; + pub use coresimd::x86_64::*; } /// Platform-specific intrinsics for the `arm` platform. @@ -116,6 +117,8 @@ mod v16 { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] mod x86; +#[cfg(target_arch = "x86_64")] +mod x86_64; #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] mod arm; diff --git a/library/stdarch/coresimd/x86/i586/abm.rs b/library/stdarch/coresimd/x86/abm.rs similarity index 88% rename from library/stdarch/coresimd/x86/i586/abm.rs rename to library/stdarch/coresimd/x86/abm.rs index cbe7dfaee9d2..1576c0a750ba 100644 --- a/library/stdarch/coresimd/x86/i586/abm.rs +++ b/library/stdarch/coresimd/x86/abm.rs @@ -42,15 +42,15 @@ pub unsafe fn _popcnt32(x: i32) -> i32 { mod tests { use stdsimd_test::simd_test; - use coresimd::x86::i586::abm; + use coresimd::x86::*; #[simd_test = "lzcnt"] - unsafe fn _lzcnt_u32() { - assert_eq!(abm::_lzcnt_u32(0b0101_1010), 25); + unsafe fn test_lzcnt_u32() { + assert_eq!(_lzcnt_u32(0b0101_1010), 25); } #[simd_test = "popcnt"] - unsafe fn _popcnt32() { - assert_eq!(abm::_popcnt32(0b0101_1010), 4); + unsafe fn test_popcnt32() { + assert_eq!(_popcnt32(0b0101_1010), 4); } } diff --git a/library/stdarch/coresimd/x86/i686/aes.rs b/library/stdarch/coresimd/x86/aes.rs similarity index 100% rename from library/stdarch/coresimd/x86/i686/aes.rs rename to library/stdarch/coresimd/x86/aes.rs diff --git a/library/stdarch/coresimd/x86/i586/avx.rs b/library/stdarch/coresimd/x86/avx.rs similarity index 100% rename from library/stdarch/coresimd/x86/i586/avx.rs rename to library/stdarch/coresimd/x86/avx.rs diff --git a/library/stdarch/coresimd/x86/i586/avx2.rs b/library/stdarch/coresimd/x86/avx2.rs similarity index 100% rename from library/stdarch/coresimd/x86/i586/avx2.rs rename to library/stdarch/coresimd/x86/avx2.rs diff --git a/library/stdarch/coresimd/x86/i586/bmi.rs b/library/stdarch/coresimd/x86/bmi.rs similarity index 76% rename from library/stdarch/coresimd/x86/i586/bmi.rs rename to library/stdarch/coresimd/x86/bmi.rs index f3005de9cf39..83a50af093ce 100644 --- a/library/stdarch/coresimd/x86/i586/bmi.rs +++ b/library/stdarch/coresimd/x86/bmi.rs @@ -96,59 +96,59 @@ extern "C" { mod tests { use stdsimd_test::simd_test; - use coresimd::x86::i586::bmi; + use coresimd::x86::*; #[simd_test = "bmi"] - unsafe fn _bextr_u32() { - let r = bmi::_bextr_u32(0b0101_0000u32, 4, 4); + unsafe fn test_bextr_u32() { + let r = _bextr_u32(0b0101_0000u32, 4, 4); assert_eq!(r, 0b0000_0101u32); } #[simd_test = "bmi"] - unsafe fn _andn_u32() { - assert_eq!(bmi::_andn_u32(0, 0), 0); - assert_eq!(bmi::_andn_u32(0, 1), 1); - assert_eq!(bmi::_andn_u32(1, 0), 0); - assert_eq!(bmi::_andn_u32(1, 1), 0); + unsafe fn test_andn_u32() { + assert_eq!(_andn_u32(0, 0), 0); + assert_eq!(_andn_u32(0, 1), 1); + assert_eq!(_andn_u32(1, 0), 0); + assert_eq!(_andn_u32(1, 1), 0); - let r = bmi::_andn_u32(0b0000_0000u32, 0b0000_0000u32); + let r = _andn_u32(0b0000_0000u32, 0b0000_0000u32); assert_eq!(r, 0b0000_0000u32); - let r = bmi::_andn_u32(0b0000_0000u32, 0b1111_1111u32); + let r = _andn_u32(0b0000_0000u32, 0b1111_1111u32); assert_eq!(r, 0b1111_1111u32); - let r = bmi::_andn_u32(0b1111_1111u32, 0b0000_0000u32); + let r = _andn_u32(0b1111_1111u32, 0b0000_0000u32); assert_eq!(r, 0b0000_0000u32); - let r = bmi::_andn_u32(0b1111_1111u32, 0b1111_1111u32); + let r = _andn_u32(0b1111_1111u32, 0b1111_1111u32); assert_eq!(r, 0b0000_0000u32); - let r = bmi::_andn_u32(0b0100_0000u32, 0b0101_1101u32); + let r = _andn_u32(0b0100_0000u32, 0b0101_1101u32); assert_eq!(r, 0b0001_1101u32); } #[simd_test = "bmi"] - unsafe fn _blsi_u32() { - assert_eq!(bmi::_blsi_u32(0b1101_0000u32), 0b0001_0000u32); + unsafe fn test_blsi_u32() { + assert_eq!(_blsi_u32(0b1101_0000u32), 0b0001_0000u32); } #[simd_test = "bmi"] - unsafe fn _blsmsk_u32() { - let r = bmi::_blsmsk_u32(0b0011_0000u32); + unsafe fn test_blsmsk_u32() { + let r = _blsmsk_u32(0b0011_0000u32); assert_eq!(r, 0b0001_1111u32); } #[simd_test = "bmi"] - unsafe fn _blsr_u32() { + unsafe fn test_blsr_u32() { // TODO: test the behavior when the input is 0 - let r = bmi::_blsr_u32(0b0011_0000u32); + let r = _blsr_u32(0b0011_0000u32); assert_eq!(r, 0b0010_0000u32); } #[simd_test = "bmi"] - unsafe fn _tzcnt_u32() { - assert_eq!(bmi::_tzcnt_u32(0b0000_0001u32), 0u32); - assert_eq!(bmi::_tzcnt_u32(0b0000_0000u32), 32u32); - assert_eq!(bmi::_tzcnt_u32(0b1001_0000u32), 4u32); + unsafe fn test_tzcnt_u32() { + assert_eq!(_tzcnt_u32(0b0000_0001u32), 0u32); + assert_eq!(_tzcnt_u32(0b0000_0000u32), 32u32); + assert_eq!(_tzcnt_u32(0b1001_0000u32), 4u32); } } diff --git a/library/stdarch/coresimd/x86/i586/bmi2.rs b/library/stdarch/coresimd/x86/bmi2.rs similarity index 88% rename from library/stdarch/coresimd/x86/i586/bmi2.rs rename to library/stdarch/coresimd/x86/bmi2.rs index f7739c316e43..686576ce4246 100644 --- a/library/stdarch/coresimd/x86/i586/bmi2.rs +++ b/library/stdarch/coresimd/x86/bmi2.rs @@ -67,10 +67,10 @@ extern "C" { mod tests { use stdsimd_test::simd_test; - use coresimd::x86::i586::bmi2; + use coresimd::x86::*; #[simd_test = "bmi2"] - unsafe fn _pext_u32() { + unsafe fn test_pext_u32() { let n = 0b1011_1110_1001_0011u32; let m0 = 0b0110_0011_1000_0101u32; @@ -79,12 +79,12 @@ mod tests { let m1 = 0b1110_1011_1110_1111u32; let s1 = 0b0001_0111_0100_0011u32; - assert_eq!(bmi2::_pext_u32(n, m0), s0); - assert_eq!(bmi2::_pext_u32(n, m1), s1); + assert_eq!(_pext_u32(n, m0), s0); + assert_eq!(_pext_u32(n, m1), s1); } #[simd_test = "bmi2"] - unsafe fn _pdep_u32() { + unsafe fn test_pdep_u32() { let n = 0b1011_1110_1001_0011u32; let m0 = 0b0110_0011_1000_0101u32; @@ -93,23 +93,23 @@ mod tests { let m1 = 0b1110_1011_1110_1111u32; let s1 = 0b1110_1001_0010_0011u32; - assert_eq!(bmi2::_pdep_u32(n, m0), s0); - assert_eq!(bmi2::_pdep_u32(n, m1), s1); + assert_eq!(_pdep_u32(n, m0), s0); + assert_eq!(_pdep_u32(n, m1), s1); } #[simd_test = "bmi2"] - unsafe fn _bzhi_u32() { + unsafe fn test_bzhi_u32() { let n = 0b1111_0010u32; let s = 0b0001_0010u32; - assert_eq!(bmi2::_bzhi_u32(n, 5), s); + assert_eq!(_bzhi_u32(n, 5), s); } #[simd_test = "bmi2"] - unsafe fn _mulx_u32() { + unsafe fn test_mulx_u32() { let a: u32 = 4_294_967_200; let b: u32 = 2; let mut hi = 0; - let lo = bmi2::_mulx_u32(a, b, &mut hi); + let lo = _mulx_u32(a, b, &mut hi); /* result = 8589934400 = 0b0001_1111_1111_1111_1111_1111_1111_0100_0000u64 diff --git a/library/stdarch/coresimd/x86/i386/bswap.rs b/library/stdarch/coresimd/x86/bswap.rs similarity index 100% rename from library/stdarch/coresimd/x86/i386/bswap.rs rename to library/stdarch/coresimd/x86/bswap.rs diff --git a/library/stdarch/coresimd/x86/i586/cpuid.rs b/library/stdarch/coresimd/x86/cpuid.rs similarity index 96% rename from library/stdarch/coresimd/x86/i586/cpuid.rs rename to library/stdarch/coresimd/x86/cpuid.rs index 58fdb3874f43..9f72e6b53ecf 100644 --- a/library/stdarch/coresimd/x86/i586/cpuid.rs +++ b/library/stdarch/coresimd/x86/cpuid.rs @@ -79,7 +79,7 @@ pub fn has_cpuid() -> bool { } #[cfg(target_arch = "x86")] { - use coresimd::x86::i386::{__readeflags, __writeeflags}; + use coresimd::x86::{__readeflags, __writeeflags}; // On `x86` the `cpuid` instruction is not always available. // This follows the approach indicated in: @@ -121,7 +121,7 @@ pub unsafe fn __get_cpuid_max(leaf: u32) -> (u32, u32) { #[cfg(test)] mod tests { - use coresimd::x86::i586::cpuid; + use coresimd::x86::*; #[test] fn test_always_has_cpuid() { @@ -133,7 +133,6 @@ mod tests { #[cfg(target_arch = "x86")] #[test] fn test_has_cpuid() { - use coresimd::x86::i386::__readeflags; unsafe { let before = __readeflags(); diff --git a/library/stdarch/coresimd/x86/i386/eflags.rs b/library/stdarch/coresimd/x86/eflags.rs similarity index 97% rename from library/stdarch/coresimd/x86/i386/eflags.rs rename to library/stdarch/coresimd/x86/eflags.rs index e98918c7f45c..8d925ecdd087 100644 --- a/library/stdarch/coresimd/x86/i386/eflags.rs +++ b/library/stdarch/coresimd/x86/eflags.rs @@ -34,7 +34,7 @@ pub unsafe fn __writeeflags(eflags: u64) { #[cfg(test)] mod tests { - use coresimd::x86::i386::*; + use coresimd::x86::*; #[test] fn test_eflags() { diff --git a/library/stdarch/coresimd/x86/i386/fxsr.rs b/library/stdarch/coresimd/x86/fxsr.rs similarity index 100% rename from library/stdarch/coresimd/x86/i386/fxsr.rs rename to library/stdarch/coresimd/x86/fxsr.rs diff --git a/library/stdarch/coresimd/x86/i386/mod.rs b/library/stdarch/coresimd/x86/i386/mod.rs deleted file mode 100644 index 78be55c71262..000000000000 --- a/library/stdarch/coresimd/x86/i386/mod.rs +++ /dev/null @@ -1,15 +0,0 @@ -//! `i386` intrinsics - -mod eflags; -pub use self::eflags::*; - -#[cfg(dont_compile_me)] // TODO: need to upstream `fxsr` target feature -mod fxsr; -#[cfg(dont_compile_me)] // TODO: need to upstream `fxsr` target feature -pub use self::fxsr::*; - -mod bswap; -pub use self::bswap::*; - -mod rdtsc; -pub use self::rdtsc::*; diff --git a/library/stdarch/coresimd/x86/i586/mod.rs b/library/stdarch/coresimd/x86/i586/mod.rs deleted file mode 100644 index bc0ec2531d99..000000000000 --- a/library/stdarch/coresimd/x86/i586/mod.rs +++ /dev/null @@ -1,39 +0,0 @@ -//! `i586` intrinsics - -pub use self::cpuid::*; -pub use self::xsave::*; - -pub use self::sse::*; -pub use self::sse2::*; -pub use self::sse3::*; -pub use self::ssse3::*; -pub use self::sse41::*; -pub use self::sse42::*; -pub use self::avx::*; -pub use self::avx2::*; - -pub use self::abm::*; -pub use self::bmi::*; -pub use self::bmi2::*; - -#[cfg(not(feature = "intel_sde"))] -pub use self::tbm::*; - -mod cpuid; -mod xsave; - -mod sse; -mod sse2; -mod sse3; -mod ssse3; -mod sse41; -mod sse42; -mod avx; -mod avx2; - -mod abm; -mod bmi; -mod bmi2; - -#[cfg(not(feature = "intel_sde"))] -mod tbm; diff --git a/library/stdarch/coresimd/x86/i686/mod.rs b/library/stdarch/coresimd/x86/i686/mod.rs deleted file mode 100644 index fd2533d21944..000000000000 --- a/library/stdarch/coresimd/x86/i686/mod.rs +++ /dev/null @@ -1,33 +0,0 @@ -//! `i686` intrinsics - -mod aes; -pub use self::aes::*; - -mod rdrand; -pub use self::rdrand::*; - -mod mmx; -pub use self::mmx::*; - -mod pclmulqdq; -pub use self::pclmulqdq::*; - -mod sse; -pub use self::sse::*; - -mod sse2; -pub use self::sse2::*; - -mod ssse3; -pub use self::ssse3::*; - -mod sse41; -pub use self::sse41::*; - -mod sse42; -pub use self::sse42::*; - -#[cfg(not(feature = "intel_sde"))] -mod sse4a; -#[cfg(not(feature = "intel_sde"))] -pub use self::sse4a::*; diff --git a/library/stdarch/coresimd/x86/i686/sse.rs b/library/stdarch/coresimd/x86/i686/sse.rs deleted file mode 100644 index 19f3bbdb1d9e..000000000000 --- a/library/stdarch/coresimd/x86/i686/sse.rs +++ /dev/null @@ -1,710 +0,0 @@ -//! `i686` Streaming SIMD Extensions (SSE) - -use coresimd::x86::*; - -#[cfg(test)] -use stdsimd_test::assert_instr; - -#[allow(improper_ctypes)] -extern "C" { - #[link_name = "llvm.x86.sse.cvtpi2ps"] - fn cvtpi2ps(a: __m128, b: __m64) -> __m128; - #[link_name = "llvm.x86.mmx.maskmovq"] - fn maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8); - #[link_name = "llvm.x86.mmx.pextr.w"] - fn pextrw(a: __m64, imm8: i32) -> i32; - #[link_name = "llvm.x86.mmx.pinsr.w"] - fn pinsrw(a: __m64, d: i32, imm8: i32) -> __m64; - #[link_name = "llvm.x86.mmx.pmovmskb"] - fn pmovmskb(a: __m64) -> i32; - #[link_name = "llvm.x86.sse.pshuf.w"] - fn pshufw(a: __m64, imm8: i8) -> __m64; - #[link_name = "llvm.x86.mmx.pmaxs.w"] - fn pmaxsw(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.pmaxu.b"] - fn pmaxub(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.pmins.w"] - fn pminsw(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.pminu.b"] - fn pminub(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.pmulhu.w"] - fn pmulhuw(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.pavg.b"] - fn pavgb(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.pavg.w"] - fn pavgw(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.psad.bw"] - fn psadbw(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.sse.cvtps2pi"] - fn cvtps2pi(a: __m128) -> __m64; - #[link_name = "llvm.x86.sse.cvttps2pi"] - fn cvttps2pi(a: __m128) -> __m64; -} - -/// Compares the packed 16-bit signed integers of `a` and `b` writing the -/// greatest value into the result. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pmaxsw))] -pub unsafe fn _mm_max_pi16(a: __m64, b: __m64) -> __m64 { - pmaxsw(a, b) -} - -/// Compares the packed 16-bit signed integers of `a` and `b` writing the -/// greatest value into the result. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pmaxsw))] -pub unsafe fn _m_pmaxsw(a: __m64, b: __m64) -> __m64 { - _mm_max_pi16(a, b) -} - -/// Compares the packed 8-bit signed integers of `a` and `b` writing the -/// greatest value into the result. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pmaxub))] -pub unsafe fn _mm_max_pu8(a: __m64, b: __m64) -> __m64 { - pmaxub(a, b) -} - -/// Compares the packed 8-bit signed integers of `a` and `b` writing the -/// greatest value into the result. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pmaxub))] -pub unsafe fn _m_pmaxub(a: __m64, b: __m64) -> __m64 { - _mm_max_pu8(a, b) -} - -/// Compares the packed 16-bit signed integers of `a` and `b` writing the -/// smallest value into the result. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pminsw))] -pub unsafe fn _mm_min_pi16(a: __m64, b: __m64) -> __m64 { - pminsw(a, b) -} - -/// Compares the packed 16-bit signed integers of `a` and `b` writing the -/// smallest value into the result. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pminsw))] -pub unsafe fn _m_pminsw(a: __m64, b: __m64) -> __m64 { - _mm_min_pi16(a, b) -} - -/// Compares the packed 8-bit signed integers of `a` and `b` writing the -/// smallest value into the result. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pminub))] -pub unsafe fn _mm_min_pu8(a: __m64, b: __m64) -> __m64 { - pminub(a, b) -} - -/// Compares the packed 8-bit signed integers of `a` and `b` writing the -/// smallest value into the result. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pminub))] -pub unsafe fn _m_pminub(a: __m64, b: __m64) -> __m64 { - _mm_min_pu8(a, b) -} - -/// Multiplies packed 16-bit unsigned integer values and writes the -/// high-order 16 bits of each 32-bit product to the corresponding bits in -/// the destination. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pmulhuw))] -pub unsafe fn _mm_mulhi_pu16(a: __m64, b: __m64) -> __m64 { - pmulhuw(a, b) -} - -/// Multiplies packed 16-bit unsigned integer values and writes the -/// high-order 16 bits of each 32-bit product to the corresponding bits in -/// the destination. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pmulhuw))] -pub unsafe fn _m_pmulhuw(a: __m64, b: __m64) -> __m64 { - _mm_mulhi_pu16(a, b) -} - -/// Computes the rounded averages of the packed unsigned 8-bit integer -/// values and writes the averages to the corresponding bits in the -/// destination. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pavgb))] -pub unsafe fn _mm_avg_pu8(a: __m64, b: __m64) -> __m64 { - pavgb(a, b) -} - -/// Computes the rounded averages of the packed unsigned 8-bit integer -/// values and writes the averages to the corresponding bits in the -/// destination. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pavgb))] -pub unsafe fn _m_pavgb(a: __m64, b: __m64) -> __m64 { - _mm_avg_pu8(a, b) -} - -/// Computes the rounded averages of the packed unsigned 16-bit integer -/// values and writes the averages to the corresponding bits in the -/// destination. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pavgw))] -pub unsafe fn _mm_avg_pu16(a: __m64, b: __m64) -> __m64 { - pavgw(a, b) -} - -/// Computes the rounded averages of the packed unsigned 16-bit integer -/// values and writes the averages to the corresponding bits in the -/// destination. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pavgw))] -pub unsafe fn _m_pavgw(a: __m64, b: __m64) -> __m64 { - _mm_avg_pu16(a, b) -} - -/// Subtracts the corresponding 8-bit unsigned integer values of the two -/// 64-bit vector operands and computes the absolute value for each of the -/// difference. Then sum of the 8 absolute differences is written to the -/// bits [15:0] of the destination; the remaining bits [63:16] are cleared. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(psadbw))] -pub unsafe fn _mm_sad_pu8(a: __m64, b: __m64) -> __m64 { - psadbw(a, b) -} - -/// Subtracts the corresponding 8-bit unsigned integer values of the two -/// 64-bit vector operands and computes the absolute value for each of the -/// difference. Then sum of the 8 absolute differences is written to the -/// bits [15:0] of the destination; the remaining bits [63:16] are cleared. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(psadbw))] -pub unsafe fn _m_psadbw(a: __m64, b: __m64) -> __m64 { - _mm_sad_pu8(a, b) -} - -/// Converts two elements of a 64-bit vector of [2 x i32] into two -/// floating point values and writes them to the lower 64-bits of the -/// destination. The remaining higher order elements of the destination are -/// copied from the corresponding elements in the first operand. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(cvtpi2ps))] -pub unsafe fn _mm_cvtpi32_ps(a: __m128, b: __m64) -> __m128 { - cvtpi2ps(a, b) -} - -/// Converts two elements of a 64-bit vector of [2 x i32] into two -/// floating point values and writes them to the lower 64-bits of the -/// destination. The remaining higher order elements of the destination are -/// copied from the corresponding elements in the first operand. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(cvtpi2ps))] -pub unsafe fn _mm_cvt_pi2ps(a: __m128, b: __m64) -> __m128 { - _mm_cvtpi32_ps(a, b) -} - -/// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(cvtpi2ps))] -pub unsafe fn _mm_cvtpi8_ps(a: __m64) -> __m128 { - let b = _mm_setzero_si64(); - let b = _mm_cmpgt_pi8(b, a); - let b = _mm_unpacklo_pi8(a, b); - _mm_cvtpi16_ps(b) -} - -/// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(cvtpi2ps))] -pub unsafe fn _mm_cvtpu8_ps(a: __m64) -> __m128 { - let b = _mm_setzero_si64(); - let b = _mm_unpacklo_pi8(a, b); - _mm_cvtpi16_ps(b) -} - -/// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(cvtpi2ps))] -pub unsafe fn _mm_cvtpi16_ps(a: __m64) -> __m128 { - let b = _mm_setzero_si64(); - let b = _mm_cmpgt_pi16(b, a); - let c = _mm_unpackhi_pi16(a, b); - let r = _mm_setzero_ps(); - let r = cvtpi2ps(r, c); - let r = _mm_movelh_ps(r, r); - let c = _mm_unpacklo_pi16(a, b); - cvtpi2ps(r, c) -} - -/// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(cvtpi2ps))] -pub unsafe fn _mm_cvtpu16_ps(a: __m64) -> __m128 { - let b = _mm_setzero_si64(); - let c = _mm_unpackhi_pi16(a, b); - let r = _mm_setzero_ps(); - let r = cvtpi2ps(r, c); - let r = _mm_movelh_ps(r, r); - let c = _mm_unpacklo_pi16(a, b); - cvtpi2ps(r, c) -} - -/// Converts the two 32-bit signed integer values from each 64-bit vector -/// operand of [2 x i32] into a 128-bit vector of [4 x float]. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(cvtpi2ps))] -pub unsafe fn _mm_cvtpi32x2_ps(a: __m64, b: __m64) -> __m128 { - let c = i586::_mm_setzero_ps(); - let c = _mm_cvtpi32_ps(c, b); - let c = i586::_mm_movelh_ps(c, c); - _mm_cvtpi32_ps(c, a) -} - -/// Conditionally copies the values from each 8-bit element in the first -/// 64-bit integer vector operand to the specified memory location, as -/// specified by the most significant bit in the corresponding element in the -/// second 64-bit integer vector operand. -/// -/// To minimize caching, the data is flagged as non-temporal -/// (unlikely to be used again soon). -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(maskmovq))] -pub unsafe fn _mm_maskmove_si64(a: __m64, mask: __m64, mem_addr: *mut i8) { - maskmovq(a, mask, mem_addr) -} - -/// Conditionally copies the values from each 8-bit element in the first -/// 64-bit integer vector operand to the specified memory location, as -/// specified by the most significant bit in the corresponding element in the -/// second 64-bit integer vector operand. -/// -/// To minimize caching, the data is flagged as non-temporal -/// (unlikely to be used again soon). -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(maskmovq))] -pub unsafe fn _m_maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8) { - _mm_maskmove_si64(a, mask, mem_addr) -} - -/// Extracts 16-bit element from a 64-bit vector of [4 x i16] and -/// returns it, as specified by the immediate integer operand. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pextrw, imm2 = 0))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_extract_pi16(a: __m64, imm2: i32) -> i32 { - macro_rules! call { - ($imm2:expr) => { pextrw(a, $imm2) as i32 } - } - constify_imm2!(imm2, call) -} - -/// Extracts 16-bit element from a 64-bit vector of [4 x i16] and -/// returns it, as specified by the immediate integer operand. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pextrw, imm2 = 0))] -#[rustc_args_required_const(1)] -pub unsafe fn _m_pextrw(a: __m64, imm2: i32) -> i32 { - macro_rules! call { - ($imm2:expr) => { pextrw(a, $imm2) as i32 } - } - constify_imm2!(imm2, call) -} - -/// Copies data from the 64-bit vector of [4 x i16] to the destination, -/// and inserts the lower 16-bits of an integer operand at the 16-bit offset -/// specified by the immediate operand `n`. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_insert_pi16(a: __m64, d: i32, imm2: i32) -> __m64 { - macro_rules! call { - ($imm2:expr) => { pinsrw(a, d, $imm2) } - } - constify_imm2!(imm2, call) -} - -/// Copies data from the 64-bit vector of [4 x i16] to the destination, -/// and inserts the lower 16-bits of an integer operand at the 16-bit offset -/// specified by the immediate operand `n`. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))] -#[rustc_args_required_const(2)] -pub unsafe fn _m_pinsrw(a: __m64, d: i32, imm2: i32) -> __m64 { - macro_rules! call { - ($imm2:expr) => { pinsrw(a, d, $imm2) } - } - constify_imm2!(imm2, call) -} - -/// Takes the most significant bit from each 8-bit element in a 64-bit -/// integer vector to create a 16-bit mask value. Zero-extends the value to -/// 32-bit integer and writes it to the destination. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pmovmskb))] -pub unsafe fn _mm_movemask_pi8(a: __m64) -> i32 { - pmovmskb(a) -} - -/// Takes the most significant bit from each 8-bit element in a 64-bit -/// integer vector to create a 16-bit mask value. Zero-extends the value to -/// 32-bit integer and writes it to the destination. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pmovmskb))] -pub unsafe fn _m_pmovmskb(a: __m64) -> i32 { - _mm_movemask_pi8(a) -} - -/// Shuffles the 4 16-bit integers from a 64-bit integer vector to the -/// destination, as specified by the immediate value operand. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pshufw, imm8 = 0))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_shuffle_pi16(a: __m64, imm8: i32) -> __m64 { - macro_rules! call { - ($imm8:expr) => { pshufw(a, $imm8) } - } - constify_imm8!(imm8, call) -} - -/// Shuffles the 4 16-bit integers from a 64-bit integer vector to the -/// destination, as specified by the immediate value operand. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pshufw, imm8 = 0))] -#[rustc_args_required_const(1)] -pub unsafe fn _m_pshufw(a: __m64, imm8: i32) -> __m64 { - macro_rules! call { - ($imm8:expr) => { pshufw(a, $imm8) } - } - constify_imm8!(imm8, call) -} - -/// Convert the two lower packed single-precision (32-bit) floating-point -/// elements in `a` to packed 32-bit integers with truncation. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(cvttps2pi))] -pub unsafe fn _mm_cvttps_pi32(a: __m128) -> __m64 { - cvttps2pi(a) -} - -/// Convert the two lower packed single-precision (32-bit) floating-point -/// elements in `a` to packed 32-bit integers with truncation. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(cvttps2pi))] -pub unsafe fn _mm_cvtt_ps2pi(a: __m128) -> __m64 { - _mm_cvttps_pi32(a) -} - -/// Convert the two lower packed single-precision (32-bit) floating-point -/// elements in `a` to packed 32-bit integers. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(cvtps2pi))] -pub unsafe fn _mm_cvtps_pi32(a: __m128) -> __m64 { - cvtps2pi(a) -} - -/// Convert the two lower packed single-precision (32-bit) floating-point -/// elements in `a` to packed 32-bit integers. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(cvtps2pi))] -pub unsafe fn _mm_cvt_ps2pi(a: __m128) -> __m64 { - _mm_cvtps_pi32(a) -} - -/// Convert packed single-precision (32-bit) floating-point elements in `a` to -/// packed 16-bit integers. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(cvtps2pi))] -pub unsafe fn _mm_cvtps_pi16(a: __m128) -> __m64 { - let b = _mm_cvtps_pi32(a); - let a = _mm_movehl_ps(a, a); - let c = _mm_cvtps_pi32(a); - _mm_packs_pi32(b, c) -} - -/// Convert packed single-precision (32-bit) floating-point elements in `a` to -/// packed 8-bit integers, and returns theem in the lower 4 elements of the -/// result. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(cvtps2pi))] -pub unsafe fn _mm_cvtps_pi8(a: __m128) -> __m64 { - let b = _mm_cvtps_pi16(a); - let c = _mm_setzero_si64(); - _mm_packs_pi16(b, c) -} - -#[cfg(test)] -mod tests { - use coresimd::x86::*; - use stdsimd_test::simd_test; - - #[simd_test = "sse,mmx"] - unsafe fn test_mm_max_pi16() { - let a = _mm_setr_pi16(-1, 6, -3, 8); - let b = _mm_setr_pi16(5, -2, 7, -4); - let r = _mm_setr_pi16(5, 6, 7, 8); - - assert_eq_m64(r, _mm_max_pi16(a, b)); - assert_eq_m64(r, _m_pmaxsw(a, b)); - } - - #[simd_test = "sse,mmx"] - unsafe fn test_mm_max_pu8() { - let a = _mm_setr_pi8(2, 6, 3, 8, 2, 6, 3, 8); - let b = _mm_setr_pi8(5, 2, 7, 4, 5, 2, 7, 4); - let r = _mm_setr_pi8(5, 6, 7, 8, 5, 6, 7, 8); - - assert_eq_m64(r, _mm_max_pu8(a, b)); - assert_eq_m64(r, _m_pmaxub(a, b)); - } - - #[simd_test = "sse,mmx"] - unsafe fn test_mm_min_pi16() { - let a = _mm_setr_pi16(-1, 6, -3, 8); - let b = _mm_setr_pi16(5, -2, 7, -4); - let r = _mm_setr_pi16(-1, -2, -3, -4); - - assert_eq_m64(r, _mm_min_pi16(a, b)); - assert_eq_m64(r, _m_pminsw(a, b)); - } - - #[simd_test = "sse,mmx"] - unsafe fn test_mm_min_pu8() { - let a = _mm_setr_pi8(2, 6, 3, 8, 2, 6, 3, 8); - let b = _mm_setr_pi8(5, 2, 7, 4, 5, 2, 7, 4); - let r = _mm_setr_pi8(2, 2, 3, 4, 2, 2, 3, 4); - - assert_eq_m64(r, _mm_min_pu8(a, b)); - assert_eq_m64(r, _m_pminub(a, b)); - } - - #[simd_test = "sse,mmx"] - unsafe fn test_mm_mulhi_pu16() { - let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001)); - let r = _mm_mulhi_pu16(a, b); - assert_eq_m64(r, _mm_set1_pi16(15)); - } - - #[simd_test = "sse,mmx"] - unsafe fn test_m_pmulhuw() { - let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001)); - let r = _m_pmulhuw(a, b); - assert_eq_m64(r, _mm_set1_pi16(15)); - } - - #[simd_test = "sse,mmx"] - unsafe fn test_mm_avg_pu8() { - let (a, b) = (_mm_set1_pi8(3), _mm_set1_pi8(9)); - let r = _mm_avg_pu8(a, b); - assert_eq_m64(r, _mm_set1_pi8(6)); - - let r = _m_pavgb(a, b); - assert_eq_m64(r, _mm_set1_pi8(6)); - } - - #[simd_test = "sse,mmx"] - unsafe fn test_mm_avg_pu16() { - let (a, b) = (_mm_set1_pi16(3), _mm_set1_pi16(9)); - let r = _mm_avg_pu16(a, b); - assert_eq_m64(r, _mm_set1_pi16(6)); - - let r = _m_pavgw(a, b); - assert_eq_m64(r, _mm_set1_pi16(6)); - } - - #[simd_test = "sse,mmx"] - unsafe fn test_mm_sad_pu8() { - #[cfg_attr(rustfmt, rustfmt_skip)] - let a = _mm_setr_pi8( - 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8, - 1, 2, 3, 4, - ); - let b = _mm_setr_pi8(0, 0, 0, 0, 2, 1, 2, 1); - let r = _mm_sad_pu8(a, b); - assert_eq_m64(r, _mm_setr_pi16(1020, 0, 0, 0)); - - let r = _m_psadbw(a, b); - assert_eq_m64(r, _mm_setr_pi16(1020, 0, 0, 0)); - } - - #[simd_test = "sse,mmx"] - unsafe fn test_mm_cvtpi32_ps() { - let a = _mm_setr_ps(0., 0., 3., 4.); - let b = _mm_setr_pi32(1, 2); - let expected = _mm_setr_ps(1., 2., 3., 4.); - let r = _mm_cvtpi32_ps(a, b); - assert_eq_m128(r, expected); - - let r = _mm_cvt_pi2ps(a, b); - assert_eq_m128(r, expected); - } - - #[simd_test = "sse,mmx"] - unsafe fn test_mm_cvtpi16_ps() { - let a = _mm_setr_pi16(1, 2, 3, 4); - let expected = _mm_setr_ps(1., 2., 3., 4.); - let r = _mm_cvtpi16_ps(a); - assert_eq_m128(r, expected); - } - - #[simd_test = "sse,mmx"] - unsafe fn test_mm_cvtpu16_ps() { - let a = _mm_setr_pi16(1, 2, 3, 4); - let expected = _mm_setr_ps(1., 2., 3., 4.); - let r = _mm_cvtpu16_ps(a); - assert_eq_m128(r, expected); - } - - #[simd_test = "sse,mmx"] - unsafe fn test_mm_cvtpi8_ps() { - let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8); - let expected = _mm_setr_ps(1., 2., 3., 4.); - let r = _mm_cvtpi8_ps(a); - assert_eq_m128(r, expected); - } - - #[simd_test = "sse,mmx"] - unsafe fn test_mm_cvtpu8_ps() { - let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8); - let expected = _mm_setr_ps(1., 2., 3., 4.); - let r = _mm_cvtpu8_ps(a); - assert_eq_m128(r, expected); - } - - #[simd_test = "sse,mmx"] - unsafe fn test_mm_cvtpi32x2_ps() { - let a = _mm_setr_pi32(1, 2); - let b = _mm_setr_pi32(3, 4); - let expected = _mm_setr_ps(1., 2., 3., 4.); - let r = _mm_cvtpi32x2_ps(a, b); - assert_eq_m128(r, expected); - } - - #[simd_test = "sse,mmx"] - unsafe fn test_mm_maskmove_si64() { - let a = _mm_set1_pi8(9); - let mask = _mm_setr_pi8(0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0); - let mut r = _mm_set1_pi8(0); - _mm_maskmove_si64(a, mask, &mut r as *mut _ as *mut i8); - let e = _mm_setr_pi8(0, 0, 9, 0, 0, 0, 0, 0); - assert_eq_m64(r, e); - - let mut r = _mm_set1_pi8(0); - _m_maskmovq(a, mask, &mut r as *mut _ as *mut i8); - assert_eq_m64(r, e); - } - - #[simd_test = "sse,mmx"] - unsafe fn test_mm_extract_pi16() { - let a = _mm_setr_pi16(1, 2, 3, 4); - let r = _mm_extract_pi16(a, 0); - assert_eq!(r, 1); - let r = _mm_extract_pi16(a, 1); - assert_eq!(r, 2); - - let r = _m_pextrw(a, 1); - assert_eq!(r, 2); - } - - #[simd_test = "sse,mmx"] - unsafe fn test_mm_insert_pi16() { - let a = _mm_setr_pi16(1, 2, 3, 4); - let r = _mm_insert_pi16(a, 0, 0b0); - let expected = _mm_setr_pi16(0, 2, 3, 4); - assert_eq_m64(r, expected); - let r = _mm_insert_pi16(a, 0, 0b10); - let expected = _mm_setr_pi16(1, 2, 0, 4); - assert_eq_m64(r, expected); - - let r = _m_pinsrw(a, 0, 0b10); - assert_eq_m64(r, expected); - } - - #[simd_test = "sse,mmx"] - unsafe fn test_mm_movemask_pi8() { - let a = - _mm_setr_pi16(0b1000_0000, 0b0100_0000, 0b1000_0000, 0b0100_0000); - let r = _mm_movemask_pi8(a); - assert_eq!(r, 0b10001); - - let r = _m_pmovmskb(a); - assert_eq!(r, 0b10001); - } - - #[simd_test = "sse,mmx"] - unsafe fn test_mm_shuffle_pi16() { - let a = _mm_setr_pi16(1, 2, 3, 4); - let r = _mm_shuffle_pi16(a, 0b00_01_01_11); - let expected = _mm_setr_pi16(4, 2, 2, 1); - assert_eq_m64(r, expected); - - let r = _m_pshufw(a, 0b00_01_01_11); - assert_eq_m64(r, expected); - } - - #[simd_test = "sse,mmx"] - unsafe fn test_mm_cvtps_pi32() { - let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); - let r = _mm_setr_pi32(1, 2); - - assert_eq_m64(r, _mm_cvtps_pi32(a)); - assert_eq_m64(r, _mm_cvt_ps2pi(a)); - } - - #[simd_test = "sse,mmx"] - unsafe fn test_mm_cvttps_pi32() { - let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0); - let r = _mm_setr_pi32(7, 2); - - assert_eq_m64(r, _mm_cvttps_pi32(a)); - assert_eq_m64(r, _mm_cvtt_ps2pi(a)); - } - - #[simd_test = "sse,mmx"] - unsafe fn test_mm_cvtps_pi16() { - let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0); - let r = _mm_setr_pi16(7, 2, 3, 4); - assert_eq_m64(r, _mm_cvtps_pi16(a)); - } - - #[simd_test = "sse,mmx"] - unsafe fn test_mm_cvtps_pi8() { - let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0); - let r = _mm_setr_pi8(7, 2, 3, 4, 0, 0, 0, 0); - assert_eq_m64(r, _mm_cvtps_pi8(a)); - } -} diff --git a/library/stdarch/coresimd/x86/i686/sse2.rs b/library/stdarch/coresimd/x86/i686/sse2.rs deleted file mode 100644 index 0132a5983ac1..000000000000 --- a/library/stdarch/coresimd/x86/i686/sse2.rs +++ /dev/null @@ -1,225 +0,0 @@ -//! `i686`'s Streaming SIMD Extensions 2 (SSE2) - -use coresimd::simd_llvm::simd_extract; -use coresimd::x86::*; -use mem; - -#[cfg(test)] -use stdsimd_test::assert_instr; - -/// Adds two signed or unsigned 64-bit integer values, returning the -/// lower 64 bits of the sum. -#[inline] -#[target_feature(enable = "sse2,mmx")] -#[cfg_attr(test, assert_instr(paddq))] -pub unsafe fn _mm_add_si64(a: __m64, b: __m64) -> __m64 { - paddq(a, b) -} - -/// Multiplies 32-bit unsigned integer values contained in the lower bits -/// of the two 64-bit integer vectors and returns the 64-bit unsigned -/// product. -#[inline] -#[target_feature(enable = "sse2,mmx")] -#[cfg_attr(test, assert_instr(pmuludq))] -pub unsafe fn _mm_mul_su32(a: __m64, b: __m64) -> __m64 { - pmuludq(a, b) -} - -/// Subtracts signed or unsigned 64-bit integer values and writes the -/// difference to the corresponding bits in the destination. -#[inline] -#[target_feature(enable = "sse2,mmx")] -#[cfg_attr(test, assert_instr(psubq))] -pub unsafe fn _mm_sub_si64(a: __m64, b: __m64) -> __m64 { - psubq(a, b) -} - -/// Converts the two signed 32-bit integer elements of a 64-bit vector of -/// [2 x i32] into two double-precision floating-point values, returned in a -/// 128-bit vector of [2 x double]. -#[inline] -#[target_feature(enable = "sse2,mmx")] -#[cfg_attr(test, assert_instr(cvtpi2pd))] -pub unsafe fn _mm_cvtpi32_pd(a: __m64) -> __m128d { - cvtpi2pd(a) -} - -/// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with -/// the specified 64-bit integer values. -#[inline] -#[target_feature(enable = "sse2,mmx")] -// no particular instruction to test -pub unsafe fn _mm_set_epi64(e1: __m64, e0: __m64) -> __m128i { - _mm_set_epi64x(mem::transmute(e1), mem::transmute(e0)) -} - -/// Initializes both values in a 128-bit vector of [2 x i64] with the -/// specified 64-bit value. -#[inline] -#[target_feature(enable = "sse2,mmx")] -// no particular instruction to test -pub unsafe fn _mm_set1_epi64(a: __m64) -> __m128i { - _mm_set_epi64x(mem::transmute(a), mem::transmute(a)) -} - -/// Constructs a 128-bit integer vector, initialized in reverse order -/// with the specified 64-bit integral values. -#[inline] -#[target_feature(enable = "sse2,mmx")] -// no particular instruction to test -pub unsafe fn _mm_setr_epi64(e1: __m64, e0: __m64) -> __m128i { - _mm_set_epi64x(mem::transmute(e0), mem::transmute(e1)) -} - -/// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit -/// integer. -#[inline] -#[target_feature(enable = "sse2,mmx")] -// #[cfg_attr(test, assert_instr(movdq2q))] // FIXME: llvm codegens wrong -// instr? -pub unsafe fn _mm_movepi64_pi64(a: __m128i) -> __m64 { - mem::transmute(simd_extract::<_, i64>(a.as_i64x2(), 0)) -} - -/// Moves the 64-bit operand to a 128-bit integer vector, zeroing the -/// upper bits. -#[inline] -#[target_feature(enable = "sse2,mmx")] -// #[cfg_attr(test, assert_instr(movq2dq))] // FIXME: llvm codegens wrong -// instr? -pub unsafe fn _mm_movpi64_epi64(a: __m64) -> __m128i { - _mm_set_epi64x(0, mem::transmute(a)) -} - -/// Converts the two double-precision floating-point elements of a -/// 128-bit vector of [2 x double] into two signed 32-bit integer values, -/// returned in a 64-bit vector of [2 x i32]. -#[inline] -#[target_feature(enable = "sse2,mmx")] -#[cfg_attr(test, assert_instr(cvtpd2pi))] -pub unsafe fn _mm_cvtpd_pi32(a: __m128d) -> __m64 { - cvtpd2pi(a) -} - -/// Converts the two double-precision floating-point elements of a -/// 128-bit vector of [2 x double] into two signed 32-bit integer values, -/// returned in a 64-bit vector of [2 x i32]. -/// If the result of either conversion is inexact, the result is truncated -/// (rounded towards zero) regardless of the current MXCSR setting. -#[inline] -#[target_feature(enable = "sse2,mmx")] -#[cfg_attr(test, assert_instr(cvttpd2pi))] -pub unsafe fn _mm_cvttpd_pi32(a: __m128d) -> __m64 { - cvttpd2pi(a) -} - -#[allow(improper_ctypes)] -extern "C" { - #[link_name = "llvm.x86.mmx.padd.q"] - fn paddq(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.pmulu.dq"] - fn pmuludq(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.psub.q"] - fn psubq(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.sse.cvtpi2pd"] - fn cvtpi2pd(a: __m64) -> __m128d; - #[link_name = "llvm.x86.sse.cvtpd2pi"] - fn cvtpd2pi(a: __m128d) -> __m64; - #[link_name = "llvm.x86.sse.cvttpd2pi"] - fn cvttpd2pi(a: __m128d) -> __m64; -} - -#[cfg(test)] -mod tests { - use std::mem; - - use stdsimd_test::simd_test; - - use coresimd::x86::*; - - #[simd_test = "sse2,mmx"] - unsafe fn test_mm_add_si64() { - let a = 1i64; - let b = 2i64; - let expected = 3i64; - let r = _mm_add_si64(mem::transmute(a), mem::transmute(b)); - assert_eq!(mem::transmute::<__m64, i64>(r), expected); - } - - #[simd_test = "sse2,mmx"] - unsafe fn test_mm_mul_su32() { - let a = _mm_setr_pi32(1, 2); - let b = _mm_setr_pi32(3, 4); - let expected = 3u64; - let r = _mm_mul_su32(a, b); - assert_eq_m64(r, mem::transmute(expected)); - } - - #[simd_test = "sse2,mmx"] - unsafe fn test_mm_sub_si64() { - let a = 1i64; - let b = 2i64; - let expected = -1i64; - let r = _mm_sub_si64(mem::transmute(a), mem::transmute(b)); - assert_eq!(mem::transmute::<__m64, i64>(r), expected); - } - - #[simd_test = "sse2,mmx"] - unsafe fn test_mm_cvtpi32_pd() { - let a = _mm_setr_pi32(1, 2); - let expected = _mm_setr_pd(1., 2.); - let r = _mm_cvtpi32_pd(a); - assert_eq_m128d(r, expected); - } - - #[simd_test = "sse2,mmx"] - unsafe fn test_mm_set_epi64() { - let r = _mm_set_epi64(mem::transmute(1i64), mem::transmute(2i64)); - assert_eq_m128i(r, _mm_setr_epi64x(2, 1)); - } - - #[simd_test = "sse2,mmx"] - unsafe fn test_mm_set1_epi64() { - let r = _mm_set1_epi64(mem::transmute(1i64)); - assert_eq_m128i(r, _mm_setr_epi64x(1, 1)); - } - - #[simd_test = "sse2,mmx"] - unsafe fn test_mm_setr_epi64() { - let r = _mm_setr_epi64(mem::transmute(1i64), mem::transmute(2i64)); - assert_eq_m128i(r, _mm_setr_epi64x(1, 2)); - } - - #[simd_test = "sse2,mmx"] - unsafe fn test_mm_movepi64_pi64() { - let r = _mm_movepi64_pi64(_mm_setr_epi64x(5, 0)); - assert_eq_m64(r, _mm_setr_pi8(5, 0, 0, 0, 0, 0, 0, 0)); - } - - #[simd_test = "sse2,mmx"] - unsafe fn test_mm_movpi64_epi64() { - let r = _mm_movpi64_epi64(_mm_setr_pi8(5, 0, 0, 0, 0, 0, 0, 0)); - assert_eq_m128i(r, _mm_setr_epi64x(5, 0)); - } - - #[simd_test = "sse2,mmx"] - unsafe fn test_mm_cvtpd_pi32() { - let a = _mm_setr_pd(5., 0.); - let r = _mm_cvtpd_pi32(a); - assert_eq_m64(r, _mm_setr_pi32(5, 0)); - } - - #[simd_test = "sse2,mmx"] - unsafe fn test_mm_cvttpd_pi32() { - use std::{f64, i32}; - - let a = _mm_setr_pd(5., 0.); - let r = _mm_cvttpd_pi32(a); - assert_eq_m64(r, _mm_setr_pi32(5, 0)); - - let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); - let r = _mm_cvttpd_pi32(a); - assert_eq_m64(r, _mm_setr_pi32(i32::MIN, i32::MIN)); - } -} diff --git a/library/stdarch/coresimd/x86/i686/sse41.rs b/library/stdarch/coresimd/x86/i686/sse41.rs deleted file mode 100644 index 5b3ff1eb53ae..000000000000 --- a/library/stdarch/coresimd/x86/i686/sse41.rs +++ /dev/null @@ -1,240 +0,0 @@ -//! `i686`'s Streaming SIMD Extensions 4.1 (SSE4.1) - -use coresimd::v128::*; -use coresimd::x86::*; - -#[cfg(test)] -use stdsimd_test::assert_instr; - -#[allow(improper_ctypes)] -extern "C" { - #[link_name = "llvm.x86.sse41.ptestz"] - fn ptestz(a: i64x2, mask: i64x2) -> i32; - #[link_name = "llvm.x86.sse41.ptestc"] - fn ptestc(a: i64x2, mask: i64x2) -> i32; - #[link_name = "llvm.x86.sse41.ptestnzc"] - fn ptestnzc(a: i64x2, mask: i64x2) -> i32; -} - -/// Tests whether the specified bits in a 128-bit integer vector are all -/// zeros. -/// -/// Arguments: -/// -/// * `a` - A 128-bit integer vector containing the bits to be tested. -/// * `mask` - A 128-bit integer vector selecting which bits to test in -/// operand `a`. -/// -/// Returns: -/// -/// * `1` - if the specified bits are all zeros, -/// * `0` - otherwise. -#[inline] -#[target_feature(enable = "sse4.1")] -#[cfg_attr(test, assert_instr(ptest))] -pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 { - ptestz(a.as_i64x2(), mask.as_i64x2()) -} - -/// Tests whether the specified bits in a 128-bit integer vector are all -/// ones. -/// -/// Arguments: -/// -/// * `a` - A 128-bit integer vector containing the bits to be tested. -/// * `mask` - A 128-bit integer vector selecting which bits to test in -/// operand `a`. -/// -/// Returns: -/// -/// * `1` - if the specified bits are all ones, -/// * `0` - otherwise. -#[inline] -#[target_feature(enable = "sse4.1")] -#[cfg_attr(test, assert_instr(ptest))] -pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 { - ptestc(a.as_i64x2(), mask.as_i64x2()) -} - -/// Tests whether the specified bits in a 128-bit integer vector are -/// neither all zeros nor all ones. -/// -/// Arguments: -/// -/// * `a` - A 128-bit integer vector containing the bits to be tested. -/// * `mask` - A 128-bit integer vector selecting which bits to test in -/// operand `a`. -/// -/// Returns: -/// -/// * `1` - if the specified bits are neither all zeros nor all ones, -/// * `0` - otherwise. -#[inline] -#[target_feature(enable = "sse4.1")] -#[cfg_attr(test, assert_instr(ptest))] -pub unsafe fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 { - ptestnzc(a.as_i64x2(), mask.as_i64x2()) -} - -/// Tests whether the specified bits in a 128-bit integer vector are all -/// zeros. -/// -/// Arguments: -/// -/// * `a` - A 128-bit integer vector containing the bits to be tested. -/// * `mask` - A 128-bit integer vector selecting which bits to test in -/// operand `a`. -/// -/// Returns: -/// -/// * `1` - if the specified bits are all zeros, -/// * `0` - otherwise. -#[inline] -#[target_feature(enable = "sse4.1")] -#[cfg_attr(test, assert_instr(ptest))] -pub unsafe fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 { - _mm_testz_si128(a, mask) -} - -/// Tests whether the specified bits in `a` 128-bit integer vector are all -/// ones. -/// -/// Argument: -/// -/// * `a` - A 128-bit integer vector containing the bits to be tested. -/// -/// Returns: -/// -/// * `1` - if the bits specified in the operand are all set to 1, -/// * `0` - otherwise. -#[inline] -#[target_feature(enable = "sse4.1")] -#[cfg_attr(test, assert_instr(pcmpeqd))] -#[cfg_attr(test, assert_instr(ptest))] -pub unsafe fn _mm_test_all_ones(a: __m128i) -> i32 { - _mm_testc_si128(a, _mm_cmpeq_epi32(a, a)) -} - -/// Tests whether the specified bits in a 128-bit integer vector are -/// neither all zeros nor all ones. -/// -/// Arguments: -/// -/// * `a` - A 128-bit integer vector containing the bits to be tested. -/// * `mask` - A 128-bit integer vector selecting which bits to test in -/// operand `a`. -/// -/// Returns: -/// -/// * `1` - if the specified bits are neither all zeros nor all ones, -/// * `0` - otherwise. -#[inline] -#[target_feature(enable = "sse4.1")] -#[cfg_attr(test, assert_instr(ptest))] -pub unsafe fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 { - _mm_testnzc_si128(a, mask) -} - -#[cfg(test)] -mod tests { - use stdsimd_test::simd_test; - use coresimd::x86::*; - - #[simd_test = "sse4.1"] - unsafe fn test_mm_testz_si128() { - let a = _mm_set1_epi8(1); - let mask = _mm_set1_epi8(0); - let r = _mm_testz_si128(a, mask); - assert_eq!(r, 1); - let a = _mm_set1_epi8(0b101); - let mask = _mm_set1_epi8(0b110); - let r = _mm_testz_si128(a, mask); - assert_eq!(r, 0); - let a = _mm_set1_epi8(0b011); - let mask = _mm_set1_epi8(0b100); - let r = _mm_testz_si128(a, mask); - assert_eq!(r, 1); - } - - #[simd_test = "sse4.1"] - unsafe fn test_mm_testc_si128() { - let a = _mm_set1_epi8(-1); - let mask = _mm_set1_epi8(0); - let r = _mm_testc_si128(a, mask); - assert_eq!(r, 1); - let a = _mm_set1_epi8(0b101); - let mask = _mm_set1_epi8(0b110); - let r = _mm_testc_si128(a, mask); - assert_eq!(r, 0); - let a = _mm_set1_epi8(0b101); - let mask = _mm_set1_epi8(0b100); - let r = _mm_testc_si128(a, mask); - assert_eq!(r, 1); - } - - #[simd_test = "sse4.1"] - unsafe fn test_mm_testnzc_si128() { - let a = _mm_set1_epi8(0); - let mask = _mm_set1_epi8(1); - let r = _mm_testnzc_si128(a, mask); - assert_eq!(r, 0); - let a = _mm_set1_epi8(-1); - let mask = _mm_set1_epi8(0); - let r = _mm_testnzc_si128(a, mask); - assert_eq!(r, 0); - let a = _mm_set1_epi8(0b101); - let mask = _mm_set1_epi8(0b110); - let r = _mm_testnzc_si128(a, mask); - assert_eq!(r, 1); - let a = _mm_set1_epi8(0b101); - let mask = _mm_set1_epi8(0b101); - let r = _mm_testnzc_si128(a, mask); - assert_eq!(r, 0); - } - - #[simd_test = "sse4.1"] - unsafe fn test_mm_test_all_zeros() { - let a = _mm_set1_epi8(1); - let mask = _mm_set1_epi8(0); - let r = _mm_test_all_zeros(a, mask); - assert_eq!(r, 1); - let a = _mm_set1_epi8(0b101); - let mask = _mm_set1_epi8(0b110); - let r = _mm_test_all_zeros(a, mask); - assert_eq!(r, 0); - let a = _mm_set1_epi8(0b011); - let mask = _mm_set1_epi8(0b100); - let r = _mm_test_all_zeros(a, mask); - assert_eq!(r, 1); - } - - #[simd_test = "sse4.1"] - unsafe fn test_mm_test_all_ones() { - let a = _mm_set1_epi8(-1); - let r = _mm_test_all_ones(a); - assert_eq!(r, 1); - let a = _mm_set1_epi8(0b101); - let r = _mm_test_all_ones(a); - assert_eq!(r, 0); - } - - #[simd_test = "sse4.1"] - unsafe fn test_mm_test_mix_ones_zeros() { - let a = _mm_set1_epi8(0); - let mask = _mm_set1_epi8(1); - let r = _mm_test_mix_ones_zeros(a, mask); - assert_eq!(r, 0); - let a = _mm_set1_epi8(-1); - let mask = _mm_set1_epi8(0); - let r = _mm_test_mix_ones_zeros(a, mask); - assert_eq!(r, 0); - let a = _mm_set1_epi8(0b101); - let mask = _mm_set1_epi8(0b110); - let r = _mm_test_mix_ones_zeros(a, mask); - assert_eq!(r, 1); - let a = _mm_set1_epi8(0b101); - let mask = _mm_set1_epi8(0b101); - let r = _mm_test_mix_ones_zeros(a, mask); - assert_eq!(r, 0); - } -} diff --git a/library/stdarch/coresimd/x86/i686/sse42.rs b/library/stdarch/coresimd/x86/i686/sse42.rs deleted file mode 100644 index bd2db2827c87..000000000000 --- a/library/stdarch/coresimd/x86/i686/sse42.rs +++ /dev/null @@ -1,35 +0,0 @@ -//! `i686`'s Streaming SIMD Extensions 4.2 (SSE4.2) - -use coresimd::simd_llvm::*; -use coresimd::v128::*; -use coresimd::x86::*; - -#[cfg(test)] -use stdsimd_test::assert_instr; - -/// Compare packed 64-bit integers in `a` and `b` for greater-than, -/// return the results. -#[inline] -#[target_feature(enable = "sse4.2")] -#[cfg_attr(test, assert_instr(pcmpgtq))] -pub unsafe fn _mm_cmpgt_epi64(a: __m128i, b: __m128i) -> __m128i { - mem::transmute(simd_gt::<_, i64x2>(a.as_i64x2(), b.as_i64x2())) -} - -#[cfg(test)] -mod tests { - use coresimd::x86::*; - - use stdsimd_test::simd_test; - - #[simd_test = "sse4.2"] - unsafe fn test_mm_cmpgt_epi64() { - let a = _mm_setr_epi64x(0, 0x2a); - let b = _mm_set1_epi64x(0x00); - let i = _mm_cmpgt_epi64(a, b); - assert_eq_m128i( - i, - _mm_setr_epi64x(0x00, 0xffffffffffffffffu64 as i64), - ); - } -} diff --git a/library/stdarch/coresimd/x86/i686/ssse3.rs b/library/stdarch/coresimd/x86/i686/ssse3.rs deleted file mode 100644 index 71feb07024ad..000000000000 --- a/library/stdarch/coresimd/x86/i686/ssse3.rs +++ /dev/null @@ -1,361 +0,0 @@ -//! Supplemental Streaming SIMD Extensions 3 (SSSE3) - -#[cfg(test)] -use stdsimd_test::assert_instr; - -use coresimd::x86::*; - -/// Compute the absolute value of packed 8-bit integers in `a` and -/// return the unsigned results. -#[inline] -#[target_feature(enable = "ssse3,mmx")] -#[cfg_attr(test, assert_instr(pabsb))] -pub unsafe fn _mm_abs_pi8(a: __m64) -> __m64 { - pabsb(a) -} - -/// Compute the absolute value of packed 8-bit integers in `a`, and return the -/// unsigned results. -#[inline] -#[target_feature(enable = "ssse3,mmx")] -#[cfg_attr(test, assert_instr(pabsw))] -pub unsafe fn _mm_abs_pi16(a: __m64) -> __m64 { - pabsw(a) -} - -/// Compute the absolute value of packed 32-bit integers in `a`, and return the -/// unsigned results. -#[inline] -#[target_feature(enable = "ssse3,mmx")] -#[cfg_attr(test, assert_instr(pabsd))] -pub unsafe fn _mm_abs_pi32(a: __m64) -> __m64 { - pabsd(a) -} - -/// Shuffle packed 8-bit integers in `a` according to shuffle control mask in -/// the corresponding 8-bit element of `b`, and return the results -#[inline] -#[target_feature(enable = "ssse3,mmx")] -#[cfg_attr(test, assert_instr(pshufb))] -pub unsafe fn _mm_shuffle_pi8(a: __m64, b: __m64) -> __m64 { - pshufb(a, b) -} - -/// Concatenates the two 64-bit integer vector operands, and right-shifts -/// the result by the number of bytes specified in the immediate operand. -#[inline] -#[target_feature(enable = "ssse3,mmx")] -#[cfg_attr(test, assert_instr(palignr, n = 15))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_alignr_pi8(a: __m64, b: __m64, n: i32) -> __m64 { - macro_rules! call { - ($imm8:expr) => { - palignrb(a, b, $imm8) - } - } - constify_imm8!(n, call) -} - -/// Horizontally add the adjacent pairs of values contained in 2 packed -/// 64-bit vectors of [4 x i16]. -#[inline] -#[target_feature(enable = "ssse3,mmx")] -#[cfg_attr(test, assert_instr(phaddw))] -pub unsafe fn _mm_hadd_pi16(a: __m64, b: __m64) -> __m64 { - phaddw(a, b) -} - -/// Horizontally add the adjacent pairs of values contained in 2 packed -/// 64-bit vectors of [2 x i32]. -#[inline] -#[target_feature(enable = "ssse3,mmx")] -#[cfg_attr(test, assert_instr(phaddd))] -pub unsafe fn _mm_hadd_pi32(a: __m64, b: __m64) -> __m64 { - phaddd(a, b) -} - -/// Horizontally add the adjacent pairs of values contained in 2 packed -/// 64-bit vectors of [4 x i16]. Positive sums greater than 7FFFh are -/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h. -#[inline] -#[target_feature(enable = "ssse3,mmx")] -#[cfg_attr(test, assert_instr(phaddsw))] -pub unsafe fn _mm_hadds_pi16(a: __m64, b: __m64) -> __m64 { - phaddsw(a, b) -} - -/// Horizontally subtracts the adjacent pairs of values contained in 2 -/// packed 64-bit vectors of [4 x i16]. -#[inline] -#[target_feature(enable = "ssse3,mmx")] -#[cfg_attr(test, assert_instr(phsubw))] -pub unsafe fn _mm_hsub_pi16(a: __m64, b: __m64) -> __m64 { - phsubw(a, b) -} - -/// Horizontally subtracts the adjacent pairs of values contained in 2 -/// packed 64-bit vectors of [2 x i32]. -#[inline] -#[target_feature(enable = "ssse3,mmx")] -#[cfg_attr(test, assert_instr(phsubd))] -pub unsafe fn _mm_hsub_pi32(a: __m64, b: __m64) -> __m64 { - phsubd(a, b) -} - -/// Horizontally subtracts the adjacent pairs of values contained in 2 -/// packed 64-bit vectors of [4 x i16]. Positive differences greater than -/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are -/// saturated to 8000h. -#[inline] -#[target_feature(enable = "ssse3,mmx")] -#[cfg_attr(test, assert_instr(phsubsw))] -pub unsafe fn _mm_hsubs_pi16(a: __m64, b: __m64) -> __m64 { - phsubsw(a, b) -} - -/// Multiplies corresponding pairs of packed 8-bit unsigned integer -/// values contained in the first source operand and packed 8-bit signed -/// integer values contained in the second source operand, adds pairs of -/// contiguous products with signed saturation, and writes the 16-bit sums to -/// the corresponding bits in the destination. -#[inline] -#[target_feature(enable = "ssse3,mmx")] -#[cfg_attr(test, assert_instr(pmaddubsw))] -pub unsafe fn _mm_maddubs_pi16(a: __m64, b: __m64) -> __m64 { - pmaddubsw(a, b) -} - -/// Multiplies packed 16-bit signed integer values, truncates the 32-bit -/// products to the 18 most significant bits by right-shifting, rounds the -/// truncated value by adding 1, and writes bits [16:1] to the destination. -#[inline] -#[target_feature(enable = "ssse3,mmx")] -#[cfg_attr(test, assert_instr(pmulhrsw))] -pub unsafe fn _mm_mulhrs_pi16(a: __m64, b: __m64) -> __m64 { - pmulhrsw(a, b) -} - -/// Negate packed 8-bit integers in `a` when the corresponding signed 8-bit -/// integer in `b` is negative, and return the results. -/// Element in result are zeroed out when the corresponding element in `b` is -/// zero. -#[inline] -#[target_feature(enable = "ssse3,mmx")] -#[cfg_attr(test, assert_instr(psignb))] -pub unsafe fn _mm_sign_pi8(a: __m64, b: __m64) -> __m64 { - psignb(a, b) -} - -/// Negate packed 16-bit integers in `a` when the corresponding signed 16-bit -/// integer in `b` is negative, and return the results. -/// Element in result are zeroed out when the corresponding element in `b` is -/// zero. -#[inline] -#[target_feature(enable = "ssse3,mmx")] -#[cfg_attr(test, assert_instr(psignw))] -pub unsafe fn _mm_sign_pi16(a: __m64, b: __m64) -> __m64 { - psignw(a, b) -} - -/// Negate packed 32-bit integers in `a` when the corresponding signed 32-bit -/// integer in `b` is negative, and return the results. -/// Element in result are zeroed out when the corresponding element in `b` is -/// zero. -#[inline] -#[target_feature(enable = "ssse3,mmx")] -#[cfg_attr(test, assert_instr(psignd))] -pub unsafe fn _mm_sign_pi32(a: __m64, b: __m64) -> __m64 { - psignd(a, b) -} - -#[allow(improper_ctypes)] -extern "C" { - #[link_name = "llvm.x86.ssse3.pabs.b"] - fn pabsb(a: __m64) -> __m64; - - #[link_name = "llvm.x86.ssse3.pabs.w"] - fn pabsw(a: __m64) -> __m64; - - #[link_name = "llvm.x86.ssse3.pabs.d"] - fn pabsd(a: __m64) -> __m64; - - #[link_name = "llvm.x86.ssse3.pshuf.b"] - fn pshufb(a: __m64, b: __m64) -> __m64; - - #[link_name = "llvm.x86.mmx.palignr.b"] - fn palignrb(a: __m64, b: __m64, n: u8) -> __m64; - - #[link_name = "llvm.x86.ssse3.phadd.w"] - fn phaddw(a: __m64, b: __m64) -> __m64; - - #[link_name = "llvm.x86.ssse3.phadd.d"] - fn phaddd(a: __m64, b: __m64) -> __m64; - - #[link_name = "llvm.x86.ssse3.phadd.sw"] - fn phaddsw(a: __m64, b: __m64) -> __m64; - - #[link_name = "llvm.x86.ssse3.phsub.w"] - fn phsubw(a: __m64, b: __m64) -> __m64; - - #[link_name = "llvm.x86.ssse3.phsub.d"] - fn phsubd(a: __m64, b: __m64) -> __m64; - - #[link_name = "llvm.x86.ssse3.phsub.sw"] - fn phsubsw(a: __m64, b: __m64) -> __m64; - - #[link_name = "llvm.x86.ssse3.pmadd.ub.sw"] - fn pmaddubsw(a: __m64, b: __m64) -> __m64; - - #[link_name = "llvm.x86.ssse3.pmul.hr.sw"] - fn pmulhrsw(a: __m64, b: __m64) -> __m64; - - #[link_name = "llvm.x86.ssse3.psign.b"] - fn psignb(a: __m64, b: __m64) -> __m64; - - #[link_name = "llvm.x86.ssse3.psign.w"] - fn psignw(a: __m64, b: __m64) -> __m64; - - #[link_name = "llvm.x86.ssse3.psign.d"] - fn psignd(a: __m64, b: __m64) -> __m64; -} - -#[cfg(test)] -mod tests { - use stdsimd_test::simd_test; - - use coresimd::x86::*; - - #[simd_test = "ssse3,mmx"] - unsafe fn test_mm_abs_pi8() { - let r = _mm_abs_pi8(_mm_set1_pi8(-5)); - assert_eq_m64(r, _mm_set1_pi8(5)); - } - - #[simd_test = "ssse3,mmx"] - unsafe fn test_mm_abs_pi16() { - let r = _mm_abs_pi16(_mm_set1_pi16(-5)); - assert_eq_m64(r, _mm_set1_pi16(5)); - } - - #[simd_test = "ssse3,mmx"] - unsafe fn test_mm_abs_pi32() { - let r = _mm_abs_pi32(_mm_set1_pi32(-5)); - assert_eq_m64(r, _mm_set1_pi32(5)); - } - - #[simd_test = "ssse3,mmx"] - unsafe fn test_mm_shuffle_pi8() { - let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8); - let b = _mm_setr_pi8(4, 128u8 as i8, 4, 3, 24, 12, 6, 19); - let expected = _mm_setr_pi8(5, 0, 5, 4, 1, 5, 7, 4); - let r = _mm_shuffle_pi8(a, b); - assert_eq_m64(r, expected); - } - - #[simd_test = "ssse3,mmx"] - unsafe fn test_mm_alignr_pi8() { - let a = _mm_setr_pi32(0x89ABCDEF_u32 as i32, 0x01234567_u32 as i32); - let b = _mm_setr_pi32(0xBBAA9988_u32 as i32, 0xFFDDEECC_u32 as i32); - let r = _mm_alignr_pi8(a, b, 4); - assert_eq_m64(r, ::std::mem::transmute(0x89abcdefffddeecc_u64)); - } - - #[simd_test = "ssse3,mmx"] - unsafe fn test_mm_hadd_pi16() { - let a = _mm_setr_pi16(1, 2, 3, 4); - let b = _mm_setr_pi16(4, 128, 4, 3); - let expected = _mm_setr_pi16(3, 7, 132, 7); - let r = _mm_hadd_pi16(a, b); - assert_eq_m64(r, expected); - } - - #[simd_test = "ssse3,mmx"] - unsafe fn test_mm_hadd_pi32() { - let a = _mm_setr_pi32(1, 2); - let b = _mm_setr_pi32(4, 128); - let expected = _mm_setr_pi32(3, 132); - let r = _mm_hadd_pi32(a, b); - assert_eq_m64(r, expected); - } - - #[simd_test = "ssse3,mmx"] - unsafe fn test_mm_hadds_pi16() { - let a = _mm_setr_pi16(1, 2, 3, 4); - let b = _mm_setr_pi16(32767, 1, -32768, -1); - let expected = _mm_setr_pi16(3, 7, 32767, -32768); - let r = _mm_hadds_pi16(a, b); - assert_eq_m64(r, expected); - } - - #[simd_test = "ssse3,mmx"] - unsafe fn test_mm_hsub_pi16() { - let a = _mm_setr_pi16(1, 2, 3, 4); - let b = _mm_setr_pi16(4, 128, 4, 3); - let expected = _mm_setr_pi16(-1, -1, -124, 1); - let r = _mm_hsub_pi16(a, b); - assert_eq_m64(r, expected); - } - - #[simd_test = "ssse3,mmx"] - unsafe fn test_mm_hsub_pi32() { - let a = _mm_setr_pi32(1, 2); - let b = _mm_setr_pi32(4, 128); - let expected = _mm_setr_pi32(-1, -124); - let r = _mm_hsub_pi32(a, b); - assert_eq_m64(r, expected); - } - - #[simd_test = "ssse3,mmx"] - unsafe fn test_mm_hsubs_pi16() { - let a = _mm_setr_pi16(1, 2, 3, 4); - let b = _mm_setr_pi16(4, 128, 4, 3); - let expected = _mm_setr_pi16(-1, -1, -124, 1); - let r = _mm_hsubs_pi16(a, b); - assert_eq_m64(r, expected); - } - - #[simd_test = "ssse3,mmx"] - unsafe fn test_mm_maddubs_pi16() { - let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8); - let b = _mm_setr_pi8(4, 63, 4, 3, 24, 12, 6, 19); - let expected = _mm_setr_pi16(130, 24, 192, 194); - let r = _mm_maddubs_pi16(a, b); - assert_eq_m64(r, expected); - } - - #[simd_test = "ssse3,mmx"] - unsafe fn test_mm_mulhrs_pi16() { - let a = _mm_setr_pi16(1, 2, 3, 4); - let b = _mm_setr_pi16(4, 32767, -1, -32768); - let expected = _mm_setr_pi16(0, 2, 0, -4); - let r = _mm_mulhrs_pi16(a, b); - assert_eq_m64(r, expected); - } - - #[simd_test = "ssse3,mmx"] - unsafe fn test_mm_sign_pi8() { - let a = _mm_setr_pi8(1, 2, 3, 4, -5, -6, 7, 8); - let b = _mm_setr_pi8(4, 64, 0, 3, 1, -1, -2, 1); - let expected = _mm_setr_pi8(1, 2, 0, 4, -5, 6, -7, 8); - let r = _mm_sign_pi8(a, b); - assert_eq_m64(r, expected); - } - - #[simd_test = "ssse3,mmx"] - unsafe fn test_mm_sign_pi16() { - let a = _mm_setr_pi16(-1, 2, 3, 4); - let b = _mm_setr_pi16(1, -1, 1, 0); - let expected = _mm_setr_pi16(-1, -2, 3, 0); - let r = _mm_sign_pi16(a, b); - assert_eq_m64(r, expected); - } - - #[simd_test = "ssse3,mmx"] - unsafe fn test_mm_sign_pi32() { - let a = _mm_setr_pi32(-1, 2); - let b = _mm_setr_pi32(1, 0); - let expected = _mm_setr_pi32(-1, 0); - let r = _mm_sign_pi32(a, b); - assert_eq_m64(r, expected); - } -} diff --git a/library/stdarch/coresimd/x86/i686/mmx.rs b/library/stdarch/coresimd/x86/mmx.rs similarity index 100% rename from library/stdarch/coresimd/x86/i686/mmx.rs rename to library/stdarch/coresimd/x86/mmx.rs diff --git a/library/stdarch/coresimd/x86/mod.rs b/library/stdarch/coresimd/x86/mod.rs index 3704903bd816..82c2483c2370 100644 --- a/library/stdarch/coresimd/x86/mod.rs +++ b/library/stdarch/coresimd/x86/mod.rs @@ -327,7 +327,7 @@ pub use self::test::*; #[doc(hidden)] #[allow(non_camel_case_types)] -trait m128iExt: Sized { +pub(crate) trait m128iExt: Sized { fn as_m128i(self) -> __m128i; #[inline] @@ -380,7 +380,7 @@ impl m128iExt for __m128i { #[doc(hidden)] #[allow(non_camel_case_types)] -trait m256iExt: Sized { +pub(crate) trait m256iExt: Sized { fn as_m256i(self) -> __m256i; #[inline] @@ -431,21 +431,69 @@ impl m256iExt for __m256i { } } -mod i386; -pub use self::i386::*; -// x86 w/o sse2 -mod i586; -pub use self::i586::*; +mod eflags; +pub use self::eflags::*; -// `i686` is `i586 + sse2`. -// -// This module is not available for `i586` targets, -// but available for all `i686` targets by default -mod i686; -pub use self::i686::*; +#[cfg(dont_compile_me)] // TODO: need to upstream `fxsr` target feature +mod fxsr; +#[cfg(dont_compile_me)] // TODO: need to upstream `fxsr` target feature +pub use self::fxsr::*; -#[cfg(target_arch = "x86_64")] -mod x86_64; -#[cfg(target_arch = "x86_64")] -pub use self::x86_64::*; +mod bswap; +pub use self::bswap::*; + +mod rdtsc; +pub use self::rdtsc::*; + +mod cpuid; +pub use self::cpuid::*; +mod xsave; +pub use self::xsave::*; + +mod sse; +pub use self::sse::*; +mod sse2; +pub use self::sse2::*; +mod sse3; +pub use self::sse3::*; +mod ssse3; +pub use self::ssse3::*; +mod sse41; +pub use self::sse41::*; +mod sse42; +pub use self::sse42::*; +mod avx; +pub use self::avx::*; +mod avx2; +pub use self::avx2::*; + +mod abm; +pub use self::abm::*; +mod bmi; +pub use self::bmi::*; + +mod bmi2; +pub use self::bmi2::*; + +#[cfg(not(feature = "intel_sde"))] +mod sse4a; +#[cfg(not(feature = "intel_sde"))] +pub use self::sse4a::*; + +#[cfg(not(feature = "intel_sde"))] +mod tbm; +#[cfg(not(feature = "intel_sde"))] +pub use self::tbm::*; + +mod mmx; +pub use self::mmx::*; + +mod pclmulqdq; +pub use self::pclmulqdq::*; + +mod aes; +pub use self::aes::*; + +mod rdrand; +pub use self::rdrand::*; diff --git a/library/stdarch/coresimd/x86/i686/pclmulqdq.rs b/library/stdarch/coresimd/x86/pclmulqdq.rs similarity index 100% rename from library/stdarch/coresimd/x86/i686/pclmulqdq.rs rename to library/stdarch/coresimd/x86/pclmulqdq.rs diff --git a/library/stdarch/coresimd/x86/i686/rdrand.rs b/library/stdarch/coresimd/x86/rdrand.rs similarity index 100% rename from library/stdarch/coresimd/x86/i686/rdrand.rs rename to library/stdarch/coresimd/x86/rdrand.rs diff --git a/library/stdarch/coresimd/x86/i386/rdtsc.rs b/library/stdarch/coresimd/x86/rdtsc.rs similarity index 98% rename from library/stdarch/coresimd/x86/i386/rdtsc.rs rename to library/stdarch/coresimd/x86/rdtsc.rs index 513885b6fe8f..de064e2dfe77 100644 --- a/library/stdarch/coresimd/x86/i386/rdtsc.rs +++ b/library/stdarch/coresimd/x86/rdtsc.rs @@ -54,7 +54,7 @@ extern "C" { #[cfg(test)] mod tests { use stdsimd_test::simd_test; - use coresimd::x86::i386::rdtsc; + use coresimd::x86::rdtsc; #[simd_test = "sse2"] unsafe fn _rdtsc() { diff --git a/library/stdarch/coresimd/x86/i586/sse.rs b/library/stdarch/coresimd/x86/sse.rs similarity index 81% rename from library/stdarch/coresimd/x86/i586/sse.rs rename to library/stdarch/coresimd/x86/sse.rs index 19b4e216062d..5b41d39f381b 100644 --- a/library/stdarch/coresimd/x86/i586/sse.rs +++ b/library/stdarch/coresimd/x86/sse.rs @@ -1680,6 +1680,38 @@ extern "C" { fn cmpss(a: __m128, b: __m128, imm8: i8) -> __m128; #[link_name = "llvm.x86.mmx.movnt.dq"] fn movntdq(a: *mut __m64, b: __m64); + #[link_name = "llvm.x86.sse.cvtpi2ps"] + fn cvtpi2ps(a: __m128, b: __m64) -> __m128; + #[link_name = "llvm.x86.mmx.maskmovq"] + fn maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8); + #[link_name = "llvm.x86.mmx.pextr.w"] + fn pextrw(a: __m64, imm8: i32) -> i32; + #[link_name = "llvm.x86.mmx.pinsr.w"] + fn pinsrw(a: __m64, d: i32, imm8: i32) -> __m64; + #[link_name = "llvm.x86.mmx.pmovmskb"] + fn pmovmskb(a: __m64) -> i32; + #[link_name = "llvm.x86.sse.pshuf.w"] + fn pshufw(a: __m64, imm8: i8) -> __m64; + #[link_name = "llvm.x86.mmx.pmaxs.w"] + fn pmaxsw(a: __m64, b: __m64) -> __m64; + #[link_name = "llvm.x86.mmx.pmaxu.b"] + fn pmaxub(a: __m64, b: __m64) -> __m64; + #[link_name = "llvm.x86.mmx.pmins.w"] + fn pminsw(a: __m64, b: __m64) -> __m64; + #[link_name = "llvm.x86.mmx.pminu.b"] + fn pminub(a: __m64, b: __m64) -> __m64; + #[link_name = "llvm.x86.mmx.pmulhu.w"] + fn pmulhuw(a: __m64, b: __m64) -> __m64; + #[link_name = "llvm.x86.mmx.pavg.b"] + fn pavgb(a: __m64, b: __m64) -> __m64; + #[link_name = "llvm.x86.mmx.pavg.w"] + fn pavgw(a: __m64, b: __m64) -> __m64; + #[link_name = "llvm.x86.mmx.psad.bw"] + fn psadbw(a: __m64, b: __m64) -> __m64; + #[link_name = "llvm.x86.sse.cvtps2pi"] + fn cvtps2pi(a: __m128) -> __m64; + #[link_name = "llvm.x86.sse.cvttps2pi"] + fn cvttps2pi(a: __m128) -> __m64; } /// Stores `a` into the memory at `mem_addr` using a non-temporal memory hint. @@ -1702,6 +1734,432 @@ pub unsafe fn _mm_stream_pi(mem_addr: *mut __m64, a: __m64) { movntdq(mem_addr, a) } +/// Compares the packed 16-bit signed integers of `a` and `b` writing the +/// greatest value into the result. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(pmaxsw))] +pub unsafe fn _mm_max_pi16(a: __m64, b: __m64) -> __m64 { + pmaxsw(a, b) +} + +/// Compares the packed 16-bit signed integers of `a` and `b` writing the +/// greatest value into the result. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(pmaxsw))] +pub unsafe fn _m_pmaxsw(a: __m64, b: __m64) -> __m64 { + _mm_max_pi16(a, b) +} + +/// Compares the packed 8-bit signed integers of `a` and `b` writing the +/// greatest value into the result. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(pmaxub))] +pub unsafe fn _mm_max_pu8(a: __m64, b: __m64) -> __m64 { + pmaxub(a, b) +} + +/// Compares the packed 8-bit signed integers of `a` and `b` writing the +/// greatest value into the result. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(pmaxub))] +pub unsafe fn _m_pmaxub(a: __m64, b: __m64) -> __m64 { + _mm_max_pu8(a, b) +} + +/// Compares the packed 16-bit signed integers of `a` and `b` writing the +/// smallest value into the result. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(pminsw))] +pub unsafe fn _mm_min_pi16(a: __m64, b: __m64) -> __m64 { + pminsw(a, b) +} + +/// Compares the packed 16-bit signed integers of `a` and `b` writing the +/// smallest value into the result. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(pminsw))] +pub unsafe fn _m_pminsw(a: __m64, b: __m64) -> __m64 { + _mm_min_pi16(a, b) +} + +/// Compares the packed 8-bit signed integers of `a` and `b` writing the +/// smallest value into the result. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(pminub))] +pub unsafe fn _mm_min_pu8(a: __m64, b: __m64) -> __m64 { + pminub(a, b) +} + +/// Compares the packed 8-bit signed integers of `a` and `b` writing the +/// smallest value into the result. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(pminub))] +pub unsafe fn _m_pminub(a: __m64, b: __m64) -> __m64 { + _mm_min_pu8(a, b) +} + +/// Multiplies packed 16-bit unsigned integer values and writes the +/// high-order 16 bits of each 32-bit product to the corresponding bits in +/// the destination. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(pmulhuw))] +pub unsafe fn _mm_mulhi_pu16(a: __m64, b: __m64) -> __m64 { + pmulhuw(a, b) +} + +/// Multiplies packed 16-bit unsigned integer values and writes the +/// high-order 16 bits of each 32-bit product to the corresponding bits in +/// the destination. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(pmulhuw))] +pub unsafe fn _m_pmulhuw(a: __m64, b: __m64) -> __m64 { + _mm_mulhi_pu16(a, b) +} + +/// Computes the rounded averages of the packed unsigned 8-bit integer +/// values and writes the averages to the corresponding bits in the +/// destination. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(pavgb))] +pub unsafe fn _mm_avg_pu8(a: __m64, b: __m64) -> __m64 { + pavgb(a, b) +} + +/// Computes the rounded averages of the packed unsigned 8-bit integer +/// values and writes the averages to the corresponding bits in the +/// destination. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(pavgb))] +pub unsafe fn _m_pavgb(a: __m64, b: __m64) -> __m64 { + _mm_avg_pu8(a, b) +} + +/// Computes the rounded averages of the packed unsigned 16-bit integer +/// values and writes the averages to the corresponding bits in the +/// destination. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(pavgw))] +pub unsafe fn _mm_avg_pu16(a: __m64, b: __m64) -> __m64 { + pavgw(a, b) +} + +/// Computes the rounded averages of the packed unsigned 16-bit integer +/// values and writes the averages to the corresponding bits in the +/// destination. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(pavgw))] +pub unsafe fn _m_pavgw(a: __m64, b: __m64) -> __m64 { + _mm_avg_pu16(a, b) +} + +/// Subtracts the corresponding 8-bit unsigned integer values of the two +/// 64-bit vector operands and computes the absolute value for each of the +/// difference. Then sum of the 8 absolute differences is written to the +/// bits [15:0] of the destination; the remaining bits [63:16] are cleared. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(psadbw))] +pub unsafe fn _mm_sad_pu8(a: __m64, b: __m64) -> __m64 { + psadbw(a, b) +} + +/// Subtracts the corresponding 8-bit unsigned integer values of the two +/// 64-bit vector operands and computes the absolute value for each of the +/// difference. Then sum of the 8 absolute differences is written to the +/// bits [15:0] of the destination; the remaining bits [63:16] are cleared. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(psadbw))] +pub unsafe fn _m_psadbw(a: __m64, b: __m64) -> __m64 { + _mm_sad_pu8(a, b) +} + +/// Converts two elements of a 64-bit vector of [2 x i32] into two +/// floating point values and writes them to the lower 64-bits of the +/// destination. The remaining higher order elements of the destination are +/// copied from the corresponding elements in the first operand. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(cvtpi2ps))] +pub unsafe fn _mm_cvtpi32_ps(a: __m128, b: __m64) -> __m128 { + cvtpi2ps(a, b) +} + +/// Converts two elements of a 64-bit vector of [2 x i32] into two +/// floating point values and writes them to the lower 64-bits of the +/// destination. The remaining higher order elements of the destination are +/// copied from the corresponding elements in the first operand. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(cvtpi2ps))] +pub unsafe fn _mm_cvt_pi2ps(a: __m128, b: __m64) -> __m128 { + _mm_cvtpi32_ps(a, b) +} + +/// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(cvtpi2ps))] +pub unsafe fn _mm_cvtpi8_ps(a: __m64) -> __m128 { + let b = _mm_setzero_si64(); + let b = _mm_cmpgt_pi8(b, a); + let b = _mm_unpacklo_pi8(a, b); + _mm_cvtpi16_ps(b) +} + +/// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(cvtpi2ps))] +pub unsafe fn _mm_cvtpu8_ps(a: __m64) -> __m128 { + let b = _mm_setzero_si64(); + let b = _mm_unpacklo_pi8(a, b); + _mm_cvtpi16_ps(b) +} + +/// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(cvtpi2ps))] +pub unsafe fn _mm_cvtpi16_ps(a: __m64) -> __m128 { + let b = _mm_setzero_si64(); + let b = _mm_cmpgt_pi16(b, a); + let c = _mm_unpackhi_pi16(a, b); + let r = _mm_setzero_ps(); + let r = cvtpi2ps(r, c); + let r = _mm_movelh_ps(r, r); + let c = _mm_unpacklo_pi16(a, b); + cvtpi2ps(r, c) +} + +/// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(cvtpi2ps))] +pub unsafe fn _mm_cvtpu16_ps(a: __m64) -> __m128 { + let b = _mm_setzero_si64(); + let c = _mm_unpackhi_pi16(a, b); + let r = _mm_setzero_ps(); + let r = cvtpi2ps(r, c); + let r = _mm_movelh_ps(r, r); + let c = _mm_unpacklo_pi16(a, b); + cvtpi2ps(r, c) +} + +/// Converts the two 32-bit signed integer values from each 64-bit vector +/// operand of [2 x i32] into a 128-bit vector of [4 x float]. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(cvtpi2ps))] +pub unsafe fn _mm_cvtpi32x2_ps(a: __m64, b: __m64) -> __m128 { + let c = _mm_setzero_ps(); + let c = _mm_cvtpi32_ps(c, b); + let c = _mm_movelh_ps(c, c); + _mm_cvtpi32_ps(c, a) +} + +/// Conditionally copies the values from each 8-bit element in the first +/// 64-bit integer vector operand to the specified memory location, as +/// specified by the most significant bit in the corresponding element in the +/// second 64-bit integer vector operand. +/// +/// To minimize caching, the data is flagged as non-temporal +/// (unlikely to be used again soon). +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(maskmovq))] +pub unsafe fn _mm_maskmove_si64(a: __m64, mask: __m64, mem_addr: *mut i8) { + maskmovq(a, mask, mem_addr) +} + +/// Conditionally copies the values from each 8-bit element in the first +/// 64-bit integer vector operand to the specified memory location, as +/// specified by the most significant bit in the corresponding element in the +/// second 64-bit integer vector operand. +/// +/// To minimize caching, the data is flagged as non-temporal +/// (unlikely to be used again soon). +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(maskmovq))] +pub unsafe fn _m_maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8) { + _mm_maskmove_si64(a, mask, mem_addr) +} + +/// Extracts 16-bit element from a 64-bit vector of [4 x i16] and +/// returns it, as specified by the immediate integer operand. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(pextrw, imm2 = 0))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm_extract_pi16(a: __m64, imm2: i32) -> i32 { + macro_rules! call { + ($imm2:expr) => { pextrw(a, $imm2) as i32 } + } + constify_imm2!(imm2, call) +} + +/// Extracts 16-bit element from a 64-bit vector of [4 x i16] and +/// returns it, as specified by the immediate integer operand. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(pextrw, imm2 = 0))] +#[rustc_args_required_const(1)] +pub unsafe fn _m_pextrw(a: __m64, imm2: i32) -> i32 { + macro_rules! call { + ($imm2:expr) => { pextrw(a, $imm2) as i32 } + } + constify_imm2!(imm2, call) +} + +/// Copies data from the 64-bit vector of [4 x i16] to the destination, +/// and inserts the lower 16-bits of an integer operand at the 16-bit offset +/// specified by the immediate operand `n`. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm_insert_pi16(a: __m64, d: i32, imm2: i32) -> __m64 { + macro_rules! call { + ($imm2:expr) => { pinsrw(a, d, $imm2) } + } + constify_imm2!(imm2, call) +} + +/// Copies data from the 64-bit vector of [4 x i16] to the destination, +/// and inserts the lower 16-bits of an integer operand at the 16-bit offset +/// specified by the immediate operand `n`. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))] +#[rustc_args_required_const(2)] +pub unsafe fn _m_pinsrw(a: __m64, d: i32, imm2: i32) -> __m64 { + macro_rules! call { + ($imm2:expr) => { pinsrw(a, d, $imm2) } + } + constify_imm2!(imm2, call) +} + +/// Takes the most significant bit from each 8-bit element in a 64-bit +/// integer vector to create a 16-bit mask value. Zero-extends the value to +/// 32-bit integer and writes it to the destination. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(pmovmskb))] +pub unsafe fn _mm_movemask_pi8(a: __m64) -> i32 { + pmovmskb(a) +} + +/// Takes the most significant bit from each 8-bit element in a 64-bit +/// integer vector to create a 16-bit mask value. Zero-extends the value to +/// 32-bit integer and writes it to the destination. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(pmovmskb))] +pub unsafe fn _m_pmovmskb(a: __m64) -> i32 { + _mm_movemask_pi8(a) +} + +/// Shuffles the 4 16-bit integers from a 64-bit integer vector to the +/// destination, as specified by the immediate value operand. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(pshufw, imm8 = 0))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm_shuffle_pi16(a: __m64, imm8: i32) -> __m64 { + macro_rules! call { + ($imm8:expr) => { pshufw(a, $imm8) } + } + constify_imm8!(imm8, call) +} + +/// Shuffles the 4 16-bit integers from a 64-bit integer vector to the +/// destination, as specified by the immediate value operand. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(pshufw, imm8 = 0))] +#[rustc_args_required_const(1)] +pub unsafe fn _m_pshufw(a: __m64, imm8: i32) -> __m64 { + macro_rules! call { + ($imm8:expr) => { pshufw(a, $imm8) } + } + constify_imm8!(imm8, call) +} + +/// Convert the two lower packed single-precision (32-bit) floating-point +/// elements in `a` to packed 32-bit integers with truncation. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(cvttps2pi))] +pub unsafe fn _mm_cvttps_pi32(a: __m128) -> __m64 { + cvttps2pi(a) +} + +/// Convert the two lower packed single-precision (32-bit) floating-point +/// elements in `a` to packed 32-bit integers with truncation. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(cvttps2pi))] +pub unsafe fn _mm_cvtt_ps2pi(a: __m128) -> __m64 { + _mm_cvttps_pi32(a) +} + +/// Convert the two lower packed single-precision (32-bit) floating-point +/// elements in `a` to packed 32-bit integers. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(cvtps2pi))] +pub unsafe fn _mm_cvtps_pi32(a: __m128) -> __m64 { + cvtps2pi(a) +} + +/// Convert the two lower packed single-precision (32-bit) floating-point +/// elements in `a` to packed 32-bit integers. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(cvtps2pi))] +pub unsafe fn _mm_cvt_ps2pi(a: __m128) -> __m64 { + _mm_cvtps_pi32(a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in `a` to +/// packed 16-bit integers. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(cvtps2pi))] +pub unsafe fn _mm_cvtps_pi16(a: __m128) -> __m64 { + let b = _mm_cvtps_pi32(a); + let a = _mm_movehl_ps(a, a); + let c = _mm_cvtps_pi32(a); + _mm_packs_pi32(b, c) +} + +/// Convert packed single-precision (32-bit) floating-point elements in `a` to +/// packed 8-bit integers, and returns theem in the lower 4 elements of the +/// result. +#[inline] +#[target_feature(enable = "sse,mmx")] +#[cfg_attr(test, assert_instr(cvtps2pi))] +pub unsafe fn _mm_cvtps_pi8(a: __m128) -> __m64 { + let b = _mm_cvtps_pi16(a); + let c = _mm_setzero_si64(); + _mm_packs_pi16(b, c) +} + #[cfg(test)] mod tests { use std::mem::transmute; @@ -3121,4 +3579,240 @@ mod tests { _mm_stream_pi(&mut *mem as *mut _ as *mut _, a); assert_eq_m64(a, *mem); } + + #[simd_test = "sse,mmx"] + unsafe fn test_mm_max_pi16() { + let a = _mm_setr_pi16(-1, 6, -3, 8); + let b = _mm_setr_pi16(5, -2, 7, -4); + let r = _mm_setr_pi16(5, 6, 7, 8); + + assert_eq_m64(r, _mm_max_pi16(a, b)); + assert_eq_m64(r, _m_pmaxsw(a, b)); + } + + #[simd_test = "sse,mmx"] + unsafe fn test_mm_max_pu8() { + let a = _mm_setr_pi8(2, 6, 3, 8, 2, 6, 3, 8); + let b = _mm_setr_pi8(5, 2, 7, 4, 5, 2, 7, 4); + let r = _mm_setr_pi8(5, 6, 7, 8, 5, 6, 7, 8); + + assert_eq_m64(r, _mm_max_pu8(a, b)); + assert_eq_m64(r, _m_pmaxub(a, b)); + } + + #[simd_test = "sse,mmx"] + unsafe fn test_mm_min_pi16() { + let a = _mm_setr_pi16(-1, 6, -3, 8); + let b = _mm_setr_pi16(5, -2, 7, -4); + let r = _mm_setr_pi16(-1, -2, -3, -4); + + assert_eq_m64(r, _mm_min_pi16(a, b)); + assert_eq_m64(r, _m_pminsw(a, b)); + } + + #[simd_test = "sse,mmx"] + unsafe fn test_mm_min_pu8() { + let a = _mm_setr_pi8(2, 6, 3, 8, 2, 6, 3, 8); + let b = _mm_setr_pi8(5, 2, 7, 4, 5, 2, 7, 4); + let r = _mm_setr_pi8(2, 2, 3, 4, 2, 2, 3, 4); + + assert_eq_m64(r, _mm_min_pu8(a, b)); + assert_eq_m64(r, _m_pminub(a, b)); + } + + #[simd_test = "sse,mmx"] + unsafe fn test_mm_mulhi_pu16() { + let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001)); + let r = _mm_mulhi_pu16(a, b); + assert_eq_m64(r, _mm_set1_pi16(15)); + } + + #[simd_test = "sse,mmx"] + unsafe fn test_m_pmulhuw() { + let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001)); + let r = _m_pmulhuw(a, b); + assert_eq_m64(r, _mm_set1_pi16(15)); + } + + #[simd_test = "sse,mmx"] + unsafe fn test_mm_avg_pu8() { + let (a, b) = (_mm_set1_pi8(3), _mm_set1_pi8(9)); + let r = _mm_avg_pu8(a, b); + assert_eq_m64(r, _mm_set1_pi8(6)); + + let r = _m_pavgb(a, b); + assert_eq_m64(r, _mm_set1_pi8(6)); + } + + #[simd_test = "sse,mmx"] + unsafe fn test_mm_avg_pu16() { + let (a, b) = (_mm_set1_pi16(3), _mm_set1_pi16(9)); + let r = _mm_avg_pu16(a, b); + assert_eq_m64(r, _mm_set1_pi16(6)); + + let r = _m_pavgw(a, b); + assert_eq_m64(r, _mm_set1_pi16(6)); + } + + #[simd_test = "sse,mmx"] + unsafe fn test_mm_sad_pu8() { + #[cfg_attr(rustfmt, rustfmt_skip)] + let a = _mm_setr_pi8( + 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8, + 1, 2, 3, 4, + ); + let b = _mm_setr_pi8(0, 0, 0, 0, 2, 1, 2, 1); + let r = _mm_sad_pu8(a, b); + assert_eq_m64(r, _mm_setr_pi16(1020, 0, 0, 0)); + + let r = _m_psadbw(a, b); + assert_eq_m64(r, _mm_setr_pi16(1020, 0, 0, 0)); + } + + #[simd_test = "sse,mmx"] + unsafe fn test_mm_cvtpi32_ps() { + let a = _mm_setr_ps(0., 0., 3., 4.); + let b = _mm_setr_pi32(1, 2); + let expected = _mm_setr_ps(1., 2., 3., 4.); + let r = _mm_cvtpi32_ps(a, b); + assert_eq_m128(r, expected); + + let r = _mm_cvt_pi2ps(a, b); + assert_eq_m128(r, expected); + } + + #[simd_test = "sse,mmx"] + unsafe fn test_mm_cvtpi16_ps() { + let a = _mm_setr_pi16(1, 2, 3, 4); + let expected = _mm_setr_ps(1., 2., 3., 4.); + let r = _mm_cvtpi16_ps(a); + assert_eq_m128(r, expected); + } + + #[simd_test = "sse,mmx"] + unsafe fn test_mm_cvtpu16_ps() { + let a = _mm_setr_pi16(1, 2, 3, 4); + let expected = _mm_setr_ps(1., 2., 3., 4.); + let r = _mm_cvtpu16_ps(a); + assert_eq_m128(r, expected); + } + + #[simd_test = "sse,mmx"] + unsafe fn test_mm_cvtpi8_ps() { + let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8); + let expected = _mm_setr_ps(1., 2., 3., 4.); + let r = _mm_cvtpi8_ps(a); + assert_eq_m128(r, expected); + } + + #[simd_test = "sse,mmx"] + unsafe fn test_mm_cvtpu8_ps() { + let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8); + let expected = _mm_setr_ps(1., 2., 3., 4.); + let r = _mm_cvtpu8_ps(a); + assert_eq_m128(r, expected); + } + + #[simd_test = "sse,mmx"] + unsafe fn test_mm_cvtpi32x2_ps() { + let a = _mm_setr_pi32(1, 2); + let b = _mm_setr_pi32(3, 4); + let expected = _mm_setr_ps(1., 2., 3., 4.); + let r = _mm_cvtpi32x2_ps(a, b); + assert_eq_m128(r, expected); + } + + #[simd_test = "sse,mmx"] + unsafe fn test_mm_maskmove_si64() { + let a = _mm_set1_pi8(9); + let mask = _mm_setr_pi8(0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0); + let mut r = _mm_set1_pi8(0); + _mm_maskmove_si64(a, mask, &mut r as *mut _ as *mut i8); + let e = _mm_setr_pi8(0, 0, 9, 0, 0, 0, 0, 0); + assert_eq_m64(r, e); + + let mut r = _mm_set1_pi8(0); + _m_maskmovq(a, mask, &mut r as *mut _ as *mut i8); + assert_eq_m64(r, e); + } + + #[simd_test = "sse,mmx"] + unsafe fn test_mm_extract_pi16() { + let a = _mm_setr_pi16(1, 2, 3, 4); + let r = _mm_extract_pi16(a, 0); + assert_eq!(r, 1); + let r = _mm_extract_pi16(a, 1); + assert_eq!(r, 2); + + let r = _m_pextrw(a, 1); + assert_eq!(r, 2); + } + + #[simd_test = "sse,mmx"] + unsafe fn test_mm_insert_pi16() { + let a = _mm_setr_pi16(1, 2, 3, 4); + let r = _mm_insert_pi16(a, 0, 0b0); + let expected = _mm_setr_pi16(0, 2, 3, 4); + assert_eq_m64(r, expected); + let r = _mm_insert_pi16(a, 0, 0b10); + let expected = _mm_setr_pi16(1, 2, 0, 4); + assert_eq_m64(r, expected); + + let r = _m_pinsrw(a, 0, 0b10); + assert_eq_m64(r, expected); + } + + #[simd_test = "sse,mmx"] + unsafe fn test_mm_movemask_pi8() { + let a = + _mm_setr_pi16(0b1000_0000, 0b0100_0000, 0b1000_0000, 0b0100_0000); + let r = _mm_movemask_pi8(a); + assert_eq!(r, 0b10001); + + let r = _m_pmovmskb(a); + assert_eq!(r, 0b10001); + } + + #[simd_test = "sse,mmx"] + unsafe fn test_mm_shuffle_pi16() { + let a = _mm_setr_pi16(1, 2, 3, 4); + let r = _mm_shuffle_pi16(a, 0b00_01_01_11); + let expected = _mm_setr_pi16(4, 2, 2, 1); + assert_eq_m64(r, expected); + + let r = _m_pshufw(a, 0b00_01_01_11); + assert_eq_m64(r, expected); + } + + #[simd_test = "sse,mmx"] + unsafe fn test_mm_cvtps_pi32() { + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let r = _mm_setr_pi32(1, 2); + + assert_eq_m64(r, _mm_cvtps_pi32(a)); + assert_eq_m64(r, _mm_cvt_ps2pi(a)); + } + + #[simd_test = "sse,mmx"] + unsafe fn test_mm_cvttps_pi32() { + let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0); + let r = _mm_setr_pi32(7, 2); + + assert_eq_m64(r, _mm_cvttps_pi32(a)); + assert_eq_m64(r, _mm_cvtt_ps2pi(a)); + } + + #[simd_test = "sse,mmx"] + unsafe fn test_mm_cvtps_pi16() { + let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0); + let r = _mm_setr_pi16(7, 2, 3, 4); + assert_eq_m64(r, _mm_cvtps_pi16(a)); + } + + #[simd_test = "sse,mmx"] + unsafe fn test_mm_cvtps_pi8() { + let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0); + let r = _mm_setr_pi8(7, 2, 3, 4, 0, 0, 0, 0); + assert_eq_m64(r, _mm_cvtps_pi8(a)); + } } diff --git a/library/stdarch/coresimd/x86/i586/sse2.rs b/library/stdarch/coresimd/x86/sse2.rs similarity index 95% rename from library/stdarch/coresimd/x86/i586/sse2.rs rename to library/stdarch/coresimd/x86/sse2.rs index be08771c841f..dcfb149ee0a5 100644 --- a/library/stdarch/coresimd/x86/i586/sse2.rs +++ b/library/stdarch/coresimd/x86/sse2.rs @@ -2213,6 +2213,113 @@ pub unsafe fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d { simd_shuffle2(a, b, [0, 2]) } +/// Adds two signed or unsigned 64-bit integer values, returning the +/// lower 64 bits of the sum. +#[inline] +#[target_feature(enable = "sse2,mmx")] +#[cfg_attr(test, assert_instr(paddq))] +pub unsafe fn _mm_add_si64(a: __m64, b: __m64) -> __m64 { + paddq(a, b) +} + +/// Multiplies 32-bit unsigned integer values contained in the lower bits +/// of the two 64-bit integer vectors and returns the 64-bit unsigned +/// product. +#[inline] +#[target_feature(enable = "sse2,mmx")] +#[cfg_attr(test, assert_instr(pmuludq))] +pub unsafe fn _mm_mul_su32(a: __m64, b: __m64) -> __m64 { + pmuludq2(a, b) +} + +/// Subtracts signed or unsigned 64-bit integer values and writes the +/// difference to the corresponding bits in the destination. +#[inline] +#[target_feature(enable = "sse2,mmx")] +#[cfg_attr(test, assert_instr(psubq))] +pub unsafe fn _mm_sub_si64(a: __m64, b: __m64) -> __m64 { + psubq(a, b) +} + +/// Converts the two signed 32-bit integer elements of a 64-bit vector of +/// [2 x i32] into two double-precision floating-point values, returned in a +/// 128-bit vector of [2 x double]. +#[inline] +#[target_feature(enable = "sse2,mmx")] +#[cfg_attr(test, assert_instr(cvtpi2pd))] +pub unsafe fn _mm_cvtpi32_pd(a: __m64) -> __m128d { + cvtpi2pd(a) +} + +/// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with +/// the specified 64-bit integer values. +#[inline] +#[target_feature(enable = "sse2,mmx")] +// no particular instruction to test +pub unsafe fn _mm_set_epi64(e1: __m64, e0: __m64) -> __m128i { + _mm_set_epi64x(mem::transmute(e1), mem::transmute(e0)) +} + +/// Initializes both values in a 128-bit vector of [2 x i64] with the +/// specified 64-bit value. +#[inline] +#[target_feature(enable = "sse2,mmx")] +// no particular instruction to test +pub unsafe fn _mm_set1_epi64(a: __m64) -> __m128i { + _mm_set_epi64x(mem::transmute(a), mem::transmute(a)) +} + +/// Constructs a 128-bit integer vector, initialized in reverse order +/// with the specified 64-bit integral values. +#[inline] +#[target_feature(enable = "sse2,mmx")] +// no particular instruction to test +pub unsafe fn _mm_setr_epi64(e1: __m64, e0: __m64) -> __m128i { + _mm_set_epi64x(mem::transmute(e0), mem::transmute(e1)) +} + +/// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit +/// integer. +#[inline] +#[target_feature(enable = "sse2,mmx")] +// #[cfg_attr(test, assert_instr(movdq2q))] // FIXME: llvm codegens wrong +// instr? +pub unsafe fn _mm_movepi64_pi64(a: __m128i) -> __m64 { + mem::transmute(simd_extract::<_, i64>(a.as_i64x2(), 0)) +} + +/// Moves the 64-bit operand to a 128-bit integer vector, zeroing the +/// upper bits. +#[inline] +#[target_feature(enable = "sse2,mmx")] +// #[cfg_attr(test, assert_instr(movq2dq))] // FIXME: llvm codegens wrong +// instr? +pub unsafe fn _mm_movpi64_epi64(a: __m64) -> __m128i { + _mm_set_epi64x(0, mem::transmute(a)) +} + +/// Converts the two double-precision floating-point elements of a +/// 128-bit vector of [2 x double] into two signed 32-bit integer values, +/// returned in a 64-bit vector of [2 x i32]. +#[inline] +#[target_feature(enable = "sse2,mmx")] +#[cfg_attr(test, assert_instr(cvtpd2pi))] +pub unsafe fn _mm_cvtpd_pi32(a: __m128d) -> __m64 { + cvtpd2pi(a) +} + +/// Converts the two double-precision floating-point elements of a +/// 128-bit vector of [2 x double] into two signed 32-bit integer values, +/// returned in a 64-bit vector of [2 x i32]. +/// If the result of either conversion is inexact, the result is truncated +/// (rounded towards zero) regardless of the current MXCSR setting. +#[inline] +#[target_feature(enable = "sse2,mmx")] +#[cfg_attr(test, assert_instr(cvttpd2pi))] +pub unsafe fn _mm_cvttpd_pi32(a: __m128d) -> __m64 { + cvttpd2pi(a) +} + #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.sse2.pause"] @@ -2371,11 +2478,23 @@ extern "C" { fn storeudq(mem_addr: *mut i8, a: __m128i); #[link_name = "llvm.x86.sse2.storeu.pd"] fn storeupd(mem_addr: *mut i8, a: __m128d); + #[link_name = "llvm.x86.mmx.padd.q"] + fn paddq(a: __m64, b: __m64) -> __m64; + #[link_name = "llvm.x86.mmx.pmulu.dq"] + fn pmuludq2(a: __m64, b: __m64) -> __m64; + #[link_name = "llvm.x86.mmx.psub.q"] + fn psubq(a: __m64, b: __m64) -> __m64; + #[link_name = "llvm.x86.sse.cvtpi2pd"] + fn cvtpi2pd(a: __m64) -> __m128d; + #[link_name = "llvm.x86.sse.cvtpd2pi"] + fn cvtpd2pi(a: __m128d) -> __m64; + #[link_name = "llvm.x86.sse.cvttpd2pi"] + fn cvttpd2pi(a: __m128d) -> __m64; } #[cfg(test)] mod tests { - use std::mem::transmute; + use std::mem::{self, transmute}; use std::f64::{self, NAN}; use std::f32; use std::i32; @@ -4452,4 +4571,89 @@ mod tests { let r = _mm_castsi128_ps(a); assert_eq_m128(r, expected); } + + #[simd_test = "sse2,mmx"] + unsafe fn test_mm_add_si64() { + let a = 1i64; + let b = 2i64; + let expected = 3i64; + let r = _mm_add_si64(mem::transmute(a), mem::transmute(b)); + assert_eq!(mem::transmute::<__m64, i64>(r), expected); + } + + #[simd_test = "sse2,mmx"] + unsafe fn test_mm_mul_su32() { + let a = _mm_setr_pi32(1, 2); + let b = _mm_setr_pi32(3, 4); + let expected = 3u64; + let r = _mm_mul_su32(a, b); + assert_eq_m64(r, mem::transmute(expected)); + } + + #[simd_test = "sse2,mmx"] + unsafe fn test_mm_sub_si64() { + let a = 1i64; + let b = 2i64; + let expected = -1i64; + let r = _mm_sub_si64(mem::transmute(a), mem::transmute(b)); + assert_eq!(mem::transmute::<__m64, i64>(r), expected); + } + + #[simd_test = "sse2,mmx"] + unsafe fn test_mm_cvtpi32_pd() { + let a = _mm_setr_pi32(1, 2); + let expected = _mm_setr_pd(1., 2.); + let r = _mm_cvtpi32_pd(a); + assert_eq_m128d(r, expected); + } + + #[simd_test = "sse2,mmx"] + unsafe fn test_mm_set_epi64() { + let r = _mm_set_epi64(mem::transmute(1i64), mem::transmute(2i64)); + assert_eq_m128i(r, _mm_setr_epi64x(2, 1)); + } + + #[simd_test = "sse2,mmx"] + unsafe fn test_mm_set1_epi64() { + let r = _mm_set1_epi64(mem::transmute(1i64)); + assert_eq_m128i(r, _mm_setr_epi64x(1, 1)); + } + + #[simd_test = "sse2,mmx"] + unsafe fn test_mm_setr_epi64() { + let r = _mm_setr_epi64(mem::transmute(1i64), mem::transmute(2i64)); + assert_eq_m128i(r, _mm_setr_epi64x(1, 2)); + } + + #[simd_test = "sse2,mmx"] + unsafe fn test_mm_movepi64_pi64() { + let r = _mm_movepi64_pi64(_mm_setr_epi64x(5, 0)); + assert_eq_m64(r, _mm_setr_pi8(5, 0, 0, 0, 0, 0, 0, 0)); + } + + #[simd_test = "sse2,mmx"] + unsafe fn test_mm_movpi64_epi64() { + let r = _mm_movpi64_epi64(_mm_setr_pi8(5, 0, 0, 0, 0, 0, 0, 0)); + assert_eq_m128i(r, _mm_setr_epi64x(5, 0)); + } + + #[simd_test = "sse2,mmx"] + unsafe fn test_mm_cvtpd_pi32() { + let a = _mm_setr_pd(5., 0.); + let r = _mm_cvtpd_pi32(a); + assert_eq_m64(r, _mm_setr_pi32(5, 0)); + } + + #[simd_test = "sse2,mmx"] + unsafe fn test_mm_cvttpd_pi32() { + use std::{f64, i32}; + + let a = _mm_setr_pd(5., 0.); + let r = _mm_cvttpd_pi32(a); + assert_eq_m64(r, _mm_setr_pi32(5, 0)); + + let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); + let r = _mm_cvttpd_pi32(a); + assert_eq_m64(r, _mm_setr_pi32(i32::MIN, i32::MIN)); + } } diff --git a/library/stdarch/coresimd/x86/i586/sse3.rs b/library/stdarch/coresimd/x86/sse3.rs similarity index 100% rename from library/stdarch/coresimd/x86/i586/sse3.rs rename to library/stdarch/coresimd/x86/sse3.rs diff --git a/library/stdarch/coresimd/x86/i586/sse41.rs b/library/stdarch/coresimd/x86/sse41.rs similarity index 88% rename from library/stdarch/coresimd/x86/i586/sse41.rs rename to library/stdarch/coresimd/x86/sse41.rs index d19581522a9e..80ac6555c6cb 100644 --- a/library/stdarch/coresimd/x86/i586/sse41.rs +++ b/library/stdarch/coresimd/x86/sse41.rs @@ -797,6 +797,125 @@ pub unsafe fn _mm_mpsadbw_epu8(a: __m128i, b: __m128i, imm8: i32) -> __m128i { mem::transmute(constify_imm3!(imm8, call)) } +/// Tests whether the specified bits in a 128-bit integer vector are all +/// zeros. +/// +/// Arguments: +/// +/// * `a` - A 128-bit integer vector containing the bits to be tested. +/// * `mask` - A 128-bit integer vector selecting which bits to test in +/// operand `a`. +/// +/// Returns: +/// +/// * `1` - if the specified bits are all zeros, +/// * `0` - otherwise. +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(ptest))] +pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 { + ptestz(a.as_i64x2(), mask.as_i64x2()) +} + +/// Tests whether the specified bits in a 128-bit integer vector are all +/// ones. +/// +/// Arguments: +/// +/// * `a` - A 128-bit integer vector containing the bits to be tested. +/// * `mask` - A 128-bit integer vector selecting which bits to test in +/// operand `a`. +/// +/// Returns: +/// +/// * `1` - if the specified bits are all ones, +/// * `0` - otherwise. +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(ptest))] +pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 { + ptestc(a.as_i64x2(), mask.as_i64x2()) +} + +/// Tests whether the specified bits in a 128-bit integer vector are +/// neither all zeros nor all ones. +/// +/// Arguments: +/// +/// * `a` - A 128-bit integer vector containing the bits to be tested. +/// * `mask` - A 128-bit integer vector selecting which bits to test in +/// operand `a`. +/// +/// Returns: +/// +/// * `1` - if the specified bits are neither all zeros nor all ones, +/// * `0` - otherwise. +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(ptest))] +pub unsafe fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 { + ptestnzc(a.as_i64x2(), mask.as_i64x2()) +} + +/// Tests whether the specified bits in a 128-bit integer vector are all +/// zeros. +/// +/// Arguments: +/// +/// * `a` - A 128-bit integer vector containing the bits to be tested. +/// * `mask` - A 128-bit integer vector selecting which bits to test in +/// operand `a`. +/// +/// Returns: +/// +/// * `1` - if the specified bits are all zeros, +/// * `0` - otherwise. +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(ptest))] +pub unsafe fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 { + _mm_testz_si128(a, mask) +} + +/// Tests whether the specified bits in `a` 128-bit integer vector are all +/// ones. +/// +/// Argument: +/// +/// * `a` - A 128-bit integer vector containing the bits to be tested. +/// +/// Returns: +/// +/// * `1` - if the bits specified in the operand are all set to 1, +/// * `0` - otherwise. +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(pcmpeqd))] +#[cfg_attr(test, assert_instr(ptest))] +pub unsafe fn _mm_test_all_ones(a: __m128i) -> i32 { + _mm_testc_si128(a, _mm_cmpeq_epi32(a, a)) +} + +/// Tests whether the specified bits in a 128-bit integer vector are +/// neither all zeros nor all ones. +/// +/// Arguments: +/// +/// * `a` - A 128-bit integer vector containing the bits to be tested. +/// * `mask` - A 128-bit integer vector selecting which bits to test in +/// operand `a`. +/// +/// Returns: +/// +/// * `1` - if the specified bits are neither all zeros nor all ones, +/// * `0` - otherwise. +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(ptest))] +pub unsafe fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 { + _mm_testnzc_si128(a, mask) +} + #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.sse41.pblendvb"] @@ -849,6 +968,12 @@ extern "C" { fn pmuldq(a: i32x4, b: i32x4) -> i64x2; #[link_name = "llvm.x86.sse41.mpsadbw"] fn mpsadbw(a: u8x16, b: u8x16, imm8: u8) -> u16x8; + #[link_name = "llvm.x86.sse41.ptestz"] + fn ptestz(a: i64x2, mask: i64x2) -> i32; + #[link_name = "llvm.x86.sse41.ptestc"] + fn ptestc(a: i64x2, mask: i64x2) -> i32; + #[link_name = "llvm.x86.sse41.ptestnzc"] + fn ptestnzc(a: i64x2, mask: i64x2) -> i32; } #[cfg(test)] @@ -1476,4 +1601,102 @@ mod tests { let e = _mm_setr_epi16(32, 28, 24, 20, 16, 12, 8, 4); assert_eq_m128i(r, e); } + + #[simd_test = "sse4.1"] + unsafe fn test_mm_testz_si128() { + let a = _mm_set1_epi8(1); + let mask = _mm_set1_epi8(0); + let r = _mm_testz_si128(a, mask); + assert_eq!(r, 1); + let a = _mm_set1_epi8(0b101); + let mask = _mm_set1_epi8(0b110); + let r = _mm_testz_si128(a, mask); + assert_eq!(r, 0); + let a = _mm_set1_epi8(0b011); + let mask = _mm_set1_epi8(0b100); + let r = _mm_testz_si128(a, mask); + assert_eq!(r, 1); + } + + #[simd_test = "sse4.1"] + unsafe fn test_mm_testc_si128() { + let a = _mm_set1_epi8(-1); + let mask = _mm_set1_epi8(0); + let r = _mm_testc_si128(a, mask); + assert_eq!(r, 1); + let a = _mm_set1_epi8(0b101); + let mask = _mm_set1_epi8(0b110); + let r = _mm_testc_si128(a, mask); + assert_eq!(r, 0); + let a = _mm_set1_epi8(0b101); + let mask = _mm_set1_epi8(0b100); + let r = _mm_testc_si128(a, mask); + assert_eq!(r, 1); + } + + #[simd_test = "sse4.1"] + unsafe fn test_mm_testnzc_si128() { + let a = _mm_set1_epi8(0); + let mask = _mm_set1_epi8(1); + let r = _mm_testnzc_si128(a, mask); + assert_eq!(r, 0); + let a = _mm_set1_epi8(-1); + let mask = _mm_set1_epi8(0); + let r = _mm_testnzc_si128(a, mask); + assert_eq!(r, 0); + let a = _mm_set1_epi8(0b101); + let mask = _mm_set1_epi8(0b110); + let r = _mm_testnzc_si128(a, mask); + assert_eq!(r, 1); + let a = _mm_set1_epi8(0b101); + let mask = _mm_set1_epi8(0b101); + let r = _mm_testnzc_si128(a, mask); + assert_eq!(r, 0); + } + + #[simd_test = "sse4.1"] + unsafe fn test_mm_test_all_zeros() { + let a = _mm_set1_epi8(1); + let mask = _mm_set1_epi8(0); + let r = _mm_test_all_zeros(a, mask); + assert_eq!(r, 1); + let a = _mm_set1_epi8(0b101); + let mask = _mm_set1_epi8(0b110); + let r = _mm_test_all_zeros(a, mask); + assert_eq!(r, 0); + let a = _mm_set1_epi8(0b011); + let mask = _mm_set1_epi8(0b100); + let r = _mm_test_all_zeros(a, mask); + assert_eq!(r, 1); + } + + #[simd_test = "sse4.1"] + unsafe fn test_mm_test_all_ones() { + let a = _mm_set1_epi8(-1); + let r = _mm_test_all_ones(a); + assert_eq!(r, 1); + let a = _mm_set1_epi8(0b101); + let r = _mm_test_all_ones(a); + assert_eq!(r, 0); + } + + #[simd_test = "sse4.1"] + unsafe fn test_mm_test_mix_ones_zeros() { + let a = _mm_set1_epi8(0); + let mask = _mm_set1_epi8(1); + let r = _mm_test_mix_ones_zeros(a, mask); + assert_eq!(r, 0); + let a = _mm_set1_epi8(-1); + let mask = _mm_set1_epi8(0); + let r = _mm_test_mix_ones_zeros(a, mask); + assert_eq!(r, 0); + let a = _mm_set1_epi8(0b101); + let mask = _mm_set1_epi8(0b110); + let r = _mm_test_mix_ones_zeros(a, mask); + assert_eq!(r, 1); + let a = _mm_set1_epi8(0b101); + let mask = _mm_set1_epi8(0b101); + let r = _mm_test_mix_ones_zeros(a, mask); + assert_eq!(r, 0); + } } diff --git a/library/stdarch/coresimd/x86/i586/sse42.rs b/library/stdarch/coresimd/x86/sse42.rs similarity index 97% rename from library/stdarch/coresimd/x86/i586/sse42.rs rename to library/stdarch/coresimd/x86/sse42.rs index dcb9fd849823..4504642a2a77 100644 --- a/library/stdarch/coresimd/x86/i586/sse42.rs +++ b/library/stdarch/coresimd/x86/sse42.rs @@ -5,6 +5,7 @@ #[cfg(test)] use stdsimd_test::assert_instr; +use coresimd::simd_llvm::*; use coresimd::v128::*; use coresimd::x86::*; @@ -601,6 +602,15 @@ pub unsafe fn _mm_crc32_u32(crc: u32, v: u32) -> u32 { crc32_32_32(crc, v) } +/// Compare packed 64-bit integers in `a` and `b` for greater-than, +/// return the results. +#[inline] +#[target_feature(enable = "sse4.2")] +#[cfg_attr(test, assert_instr(pcmpgtq))] +pub unsafe fn _mm_cmpgt_epi64(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(simd_gt::<_, i64x2>(a.as_i64x2(), b.as_i64x2())) +} + #[allow(improper_ctypes)] extern "C" { // SSE 4.2 string and text comparison ops @@ -826,4 +836,15 @@ mod tests { let i = _mm_crc32_u32(crc, v); assert_eq!(i, 0xffae2ed1); } + + #[simd_test = "sse4.2"] + unsafe fn test_mm_cmpgt_epi64() { + let a = _mm_setr_epi64x(0, 0x2a); + let b = _mm_set1_epi64x(0x00); + let i = _mm_cmpgt_epi64(a, b); + assert_eq_m128i( + i, + _mm_setr_epi64x(0x00, 0xffffffffffffffffu64 as i64), + ); + } } diff --git a/library/stdarch/coresimd/x86/i686/sse4a.rs b/library/stdarch/coresimd/x86/sse4a.rs similarity index 100% rename from library/stdarch/coresimd/x86/i686/sse4a.rs rename to library/stdarch/coresimd/x86/sse4a.rs diff --git a/library/stdarch/coresimd/x86/i586/ssse3.rs b/library/stdarch/coresimd/x86/ssse3.rs similarity index 58% rename from library/stdarch/coresimd/x86/i586/ssse3.rs rename to library/stdarch/coresimd/x86/ssse3.rs index 308cdffa2f52..7b97443fb596 100644 --- a/library/stdarch/coresimd/x86/i586/ssse3.rs +++ b/library/stdarch/coresimd/x86/ssse3.rs @@ -239,6 +239,169 @@ pub unsafe fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(psignd128(a.as_i32x4(), b.as_i32x4())) } +/// Compute the absolute value of packed 8-bit integers in `a` and +/// return the unsigned results. +#[inline] +#[target_feature(enable = "ssse3,mmx")] +#[cfg_attr(test, assert_instr(pabsb))] +pub unsafe fn _mm_abs_pi8(a: __m64) -> __m64 { + pabsb(a) +} + +/// Compute the absolute value of packed 8-bit integers in `a`, and return the +/// unsigned results. +#[inline] +#[target_feature(enable = "ssse3,mmx")] +#[cfg_attr(test, assert_instr(pabsw))] +pub unsafe fn _mm_abs_pi16(a: __m64) -> __m64 { + pabsw(a) +} + +/// Compute the absolute value of packed 32-bit integers in `a`, and return the +/// unsigned results. +#[inline] +#[target_feature(enable = "ssse3,mmx")] +#[cfg_attr(test, assert_instr(pabsd))] +pub unsafe fn _mm_abs_pi32(a: __m64) -> __m64 { + pabsd(a) +} + +/// Shuffle packed 8-bit integers in `a` according to shuffle control mask in +/// the corresponding 8-bit element of `b`, and return the results +#[inline] +#[target_feature(enable = "ssse3,mmx")] +#[cfg_attr(test, assert_instr(pshufb))] +pub unsafe fn _mm_shuffle_pi8(a: __m64, b: __m64) -> __m64 { + pshufb(a, b) +} + +/// Concatenates the two 64-bit integer vector operands, and right-shifts +/// the result by the number of bytes specified in the immediate operand. +#[inline] +#[target_feature(enable = "ssse3,mmx")] +#[cfg_attr(test, assert_instr(palignr, n = 15))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm_alignr_pi8(a: __m64, b: __m64, n: i32) -> __m64 { + macro_rules! call { + ($imm8:expr) => { + palignrb(a, b, $imm8) + } + } + constify_imm8!(n, call) +} + +/// Horizontally add the adjacent pairs of values contained in 2 packed +/// 64-bit vectors of [4 x i16]. +#[inline] +#[target_feature(enable = "ssse3,mmx")] +#[cfg_attr(test, assert_instr(phaddw))] +pub unsafe fn _mm_hadd_pi16(a: __m64, b: __m64) -> __m64 { + phaddw(a, b) +} + +/// Horizontally add the adjacent pairs of values contained in 2 packed +/// 64-bit vectors of [2 x i32]. +#[inline] +#[target_feature(enable = "ssse3,mmx")] +#[cfg_attr(test, assert_instr(phaddd))] +pub unsafe fn _mm_hadd_pi32(a: __m64, b: __m64) -> __m64 { + phaddd(a, b) +} + +/// Horizontally add the adjacent pairs of values contained in 2 packed +/// 64-bit vectors of [4 x i16]. Positive sums greater than 7FFFh are +/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h. +#[inline] +#[target_feature(enable = "ssse3,mmx")] +#[cfg_attr(test, assert_instr(phaddsw))] +pub unsafe fn _mm_hadds_pi16(a: __m64, b: __m64) -> __m64 { + phaddsw(a, b) +} + +/// Horizontally subtracts the adjacent pairs of values contained in 2 +/// packed 64-bit vectors of [4 x i16]. +#[inline] +#[target_feature(enable = "ssse3,mmx")] +#[cfg_attr(test, assert_instr(phsubw))] +pub unsafe fn _mm_hsub_pi16(a: __m64, b: __m64) -> __m64 { + phsubw(a, b) +} + +/// Horizontally subtracts the adjacent pairs of values contained in 2 +/// packed 64-bit vectors of [2 x i32]. +#[inline] +#[target_feature(enable = "ssse3,mmx")] +#[cfg_attr(test, assert_instr(phsubd))] +pub unsafe fn _mm_hsub_pi32(a: __m64, b: __m64) -> __m64 { + phsubd(a, b) +} + +/// Horizontally subtracts the adjacent pairs of values contained in 2 +/// packed 64-bit vectors of [4 x i16]. Positive differences greater than +/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are +/// saturated to 8000h. +#[inline] +#[target_feature(enable = "ssse3,mmx")] +#[cfg_attr(test, assert_instr(phsubsw))] +pub unsafe fn _mm_hsubs_pi16(a: __m64, b: __m64) -> __m64 { + phsubsw(a, b) +} + +/// Multiplies corresponding pairs of packed 8-bit unsigned integer +/// values contained in the first source operand and packed 8-bit signed +/// integer values contained in the second source operand, adds pairs of +/// contiguous products with signed saturation, and writes the 16-bit sums to +/// the corresponding bits in the destination. +#[inline] +#[target_feature(enable = "ssse3,mmx")] +#[cfg_attr(test, assert_instr(pmaddubsw))] +pub unsafe fn _mm_maddubs_pi16(a: __m64, b: __m64) -> __m64 { + pmaddubsw(a, b) +} + +/// Multiplies packed 16-bit signed integer values, truncates the 32-bit +/// products to the 18 most significant bits by right-shifting, rounds the +/// truncated value by adding 1, and writes bits [16:1] to the destination. +#[inline] +#[target_feature(enable = "ssse3,mmx")] +#[cfg_attr(test, assert_instr(pmulhrsw))] +pub unsafe fn _mm_mulhrs_pi16(a: __m64, b: __m64) -> __m64 { + pmulhrsw(a, b) +} + +/// Negate packed 8-bit integers in `a` when the corresponding signed 8-bit +/// integer in `b` is negative, and return the results. +/// Element in result are zeroed out when the corresponding element in `b` is +/// zero. +#[inline] +#[target_feature(enable = "ssse3,mmx")] +#[cfg_attr(test, assert_instr(psignb))] +pub unsafe fn _mm_sign_pi8(a: __m64, b: __m64) -> __m64 { + psignb(a, b) +} + +/// Negate packed 16-bit integers in `a` when the corresponding signed 16-bit +/// integer in `b` is negative, and return the results. +/// Element in result are zeroed out when the corresponding element in `b` is +/// zero. +#[inline] +#[target_feature(enable = "ssse3,mmx")] +#[cfg_attr(test, assert_instr(psignw))] +pub unsafe fn _mm_sign_pi16(a: __m64, b: __m64) -> __m64 { + psignw(a, b) +} + +/// Negate packed 32-bit integers in `a` when the corresponding signed 32-bit +/// integer in `b` is negative, and return the results. +/// Element in result are zeroed out when the corresponding element in `b` is +/// zero. +#[inline] +#[target_feature(enable = "ssse3,mmx")] +#[cfg_attr(test, assert_instr(psignd))] +pub unsafe fn _mm_sign_pi32(a: __m64, b: __m64) -> __m64 { + psignd(a, b) +} + #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.ssse3.pabs.b.128"] @@ -285,6 +448,54 @@ extern "C" { #[link_name = "llvm.x86.ssse3.psign.d.128"] fn psignd128(a: i32x4, b: i32x4) -> i32x4; + + #[link_name = "llvm.x86.ssse3.pabs.b"] + fn pabsb(a: __m64) -> __m64; + + #[link_name = "llvm.x86.ssse3.pabs.w"] + fn pabsw(a: __m64) -> __m64; + + #[link_name = "llvm.x86.ssse3.pabs.d"] + fn pabsd(a: __m64) -> __m64; + + #[link_name = "llvm.x86.ssse3.pshuf.b"] + fn pshufb(a: __m64, b: __m64) -> __m64; + + #[link_name = "llvm.x86.mmx.palignr.b"] + fn palignrb(a: __m64, b: __m64, n: u8) -> __m64; + + #[link_name = "llvm.x86.ssse3.phadd.w"] + fn phaddw(a: __m64, b: __m64) -> __m64; + + #[link_name = "llvm.x86.ssse3.phadd.d"] + fn phaddd(a: __m64, b: __m64) -> __m64; + + #[link_name = "llvm.x86.ssse3.phadd.sw"] + fn phaddsw(a: __m64, b: __m64) -> __m64; + + #[link_name = "llvm.x86.ssse3.phsub.w"] + fn phsubw(a: __m64, b: __m64) -> __m64; + + #[link_name = "llvm.x86.ssse3.phsub.d"] + fn phsubd(a: __m64, b: __m64) -> __m64; + + #[link_name = "llvm.x86.ssse3.phsub.sw"] + fn phsubsw(a: __m64, b: __m64) -> __m64; + + #[link_name = "llvm.x86.ssse3.pmadd.ub.sw"] + fn pmaddubsw(a: __m64, b: __m64) -> __m64; + + #[link_name = "llvm.x86.ssse3.pmul.hr.sw"] + fn pmulhrsw(a: __m64, b: __m64) -> __m64; + + #[link_name = "llvm.x86.ssse3.psign.b"] + fn psignb(a: __m64, b: __m64) -> __m64; + + #[link_name = "llvm.x86.ssse3.psign.w"] + fn psignw(a: __m64, b: __m64) -> __m64; + + #[link_name = "llvm.x86.ssse3.psign.d"] + fn psignd(a: __m64, b: __m64) -> __m64; } #[cfg(test)] @@ -491,4 +702,138 @@ mod tests { let r = _mm_sign_epi32(a, b); assert_eq_m128i(r, expected); } + + #[simd_test = "ssse3,mmx"] + unsafe fn test_mm_abs_pi8() { + let r = _mm_abs_pi8(_mm_set1_pi8(-5)); + assert_eq_m64(r, _mm_set1_pi8(5)); + } + + #[simd_test = "ssse3,mmx"] + unsafe fn test_mm_abs_pi16() { + let r = _mm_abs_pi16(_mm_set1_pi16(-5)); + assert_eq_m64(r, _mm_set1_pi16(5)); + } + + #[simd_test = "ssse3,mmx"] + unsafe fn test_mm_abs_pi32() { + let r = _mm_abs_pi32(_mm_set1_pi32(-5)); + assert_eq_m64(r, _mm_set1_pi32(5)); + } + + #[simd_test = "ssse3,mmx"] + unsafe fn test_mm_shuffle_pi8() { + let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm_setr_pi8(4, 128u8 as i8, 4, 3, 24, 12, 6, 19); + let expected = _mm_setr_pi8(5, 0, 5, 4, 1, 5, 7, 4); + let r = _mm_shuffle_pi8(a, b); + assert_eq_m64(r, expected); + } + + #[simd_test = "ssse3,mmx"] + unsafe fn test_mm_alignr_pi8() { + let a = _mm_setr_pi32(0x89ABCDEF_u32 as i32, 0x01234567_u32 as i32); + let b = _mm_setr_pi32(0xBBAA9988_u32 as i32, 0xFFDDEECC_u32 as i32); + let r = _mm_alignr_pi8(a, b, 4); + assert_eq_m64(r, ::std::mem::transmute(0x89abcdefffddeecc_u64)); + } + + #[simd_test = "ssse3,mmx"] + unsafe fn test_mm_hadd_pi16() { + let a = _mm_setr_pi16(1, 2, 3, 4); + let b = _mm_setr_pi16(4, 128, 4, 3); + let expected = _mm_setr_pi16(3, 7, 132, 7); + let r = _mm_hadd_pi16(a, b); + assert_eq_m64(r, expected); + } + + #[simd_test = "ssse3,mmx"] + unsafe fn test_mm_hadd_pi32() { + let a = _mm_setr_pi32(1, 2); + let b = _mm_setr_pi32(4, 128); + let expected = _mm_setr_pi32(3, 132); + let r = _mm_hadd_pi32(a, b); + assert_eq_m64(r, expected); + } + + #[simd_test = "ssse3,mmx"] + unsafe fn test_mm_hadds_pi16() { + let a = _mm_setr_pi16(1, 2, 3, 4); + let b = _mm_setr_pi16(32767, 1, -32768, -1); + let expected = _mm_setr_pi16(3, 7, 32767, -32768); + let r = _mm_hadds_pi16(a, b); + assert_eq_m64(r, expected); + } + + #[simd_test = "ssse3,mmx"] + unsafe fn test_mm_hsub_pi16() { + let a = _mm_setr_pi16(1, 2, 3, 4); + let b = _mm_setr_pi16(4, 128, 4, 3); + let expected = _mm_setr_pi16(-1, -1, -124, 1); + let r = _mm_hsub_pi16(a, b); + assert_eq_m64(r, expected); + } + + #[simd_test = "ssse3,mmx"] + unsafe fn test_mm_hsub_pi32() { + let a = _mm_setr_pi32(1, 2); + let b = _mm_setr_pi32(4, 128); + let expected = _mm_setr_pi32(-1, -124); + let r = _mm_hsub_pi32(a, b); + assert_eq_m64(r, expected); + } + + #[simd_test = "ssse3,mmx"] + unsafe fn test_mm_hsubs_pi16() { + let a = _mm_setr_pi16(1, 2, 3, 4); + let b = _mm_setr_pi16(4, 128, 4, 3); + let expected = _mm_setr_pi16(-1, -1, -124, 1); + let r = _mm_hsubs_pi16(a, b); + assert_eq_m64(r, expected); + } + + #[simd_test = "ssse3,mmx"] + unsafe fn test_mm_maddubs_pi16() { + let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm_setr_pi8(4, 63, 4, 3, 24, 12, 6, 19); + let expected = _mm_setr_pi16(130, 24, 192, 194); + let r = _mm_maddubs_pi16(a, b); + assert_eq_m64(r, expected); + } + + #[simd_test = "ssse3,mmx"] + unsafe fn test_mm_mulhrs_pi16() { + let a = _mm_setr_pi16(1, 2, 3, 4); + let b = _mm_setr_pi16(4, 32767, -1, -32768); + let expected = _mm_setr_pi16(0, 2, 0, -4); + let r = _mm_mulhrs_pi16(a, b); + assert_eq_m64(r, expected); + } + + #[simd_test = "ssse3,mmx"] + unsafe fn test_mm_sign_pi8() { + let a = _mm_setr_pi8(1, 2, 3, 4, -5, -6, 7, 8); + let b = _mm_setr_pi8(4, 64, 0, 3, 1, -1, -2, 1); + let expected = _mm_setr_pi8(1, 2, 0, 4, -5, 6, -7, 8); + let r = _mm_sign_pi8(a, b); + assert_eq_m64(r, expected); + } + + #[simd_test = "ssse3,mmx"] + unsafe fn test_mm_sign_pi16() { + let a = _mm_setr_pi16(-1, 2, 3, 4); + let b = _mm_setr_pi16(1, -1, 1, 0); + let expected = _mm_setr_pi16(-1, -2, 3, 0); + let r = _mm_sign_pi16(a, b); + assert_eq_m64(r, expected); + } + + #[simd_test = "ssse3,mmx"] + unsafe fn test_mm_sign_pi32() { + let a = _mm_setr_pi32(-1, 2); + let b = _mm_setr_pi32(1, 0); + let expected = _mm_setr_pi32(-1, 0); + let r = _mm_sign_pi32(a, b); + assert_eq_m64(r, expected); + } } diff --git a/library/stdarch/coresimd/x86/i586/tbm.rs b/library/stdarch/coresimd/x86/tbm.rs similarity index 79% rename from library/stdarch/coresimd/x86/i586/tbm.rs rename to library/stdarch/coresimd/x86/tbm.rs index 01186a099cd3..fa5b08d197be 100644 --- a/library/stdarch/coresimd/x86/i586/tbm.rs +++ b/library/stdarch/coresimd/x86/tbm.rs @@ -263,41 +263,41 @@ pub unsafe fn _tzmsk_u64(x: u64) -> u64 { mod tests { use stdsimd_test::simd_test; - use coresimd::x86::i586::tbm; + use coresimd::x86::*; /* #[simd_test = "tbm"] - unsafe fn _bextr_u32() { - assert_eq!(tbm::_bextr_u32(0b0101_0000u32, 4, 4), 0b0000_0101u32); + unsafe fn test_bextr_u32() { + assert_eq!(_bextr_u32(0b0101_0000u32, 4, 4), 0b0000_0101u32); } #[simd_test = "tbm"] - unsafe fn _bextr_u64() { - assert_eq!(tbm::_bextr_u64(0b0101_0000u64, 4, 4), 0b0000_0101u64); + unsafe fn test_bextr_u64() { + assert_eq!(_bextr_u64(0b0101_0000u64, 4, 4), 0b0000_0101u64); } */ #[simd_test = "tbm"] - unsafe fn _blcfill_u32() { - assert_eq!(tbm::_blcfill_u32(0b0101_0111u32), 0b0101_0000u32); - assert_eq!(tbm::_blcfill_u32(0b1111_1111u32), 0u32); + unsafe fn test_blcfill_u32() { + assert_eq!(_blcfill_u32(0b0101_0111u32), 0b0101_0000u32); + assert_eq!(_blcfill_u32(0b1111_1111u32), 0u32); } #[simd_test = "tbm"] #[cfg(not(target_arch = "x86"))] - unsafe fn _blcfill_u64() { - assert_eq!(tbm::_blcfill_u64(0b0101_0111u64), 0b0101_0000u64); - assert_eq!(tbm::_blcfill_u64(0b1111_1111u64), 0u64); + unsafe fn test_blcfill_u64() { + assert_eq!(_blcfill_u64(0b0101_0111u64), 0b0101_0000u64); + assert_eq!(_blcfill_u64(0b1111_1111u64), 0u64); } #[simd_test = "tbm"] - unsafe fn _blci_u32() { + unsafe fn test_blci_u32() { assert_eq!( - tbm::_blci_u32(0b0101_0000u32), + _blci_u32(0b0101_0000u32), 0b1111_1111_1111_1111_1111_1111_1111_1110u32 ); assert_eq!( - tbm::_blci_u32(0b1111_1111u32), + _blci_u32(0b1111_1111u32), 0b1111_1111_1111_1111_1111_1110_1111_1111u32 ); } @@ -305,61 +305,61 @@ mod tests { #[simd_test = "tbm"] #[cfg(not(target_arch = "x86"))] #[cfg_attr(rustfmt, rustfmt_skip)] - unsafe fn _blci_u64() { + unsafe fn test_blci_u64() { assert_eq!( - tbm::_blci_u64(0b0101_0000u64), + _blci_u64(0b0101_0000u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110u64 ); assert_eq!( - tbm::_blci_u64(0b1111_1111u64), + _blci_u64(0b1111_1111u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110_1111_1111u64 ); } #[simd_test = "tbm"] - unsafe fn _blcic_u32() { - assert_eq!(tbm::_blcic_u32(0b0101_0001u32), 0b0000_0010u32); - assert_eq!(tbm::_blcic_u32(0b1111_1111u32), 0b1_0000_0000u32); + unsafe fn test_blcic_u32() { + assert_eq!(_blcic_u32(0b0101_0001u32), 0b0000_0010u32); + assert_eq!(_blcic_u32(0b1111_1111u32), 0b1_0000_0000u32); } #[simd_test = "tbm"] #[cfg(not(target_arch = "x86"))] - unsafe fn _blcic_u64() { - assert_eq!(tbm::_blcic_u64(0b0101_0001u64), 0b0000_0010u64); - assert_eq!(tbm::_blcic_u64(0b1111_1111u64), 0b1_0000_0000u64); + unsafe fn test_blcic_u64() { + assert_eq!(_blcic_u64(0b0101_0001u64), 0b0000_0010u64); + assert_eq!(_blcic_u64(0b1111_1111u64), 0b1_0000_0000u64); } #[simd_test = "tbm"] - unsafe fn _blcmsk_u32() { - assert_eq!(tbm::_blcmsk_u32(0b0101_0001u32), 0b0000_0011u32); - assert_eq!(tbm::_blcmsk_u32(0b1111_1111u32), 0b1_1111_1111u32); + unsafe fn test_blcmsk_u32() { + assert_eq!(_blcmsk_u32(0b0101_0001u32), 0b0000_0011u32); + assert_eq!(_blcmsk_u32(0b1111_1111u32), 0b1_1111_1111u32); } #[simd_test = "tbm"] #[cfg(not(target_arch = "x86"))] - unsafe fn _blcmsk_u64() { - assert_eq!(tbm::_blcmsk_u64(0b0101_0001u64), 0b0000_0011u64); - assert_eq!(tbm::_blcmsk_u64(0b1111_1111u64), 0b1_1111_1111u64); + unsafe fn test_blcmsk_u64() { + assert_eq!(_blcmsk_u64(0b0101_0001u64), 0b0000_0011u64); + assert_eq!(_blcmsk_u64(0b1111_1111u64), 0b1_1111_1111u64); } #[simd_test = "tbm"] - unsafe fn _blcs_u32() { - assert_eq!(tbm::_blcs_u32(0b0101_0001u32), 0b0101_0011u32); - assert_eq!(tbm::_blcs_u32(0b1111_1111u32), 0b1_1111_1111u32); + unsafe fn test_blcs_u32() { + assert_eq!(_blcs_u32(0b0101_0001u32), 0b0101_0011u32); + assert_eq!(_blcs_u32(0b1111_1111u32), 0b1_1111_1111u32); } #[simd_test = "tbm"] #[cfg(not(target_arch = "x86"))] - unsafe fn _blcs_u64() { - assert_eq!(tbm::_blcs_u64(0b0101_0001u64), 0b0101_0011u64); - assert_eq!(tbm::_blcs_u64(0b1111_1111u64), 0b1_1111_1111u64); + unsafe fn test_blcs_u64() { + assert_eq!(_blcs_u64(0b0101_0001u64), 0b0101_0011u64); + assert_eq!(_blcs_u64(0b1111_1111u64), 0b1_1111_1111u64); } #[simd_test = "tbm"] - unsafe fn _blsfill_u32() { - assert_eq!(tbm::_blsfill_u32(0b0101_0100u32), 0b0101_0111u32); + unsafe fn test_blsfill_u32() { + assert_eq!(_blsfill_u32(0b0101_0100u32), 0b0101_0111u32); assert_eq!( - tbm::_blsfill_u32(0u32), + _blsfill_u32(0u32), 0b1111_1111_1111_1111_1111_1111_1111_1111u32 ); } @@ -367,22 +367,22 @@ mod tests { #[simd_test = "tbm"] #[cfg(not(target_arch = "x86"))] #[cfg_attr(rustfmt, rustfmt_skip)] - unsafe fn _blsfill_u64() { - assert_eq!(tbm::_blsfill_u64(0b0101_0100u64), 0b0101_0111u64); + unsafe fn test_blsfill_u64() { + assert_eq!(_blsfill_u64(0b0101_0100u64), 0b0101_0111u64); assert_eq!( - tbm::_blsfill_u64(0u64), + _blsfill_u64(0u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64 ); } #[simd_test = "tbm"] - unsafe fn _blsic_u32() { + unsafe fn test_blsic_u32() { assert_eq!( - tbm::_blsic_u32(0b0101_0100u32), + _blsic_u32(0b0101_0100u32), 0b1111_1111_1111_1111_1111_1111_1111_1011u32 ); assert_eq!( - tbm::_blsic_u32(0u32), + _blsic_u32(0u32), 0b1111_1111_1111_1111_1111_1111_1111_1111u32 ); } @@ -390,25 +390,25 @@ mod tests { #[simd_test = "tbm"] #[cfg(not(target_arch = "x86"))] #[cfg_attr(rustfmt, rustfmt_skip)] - unsafe fn _blsic_u64() { + unsafe fn test_blsic_u64() { assert_eq!( - tbm::_blsic_u64(0b0101_0100u64), + _blsic_u64(0b0101_0100u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1011u64 ); assert_eq!( - tbm::_blsic_u64(0u64), + _blsic_u64(0u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64 ); } #[simd_test = "tbm"] - unsafe fn _t1mskc_u32() { + unsafe fn test_t1mskc_u32() { assert_eq!( - tbm::_t1mskc_u32(0b0101_0111u32), + _t1mskc_u32(0b0101_0111u32), 0b1111_1111_1111_1111_1111_1111_1111_1000u32 ); assert_eq!( - tbm::_t1mskc_u32(0u32), + _t1mskc_u32(0u32), 0b1111_1111_1111_1111_1111_1111_1111_1111u32 ); } @@ -416,27 +416,27 @@ mod tests { #[simd_test = "tbm"] #[cfg(not(target_arch = "x86"))] #[cfg_attr(rustfmt, rustfmt_skip)] - unsafe fn _t1mksc_u64() { + unsafe fn test_t1mksc_u64() { assert_eq!( - tbm::_t1mskc_u64(0b0101_0111u64), + _t1mskc_u64(0b0101_0111u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1000u64 ); assert_eq!( - tbm::_t1mskc_u64(0u64), + _t1mskc_u64(0u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64 ); } #[simd_test = "tbm"] - unsafe fn _tzmsk_u32() { - assert_eq!(tbm::_tzmsk_u32(0b0101_1000u32), 0b0000_0111u32); - assert_eq!(tbm::_tzmsk_u32(0b0101_1001u32), 0b0000_0000u32); + unsafe fn test_tzmsk_u32() { + assert_eq!(_tzmsk_u32(0b0101_1000u32), 0b0000_0111u32); + assert_eq!(_tzmsk_u32(0b0101_1001u32), 0b0000_0000u32); } #[simd_test = "tbm"] #[cfg(not(target_arch = "x86"))] - unsafe fn _tzmsk_u64() { - assert_eq!(tbm::_tzmsk_u64(0b0101_1000u64), 0b0000_0111u64); - assert_eq!(tbm::_tzmsk_u64(0b0101_1001u64), 0b0000_0000u64); + unsafe fn test_tzmsk_u64() { + assert_eq!(_tzmsk_u64(0b0101_1000u64), 0b0000_0111u64); + assert_eq!(_tzmsk_u64(0b0101_1001u64), 0b0000_0000u64); } } diff --git a/library/stdarch/coresimd/x86/test.rs b/library/stdarch/coresimd/x86/test.rs index 48be66cb58a1..e03f3c413c8d 100644 --- a/library/stdarch/coresimd/x86/test.rs +++ b/library/stdarch/coresimd/x86/test.rs @@ -103,7 +103,7 @@ pub unsafe fn get_m256(a: __m256, idx: usize) -> f32 { A { a }.b[idx] } -// These intrinsics doesn't exist on x86 b/c it requires a 64-bit registe,r +// These intrinsics doesn't exist on x86 b/c it requires a 64-bit register, // which doesn't exist on x86! #[cfg(target_arch = "x86")] mod x86_polyfill { @@ -132,5 +132,8 @@ mod x86_polyfill { a.a } } -#[cfg(target_arch = "x86")] +#[cfg(target_arch = "x86_64")] +mod x86_polyfill { + pub use coresimd::x86_64::{_mm_insert_epi64, _mm256_insert_epi64}; +} pub use self::x86_polyfill::*; diff --git a/library/stdarch/coresimd/x86/i586/xsave.rs b/library/stdarch/coresimd/x86/xsave.rs similarity index 92% rename from library/stdarch/coresimd/x86/i586/xsave.rs rename to library/stdarch/coresimd/x86/xsave.rs index 27ee1aa60201..8cda7a58516c 100644 --- a/library/stdarch/coresimd/x86/i586/xsave.rs +++ b/library/stdarch/coresimd/x86/xsave.rs @@ -54,7 +54,7 @@ pub unsafe fn _xrstor(mem_addr: *const u8, rs_mask: u64) { /// `XFEATURE_ENABLED_MASK` for `XCR` /// /// This intrinsic maps to `XSETBV` instruction. -const _XCR_XFEATURE_ENABLED_MASK: u32 = 0; +pub const _XCR_XFEATURE_ENABLED_MASK: u32 = 0; /// Copy 64-bits from `val` to the extended control register (`XCR`) specified /// by `a`. @@ -141,7 +141,7 @@ mod tests { use std::fmt; use std::prelude::v1::*; - use coresimd::x86::i586::xsave; + use coresimd::x86::*; use stdsimd_test::simd_test; #[repr(align(64))] @@ -194,23 +194,23 @@ mod tests { let mut a = XsaveArea::new(); let mut b = XsaveArea::new(); - xsave::_xsave(a.ptr(), m); - xsave::_xrstor(a.ptr(), m); - xsave::_xsave(b.ptr(), m); + _xsave(a.ptr(), m); + _xrstor(a.ptr(), m); + _xsave(b.ptr(), m); assert_eq!(a, b); } */ #[simd_test = "xsave"] unsafe fn xgetbv_xsetbv() { - let xcr_n: u32 = xsave::_XCR_XFEATURE_ENABLED_MASK; + let xcr_n: u32 = _XCR_XFEATURE_ENABLED_MASK; - let xcr: u64 = xsave::_xgetbv(xcr_n); + let xcr: u64 = _xgetbv(xcr_n); // FIXME: XSETBV is a privileged instruction we should only test this // when running in privileged mode: // // _xsetbv(xcr_n, xcr); - let xcr_cpy: u64 = xsave::_xgetbv(xcr_n); + let xcr_cpy: u64 = _xgetbv(xcr_n); assert_eq!(xcr, xcr_cpy); } @@ -222,9 +222,9 @@ mod tests { let mut a = XsaveArea::new(); let mut b = XsaveArea::new(); - xsave::_xsaveopt(a.ptr(), m); - xsave::_xrstor(a.ptr(), m); - xsave::_xsaveopt(b.ptr(), m); + _xsaveopt(a.ptr(), m); + _xrstor(a.ptr(), m); + _xsaveopt(b.ptr(), m); assert_eq!(a, b); } */ @@ -237,9 +237,9 @@ mod tests { let mut a = XsaveArea::new(); let mut b = XsaveArea::new(); - xsave::_xsavec(a.ptr(), m); - xsave::_xrstor(a.ptr(), m); - xsave::_xsavec(b.ptr(), m); + _xsavec(a.ptr(), m); + _xrstor(a.ptr(), m); + _xsavec(b.ptr(), m); assert_eq!(a, b); } @@ -251,9 +251,9 @@ mod tests { let mut a = XsaveArea::new(); let mut b = XsaveArea::new(); - xsave::_xsaves(a.ptr(), m); - xsave::_xrstors(a.ptr(), m); - xsave::_xsaves(b.ptr(), m); + _xsaves(a.ptr(), m); + _xrstors(a.ptr(), m); + _xsaves(b.ptr(), m); assert_eq!(a, b); } */ diff --git a/library/stdarch/coresimd/x86/x86_64/abm.rs b/library/stdarch/coresimd/x86_64/abm.rs similarity index 97% rename from library/stdarch/coresimd/x86/x86_64/abm.rs rename to library/stdarch/coresimd/x86_64/abm.rs index 2bd6ccbf879c..43fbee28ef6d 100644 --- a/library/stdarch/coresimd/x86/x86_64/abm.rs +++ b/library/stdarch/coresimd/x86_64/abm.rs @@ -42,7 +42,7 @@ pub unsafe fn _popcnt64(x: i64) -> i32 { mod tests { use stdsimd_test::simd_test; - use coresimd::x86::*; + use coresimd::arch::x86_64::*; #[simd_test = "lzcnt"] unsafe fn test_lzcnt_u64() { diff --git a/library/stdarch/coresimd/x86/x86_64/avx.rs b/library/stdarch/coresimd/x86_64/avx.rs similarity index 100% rename from library/stdarch/coresimd/x86/x86_64/avx.rs rename to library/stdarch/coresimd/x86_64/avx.rs diff --git a/library/stdarch/coresimd/x86/x86_64/avx2.rs b/library/stdarch/coresimd/x86_64/avx2.rs similarity index 97% rename from library/stdarch/coresimd/x86/x86_64/avx2.rs rename to library/stdarch/coresimd/x86_64/avx2.rs index 00e5f09ca3d0..86d2863739a1 100644 --- a/library/stdarch/coresimd/x86/x86_64/avx2.rs +++ b/library/stdarch/coresimd/x86_64/avx2.rs @@ -35,7 +35,7 @@ pub unsafe fn _mm256_extract_epi64(a: __m256i, imm8: i32) -> i64 { mod tests { use stdsimd_test::simd_test; - use coresimd::x86::*; + use coresimd::arch::x86_64::*; #[simd_test = "avx2"] unsafe fn test_mm256_extract_epi64() { diff --git a/library/stdarch/coresimd/x86/x86_64/bmi.rs b/library/stdarch/coresimd/x86_64/bmi.rs similarity index 99% rename from library/stdarch/coresimd/x86/x86_64/bmi.rs rename to library/stdarch/coresimd/x86_64/bmi.rs index 7c1962ef6a7c..130b1ff06e90 100644 --- a/library/stdarch/coresimd/x86/x86_64/bmi.rs +++ b/library/stdarch/coresimd/x86_64/bmi.rs @@ -102,6 +102,7 @@ mod tests { use stdsimd_test::simd_test; use coresimd::x86::*; + use coresimd::x86_64::*; #[simd_test = "bmi"] unsafe fn test_bextr_u64() { diff --git a/library/stdarch/coresimd/x86/x86_64/bmi2.rs b/library/stdarch/coresimd/x86_64/bmi2.rs similarity index 99% rename from library/stdarch/coresimd/x86/x86_64/bmi2.rs rename to library/stdarch/coresimd/x86_64/bmi2.rs index 77a19ce805b7..d97d371b3eeb 100644 --- a/library/stdarch/coresimd/x86/x86_64/bmi2.rs +++ b/library/stdarch/coresimd/x86_64/bmi2.rs @@ -69,7 +69,7 @@ extern "C" { mod tests { use stdsimd_test::simd_test; - use coresimd::x86::*; + use coresimd::x86_64::*; #[simd_test = "bmi2"] unsafe fn test_pext_u64() { diff --git a/library/stdarch/coresimd/x86/x86_64/bswap.rs b/library/stdarch/coresimd/x86_64/bswap.rs similarity index 100% rename from library/stdarch/coresimd/x86/x86_64/bswap.rs rename to library/stdarch/coresimd/x86_64/bswap.rs diff --git a/library/stdarch/coresimd/x86/x86_64/fxsr.rs b/library/stdarch/coresimd/x86_64/fxsr.rs similarity index 100% rename from library/stdarch/coresimd/x86/x86_64/fxsr.rs rename to library/stdarch/coresimd/x86_64/fxsr.rs diff --git a/library/stdarch/coresimd/x86/x86_64/mod.rs b/library/stdarch/coresimd/x86_64/mod.rs similarity index 100% rename from library/stdarch/coresimd/x86/x86_64/mod.rs rename to library/stdarch/coresimd/x86_64/mod.rs diff --git a/library/stdarch/coresimd/x86/x86_64/rdrand.rs b/library/stdarch/coresimd/x86_64/rdrand.rs similarity index 100% rename from library/stdarch/coresimd/x86/x86_64/rdrand.rs rename to library/stdarch/coresimd/x86_64/rdrand.rs diff --git a/library/stdarch/coresimd/x86/x86_64/sse.rs b/library/stdarch/coresimd/x86_64/sse.rs similarity index 99% rename from library/stdarch/coresimd/x86/x86_64/sse.rs rename to library/stdarch/coresimd/x86_64/sse.rs index be81536cad33..808470c17fbb 100644 --- a/library/stdarch/coresimd/x86/x86_64/sse.rs +++ b/library/stdarch/coresimd/x86_64/sse.rs @@ -66,7 +66,7 @@ mod tests { use stdsimd_test::simd_test; - use coresimd::x86::*; + use coresimd::arch::x86_64::*; #[simd_test = "sse"] unsafe fn test_mm_cvtss_si64() { diff --git a/library/stdarch/coresimd/x86/x86_64/sse2.rs b/library/stdarch/coresimd/x86_64/sse2.rs similarity index 99% rename from library/stdarch/coresimd/x86/x86_64/sse2.rs rename to library/stdarch/coresimd/x86_64/sse2.rs index 69bbad4d4b63..fa25a1fe80c9 100644 --- a/library/stdarch/coresimd/x86/x86_64/sse2.rs +++ b/library/stdarch/coresimd/x86_64/sse2.rs @@ -117,7 +117,7 @@ mod tests { use stdsimd_test::simd_test; - use coresimd::x86::*; + use coresimd::arch::x86_64::*; #[simd_test = "sse2"] unsafe fn test_mm_cvtsd_si64() { diff --git a/library/stdarch/coresimd/x86/x86_64/sse41.rs b/library/stdarch/coresimd/x86_64/sse41.rs similarity index 97% rename from library/stdarch/coresimd/x86/x86_64/sse41.rs rename to library/stdarch/coresimd/x86_64/sse41.rs index a0b38d52d22d..eb831033537a 100644 --- a/library/stdarch/coresimd/x86/x86_64/sse41.rs +++ b/library/stdarch/coresimd/x86_64/sse41.rs @@ -31,7 +31,7 @@ pub unsafe fn _mm_insert_epi64(a: __m128i, i: i64, imm8: i32) -> __m128i { #[cfg(test)] mod tests { use stdsimd_test::simd_test; - use coresimd::x86::*; + use coresimd::arch::x86_64::*; #[simd_test = "sse4.1"] unsafe fn test_mm_extract_epi64() { diff --git a/library/stdarch/coresimd/x86/x86_64/sse42.rs b/library/stdarch/coresimd/x86_64/sse42.rs similarity index 95% rename from library/stdarch/coresimd/x86/x86_64/sse42.rs rename to library/stdarch/coresimd/x86_64/sse42.rs index f1720a1a9ca2..1cbd04d6a8f0 100644 --- a/library/stdarch/coresimd/x86/x86_64/sse42.rs +++ b/library/stdarch/coresimd/x86_64/sse42.rs @@ -20,7 +20,7 @@ pub unsafe fn _mm_crc32_u64(crc: u64, v: u64) -> u64 { #[cfg(test)] mod tests { - use coresimd::x86::*; + use coresimd::arch::x86_64::*; use stdsimd_test::simd_test; diff --git a/library/stdarch/coresimd/x86/x86_64/xsave.rs b/library/stdarch/coresimd/x86_64/xsave.rs similarity index 100% rename from library/stdarch/coresimd/x86/x86_64/xsave.rs rename to library/stdarch/coresimd/x86_64/xsave.rs diff --git a/library/stdarch/crates/stdsimd-verify/src/lib.rs b/library/stdarch/crates/stdsimd-verify/src/lib.rs index c04a03bba1ee..ddfa7adab93e 100644 --- a/library/stdarch/crates/stdsimd-verify/src/lib.rs +++ b/library/stdarch/crates/stdsimd-verify/src/lib.rs @@ -22,10 +22,10 @@ macro_rules! my_quote { pub fn x86_functions(input: TokenStream) -> TokenStream { let dir = Path::new(env!("CARGO_MANIFEST_DIR")); let root = dir.parent().unwrap(); - let root = root.join("../coresimd/x86"); let mut files = Vec::new(); - walk(&root, &mut files); + walk(&root.join("../coresimd/x86"), &mut files); + walk(&root.join("../coresimd/x86_64"), &mut files); assert!(files.len() > 0); let mut functions = Vec::new();