add mmx module, mmx run-time detection, intrinsics (#220)
* [sse] _mm_cvtps_pi32, _mm_cvt_ps2pi
* [mmx] run-time detection support
* [x86] add mmx module
* [x86] make __m64 public
* [sse] add _mm_cvtps_pi{8,16}, _mm_cvttps_pi32, _mm_cvtt_ps2pi
* move new intrinsics from i586 to i686 module
* mmx requires i686
This commit is contained in:
parent
ef847ac83b
commit
288a30a93e
6 changed files with 205 additions and 16 deletions
|
|
@ -29,6 +29,9 @@ use super::bit;
|
|||
#[macro_export]
|
||||
#[doc(hidden)]
|
||||
macro_rules! __unstable_detect_feature {
|
||||
("mmx") => {
|
||||
$crate::vendor::__unstable_detect_feature(
|
||||
$crate::vendor::__Feature::mmx{}) };
|
||||
("sse") => {
|
||||
$crate::vendor::__unstable_detect_feature(
|
||||
$crate::vendor::__Feature::sse{}) };
|
||||
|
|
@ -165,6 +168,8 @@ macro_rules! __unstable_detect_feature {
|
|||
#[allow(non_camel_case_types)]
|
||||
#[repr(u8)]
|
||||
pub enum __Feature {
|
||||
/// MMX
|
||||
mmx,
|
||||
/// SSE (Streaming SIMD Extensions)
|
||||
sse,
|
||||
/// SSE2 (Streaming SIMD Extensions 2)
|
||||
|
|
@ -332,6 +337,7 @@ pub fn detect_features() -> usize {
|
|||
enable(proc_info_ecx, 20, __Feature::sse4_2);
|
||||
enable(proc_info_ecx, 23, __Feature::popcnt);
|
||||
enable(proc_info_edx, 24, __Feature::fxsr);
|
||||
enable(proc_info_edx, 23, __Feature::mmx);
|
||||
enable(proc_info_edx, 25, __Feature::sse);
|
||||
enable(proc_info_edx, 26, __Feature::sse2);
|
||||
|
||||
|
|
|
|||
|
|
@ -626,10 +626,6 @@ pub unsafe fn _mm_cvt_ss2si(a: f32x4) -> i32 {
|
|||
_mm_cvtss_si32(a)
|
||||
}
|
||||
|
||||
// Blocked by https://github.com/rust-lang-nursery/stdsimd/issues/74
|
||||
// pub unsafe fn _mm_cvtps_pi32(a: f32x4) -> i32x2
|
||||
// pub unsafe fn _mm_cvt_ps2pi(a: f32x4) -> i32x2 { _mm_cvtps_pi32(a) }
|
||||
|
||||
/// Convert the lowest 32 bit float in the input vector to a 32 bit integer
|
||||
/// with
|
||||
/// truncation.
|
||||
|
|
@ -655,10 +651,6 @@ pub unsafe fn _mm_cvtt_ss2si(a: f32x4) -> i32 {
|
|||
_mm_cvttss_si32(a)
|
||||
}
|
||||
|
||||
// Blocked by https://github.com/rust-lang-nursery/stdsimd/issues/74
|
||||
// pub unsafe fn _mm_cvttps_pi32(a: f32x4) -> i32x2;
|
||||
// pub unsafe fn _mm_cvtt_ps2pi(a: f32x4) -> i32x2 { _mm_cvttps_pi32(a) }
|
||||
|
||||
/// Extract the lowest 32 bit float from the input vector.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
|
|
|
|||
88
library/stdarch/coresimd/src/x86/i686/mmx.rs
Normal file
88
library/stdarch/coresimd/src/x86/i686/mmx.rs
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
//! `i586` MMX instruction set.
|
||||
//!
|
||||
//! The intrinsics here roughly correspond to those in the `mmintrin.h` C
|
||||
//! header.
|
||||
//!
|
||||
//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
|
||||
//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
|
||||
//!
|
||||
//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
|
||||
|
||||
use v64::{i16x4, i32x2, i8x8};
|
||||
use x86::__m64;
|
||||
use core::mem;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdsimd_test::assert_instr;
|
||||
|
||||
/// Constructs a 64-bit integer vector initialized to zero.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx,+sse"]
|
||||
// FIXME: this produces a movl instead of xorps on x86
|
||||
// FIXME: this produces a xor intrinsic instead of xorps on x86_64
|
||||
#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(xor))]
|
||||
pub unsafe fn _mm_setzero_si64() -> __m64 {
|
||||
mem::transmute(0_i64)
|
||||
}
|
||||
|
||||
/// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers
|
||||
/// using signed saturation.
|
||||
///
|
||||
/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
|
||||
/// less than 0x80 are saturated to 0x80.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx,+sse"]
|
||||
#[cfg_attr(test, assert_instr(packsswb))]
|
||||
pub unsafe fn _mm_packs_pi16(a: i16x4, b: i16x4) -> i8x8 {
|
||||
mem::transmute(packsswb(mem::transmute(a), mem::transmute(b)))
|
||||
}
|
||||
|
||||
/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers
|
||||
/// using signed saturation.
|
||||
///
|
||||
/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
|
||||
/// less than 0x80 are saturated to 0x80.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx,+sse"]
|
||||
#[cfg_attr(test, assert_instr(packssdw))]
|
||||
pub unsafe fn _mm_packs_pi32(a: i32x2, b: i32x2) -> i16x4 {
|
||||
mem::transmute(packssdw(mem::transmute(a), mem::transmute(b)))
|
||||
}
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[link_name = "llvm.x86.mmx.packsswb"]
|
||||
fn packsswb(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.packssdw"]
|
||||
fn packssdw(a: __m64, b: __m64) -> __m64;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use v64::{i16x4, i32x2, i8x8};
|
||||
use x86::i686::mmx;
|
||||
use x86::__m64;
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
#[simd_test = "sse"] // FIXME: should be mmx
|
||||
unsafe fn _mm_setzero_si64() {
|
||||
let r: __m64 = ::std::mem::transmute(0_i64);
|
||||
assert_eq!(r, mmx::_mm_setzero_si64());
|
||||
}
|
||||
|
||||
#[simd_test = "sse"] // FIXME: should be mmx
|
||||
unsafe fn _mm_packs_pi16() {
|
||||
let a = i16x4::new(-1, 2, -3, 4);
|
||||
let b = i16x4::new(-5, 6, -7, 8);
|
||||
let r = i8x8::new(-1, 2, -3, 4, -5, 6, -7, 8);
|
||||
assert_eq!(r, mmx::_mm_packs_pi16(a, b));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"] // FIXME: should be mmx
|
||||
unsafe fn _mm_packs_pi32() {
|
||||
let a = i32x2::new(-1, 2);
|
||||
let b = i32x2::new(-5, 6);
|
||||
let r = i16x4::new(-1, 2, -5, 6);
|
||||
assert_eq!(r, mmx::_mm_packs_pi32(a, b));
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,8 @@
|
|||
//! `i686` intrinsics
|
||||
|
||||
mod mmx;
|
||||
pub use self::mmx::*;
|
||||
|
||||
mod sse;
|
||||
pub use self::sse::*;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,17 +1,15 @@
|
|||
//! `i686` Streaming SIMD Extensions (SSE)
|
||||
|
||||
use v64::{i16x4, u8x8};
|
||||
use v128::f32x4;
|
||||
use v64::{i16x4, i32x2, i8x8, u8x8};
|
||||
use x86::__m64;
|
||||
use core::mem;
|
||||
use x86::i586;
|
||||
use x86::i686::mmx;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdsimd_test::assert_instr;
|
||||
|
||||
/// This type is only required for mapping vector types to llvm's `x86_mmx`
|
||||
/// type.
|
||||
#[allow(non_camel_case_types)]
|
||||
#[repr(simd)]
|
||||
struct __m64(i64);
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[link_name = "llvm.x86.mmx.pmaxs.w"]
|
||||
|
|
@ -22,6 +20,10 @@ extern "C" {
|
|||
fn pminsw(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.pminu.b"]
|
||||
fn pminub(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.sse.cvtps2pi"]
|
||||
fn cvtps2pi(a: f32x4) -> __m64;
|
||||
#[link_name = "llvm.x86.sse.cvttps2pi"]
|
||||
fn cvttps2pi(a: f32x4) -> __m64;
|
||||
}
|
||||
|
||||
/// Compares the packed 16-bit signed integers of `a` and `b` writing the
|
||||
|
|
@ -96,9 +98,70 @@ pub unsafe fn _m_pminub(a: u8x8, b: u8x8) -> u8x8 {
|
|||
_mm_min_pu8(a, b)
|
||||
}
|
||||
|
||||
/// Convert the two lower packed single-precision (32-bit) floating-point
|
||||
/// elements in `a` to packed 32-bit integers with truncation.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(cvttps2pi))]
|
||||
pub unsafe fn _mm_cvttps_pi32(a: f32x4) -> i32x2 {
|
||||
mem::transmute(cvttps2pi(a))
|
||||
}
|
||||
|
||||
/// Convert the two lower packed single-precision (32-bit) floating-point
|
||||
/// elements in `a` to packed 32-bit integers with truncation.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(cvttps2pi))]
|
||||
pub unsafe fn _mm_cvtt_ps2pi(a: f32x4) -> i32x2 {
|
||||
_mm_cvttps_pi32(a)
|
||||
}
|
||||
|
||||
/// Convert the two lower packed single-precision (32-bit) floating-point
|
||||
/// elements in `a` to packed 32-bit integers.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(cvtps2pi))]
|
||||
pub unsafe fn _mm_cvtps_pi32(a: f32x4) -> i32x2 {
|
||||
mem::transmute(cvtps2pi(a))
|
||||
}
|
||||
|
||||
/// Convert the two lower packed single-precision (32-bit) floating-point
|
||||
/// elements in `a` to packed 32-bit integers.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(cvtps2pi))]
|
||||
pub unsafe fn _mm_cvt_ps2pi(a: f32x4) -> i32x2 {
|
||||
_mm_cvtps_pi32(a)
|
||||
}
|
||||
|
||||
/// Convert packed single-precision (32-bit) floating-point elements in `a` to
|
||||
/// packed 16-bit integers.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(cvtps2pi))]
|
||||
pub unsafe fn _mm_cvtps_pi16(a: f32x4) -> i16x4 {
|
||||
let b = _mm_cvtps_pi32(a);
|
||||
let a = i586::_mm_movehl_ps(a, a);
|
||||
let c = _mm_cvtps_pi32(a);
|
||||
mmx::_mm_packs_pi32(b, c)
|
||||
}
|
||||
|
||||
/// Convert packed single-precision (32-bit) floating-point elements in `a` to
|
||||
/// packed 8-bit integers, and returns theem in the lower 4 elements of the
|
||||
/// result.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(cvtps2pi))]
|
||||
pub unsafe fn _mm_cvtps_pi8(a: f32x4) -> i8x8 {
|
||||
let b = _mm_cvtps_pi16(a);
|
||||
let c = mmx::_mm_setzero_si64();
|
||||
mmx::_mm_packs_pi16(b, mem::transmute(c))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use v64::{i16x4, u8x8};
|
||||
use v128::f32x4;
|
||||
use v64::{i16x4, i32x2, i8x8, u8x8};
|
||||
use x86::i686::sse;
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
|
|
@ -141,4 +204,36 @@ mod tests {
|
|||
assert_eq!(r, sse::_mm_min_pu8(a, b));
|
||||
assert_eq!(r, sse::_m_pminub(a, b));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
unsafe fn _mm_cvtps_pi32() {
|
||||
let a = f32x4::new(1.0, 2.0, 3.0, 4.0);
|
||||
let r = i32x2::new(1, 2);
|
||||
|
||||
assert_eq!(r, sse::_mm_cvtps_pi32(a));
|
||||
assert_eq!(r, sse::_mm_cvt_ps2pi(a));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
unsafe fn _mm_cvttps_pi32() {
|
||||
let a = f32x4::new(7.0, 2.0, 3.0, 4.0);
|
||||
let r = i32x2::new(7, 2);
|
||||
|
||||
assert_eq!(r, sse::_mm_cvttps_pi32(a));
|
||||
assert_eq!(r, sse::_mm_cvtt_ps2pi(a));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
unsafe fn _mm_cvtps_pi16() {
|
||||
let a = f32x4::new(7.0, 2.0, 3.0, 4.0);
|
||||
let r = i16x4::new(7, 2, 3, 4);
|
||||
assert_eq!(r, sse::_mm_cvtps_pi16(a));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
unsafe fn _mm_cvtps_pi8() {
|
||||
let a = f32x4::new(7.0, 2.0, 3.0, 4.0);
|
||||
let r = i8x8::new(7, 2, 3, 4, 0, 0, 0, 0);
|
||||
assert_eq!(r, sse::_mm_cvtps_pi8(a));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,6 +26,11 @@ mod x86_64;
|
|||
#[cfg(target_arch = "x86_64")]
|
||||
pub use self::x86_64::*;
|
||||
|
||||
/// 64-bit wide integer vector type.
|
||||
#[allow(non_camel_case_types)]
|
||||
#[repr(simd)]
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct __m64(i64); // corresponds to llvm's `x86_mmx` type
|
||||
/// 128-bit wide signed integer vector type
|
||||
#[allow(non_camel_case_types)]
|
||||
pub type __m128i = ::v128::i8x16;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue