add mmx module, mmx run-time detection, intrinsics (#220)

* [sse] _mm_cvtps_pi32, _mm_cvt_ps2pi

* [mmx] run-time detection support

* [x86] add mmx module

* [x86] make __m64 public

* [sse] add _mm_cvtps_pi{8,16}, _mm_cvttps_pi32, _mm_cvtt_ps2pi

* move new intrinsics from i586 to i686 module

* mmx requires i686
This commit is contained in:
gnzlbg 2017-11-28 16:45:41 +01:00 committed by Alex Crichton
parent ef847ac83b
commit 288a30a93e
6 changed files with 205 additions and 16 deletions

View file

@ -29,6 +29,9 @@ use super::bit;
#[macro_export]
#[doc(hidden)]
macro_rules! __unstable_detect_feature {
("mmx") => {
$crate::vendor::__unstable_detect_feature(
$crate::vendor::__Feature::mmx{}) };
("sse") => {
$crate::vendor::__unstable_detect_feature(
$crate::vendor::__Feature::sse{}) };
@ -165,6 +168,8 @@ macro_rules! __unstable_detect_feature {
#[allow(non_camel_case_types)]
#[repr(u8)]
pub enum __Feature {
/// MMX
mmx,
/// SSE (Streaming SIMD Extensions)
sse,
/// SSE2 (Streaming SIMD Extensions 2)
@ -332,6 +337,7 @@ pub fn detect_features() -> usize {
enable(proc_info_ecx, 20, __Feature::sse4_2);
enable(proc_info_ecx, 23, __Feature::popcnt);
enable(proc_info_edx, 24, __Feature::fxsr);
enable(proc_info_edx, 23, __Feature::mmx);
enable(proc_info_edx, 25, __Feature::sse);
enable(proc_info_edx, 26, __Feature::sse2);

View file

@ -626,10 +626,6 @@ pub unsafe fn _mm_cvt_ss2si(a: f32x4) -> i32 {
_mm_cvtss_si32(a)
}
// Blocked by https://github.com/rust-lang-nursery/stdsimd/issues/74
// pub unsafe fn _mm_cvtps_pi32(a: f32x4) -> i32x2
// pub unsafe fn _mm_cvt_ps2pi(a: f32x4) -> i32x2 { _mm_cvtps_pi32(a) }
/// Convert the lowest 32 bit float in the input vector to a 32 bit integer
/// with
/// truncation.
@ -655,10 +651,6 @@ pub unsafe fn _mm_cvtt_ss2si(a: f32x4) -> i32 {
_mm_cvttss_si32(a)
}
// Blocked by https://github.com/rust-lang-nursery/stdsimd/issues/74
// pub unsafe fn _mm_cvttps_pi32(a: f32x4) -> i32x2;
// pub unsafe fn _mm_cvtt_ps2pi(a: f32x4) -> i32x2 { _mm_cvttps_pi32(a) }
/// Extract the lowest 32 bit float from the input vector.
#[inline(always)]
#[target_feature = "+sse"]

View file

@ -0,0 +1,88 @@
//! `i586` MMX instruction set.
//!
//! The intrinsics here roughly correspond to those in the `mmintrin.h` C
//! header.
//!
//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
//!
//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
use v64::{i16x4, i32x2, i8x8};
use x86::__m64;
use core::mem;
#[cfg(test)]
use stdsimd_test::assert_instr;
/// Constructs a 64-bit integer vector initialized to zero.
#[inline(always)]
#[target_feature = "+mmx,+sse"]
// FIXME: this produces a movl instead of xorps on x86
// FIXME: this produces a xor intrinsic instead of xorps on x86_64
#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(xor))]
pub unsafe fn _mm_setzero_si64() -> __m64 {
mem::transmute(0_i64)
}
/// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers
/// using signed saturation.
///
/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
/// less than 0x80 are saturated to 0x80.
#[inline(always)]
#[target_feature = "+mmx,+sse"]
#[cfg_attr(test, assert_instr(packsswb))]
pub unsafe fn _mm_packs_pi16(a: i16x4, b: i16x4) -> i8x8 {
mem::transmute(packsswb(mem::transmute(a), mem::transmute(b)))
}
/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers
/// using signed saturation.
///
/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
/// less than 0x80 are saturated to 0x80.
#[inline(always)]
#[target_feature = "+mmx,+sse"]
#[cfg_attr(test, assert_instr(packssdw))]
pub unsafe fn _mm_packs_pi32(a: i32x2, b: i32x2) -> i16x4 {
mem::transmute(packssdw(mem::transmute(a), mem::transmute(b)))
}
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.mmx.packsswb"]
fn packsswb(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.packssdw"]
fn packssdw(a: __m64, b: __m64) -> __m64;
}
#[cfg(test)]
mod tests {
use v64::{i16x4, i32x2, i8x8};
use x86::i686::mmx;
use x86::__m64;
use stdsimd_test::simd_test;
#[simd_test = "sse"] // FIXME: should be mmx
unsafe fn _mm_setzero_si64() {
let r: __m64 = ::std::mem::transmute(0_i64);
assert_eq!(r, mmx::_mm_setzero_si64());
}
#[simd_test = "sse"] // FIXME: should be mmx
unsafe fn _mm_packs_pi16() {
let a = i16x4::new(-1, 2, -3, 4);
let b = i16x4::new(-5, 6, -7, 8);
let r = i8x8::new(-1, 2, -3, 4, -5, 6, -7, 8);
assert_eq!(r, mmx::_mm_packs_pi16(a, b));
}
#[simd_test = "sse"] // FIXME: should be mmx
unsafe fn _mm_packs_pi32() {
let a = i32x2::new(-1, 2);
let b = i32x2::new(-5, 6);
let r = i16x4::new(-1, 2, -5, 6);
assert_eq!(r, mmx::_mm_packs_pi32(a, b));
}
}

View file

@ -1,5 +1,8 @@
//! `i686` intrinsics
mod mmx;
pub use self::mmx::*;
mod sse;
pub use self::sse::*;

View file

@ -1,17 +1,15 @@
//! `i686` Streaming SIMD Extensions (SSE)
use v64::{i16x4, u8x8};
use v128::f32x4;
use v64::{i16x4, i32x2, i8x8, u8x8};
use x86::__m64;
use core::mem;
use x86::i586;
use x86::i686::mmx;
#[cfg(test)]
use stdsimd_test::assert_instr;
/// This type is only required for mapping vector types to llvm's `x86_mmx`
/// type.
#[allow(non_camel_case_types)]
#[repr(simd)]
struct __m64(i64);
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.mmx.pmaxs.w"]
@ -22,6 +20,10 @@ extern "C" {
fn pminsw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pminu.b"]
fn pminub(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.sse.cvtps2pi"]
fn cvtps2pi(a: f32x4) -> __m64;
#[link_name = "llvm.x86.sse.cvttps2pi"]
fn cvttps2pi(a: f32x4) -> __m64;
}
/// Compares the packed 16-bit signed integers of `a` and `b` writing the
@ -96,9 +98,70 @@ pub unsafe fn _m_pminub(a: u8x8, b: u8x8) -> u8x8 {
_mm_min_pu8(a, b)
}
/// Convert the two lower packed single-precision (32-bit) floating-point
/// elements in `a` to packed 32-bit integers with truncation.
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(cvttps2pi))]
pub unsafe fn _mm_cvttps_pi32(a: f32x4) -> i32x2 {
mem::transmute(cvttps2pi(a))
}
/// Convert the two lower packed single-precision (32-bit) floating-point
/// elements in `a` to packed 32-bit integers with truncation.
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(cvttps2pi))]
pub unsafe fn _mm_cvtt_ps2pi(a: f32x4) -> i32x2 {
_mm_cvttps_pi32(a)
}
/// Convert the two lower packed single-precision (32-bit) floating-point
/// elements in `a` to packed 32-bit integers.
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(cvtps2pi))]
pub unsafe fn _mm_cvtps_pi32(a: f32x4) -> i32x2 {
mem::transmute(cvtps2pi(a))
}
/// Convert the two lower packed single-precision (32-bit) floating-point
/// elements in `a` to packed 32-bit integers.
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(cvtps2pi))]
pub unsafe fn _mm_cvt_ps2pi(a: f32x4) -> i32x2 {
_mm_cvtps_pi32(a)
}
/// Convert packed single-precision (32-bit) floating-point elements in `a` to
/// packed 16-bit integers.
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(cvtps2pi))]
pub unsafe fn _mm_cvtps_pi16(a: f32x4) -> i16x4 {
let b = _mm_cvtps_pi32(a);
let a = i586::_mm_movehl_ps(a, a);
let c = _mm_cvtps_pi32(a);
mmx::_mm_packs_pi32(b, c)
}
/// Convert packed single-precision (32-bit) floating-point elements in `a` to
/// packed 8-bit integers, and returns theem in the lower 4 elements of the
/// result.
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(cvtps2pi))]
pub unsafe fn _mm_cvtps_pi8(a: f32x4) -> i8x8 {
let b = _mm_cvtps_pi16(a);
let c = mmx::_mm_setzero_si64();
mmx::_mm_packs_pi16(b, mem::transmute(c))
}
#[cfg(test)]
mod tests {
use v64::{i16x4, u8x8};
use v128::f32x4;
use v64::{i16x4, i32x2, i8x8, u8x8};
use x86::i686::sse;
use stdsimd_test::simd_test;
@ -141,4 +204,36 @@ mod tests {
assert_eq!(r, sse::_mm_min_pu8(a, b));
assert_eq!(r, sse::_m_pminub(a, b));
}
#[simd_test = "sse"]
unsafe fn _mm_cvtps_pi32() {
let a = f32x4::new(1.0, 2.0, 3.0, 4.0);
let r = i32x2::new(1, 2);
assert_eq!(r, sse::_mm_cvtps_pi32(a));
assert_eq!(r, sse::_mm_cvt_ps2pi(a));
}
#[simd_test = "sse"]
unsafe fn _mm_cvttps_pi32() {
let a = f32x4::new(7.0, 2.0, 3.0, 4.0);
let r = i32x2::new(7, 2);
assert_eq!(r, sse::_mm_cvttps_pi32(a));
assert_eq!(r, sse::_mm_cvtt_ps2pi(a));
}
#[simd_test = "sse"]
unsafe fn _mm_cvtps_pi16() {
let a = f32x4::new(7.0, 2.0, 3.0, 4.0);
let r = i16x4::new(7, 2, 3, 4);
assert_eq!(r, sse::_mm_cvtps_pi16(a));
}
#[simd_test = "sse"]
unsafe fn _mm_cvtps_pi8() {
let a = f32x4::new(7.0, 2.0, 3.0, 4.0);
let r = i8x8::new(7, 2, 3, 4, 0, 0, 0, 0);
assert_eq!(r, sse::_mm_cvtps_pi8(a));
}
}

View file

@ -26,6 +26,11 @@ mod x86_64;
#[cfg(target_arch = "x86_64")]
pub use self::x86_64::*;
/// 64-bit wide integer vector type.
#[allow(non_camel_case_types)]
#[repr(simd)]
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct __m64(i64); // corresponds to llvm's `x86_mmx` type
/// 128-bit wide signed integer vector type
#[allow(non_camel_case_types)]
pub type __m128i = ::v128::i8x16;