add arm/aarch64 table lookup and vector combine intrinsics (#546)

This commit is contained in:
gnzlbg 2018-08-01 15:59:31 +02:00 committed by Alex Crichton
parent 82274aadc1
commit e6370ca22a
4 changed files with 2507 additions and 3 deletions

File diff suppressed because it is too large Load diff

View file

@ -65,6 +65,46 @@ types! {
pub struct uint64x2_t(u64, u64);
}
/// ARM-specific type containing two `int8x8_t` vectors.
#[derive(Copy, Clone)]
pub struct int8x8x2_t(pub int8x8_t, pub int8x8_t);
/// ARM-specific type containing three `int8x8_t` vectors.
#[derive(Copy, Clone)]
pub struct int8x8x3_t(pub int8x8_t, pub int8x8_t, pub int8x8_t);
/// ARM-specific type containing four `int8x8_t` vectors.
#[derive(Copy, Clone)]
pub struct int8x8x4_t(pub int8x8_t, pub int8x8_t, pub int8x8_t, pub int8x8_t);
/// ARM-specific type containing two `uint8x8_t` vectors.
#[derive(Copy, Clone)]
pub struct uint8x8x2_t(pub uint8x8_t, pub uint8x8_t);
/// ARM-specific type containing three `uint8x8_t` vectors.
#[derive(Copy, Clone)]
pub struct uint8x8x3_t(pub uint8x8_t, pub uint8x8_t, pub uint8x8_t);
/// ARM-specific type containing four `uint8x8_t` vectors.
#[derive(Copy, Clone)]
pub struct uint8x8x4_t(
pub uint8x8_t,
pub uint8x8_t,
pub uint8x8_t,
pub uint8x8_t,
);
/// ARM-specific type containing two `poly8x8_t` vectors.
#[derive(Copy, Clone)]
pub struct poly8x8x2_t(pub poly8x8_t, pub poly8x8_t);
/// ARM-specific type containing three `poly8x8_t` vectors.
#[derive(Copy, Clone)]
pub struct poly8x8x3_t(pub poly8x8_t, pub poly8x8_t, pub poly8x8_t);
/// ARM-specific type containing four `poly8x8_t` vectors.
#[derive(Copy, Clone)]
pub struct poly8x8x4_t(
pub poly8x8_t,
pub poly8x8_t,
pub poly8x8_t,
pub poly8x8_t,
);
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(
@ -194,6 +234,35 @@ extern "C" {
fn vpmaxf_v2f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
}
#[cfg(target_arch = "arm")]
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.arm.neon.vtbl1"]
fn vtbl1(a: int8x8_t, b: int8x8_t) -> int8x8_t;
#[link_name = "llvm.arm.neon.vtbl2"]
fn vtbl2(a: int8x8_t, b: int8x8_t, b: int8x8_t) -> int8x8_t;
#[link_name = "llvm.arm.neon.vtbl3"]
fn vtbl3(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t;
#[link_name = "llvm.arm.neon.vtbl4"]
fn vtbl4(
a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t,
) -> int8x8_t;
#[link_name = "llvm.arm.neon.vtbx1"]
fn vtbx1(a: int8x8_t, b: int8x8_t, b: int8x8_t) -> int8x8_t;
#[link_name = "llvm.arm.neon.vtbx2"]
fn vtbx2(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t;
#[link_name = "llvm.arm.neon.vtbx3"]
fn vtbx3(
a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t,
) -> int8x8_t;
#[link_name = "llvm.arm.neon.vtbx4"]
fn vtbx4(
a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t,
e: int8x8_t,
) -> int8x8_t;
}
/// Vector add.
#[inline]
#[target_feature(enable = "neon")]
@ -695,6 +764,308 @@ pub unsafe fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
vpmaxf_v2f32(a, b)
}
/// Table look-up
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(vtbl))]
pub unsafe fn vtbl1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
vtbl1(a, b)
}
/// Table look-up
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(vtbl))]
pub unsafe fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
::mem::transmute(vtbl1(::mem::transmute(a), ::mem::transmute(b)))
}
/// Table look-up
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(vtbl))]
pub unsafe fn vtbl1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
::mem::transmute(vtbl1(::mem::transmute(a), ::mem::transmute(b)))
}
/// Table look-up
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(vtbl))]
pub unsafe fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t {
vtbl2(a.0, a.1, b)
}
/// Table look-up
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(vtbl))]
pub unsafe fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t {
::mem::transmute(vtbl2(
::mem::transmute(a.0),
::mem::transmute(a.1),
::mem::transmute(b),
))
}
/// Table look-up
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(vtbl))]
pub unsafe fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t {
::mem::transmute(vtbl2(
::mem::transmute(a.0),
::mem::transmute(a.1),
::mem::transmute(b),
))
}
/// Table look-up
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(vtbl))]
pub unsafe fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t {
vtbl3(a.0, a.1, a.2, b)
}
/// Table look-up
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(vtbl))]
pub unsafe fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t {
::mem::transmute(vtbl3(
::mem::transmute(a.0),
::mem::transmute(a.1),
::mem::transmute(a.2),
::mem::transmute(b),
))
}
/// Table look-up
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(vtbl))]
pub unsafe fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t {
::mem::transmute(vtbl3(
::mem::transmute(a.0),
::mem::transmute(a.1),
::mem::transmute(a.2),
::mem::transmute(b),
))
}
/// Table look-up
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(vtbl))]
pub unsafe fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t {
vtbl4(a.0, a.1, a.2, a.3, b)
}
/// Table look-up
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(vtbl))]
pub unsafe fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t {
::mem::transmute(vtbl4(
::mem::transmute(a.0),
::mem::transmute(a.1),
::mem::transmute(a.2),
::mem::transmute(a.3),
::mem::transmute(b),
))
}
/// Table look-up
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(vtbl))]
pub unsafe fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t {
::mem::transmute(vtbl4(
::mem::transmute(a.0),
::mem::transmute(a.1),
::mem::transmute(a.2),
::mem::transmute(a.3),
::mem::transmute(b),
))
}
/// Extended table look-up
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(vtbx))]
pub unsafe fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
vtbx1(a, b, c)
}
/// Extended table look-up
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(vtbx))]
pub unsafe fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
::mem::transmute(vtbx1(
::mem::transmute(a),
::mem::transmute(b),
::mem::transmute(c),
))
}
/// Extended table look-up
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(vtbx))]
pub unsafe fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t {
::mem::transmute(vtbx1(
::mem::transmute(a),
::mem::transmute(b),
::mem::transmute(c),
))
}
/// Extended table look-up
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(vtbx))]
pub unsafe fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t {
vtbx2(a, b.0, b.1, c)
}
/// Extended table look-up
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(vtbx))]
pub unsafe fn vtbx2_u8(
a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t,
) -> uint8x8_t {
::mem::transmute(vtbx2(
::mem::transmute(a),
::mem::transmute(b.0),
::mem::transmute(b.1),
::mem::transmute(c),
))
}
/// Extended table look-up
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(vtbx))]
pub unsafe fn vtbx2_p8(
a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t,
) -> poly8x8_t {
::mem::transmute(vtbx2(
::mem::transmute(a),
::mem::transmute(b.0),
::mem::transmute(b.1),
::mem::transmute(c),
))
}
/// Extended table look-up
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(vtbx))]
pub unsafe fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t {
vtbx3(a, b.0, b.1, b.2, c)
}
/// Extended table look-up
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(vtbx))]
pub unsafe fn vtbx3_u8(
a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t,
) -> uint8x8_t {
::mem::transmute(vtbx3(
::mem::transmute(a),
::mem::transmute(b.0),
::mem::transmute(b.1),
::mem::transmute(b.2),
::mem::transmute(c),
))
}
/// Extended table look-up
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(vtbx))]
pub unsafe fn vtbx3_p8(
a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t,
) -> poly8x8_t {
::mem::transmute(vtbx3(
::mem::transmute(a),
::mem::transmute(b.0),
::mem::transmute(b.1),
::mem::transmute(b.2),
::mem::transmute(c),
))
}
/// Extended table look-up
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(vtbx))]
pub unsafe fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t {
vtbx4(a, b.0, b.1, b.2, b.3, c)
}
/// Extended table look-up
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(vtbx))]
pub unsafe fn vtbx4_u8(
a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t,
) -> uint8x8_t {
::mem::transmute(vtbx4(
::mem::transmute(a),
::mem::transmute(b.0),
::mem::transmute(b.1),
::mem::transmute(b.2),
::mem::transmute(b.3),
::mem::transmute(c),
))
}
/// Extended table look-up
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(vtbx))]
pub unsafe fn vtbx4_p8(
a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t,
) -> poly8x8_t {
::mem::transmute(vtbx4(
::mem::transmute(a),
::mem::transmute(b.0),
::mem::transmute(b.1),
::mem::transmute(b.2),
::mem::transmute(b.3),
::mem::transmute(c),
))
}
#[cfg(test)]
mod tests {
use coresimd::arm::*;
@ -1220,3 +1591,7 @@ mod tests {
assert_eq!(r, e);
}
}
#[cfg(test)]
#[path = "table_lookup_tests.rs"]
mod table_lookup_tests;

File diff suppressed because it is too large Load diff

View file

@ -94,8 +94,7 @@ pub fn assert_instr(
.ident
.to_string()
.starts_with("target")
})
.collect::<Vec<_>>();
}).collect::<Vec<_>>();
let attrs = Append(&attrs);
// Use an ABI on Windows that passes SIMD values in registers, like what