add neon instruction vmaxnm_f* vpmaxnm_f* vminnm_f* vpminnm_f* (#1105)

This commit is contained in:
surechen 2021-04-06 13:57:05 +08:00 committed by GitHub
parent 6201670dd2
commit e6a81b7566
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 416 additions and 1 deletions

View file

@ -2538,6 +2538,71 @@ pub unsafe fn vmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
vmaxq_f64_(a, b)
}
/// Floating-point Maximun Number (vector)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmaxnm))]
pub unsafe fn vmaxnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v1f64")]
fn vmaxnm_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
}
vmaxnm_f64_(a, b)
}
/// Floating-point Maximun Number (vector)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmaxnm))]
pub unsafe fn vmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v2f64")]
fn vmaxnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
}
vmaxnmq_f64_(a, b)
}
/// Floating-point Maximum Number Pairwise (vector).
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmaxnmp))]
pub unsafe fn vpmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmp.v2f32")]
fn vpmaxnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
}
vpmaxnm_f32_(a, b)
}
/// Floating-point Maximum Number Pairwise (vector).
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmaxnmp))]
pub unsafe fn vpmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmp.v2f64")]
fn vpmaxnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
}
vpmaxnmq_f64_(a, b)
}
/// Floating-point Maximum Number Pairwise (vector).
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmaxnmp))]
pub unsafe fn vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmp.v4f32")]
fn vpmaxnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
}
vpmaxnmq_f32_(a, b)
}
/// Minimum (vector)
#[inline]
#[target_feature(enable = "neon")]
@ -2564,6 +2629,71 @@ pub unsafe fn vminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
vminq_f64_(a, b)
}
/// Floating-point Minimun Number (vector)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fminnm))]
pub unsafe fn vminnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v1f64")]
fn vminnm_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
}
vminnm_f64_(a, b)
}
/// Floating-point Minimun Number (vector)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fminnm))]
pub unsafe fn vminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v2f64")]
fn vminnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
}
vminnmq_f64_(a, b)
}
/// Floating-point Minimum Number Pairwise (vector).
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fminnmp))]
pub unsafe fn vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmp.v2f32")]
fn vpminnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
}
vpminnm_f32_(a, b)
}
/// Floating-point Minimum Number Pairwise (vector).
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fminnmp))]
pub unsafe fn vpminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmp.v2f64")]
fn vpminnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
}
vpminnmq_f64_(a, b)
}
/// Floating-point Minimum Number Pairwise (vector).
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fminnmp))]
pub unsafe fn vpminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmp.v4f32")]
fn vpminnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
}
vpminnmq_f32_(a, b)
}
/// Calculates the square root of each lane.
#[inline]
#[target_feature(enable = "neon")]
@ -6347,6 +6477,51 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmaxnm_f64() {
let a: f64 = 1.0;
let b: f64 = 8.0;
let e: f64 = 8.0;
let r: f64 = transmute(vmaxnm_f64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmaxnmq_f64() {
let a: f64x2 = f64x2::new(1.0, 2.0);
let b: f64x2 = f64x2::new(8.0, 16.0);
let e: f64x2 = f64x2::new(8.0, 16.0);
let r: f64x2 = transmute(vmaxnmq_f64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vpmaxnm_f32() {
let a: f32x2 = f32x2::new(1.0, 2.0);
let b: f32x2 = f32x2::new(6.0, -3.0);
let e: f32x2 = f32x2::new(2.0, 6.0);
let r: f32x2 = transmute(vpmaxnm_f32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vpmaxnmq_f64() {
let a: f64x2 = f64x2::new(1.0, 2.0);
let b: f64x2 = f64x2::new(6.0, -3.0);
let e: f64x2 = f64x2::new(2.0, 6.0);
let r: f64x2 = transmute(vpmaxnmq_f64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vpmaxnmq_f32() {
let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, -4.0);
let b: f32x4 = f32x4::new(8.0, 16.0, -1.0, 6.0);
let e: f32x4 = f32x4::new(2.0, 3.0, 16.0, 6.0);
let r: f32x4 = transmute(vpmaxnmq_f32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmin_f64() {
let a: f64 = 1.0;
@ -6365,6 +6540,51 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vminnm_f64() {
let a: f64 = 1.0;
let b: f64 = 8.0;
let e: f64 = 1.0;
let r: f64 = transmute(vminnm_f64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vminnmq_f64() {
let a: f64x2 = f64x2::new(1.0, 2.0);
let b: f64x2 = f64x2::new(8.0, 16.0);
let e: f64x2 = f64x2::new(1.0, 2.0);
let r: f64x2 = transmute(vminnmq_f64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vpminnm_f32() {
let a: f32x2 = f32x2::new(1.0, 2.0);
let b: f32x2 = f32x2::new(6.0, -3.0);
let e: f32x2 = f32x2::new(1.0, -3.0);
let r: f32x2 = transmute(vpminnm_f32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vpminnmq_f64() {
let a: f64x2 = f64x2::new(1.0, 2.0);
let b: f64x2 = f64x2::new(6.0, -3.0);
let e: f64x2 = f64x2::new(1.0, -3.0);
let r: f64x2 = transmute(vpminnmq_f64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vpminnmq_f32() {
let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, -4.0);
let b: f32x4 = f32x4::new(8.0, 16.0, -1.0, 6.0);
let e: f32x4 = f32x4::new(1.0, -4.0, 8.0, -1.0);
let r: f32x4 = transmute(vpminnmq_f32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vsqrt_f32() {
let a: f32x2 = f32x2::new(4.0, 9.0);

View file

@ -4906,6 +4906,38 @@ pub unsafe fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
vmaxq_f32_(a, b)
}
/// Floating-point Maximun Number (vector)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmaxnm))]
pub unsafe fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v2f32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v2f32")]
fn vmaxnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
}
vmaxnm_f32_(a, b)
}
/// Floating-point Maximun Number (vector)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmaxnm))]
pub unsafe fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v4f32")]
fn vmaxnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
}
vmaxnmq_f32_(a, b)
}
/// Minimum (vector)
#[inline]
#[target_feature(enable = "neon")]
@ -5130,6 +5162,38 @@ pub unsafe fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
vminq_f32_(a, b)
}
/// Floating-point Minimun Number (vector)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fminnm))]
pub unsafe fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v2f32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v2f32")]
fn vminnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
}
vminnm_f32_(a, b)
}
/// Floating-point Minimun Number (vector)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fminnm))]
pub unsafe fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v4f32")]
fn vminnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
}
vminnmq_f32_(a, b)
}
/// Reciprocal square-root estimate.
#[inline]
#[target_feature(enable = "neon")]
@ -11292,6 +11356,24 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmaxnm_f32() {
let a: f32x2 = f32x2::new(1.0, 2.0);
let b: f32x2 = f32x2::new(8.0, 16.0);
let e: f32x2 = f32x2::new(8.0, 16.0);
let r: f32x2 = transmute(vmaxnm_f32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmaxnmq_f32() {
let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, -4.0);
let b: f32x4 = f32x4::new(8.0, 16.0, -1.0, 6.0);
let e: f32x4 = f32x4::new(8.0, 16.0, 3.0, 6.0);
let r: f32x4 = transmute(vmaxnmq_f32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmin_s8() {
let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
@ -11418,6 +11500,24 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vminnm_f32() {
let a: f32x2 = f32x2::new(1.0, 2.0);
let b: f32x2 = f32x2::new(8.0, 16.0);
let e: f32x2 = f32x2::new(1.0, 2.0);
let r: f32x2 = transmute(vminnm_f32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vminnmq_f32() {
let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, -4.0);
let b: f32x4 = f32x4::new(8.0, 16.0, -1.0, 6.0);
let e: f32x4 = f32x4::new(1.0, 2.0, -1.0, -4.0);
let r: f32x4 = transmute(vminnmq_f32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrsqrte_f32() {
let a: f32x2 = f32x2::new(1.0, 2.0);

View file

@ -1517,6 +1517,41 @@ link-arm = vmaxs._EXT_
link-aarch64 = fmax._EXT_
generate float*_t
/// Floating-point Maximun Number (vector)
name = vmaxnm
a = 1.0, 2.0, 3.0, -4.0
b = 8.0, 16.0, -1.0, 6.0
validate 8.0, 16.0, 3.0, 6.0
aarch64 = fmaxnm
link-aarch64 = fmaxnm._EXT_
generate float64x*_t
target = fp-armv8
arm = vmaxnm
aarch64 = fmaxnm
link-arm = vmaxnm._EXT_
link-aarch64 = fmaxnm._EXT_
generate float*_t
/// Floating-point Maximum Number Pairwise (vector).
name = vpmaxnm
a = 1.0, 2.0
b = 6.0, -3.0
validate 2.0, 6.0
aarch64 = fmaxnmp
link-aarch64 = fmaxnmp._EXT_
generate float32x2_t:float32x2_t:float32x2_t, float64x2_t:float64x2_t:float64x2_t
/// Floating-point Maximum Number Pairwise (vector).
name = vpmaxnm
a = 1.0, 2.0, 3.0, -4.0
b = 8.0, 16.0, -1.0, 6.0
validate 2.0, 3.0, 16.0, 6.0
aarch64 = fmaxnmp
link-aarch64 = fmaxnmp._EXT_
generate float32x4_t:float32x4_t:float32x4_t
/// Minimum (vector)
name = vmin
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
@ -1551,6 +1586,41 @@ link-arm = vmins._EXT_
link-aarch64 = fmin._EXT_
generate float*_t
/// Floating-point Minimun Number (vector)
name = vminnm
a = 1.0, 2.0, 3.0, -4.0
b = 8.0, 16.0, -1.0, 6.0
validate 1.0, 2.0, -1.0, -4.0
aarch64 = fminnm
link-aarch64 = fminnm._EXT_
generate float64x*_t
target = fp-armv8
arm = vminnm
aarch64 = fminnm
link-arm = vminnm._EXT_
link-aarch64 = fminnm._EXT_
generate float*_t
/// Floating-point Minimum Number Pairwise (vector).
name = vpminnm
a = 1.0, 2.0
b = 6.0, -3.0
validate 1.0, -3.0
aarch64 = fminnmp
link-aarch64 = fminnmp._EXT_
generate float32x2_t:float32x2_t:float32x2_t, float64x2_t:float64x2_t:float64x2_t
/// Floating-point Minimum Number Pairwise (vector).
name = vpminnm
a = 1.0, 2.0, 3.0, -4.0
b = 8.0, 16.0, -1.0, 6.0
validate 1.0, -4.0, 8.0, -1.0
aarch64 = fminnmp
link-aarch64 = fminnmp._EXT_
generate float32x4_t:float32x4_t:float32x4_t
/// Calculates the square root of each lane.
name = vsqrt
fn = simd_fsqrt

View file

@ -1,4 +1,5 @@
use self::Suffix::*;
use self::TargetFeature::*;
use std::env;
use std::fs::File;
use std::io::prelude::*;
@ -244,6 +245,12 @@ enum Suffix {
NoQDouble,
}
#[derive(Clone, Copy)]
enum TargetFeature {
ArmV7,
FPArmV8,
}
fn type_to_global_type(t: &str) -> &str {
match t {
"int8x8_t" => "i8x8",
@ -962,6 +969,7 @@ fn gen_arm(
)],
suffix: Suffix,
para_num: i32,
target: TargetFeature,
fixed: &Vec<String>,
multi_fn: &Vec<String>,
) -> (String, String) {
@ -985,6 +993,11 @@ fn gen_arm(
.clone()
.unwrap_or_else(|| current_arm.to_string());
let current_target = match target {
ArmV7 => "v7",
FPArmV8 => "fp-armv8,v8",
};
let current_fn = if let Some(current_fn) = current_fn.clone() {
if link_aarch64.is_some() || link_arm.is_some() {
panic!(
@ -1154,12 +1167,13 @@ fn gen_arm(
{}
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(target_arch = "arm", target_feature(enable = "{}"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr({}{}))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr({}{}))]{}
{}
"#,
current_comment,
current_target,
expand_intrinsic(&current_arm, in_t[1]),
const_assert,
expand_intrinsic(&current_aarch64, in_t[1]),
@ -1482,6 +1496,7 @@ fn main() -> io::Result<()> {
Vec<String>,
)> = Vec::new();
let mut multi_fn: Vec<String> = Vec::new();
let mut target: TargetFeature = ArmV7;
//
// THIS FILE IS GENERATED FORM neon.spec DO NOT CHANGE IT MANUALLY
@ -1560,6 +1575,7 @@ mod test {
fixed = Vec::new();
n = None;
multi_fn = Vec::new();
target = ArmV7;
} else if line.starts_with("//") {
} else if line.starts_with("name = ") {
current_name = Some(String::from(&line[7..]));
@ -1596,6 +1612,14 @@ mod test {
link_aarch64 = Some(String::from(&line[15..]));
} else if line.starts_with("link-arm = ") {
link_arm = Some(String::from(&line[11..]));
} else if line.starts_with("target = ") {
target = match Some(String::from(&line[9..])) {
Some(input) => match input.as_str() {
"fp-armv8" => FPArmV8,
_ => ArmV7,
},
_ => ArmV7,
}
} else if line.starts_with("generate ") {
let line = &line[9..];
let types: Vec<String> = line
@ -1652,6 +1676,7 @@ mod test {
&current_tests,
suffix,
para_num,
target,
&fixed,
&multi_fn,
);