From 24416a21515c41d0e34a85535407663e635b76f3 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Fri, 28 Aug 2015 22:43:38 -0700 Subject: [PATCH] Autogenerate most x86 platform intrinsics. --- src/etc/platform-intrinsics/x86/avx.json | 152 +++ src/etc/platform-intrinsics/x86/avx2.json | 145 +++ src/etc/platform-intrinsics/x86/info.json | 28 + src/etc/platform-intrinsics/x86/sse.json | 40 + src/etc/platform-intrinsics/x86/sse2.json | 110 +++ src/etc/platform-intrinsics/x86/sse3.json | 26 + src/etc/platform-intrinsics/x86/sse41.json | 75 ++ src/etc/platform-intrinsics/x86/sse42.json | 103 ++ src/etc/platform-intrinsics/x86/ssse3.json | 68 ++ src/librustc_platform_intrinsics/x86.rs | 1020 ++++++++++++++++---- 10 files changed, 1593 insertions(+), 174 deletions(-) create mode 100644 src/etc/platform-intrinsics/x86/avx.json create mode 100644 src/etc/platform-intrinsics/x86/avx2.json create mode 100644 src/etc/platform-intrinsics/x86/info.json create mode 100644 src/etc/platform-intrinsics/x86/sse.json create mode 100644 src/etc/platform-intrinsics/x86/sse2.json create mode 100644 src/etc/platform-intrinsics/x86/sse3.json create mode 100644 src/etc/platform-intrinsics/x86/sse41.json create mode 100644 src/etc/platform-intrinsics/x86/sse42.json create mode 100644 src/etc/platform-intrinsics/x86/ssse3.json diff --git a/src/etc/platform-intrinsics/x86/avx.json b/src/etc/platform-intrinsics/x86/avx.json new file mode 100644 index 000000000000..7d6a07f7550f --- /dev/null +++ b/src/etc/platform-intrinsics/x86/avx.json @@ -0,0 +1,152 @@ +{ + "llvm_prefix": "llvm.x86.avx.", + "intrinsics": [ + { + "intrinsic": "256_addsub_{0.data_type}", + "width": [256], + "llvm": "addsub.{0.data_type}.256", + "ret": "f(32-64)", + "args": ["0", "0"] + }, + { + "intrinsic": "256_dp_ps", + "width": [256], + "llvm": "dp.ps.256", + "ret": "f32", + "args": ["0", "0", "S32"] + }, + { + "intrinsic": "256_hadd_{0.data_type}", + "width": [256], + "llvm": "hadd.{0.data_type}.256", + "ret": "f(32-64)", + "args": ["0", "0"] + }, + { + "intrinsic": "256_hsub_{0.data_type}", + "width": [256], + "llvm": "hsub.{0.data_type}.256", + "ret": "f(32-64)", + "args": ["0", "0"] + }, + { + "intrinsic": "256_max_{0.data_type}", + "width": [256], + "llvm": "max.{0.data_type}.256", + "ret": "f(32-64)", + "args": ["0", "0"] + }, + { + "intrinsic": "256_min_{0.data_type}", + "width": [256], + "llvm": "min.{0.data_type}.256", + "ret": "f(32-64)", + "args": ["0", "0"] + }, + { + "intrinsic": "256_movemask_ps", + "width": [256], + "llvm": "movmsk.ps.256", + "ret": "S32", + "args": ["f32"] + }, + { + "intrinsic": "256_movemask_pd", + "width": [256], + "llvm": "movmsk.pd.256", + "ret": "S32", + "args": ["f64"] + }, + { + "intrinsic": "{0.width_mm}_permutevar_{0.data_type}", + "width": [128, 256], + "llvm": "vpermilvar.{0.data_type}{0.width_suffix}", + "ret": "f(32-64)", + "args": ["0", "0s"] + }, + { + "intrinsic": "256_rcp_ps", + "width": [256], + "llvm": "rcp.ps.256", + "ret": "f32", + "args": ["f32"] + }, + { + "intrinsic": "256_rsqrt_ps", + "width": [256], + "llvm": "rsqrt.ps.256", + "ret": "f32", + "args": ["f32"] + }, + { + "intrinsic": "256_sqrt_{0.data_type}", + "width": [256], + "llvm": "!llvm.sqrt.{0.llvm_name}", + "ret": "f(32-64)", + "args": ["0"] + }, + { + "intrinsic": "{1.width_mm}_testc_ps", + "width": [128, 256], + "llvm": "vtestc.ps{1.width_suffix}", + "ret": "S32", + "args": ["f32", "f32"] + }, + { + "intrinsic": "{1.width_mm}_testc_pd", + "width": [128, 256], + "llvm": "vtestc.pd{1.width_suffix}", + "ret": "S32", + "args": ["f64", "f64"] + }, + { + "intrinsic": "256_testc_si256", + "width": [256], + "llvm": "ptestc.256", + "ret": "S32", + "args": ["u64", "u64"] + }, + { + "intrinsic": "{1.width_mm}_testnzc_ps", + "width": [128, 256], + "llvm": "vtestnzc.ps{1.width_suffix}", + "ret": "S32", + "args": ["f32", "f32"] + }, + { + "intrinsic": "{1.width_mm}_testnzc_pd", + "width": [128, 256], + "llvm": "vtestnzc.pd{1.width_suffix}", + "ret": "S32", + "args": ["f64", "f64"] + }, + { + "intrinsic": "256_testnzc_si256", + "width": [256], + "llvm": "ptestnzc.256", + "ret": "S32", + "args": ["u64", "u64"] + }, + { + "intrinsic": "{1.width_mm}_testz_ps", + "width": [128, 256], + "llvm": "vtestz.ps{1.width_suffix}", + "ret": "S32", + "args": ["f32", "f32"] + }, + { + "intrinsic": "{1.width_mm}_testz_pd", + "width": [128, 256], + "llvm": "vtestz.pd{1.width_suffix}", + "ret": "S32", + "args": ["f64", "f64"] + }, + { + "intrinsic": "256_testz_si256", + "width": [256], + "llvm": "ptestz.256", + "ret": "S32", + "args": ["u64", "u64"] + } + ] +} diff --git a/src/etc/platform-intrinsics/x86/avx2.json b/src/etc/platform-intrinsics/x86/avx2.json new file mode 100644 index 000000000000..8b0e53e48cf9 --- /dev/null +++ b/src/etc/platform-intrinsics/x86/avx2.json @@ -0,0 +1,145 @@ +{ + "llvm_prefix": "llvm.x86.avx2.", + "intrinsics": [ + { + "intrinsic": "256_abs_{0.data_type}", + "width": [256], + "llvm": "avx2.pabs.{0.data_type_short}", + "ret": "s(8-32)", + "args": ["0"] + }, + { + "intrinsic": "256_adds_{0.data_type}", + "width": [256], + "llvm": "avx2.padd{0.kind_short}s.{0.data_type_short}", + "ret": "i(8-16)", + "args": ["0", "0"] + }, + { + "intrinsic": "256_avg_{0.data_type}", + "width": [256], + "llvm": "avx2.pavg.{0.data_type_short}", + "ret": "u(8-16)", + "args": ["0", "0"] + }, + { + "intrinsic": "256_hadd_{0.data_type}", + "width": [256], + "llvm": "phadd.{0.data_type_short}", + "ret": "s(16-32)", + "args": ["0", "0"] + }, + { + "intrinsic": "256_hadds_epi16", + "width": [256], + "llvm": "phadd.sw", + "ret": "s16", + "args": ["0", "0"] + }, + { + "intrinsic": "256_hsub_{0.data_type}", + "width": [256], + "llvm": "phsub.{0.data_type_short}", + "ret": "s(16-32)", + "args": ["0", "0"] + }, + { + "intrinsic": "256_hsubs_epi16", + "width": [256], + "llvm": "phsub.sw", + "ret": "s16", + "args": ["0", "0"] + }, + { + "intrinsic": "256_madd_epi16", + "width": [256], + "llvm": "pmadd.wd", + "ret": "s32", + "args": ["s16", "s16"] + }, + { + "intrinsic": "256_maddubs_epi16", + "width": [256], + "llvm": "pmadd.ub.sw", + "ret": "s16", + "args": ["s8", "s8"] + }, + { + "intrinsic": "256_max_{0.data_type}", + "width": [256], + "llvm": "pmax{0.kind}.{0.data_type_short}", + "ret": "i(8-32)", + "args": ["0", "0"] + }, + { + "intrinsic": "256_min_{0.data_type}", + "width": [256], + "llvm": "pmin{0.kind}.{0.data_type_short}", + "ret": "i(8-32)", + "args": ["0", "0"] + }, + { + "intrinsic": "256_mul_{0.data_type}", + "width": [256], + "llvm": "pmul{0.data_type_short}.dq", + "ret": "i64", + "args": ["0dn", "0dn"] + }, + { + "intrinsic": "256_mulhi_{0.data_type}", + "width": [256], + "llvm": "pmulh{0.data_type_short}.w", + "ret": "i16", + "args": ["0", "0"] + }, + { + "intrinsic": "256_mulhrs_epi16", + "width": [256], + "llvm": "pmul.hr.sw", + "ret": "s16", + "args": ["0", "0"] + }, + { + "intrinsic": "256_pack{0.kind_short}s_{1.data_type}", + "width": [256], + "llvm": "pack{0.kind}s{1.data_type_short}{0.data_type_short}", + "ret": "i(8-16)", + "args": ["0hws", "0hws"] + }, + { + "intrinsic": "256_permutevar8x32_{0.data_type}", + "width": [256], + "llvm": "perm{0.data_type_short}", + "ret": ["s32", "f32"], + "args": ["0", "0s"] + }, + { + "intrinsic": "256_sad_epu8", + "width": [256], + "llvm": "psad.bw", + "ret": "u8", + "args": ["0", "0"] + }, + { + "intrinsic": "256_shuffle_epi8", + "width": [256], + "llvm": "pshuf.b", + "ret": "s8", + "args": ["0", "0"] + }, + { + "intrinsic": "256_sign_{0.data_type}", + "width": [256], + "llvm": "psign.{0.data_type_short}", + "ret": "s(8-32)", + "args": ["0", "0"] + }, + { + "intrinsic": "256_subs_{0.data_type}", + "width": [256], + "llvm": "psub{0.kind_short}s.{0.data_type_short}", + "ret": "i(8-16)", + "args": ["0", "0"] + } + ] +} diff --git a/src/etc/platform-intrinsics/x86/info.json b/src/etc/platform-intrinsics/x86/info.json new file mode 100644 index 000000000000..d48bcd268a00 --- /dev/null +++ b/src/etc/platform-intrinsics/x86/info.json @@ -0,0 +1,28 @@ +{ + "platform": "x86", + "intrinsic_prefix": "x86_mm", + "number_info": { + "signed": { + "kind": "s", + "kind_short": "", + "data_type": { "pattern": "epi{bitwidth}" }, + "data_type_short": { "8": "b", "16": "w", "32": "d", "64": "q" } + }, + "unsigned": { + "kind": "u", + "kind_short": "u", + "data_type": { "pattern": "epu{bitwidth}" }, + "data_type_short": { "8": "b", "16": "w", "32": "d", "64": "q" } + }, + "float": { + "kind": "f", + "data_type": { "32": "ps", "64": "pd" }, + "data_type_short": { "32": "ps", "64": "pd" } + } + }, + "width_info": { + "128": { "width_mm": "", "width_suffix": "" }, + "256": { "width_mm": "256", "width_suffix": ".256" }, + "512": { "width_mm": "512", "width_suffix": ".512" } + } +} diff --git a/src/etc/platform-intrinsics/x86/sse.json b/src/etc/platform-intrinsics/x86/sse.json new file mode 100644 index 000000000000..144d326ffd0c --- /dev/null +++ b/src/etc/platform-intrinsics/x86/sse.json @@ -0,0 +1,40 @@ +{ + "llvm_prefix": "llvm.x86.sse.", + "intrinsics": [ + { + "intrinsic": "_movemask_ps", + "width": [128], + "llvm": "movmsk.ps", + "ret": "S32", + "args": ["f32"] + }, + { + "intrinsic": "_max_ps", + "width": [128], + "llvm": "max.ps", + "ret": "f32", + "args": ["0", "0"] + }, + { + "intrinsic": "_min_ps", + "width": [128], + "llvm": "min.ps", + "ret": "f32", + "args": ["0", "0"] + }, + { + "intrinsic": "_rsqrt_ps", + "width": [128], + "llvm": "rsqrt.ps", + "ret": "f32", + "args": ["0"] + }, + { + "intrinsic": "_rcp_ps", + "width": [128], + "llvm": "rcp.ps", + "ret": "f32", + "args": ["0"] + } + ] +} diff --git a/src/etc/platform-intrinsics/x86/sse2.json b/src/etc/platform-intrinsics/x86/sse2.json new file mode 100644 index 000000000000..2a3db5610979 --- /dev/null +++ b/src/etc/platform-intrinsics/x86/sse2.json @@ -0,0 +1,110 @@ +{ + "llvm_prefix": "llvm.x86.sse2.", + "intrinsics": [ + { + "intrinsic": "_adds_{0.data_type}", + "width": [128], + "llvm": "padd{0.kind_short}s.{0.data_type_short}", + "ret": "i(8-16)", + "args": ["0", "0"] + }, + { + "intrinsic": "_avg_{0.data_type}", + "width": [128], + "llvm": "pavg.{0.data_type_short}", + "ret": "u(8-16)", + "args": ["0", "0"] + }, + { + "intrinsic": "_madd_epi16", + "width": [128], + "llvm": "pmadd.wd", + "ret": "s16", + "args": ["0", "0"] + }, + { + "intrinsic": "_max_{0.data_type}", + "width": [128], + "llvm": "pmax{0.kind}.{0.data_type_short}", + "ret": ["s16", "u8"], + "args": ["0", "0"] + }, + { + "intrinsic": "_max_pd", + "width": [128], + "llvm": "max.pd", + "ret": "f64", + "args": ["0", "0"] + }, + { + "intrinsic": "_min_{0.data_type}", + "width": [128], + "llvm": "pmin{0.kind}.{0.data_type_short}", + "ret": ["s16", "u8"], + "args": ["0", "0"] + }, + { + "intrinsic": "_min_pd", + "width": [128], + "llvm": "min.pd", + "ret": "f64", + "args": ["0", "0"] + }, + { + "intrinsic": "_movemask_pd", + "width": [128], + "llvm": "movmsk.pd", + "ret": "S32", + "args": ["f64"] + }, + { + "intrinsic": "_movemask_epi8", + "width": [128], + "llvm": "pmovmskb.128", + "ret": "S32", + "args": ["s8"] + }, + { + "intrinsic": "_mul_epu32", + "width": [128], + "llvm": "pmulu.dq", + "ret": "s64", + "args": ["0dn", "0dn"] + }, + { + "intrinsic": "_mulhi_ep{0.kind}16", + "width": [128], + "llvm": "pmulh{0.kind_short}.w", + "ret": "i16", + "args": ["0", "0"] + }, + { + "intrinsic": "_packs_{1.data_type}", + "width": [128], + "llvm": "packss{1.data_type_short}{0.data_type_short}.128", + "ret": "s(8-16)", + "args": ["0hw", "0hw"] + }, + { + "intrinsic": "_packus_epi16", + "width": [128], + "llvm": "packuswb.128", + "ret": "u8", + "args": ["s16", "s16"] + }, + { + "intrinsic": "_sad_epu8", + "width": [128], + "llvm": "psad.bw", + "ret": "u64", + "args": ["u8", "u8"] + }, + { + "intrinsic": "_subs_{0.data_type}", + "width": [128], + "llvm": "psub{0.kind_short}s.{0.data_type_short}", + "ret": "i(8-16)", + "args": ["0", "0"] + } + ] +} diff --git a/src/etc/platform-intrinsics/x86/sse3.json b/src/etc/platform-intrinsics/x86/sse3.json new file mode 100644 index 000000000000..376e32fa9156 --- /dev/null +++ b/src/etc/platform-intrinsics/x86/sse3.json @@ -0,0 +1,26 @@ +{ + "llvm_prefix": "llvm.x86.sse3.", + "intrinsics": [ + { + "intrinsic": "_addsub_{0.data_type}", + "width": [128], + "llvm": "addsub.{0.data_type}", + "ret": "f(32-64)", + "args": ["0", "0"] + }, + { + "intrinsic": "_hadd_{0.data_type}", + "width": [128], + "llvm": "hadd.{0.data_type}", + "ret": "f(32-64)", + "args": ["0", "0"] + }, + { + "intrinsic": "_hsub_{0.data_type}", + "width": [128], + "llvm": "hsub.{0.data_type}", + "ret": "f(32-64)", + "args": ["0", "0"] + } + ] +} diff --git a/src/etc/platform-intrinsics/x86/sse41.json b/src/etc/platform-intrinsics/x86/sse41.json new file mode 100644 index 000000000000..8610dc83bd68 --- /dev/null +++ b/src/etc/platform-intrinsics/x86/sse41.json @@ -0,0 +1,75 @@ +{ + "llvm_prefix": "llvm.x86.sse41.", + "intrinsics": [ + { + "intrinsic": "_dp_{0.data_type}", + "width": [128], + "llvm": "dp{0.data_type}", + "ret": "f(32-64)", + "args": ["0", "0", "S32"] + }, + { + "intrinsic": "_max_{0.data_type}", + "width": [128], + "llvm": "pmax{0.kind}{0.data_type_short}", + "ret": ["s8", "u16", "i32"], + "args": ["0", "0"] + }, + { + "intrinsic": "_min_{0.data_type}", + "width": [128], + "llvm": "pmin{0.kind}{0.data_type_short}", + "ret": ["s8", "u16", "i32"], + "args": ["0", "0"] + }, + { + "intrinsic": "_minpos_epu16", + "width": [128], + "llvm": "phminposuw", + "ret": "u16", + "args": ["0"] + }, + { + "intrinsic": "_mpsadbw_epu8", + "width": [128], + "llvm": "mpsadbw", + "ret": "u16", + "args": ["u8", "u8", "S32"] + }, + { + "intrinsic": "_mul_epi32", + "width": [128], + "llvm": "muldq", + "ret": "s64", + "args": ["s32", "s32"] + }, + { + "intrinsic": "_packus_epi32", + "width": [128], + "llvm": "packusdw", + "ret": "u16", + "args": ["s32", "s32"] + }, + { + "intrinsic": "_testc_si128", + "width": [128], + "llvm": "ptestc", + "ret": "S32", + "args": ["u64", "u64"] + }, + { + "intrinsic": "_testncz_si128", + "width": [128], + "llvm": "ptest.nzc", + "ret": "S32", + "args": ["u64", "u64"] + }, + { + "intrinsic": "_testz_si128", + "width": [128], + "llvm": "ptestz", + "ret": "S32", + "args": ["u64", "u64"] + } + ] +} diff --git a/src/etc/platform-intrinsics/x86/sse42.json b/src/etc/platform-intrinsics/x86/sse42.json new file mode 100644 index 000000000000..2961405a0551 --- /dev/null +++ b/src/etc/platform-intrinsics/x86/sse42.json @@ -0,0 +1,103 @@ +{ + "llvm_prefix": "llvm.x86.sse42.", + "intrinsics": [ + { + "intrinsic": "_cmpestra", + "width": [128], + "llvm": "pcmpestria128", + "ret": "S32", + "args": ["s8", "S32", "s8", "S32", "S32"] + }, + { + "intrinsic": "_cmpestrc", + "width": [128], + "llvm": "pcmpestric128", + "ret": "S32", + "args": ["s8", "S32", "s8", "S32", "S32"] + }, + { + "intrinsic": "_cmpestri", + "width": [128], + "llvm": "pcmpestri128", + "ret": "S32", + "args": ["s8", "S32", "s8", "S32", "S32"] + }, + { + "intrinsic": "_cmpestrm", + "width": [128], + "llvm": "pcmpestrim128", + "ret": "s8", + "args": ["s8", "S32", "s8", "S32", "S32"] + }, + { + "intrinsic": "_cmpestro", + "width": [128], + "llvm": "pcmpestrio128", + "ret": "S32", + "args": ["s8", "S32", "s8", "S32", "S32"] + }, + { + "intrinsic": "_cmpestrs", + "width": [128], + "llvm": "pcmpestris128", + "ret": "S32", + "args": ["s8", "S32", "s8", "S32", "S32"] + }, + { + "intrinsic": "_cmpestrz", + "width": [128], + "llvm": "pcmpestriz128", + "ret": "S32", + "args": ["s8", "S32", "s8", "S32", "S32"] + }, + { + "intrinsic": "_cmpistra", + "width": [128], + "llvm": "pcmpistria128", + "ret": "S32", + "args": ["s8", "S32", "s8", "S32", "S32"] + }, + { + "intrinsic": "_cmpistrc", + "width": [128], + "llvm": "pcmpistric128", + "ret": "S32", + "args": ["s8", "S32", "s8", "S32", "S32"] + }, + { + "intrinsic": "_cmpistri", + "width": [128], + "llvm": "pcmpistrii128", + "ret": "S32", + "args": ["s8", "S32", "s8", "S32", "S32"] + }, + { + "intrinsic": "_cmpistrm", + "width": [128], + "llvm": "pcmpistrim128", + "ret": "s8", + "args": ["s8", "S32", "s8", "S32", "S32"] + }, + { + "intrinsic": "_cmpistro", + "width": [128], + "llvm": "pcmpistrio128", + "ret": "S32", + "args": ["s8", "S32", "s8", "S32", "S32"] + }, + { + "intrinsic": "_cmpistrs", + "width": [128], + "llvm": "pcmpistris128", + "ret": "S32", + "args": ["s8", "S32", "s8", "S32", "S32"] + }, + { + "intrinsic": "_cmpistrz", + "width": [128], + "llvm": "pcmpistriz128", + "ret": "S32", + "args": ["s8", "S32", "s8", "S32", "S32"] + } + ] +} diff --git a/src/etc/platform-intrinsics/x86/ssse3.json b/src/etc/platform-intrinsics/x86/ssse3.json new file mode 100644 index 000000000000..bbe11380ff87 --- /dev/null +++ b/src/etc/platform-intrinsics/x86/ssse3.json @@ -0,0 +1,68 @@ +{ + "llvm_prefix": "llvm.x86.ssse3.", + "intrinsics": [ + { + "intrinsic": "_abs_{0.data_type}", + "width": [128], + "llvm": "pabs.{0.data_type_short}", + "ret": "s(8-32)", + "args": ["0"] + }, + { + "intrinsic": "_hadd_{0.data_type}", + "width": [128], + "llvm": "phadd.{0.data_type_short}.128", + "ret": "s(16-32)", + "args": ["0", "0"] + }, + { + "intrinsic": "_hadds_epi16", + "width": [128], + "llvm": "phadd.sw.128", + "ret": "s16", + "args": ["0", "0"] + }, + { + "intrinsic": "_hsub_{0.data_type}", + "width": [128], + "llvm": "phsub.{0.data_type_short}.128", + "ret": "s(16-32)", + "args": ["0", "0"] + }, + { + "intrinsic": "_hsubs_epi16", + "width": [128], + "llvm": "phsub.sw.128", + "ret": "s16", + "args": ["0", "0"] + }, + { + "intrinsic": "_maddubs_epi16", + "width": [128], + "llvm": "pmadd.ub.sw.128", + "ret": "s16", + "args": ["s8", "s8"] + }, + { + "intrinsic": "_mulhrs_epi16", + "width": [128], + "llvm": "pmul.hr.sw.128", + "ret": "s16", + "args": ["s16", "s16"] + }, + { + "intrinsic": "_shuffle_epi8", + "width": [128], + "llvm": "pshuf.b.128", + "ret": "s8", + "args": ["s8", "s8"] + }, + { + "intrinsic": "_sign_{0.data_type}", + "width": [128], + "llvm": "psign.{0.data_type_short}.128", + "ret": "s(8-16)", + "args": ["0", "0"] + } + ] +} diff --git a/src/librustc_platform_intrinsics/x86.rs b/src/librustc_platform_intrinsics/x86.rs index ad5fcff8f584..f805f480ece1 100644 --- a/src/librustc_platform_intrinsics/x86.rs +++ b/src/librustc_platform_intrinsics/x86.rs @@ -8,181 +8,853 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use {Intrinsic, i, f, v}; +// DO NOT EDIT: autogenerated by etc/platform-intrinsics/generator.py +// ignore-tidy-linelength + +#![allow(unused_imports)] + +use {Intrinsic, i, u, f, v, agg}; +use IntrinsicDef::Named; use rustc::middle::ty; -macro_rules! p { - ($name: expr, ($($inputs: tt),*) -> $output: tt) => { - plain!(concat!("llvm.x86.", $name), ($($inputs),*) -> $output) - } -} - pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option { - if name.starts_with("x86_mm_") { - Some(match &name["x86_mm_".len()..] { - "sqrt_ps" => plain!("llvm.sqrt.v4f32", (f32x4) -> f32x4), - "sqrt_pd" => plain!("llvm.sqrt.v2f64", (f64x2) -> f64x2), - - "movemask_ps" => p!("sse.movmsk.ps", (f32x4) -> i32), - "max_ps" => p!("sse.max.ps", (f32x4, f32x4) -> f32x4), - "min_ps" => p!("sse.min.ps", (f32x4, f32x4) -> f32x4), - "rsqrt_ps" => p!("sse.rsqrt.ps", (f32x4) -> f32x4), - "rcp_ps" => p!("sse.rcp.ps", (f32x4) -> f32x4), - - "adds_epi16" => p!("sse2.padds.w", (i16x8, i16x8) -> i16x8), - "adds_epi8" => p!("sse2.padds.b", (i8x16, i8x16) -> i8x16), - "adds_epu16" => p!("sse2.paddus.w", (i16x8, i16x8) -> i16x8), - "adds_epu8" => p!("sse2.paddus.b", (i8x16, i8x16) -> i8x16), - "avg_epu16" => p!("sse2.pavg.w", (i16x8, i16x8) -> i16x8), - "avg_epu8" => p!("sse2.pavg.b", (i8x16, i8x16) -> i8x16), - "madd_epi16" => p!("sse2.pmadd.wd", (i16x8, i16x8) -> i32x4), - "max_epi16" => p!("sse2.pmaxs.w", (i16x8, i16x8) -> i16x8), - "max_epu8" => p!("sse2.pmaxu.b", (i8x16, i8x16) -> i8x16), - "max_pd" => p!("sse2.max.pd", (f64x2, f64x2) -> f64x2), - "min_epi16" => p!("sse2.pmins.w", (i16x8, i16x8) -> i16x8), - "min_epu8" => p!("sse2.pminu.b", (i8x16, i8x16) -> i8x16), - "min_pd" => p!("sse2.min.pd", (f64x2, f64x2) -> f64x2), - "movemask_pd" => p!("sse2.movmsk.pd", (f64x2) -> i32), - "movemask_epi8" => p!("sse2.pmovmskb.128", (i8x16) -> i32), - "mul_epu32" => p!("sse2.pmulu.dq", (i32x4, i32x4) -> i64x2), - "mulhi_epi16" => p!("sse2.pmulh.w", (i8x16, i8x16) -> i8x16), - "mulhi_epu16" => p!("sse2.pmulhu.w", (i8x16, i8x16) -> i8x16), - "packs_epi16" => p!("sse2.packsswb.128", (i16x8, i16x8) -> i8x16), - "packs_epi32" => p!("sse2.packssdw.128", (i32x4, i32x4) -> i16x8), - "packus_epi16" => p!("sse2.packuswb.128", (i16x8, i16x8) -> i8x16), - "sad_epu8" => p!("sse2.psad.bw", (i8x16, i8x16) -> i64x2), - "subs_epi16" => p!("sse2.psubs.w", (i16x8, i16x8) -> i16x8), - "subs_epi8" => p!("sse2.psubs.b", (i8x16, i8x16) -> i8x16), - "subs_epu16" => p!("sse2.psubus.w", (i16x8, i16x8) -> i16x8), - "subs_epu8" => p!("sse2.psubus.b", (i8x16, i8x16) -> i8x16), - - "addsub_pd" => p!("sse3.addsub.pd", (f64x2, f64x2) -> f64x2), - "addsub_ps" => p!("sse3.addsub.ps", (f32x4, f32x4) -> f32x4), - "hadd_pd" => p!("sse3.hadd.pd", (f64x2, f64x2) -> f64x2), - "hadd_ps" => p!("sse3.hadd.ps", (f32x4, f32x4) -> f32x4), - "hsub_pd" => p!("sse3.hsub.pd", (f64x2, f64x2) -> f64x2), - "hsub_ps" => p!("sse3.hsub.ps", (f32x4, f32x4) -> f32x4), - - "abs_epi16" => p!("ssse3.pabs.w.128", (i16x8) -> i16x8), - "abs_epi32" => p!("ssse3.pabs.d.128", (i32x4) -> i32x4), - "abs_epi8" => p!("ssse3.pabs.b.128", (i8x16) -> i8x16), - "hadd_epi16" => p!("ssse3.phadd.w.128", (i16x8, i16x8) -> i16x8), - "hadd_epi32" => p!("ssse3.phadd.d.128", (i32x4, i32x4) -> i32x4), - "hadds_epi16" => p!("ssse3.phadd.sw.128", (i16x8, i16x8) -> i16x8), - "hsub_epi16" => p!("ssse3.phsub.w.128", (i16x8, i16x8) -> i16x8), - "hsub_epi32" => p!("ssse3.phsub.d.128", (i32x4, i32x4) -> i32x4), - "hsubs_epi16" => p!("ssse3.phsub.sw.128", (i16x8, i16x8) -> i16x8), - "maddubs_epi16" => p!("ssse3.pmadd.ub.sw.128", (i8x16, i8x16) -> i16x8), - "mulhrs_epi16" => p!("ssse3.pmul.hr.sw.128", (i16x8, i16x8) -> i16x8), - "shuffle_epi8" => p!("ssse3.pshuf.b.128", (i8x16, i8x16) -> i8x16), - "sign_epi16" => p!("ssse3.psign.w.128", (i16x8, i16x8) -> i16x8), - "sign_epi32" => p!("ssse3.psign.d.128", (i32x4, i32x4) -> i32x4), - "sign_epi8" => p!("ssse3.psign.b.128", (i8x16, i8x16) -> i8x16), - - "max_epi32" => p!("sse41.pmaxsd", (i32x4, i32x4) -> i32x4), - "max_epi8" => p!("sse41.pmaxsb", (i8x16, i8x16) -> i8x16), - "max_epu16" => p!("sse41.pmaxuw", (i16x8, i16x8) -> i16x8), - "max_epu32" => p!("sse41.pmaxud", (i32x4, i32x4) -> i32x4), - "min_epi32" => p!("sse41.pminsd", (i32x4, i32x4) -> i32x4), - "min_epi8" => p!("sse41.pminsb", (i8x16, i8x16) -> i8x16), - "min_epu16" => p!("sse41.pminuw", (i16x8, i16x8) -> i16x8), - "min_epu32" => p!("sse41.pminud", (i32x4, i32x4) -> i32x4), - "minpos_epu16" => p!("sse41.phminposuw", (i16x8) -> i16x8), - "mul_epi32" => p!("sse41.muldq", (i32x4, i32x4) -> i64x2), - "packus_epi32" => p!("sse41.packusdw", (i32x4, i32x4) -> i16x8), - "testc_si128" => p!("sse41.ptestc", (i64x2, i64x2) -> i32), - "testnzc_si128" => p!("sse41.ptestnzc", (i64x2, i64x2) -> i32), - "testz_si128" => p!("sse41.ptestz", (i64x2, i64x2) -> i32), - - "permutevar_pd" => p!("avx.vpermilvar.pd", (f64x2, i64x2) -> f64x2), - "permutevar_ps" => p!("avx.vpermilvar.ps", (f32x4, i32x4) -> f32x4), - "testc_pd" => p!("avx.vtestc.pd", (f64x2, f64x2) -> i32), - "testc_ps" => p!("avx.vtestc.ps", (f32x4, f32x4) -> i32), - "testnzc_pd" => p!("avx.vtestnzc.pd", (f64x2, f64x2) -> i32), - "testnzc_ps" => p!("avx.vtestnzc.ps", (f32x4, f32x4) -> i32), - "testz_pd" => p!("avx.vtestz.pd", (f64x2, f64x2) -> i32), - "testz_ps" => p!("avx.vtestz.ps", (f32x4, f32x4) -> i32), - - _ => return None - }) - } else if name.starts_with("x86_mm256_") { - Some(match &name["x86_mm256_".len()..] { - "addsub_pd" => p!("avx.addsub.pd.256", (f64x4, f64x4) -> f64x4), - "addsub_ps" => p!("avx.addsub.ps.256", (f32x8, f32x8) -> f32x8), - "hadd_pd" => p!("avx.hadd.pd.256", (f64x4, f64x4) -> f64x4), - "hadd_ps" => p!("avx.hadd.ps.256", (f32x8, f32x8) -> f32x8), - "hsub_pd" => p!("avx.hsub.pd.256", (f64x4, f64x4) -> f64x4), - "hsub_ps" => p!("avx.hsub.ps.256", (f32x8, f32x8) -> f32x8), - "max_pd" => p!("avx.max.pd.256", (f64x4, f64x4) -> f64x4), - "max_ps" => p!("avx.max.ps.256", (f32x8, f32x8) -> f32x8), - "min_pd" => p!("avx.min.pd.256", (f64x4, f64x4) -> f64x4), - "min_ps" => p!("avx.min.ps.256", (f32x8, f32x8) -> f32x8), - "permutevar_pd" => p!("avx.vpermilvar.pd.256", (f64x4, i64x4) -> f64x4), - "permutevar_ps" => p!("avx.vpermilvar.ps.256", (f32x8, i32x8) -> f32x8), - "rcp_ps" => p!("avx.rcp.ps.256", (f32x8) -> f32x8), - "rsqrt_ps" => p!("avx.rsqrt.ps.256", (f32x8) -> f32x8), - "sqrt_pd" => p!("llvm.sqrt.v4f64", (f64x4) -> f64x4), - "sqrt_ps" => p!("llvm.sqrt.v8f32", (f32x8) -> f32x8), - "testc_pd" => p!("avx.vtestc.pd.256", (f64x4, f64x4) -> i32), - "testc_ps" => p!("avx.vtestc.ps.256", (f32x8, f32x8) -> i32), - "testnzc_pd" => p!("avx.vtestnzc.pd.256", (f64x4, f64x4) -> i32), - "testnzc_ps" => p!("avx.vtestnzc.ps.256", (f32x8, f32x8) -> i32), - "testz_pd" => p!("avx.vtestz.pd.256", (f64x4, f64x4) -> i32), - "testz_ps" => p!("avx.vtestz.ps.256", (f32x8, f32x8) -> i32), - - "abs_epi16" => p!("avx2.pabs.w", (i16x16) -> i16x16), - "abs_epi32" => p!("avx2.pabs.d", (i32x8) -> i32x8), - "abs_epi8" => p!("avx2.pabs.b", (i8x32) -> i8x32), - "adds_epi16" => p!("avx2.padds.w", (i16x16, i16x16) -> i16x16), - "adds_epi8" => p!("avx2.padds.b", (i8x32, i8x32) -> i8x32), - "adds_epu16" => p!("avx2.paddus.w", (i16x16, i16x16) -> i16x16), - "adds_epu8" => p!("avx2.paddus.b", (i8x32, i8x32) -> i8x32), - "avg_epu16" => p!("avx2.pavg.w", (i16x16, i16x16) -> i16x16), - "avg_epu8" => p!("avx2.pavg.b", (i8x32, i8x32) -> i8x32), - "hadd_epi16" => p!("avx2.phadd.w", (i16x16, i16x16) -> i16x16), - "hadd_epi32" => p!("avx2.phadd.d", (i32x8, i32x8) -> i32x8), - "hadds_epi16" => p!("avx2.phadd.sw", (i16x16, i16x16) -> i16x16), - "hsub_epi16" => p!("avx2.phsub.w", (i16x16, i16x16) -> i16x16), - "hsub_epi32" => p!("avx2.phsub.d", (i32x8, i32x8) -> i32x8), - "hsubs_epi16" => p!("avx2.phsub.sw", (i16x16, i16x16) -> i16x16), - "madd_epi16" => p!("avx2.pmadd.wd", (i16x16, i16x16) -> i32x8), - "maddubs_epi16" => p!("avx2.pmadd.ub.sw", (i8x32, i8x32) -> i16x16), - "max_epi16" => p!("avx2.pmaxs.w", (i16x16, i16x16) -> i16x16), - "max_epi32" => p!("avx2.pmaxs.d", (i32x8, i32x8) -> i32x8), - "max_epi8" => p!("avx2.pmaxs.b", (i8x32, i8x32) -> i8x32), - "max_epu16" => p!("avx2.pmaxu.w", (i16x16, i16x16) -> i16x16), - "max_epu32" => p!("avx2.pmaxu.d", (i32x8, i32x8) -> i32x8), - "max_epu8" => p!("avx2.pmaxu.b", (i8x32, i8x32) -> i8x32), - "min_epi16" => p!("avx2.pmins.w", (i16x16, i16x16) -> i16x16), - "min_epi32" => p!("avx2.pmins.d", (i32x8, i32x8) -> i32x8), - "min_epi8" => p!("avx2.pmins.b", (i8x32, i8x32) -> i8x32), - "min_epu16" => p!("avx2.pminu.w", (i16x16, i16x16) -> i16x16), - "min_epu32" => p!("avx2.pminu.d", (i32x8, i32x8) -> i32x8), - "min_epu8" => p!("avx2.pminu.b", (i8x32, i8x32) -> i8x32), - "mul_epi32" => p!("avx2.mul.dq", (i32x8, i32x8) -> i64x4), - "mul_epu32" => p!("avx2.mulu.dq", (i32x8, i32x8) -> i64x4), - "mulhi_epi16" => p!("avx2.pmulh.w", (i8x32, i8x32) -> i8x32), - "mulhi_epu16" => p!("avx2.pmulhu.w", (i8x32, i8x32) -> i8x32), - "mulhrs_epi16" => p!("avx2.pmul.hr.sw", (i16x16, i16x16) -> i16x16), - "packs_epi16" => p!("avx2.packsswb", (i16x16, i16x16) -> i8x32), - "packs_epi32" => p!("avx2.packssdw", (i32x8, i32x8) -> i16x16), - "packus_epi16" => p!("avx2.packuswb", (i16x16, i16x16) -> i8x32), - "packus_epi32" => p!("avx2.packusdw", (i32x8, i32x8) -> i16x16), - "permutevar8x32_epi32" => p!("avx2.permd", (i32x8, i32x8) -> i32x8), - "permutevar8x32_ps" => p!("avx2.permps", (f32x8, i32x8) -> i32x8), - "sad_epu8" => p!("avx2.psad.bw", (i8x32, i8x32) -> i64x4), - "shuffle_epi8" => p!("avx2.pshuf.b", (i8x32, i8x32) -> i8x32), - "sign_epi16" => p!("avx2.psign.w", (i16x16, i16x16) -> i16x16), - "sign_epi32" => p!("avx2.psign.d", (i32x8, i32x8) -> i32x8), - "sign_epi8" => p!("avx2.psign.b", (i8x32, i8x32) -> i8x32), - "subs_epi16" => p!("avx2.psubs.w", (i16x16, i16x16) -> i16x16), - "subs_epi8" => p!("avx2.psubs.b", (i8x32, i8x32) -> i8x32), - "subs_epu16" => p!("avx2.psubus.w", (i16x16, i16x16) -> i16x16), - "subs_epu8" => p!("avx2.psubus.b", (i8x32, i8x32) -> i8x32), - - _ => return None, - }) - } else { - None - } + if !name.starts_with("x86_mm") { return None } + Some(match &name["x86_mm".len()..] { + "_movemask_ps" => Intrinsic { + inputs: vec![v(f(32), 4)], + output: i(32), + definition: Named("llvm.x86.sse.movmsk.ps") + }, + "_max_ps" => Intrinsic { + inputs: vec![v(f(32), 4), v(f(32), 4)], + output: v(f(32), 4), + definition: Named("llvm.x86.sse.max.ps") + }, + "_min_ps" => Intrinsic { + inputs: vec![v(f(32), 4), v(f(32), 4)], + output: v(f(32), 4), + definition: Named("llvm.x86.sse.min.ps") + }, + "_rsqrt_ps" => Intrinsic { + inputs: vec![v(f(32), 4)], + output: v(f(32), 4), + definition: Named("llvm.x86.sse.rsqrt.ps") + }, + "_rcp_ps" => Intrinsic { + inputs: vec![v(f(32), 4)], + output: v(f(32), 4), + definition: Named("llvm.x86.sse.rcp.ps") + }, + "_adds_epi8" => Intrinsic { + inputs: vec![v(i(8), 16), v(i(8), 16)], + output: v(i(8), 16), + definition: Named("llvm.x86.sse2.padds.b") + }, + "_adds_epu8" => Intrinsic { + inputs: vec![v(u(8), 16), v(u(8), 16)], + output: v(u(8), 16), + definition: Named("llvm.x86.sse2.paddus.b") + }, + "_adds_epi16" => Intrinsic { + inputs: vec![v(i(16), 8), v(i(16), 8)], + output: v(i(16), 8), + definition: Named("llvm.x86.sse2.padds.w") + }, + "_adds_epu16" => Intrinsic { + inputs: vec![v(u(16), 8), v(u(16), 8)], + output: v(u(16), 8), + definition: Named("llvm.x86.sse2.paddus.w") + }, + "_avg_epu8" => Intrinsic { + inputs: vec![v(u(8), 16), v(u(8), 16)], + output: v(u(8), 16), + definition: Named("llvm.x86.sse2.pavg.b") + }, + "_avg_epu16" => Intrinsic { + inputs: vec![v(u(16), 8), v(u(16), 8)], + output: v(u(16), 8), + definition: Named("llvm.x86.sse2.pavg.w") + }, + "_madd_epi16" => Intrinsic { + inputs: vec![v(i(16), 8), v(i(16), 8)], + output: v(i(16), 8), + definition: Named("llvm.x86.sse2.pmadd.wd") + }, + "_max_epi16" => Intrinsic { + inputs: vec![v(i(16), 8), v(i(16), 8)], + output: v(i(16), 8), + definition: Named("llvm.x86.sse2.pmaxs.w") + }, + "_max_epu8" => Intrinsic { + inputs: vec![v(u(8), 16), v(u(8), 16)], + output: v(u(8), 16), + definition: Named("llvm.x86.sse2.pmaxu.b") + }, + "_max_pd" => Intrinsic { + inputs: vec![v(f(64), 2), v(f(64), 2)], + output: v(f(64), 2), + definition: Named("llvm.x86.sse2.max.pd") + }, + "_min_epi16" => Intrinsic { + inputs: vec![v(i(16), 8), v(i(16), 8)], + output: v(i(16), 8), + definition: Named("llvm.x86.sse2.pmins.w") + }, + "_min_epu8" => Intrinsic { + inputs: vec![v(u(8), 16), v(u(8), 16)], + output: v(u(8), 16), + definition: Named("llvm.x86.sse2.pminu.b") + }, + "_min_pd" => Intrinsic { + inputs: vec![v(f(64), 2), v(f(64), 2)], + output: v(f(64), 2), + definition: Named("llvm.x86.sse2.min.pd") + }, + "_movemask_pd" => Intrinsic { + inputs: vec![v(f(64), 2)], + output: i(32), + definition: Named("llvm.x86.sse2.movmsk.pd") + }, + "_movemask_epi8" => Intrinsic { + inputs: vec![v(i(8), 16)], + output: i(32), + definition: Named("llvm.x86.sse2.pmovmskb.128") + }, + "_mul_epu32" => Intrinsic { + inputs: vec![v(i(32), 4), v(i(32), 4)], + output: v(i(64), 2), + definition: Named("llvm.x86.sse2.pmulu.dq") + }, + "_mulhi_eps16" => Intrinsic { + inputs: vec![v(i(16), 8), v(i(16), 8)], + output: v(i(16), 8), + definition: Named("llvm.x86.sse2.pmulh.w") + }, + "_mulhi_epu16" => Intrinsic { + inputs: vec![v(u(16), 8), v(u(16), 8)], + output: v(u(16), 8), + definition: Named("llvm.x86.sse2.pmulhu.w") + }, + "_packs_epi16" => Intrinsic { + inputs: vec![v(i(16), 8), v(i(16), 8)], + output: v(i(8), 16), + definition: Named("llvm.x86.sse2.packsswb.128") + }, + "_packs_epi32" => Intrinsic { + inputs: vec![v(i(32), 4), v(i(32), 4)], + output: v(i(16), 8), + definition: Named("llvm.x86.sse2.packssdw.128") + }, + "_packus_epi16" => Intrinsic { + inputs: vec![v(i(16), 8), v(i(16), 8)], + output: v(u(8), 16), + definition: Named("llvm.x86.sse2.packuswb.128") + }, + "_sad_epu8" => Intrinsic { + inputs: vec![v(u(8), 16), v(u(8), 16)], + output: v(u(64), 2), + definition: Named("llvm.x86.sse2.psad.bw") + }, + "_subs_epi8" => Intrinsic { + inputs: vec![v(i(8), 16), v(i(8), 16)], + output: v(i(8), 16), + definition: Named("llvm.x86.sse2.psubs.b") + }, + "_subs_epu8" => Intrinsic { + inputs: vec![v(u(8), 16), v(u(8), 16)], + output: v(u(8), 16), + definition: Named("llvm.x86.sse2.psubus.b") + }, + "_subs_epi16" => Intrinsic { + inputs: vec![v(i(16), 8), v(i(16), 8)], + output: v(i(16), 8), + definition: Named("llvm.x86.sse2.psubs.w") + }, + "_subs_epu16" => Intrinsic { + inputs: vec![v(u(16), 8), v(u(16), 8)], + output: v(u(16), 8), + definition: Named("llvm.x86.sse2.psubus.w") + }, + "_addsub_ps" => Intrinsic { + inputs: vec![v(f(32), 4), v(f(32), 4)], + output: v(f(32), 4), + definition: Named("llvm.x86.sse3.addsub.ps") + }, + "_addsub_pd" => Intrinsic { + inputs: vec![v(f(64), 2), v(f(64), 2)], + output: v(f(64), 2), + definition: Named("llvm.x86.sse3.addsub.pd") + }, + "_hadd_ps" => Intrinsic { + inputs: vec![v(f(32), 4), v(f(32), 4)], + output: v(f(32), 4), + definition: Named("llvm.x86.sse3.hadd.ps") + }, + "_hadd_pd" => Intrinsic { + inputs: vec![v(f(64), 2), v(f(64), 2)], + output: v(f(64), 2), + definition: Named("llvm.x86.sse3.hadd.pd") + }, + "_hsub_ps" => Intrinsic { + inputs: vec![v(f(32), 4), v(f(32), 4)], + output: v(f(32), 4), + definition: Named("llvm.x86.sse3.hsub.ps") + }, + "_hsub_pd" => Intrinsic { + inputs: vec![v(f(64), 2), v(f(64), 2)], + output: v(f(64), 2), + definition: Named("llvm.x86.sse3.hsub.pd") + }, + "_abs_epi8" => Intrinsic { + inputs: vec![v(i(8), 16)], + output: v(i(8), 16), + definition: Named("llvm.x86.ssse3.pabs.b") + }, + "_abs_epi16" => Intrinsic { + inputs: vec![v(i(16), 8)], + output: v(i(16), 8), + definition: Named("llvm.x86.ssse3.pabs.w") + }, + "_abs_epi32" => Intrinsic { + inputs: vec![v(i(32), 4)], + output: v(i(32), 4), + definition: Named("llvm.x86.ssse3.pabs.d") + }, + "_hadd_epi16" => Intrinsic { + inputs: vec![v(i(16), 8), v(i(16), 8)], + output: v(i(16), 8), + definition: Named("llvm.x86.ssse3.phadd.w.128") + }, + "_hadd_epi32" => Intrinsic { + inputs: vec![v(i(32), 4), v(i(32), 4)], + output: v(i(32), 4), + definition: Named("llvm.x86.ssse3.phadd.d.128") + }, + "_hadds_epi16" => Intrinsic { + inputs: vec![v(i(16), 8), v(i(16), 8)], + output: v(i(16), 8), + definition: Named("llvm.x86.ssse3.phadd.sw.128") + }, + "_hsub_epi16" => Intrinsic { + inputs: vec![v(i(16), 8), v(i(16), 8)], + output: v(i(16), 8), + definition: Named("llvm.x86.ssse3.phsub.w.128") + }, + "_hsub_epi32" => Intrinsic { + inputs: vec![v(i(32), 4), v(i(32), 4)], + output: v(i(32), 4), + definition: Named("llvm.x86.ssse3.phsub.d.128") + }, + "_hsubs_epi16" => Intrinsic { + inputs: vec![v(i(16), 8), v(i(16), 8)], + output: v(i(16), 8), + definition: Named("llvm.x86.ssse3.phsub.sw.128") + }, + "_maddubs_epi16" => Intrinsic { + inputs: vec![v(i(8), 16), v(i(8), 16)], + output: v(i(16), 8), + definition: Named("llvm.x86.ssse3.pmadd.ub.sw.128") + }, + "_mulhrs_epi16" => Intrinsic { + inputs: vec![v(i(16), 8), v(i(16), 8)], + output: v(i(16), 8), + definition: Named("llvm.x86.ssse3.pmul.hr.sw.128") + }, + "_shuffle_epi8" => Intrinsic { + inputs: vec![v(i(8), 16), v(i(8), 16)], + output: v(i(8), 16), + definition: Named("llvm.x86.ssse3.pshuf.b.128") + }, + "_sign_epi8" => Intrinsic { + inputs: vec![v(i(8), 16), v(i(8), 16)], + output: v(i(8), 16), + definition: Named("llvm.x86.ssse3.psign.b.128") + }, + "_sign_epi16" => Intrinsic { + inputs: vec![v(i(16), 8), v(i(16), 8)], + output: v(i(16), 8), + definition: Named("llvm.x86.ssse3.psign.w.128") + }, + "_dp_ps" => Intrinsic { + inputs: vec![v(f(32), 4), v(f(32), 4), i(32)], + output: v(f(32), 4), + definition: Named("llvm.x86.sse41.dpps") + }, + "_dp_pd" => Intrinsic { + inputs: vec![v(f(64), 2), v(f(64), 2), i(32)], + output: v(f(64), 2), + definition: Named("llvm.x86.sse41.dppd") + }, + "_max_epi8" => Intrinsic { + inputs: vec![v(i(8), 16), v(i(8), 16)], + output: v(i(8), 16), + definition: Named("llvm.x86.sse41.pmaxsb") + }, + "_max_epu16" => Intrinsic { + inputs: vec![v(u(16), 8), v(u(16), 8)], + output: v(u(16), 8), + definition: Named("llvm.x86.sse41.pmaxuw") + }, + "_max_epi32" => Intrinsic { + inputs: vec![v(i(32), 4), v(i(32), 4)], + output: v(i(32), 4), + definition: Named("llvm.x86.sse41.pmaxsd") + }, + "_max_epu32" => Intrinsic { + inputs: vec![v(u(32), 4), v(u(32), 4)], + output: v(u(32), 4), + definition: Named("llvm.x86.sse41.pmaxud") + }, + "_min_epi8" => Intrinsic { + inputs: vec![v(i(8), 16), v(i(8), 16)], + output: v(i(8), 16), + definition: Named("llvm.x86.sse41.pminsb") + }, + "_min_epu16" => Intrinsic { + inputs: vec![v(u(16), 8), v(u(16), 8)], + output: v(u(16), 8), + definition: Named("llvm.x86.sse41.pminuw") + }, + "_min_epi32" => Intrinsic { + inputs: vec![v(i(32), 4), v(i(32), 4)], + output: v(i(32), 4), + definition: Named("llvm.x86.sse41.pminsd") + }, + "_min_epu32" => Intrinsic { + inputs: vec![v(u(32), 4), v(u(32), 4)], + output: v(u(32), 4), + definition: Named("llvm.x86.sse41.pminud") + }, + "_minpos_epu16" => Intrinsic { + inputs: vec![v(u(16), 8)], + output: v(u(16), 8), + definition: Named("llvm.x86.sse41.phminposuw") + }, + "_mpsadbw_epu8" => Intrinsic { + inputs: vec![v(u(8), 16), v(u(8), 16), i(32)], + output: v(u(16), 8), + definition: Named("llvm.x86.sse41.mpsadbw") + }, + "_mul_epi32" => Intrinsic { + inputs: vec![v(i(32), 4), v(i(32), 4)], + output: v(i(64), 2), + definition: Named("llvm.x86.sse41.muldq") + }, + "_packus_epi32" => Intrinsic { + inputs: vec![v(i(32), 4), v(i(32), 4)], + output: v(u(16), 8), + definition: Named("llvm.x86.sse41.packusdw") + }, + "_testc_si128" => Intrinsic { + inputs: vec![v(u(64), 2), v(u(64), 2)], + output: i(32), + definition: Named("llvm.x86.sse41.ptestc") + }, + "_testncz_si128" => Intrinsic { + inputs: vec![v(u(64), 2), v(u(64), 2)], + output: i(32), + definition: Named("llvm.x86.sse41.ptest.nzc") + }, + "_testz_si128" => Intrinsic { + inputs: vec![v(u(64), 2), v(u(64), 2)], + output: i(32), + definition: Named("llvm.x86.sse41.ptestz") + }, + "_cmpestra" => Intrinsic { + inputs: vec![v(i(8), 16), i(32), v(i(8), 16), i(32), i(32)], + output: i(32), + definition: Named("llvm.x86.sse42.pcmpestria128") + }, + "_cmpestrc" => Intrinsic { + inputs: vec![v(i(8), 16), i(32), v(i(8), 16), i(32), i(32)], + output: i(32), + definition: Named("llvm.x86.sse42.pcmpestric128") + }, + "_cmpestri" => Intrinsic { + inputs: vec![v(i(8), 16), i(32), v(i(8), 16), i(32), i(32)], + output: i(32), + definition: Named("llvm.x86.sse42.pcmpestri128") + }, + "_cmpestrm" => Intrinsic { + inputs: vec![v(i(8), 16), i(32), v(i(8), 16), i(32), i(32)], + output: v(i(8), 16), + definition: Named("llvm.x86.sse42.pcmpestrim128") + }, + "_cmpestro" => Intrinsic { + inputs: vec![v(i(8), 16), i(32), v(i(8), 16), i(32), i(32)], + output: i(32), + definition: Named("llvm.x86.sse42.pcmpestrio128") + }, + "_cmpestrs" => Intrinsic { + inputs: vec![v(i(8), 16), i(32), v(i(8), 16), i(32), i(32)], + output: i(32), + definition: Named("llvm.x86.sse42.pcmpestris128") + }, + "_cmpestrz" => Intrinsic { + inputs: vec![v(i(8), 16), i(32), v(i(8), 16), i(32), i(32)], + output: i(32), + definition: Named("llvm.x86.sse42.pcmpestriz128") + }, + "_cmpistra" => Intrinsic { + inputs: vec![v(i(8), 16), i(32), v(i(8), 16), i(32), i(32)], + output: i(32), + definition: Named("llvm.x86.sse42.pcmpistria128") + }, + "_cmpistrc" => Intrinsic { + inputs: vec![v(i(8), 16), i(32), v(i(8), 16), i(32), i(32)], + output: i(32), + definition: Named("llvm.x86.sse42.pcmpistric128") + }, + "_cmpistri" => Intrinsic { + inputs: vec![v(i(8), 16), i(32), v(i(8), 16), i(32), i(32)], + output: i(32), + definition: Named("llvm.x86.sse42.pcmpistrii128") + }, + "_cmpistrm" => Intrinsic { + inputs: vec![v(i(8), 16), i(32), v(i(8), 16), i(32), i(32)], + output: v(i(8), 16), + definition: Named("llvm.x86.sse42.pcmpistrim128") + }, + "_cmpistro" => Intrinsic { + inputs: vec![v(i(8), 16), i(32), v(i(8), 16), i(32), i(32)], + output: i(32), + definition: Named("llvm.x86.sse42.pcmpistrio128") + }, + "_cmpistrs" => Intrinsic { + inputs: vec![v(i(8), 16), i(32), v(i(8), 16), i(32), i(32)], + output: i(32), + definition: Named("llvm.x86.sse42.pcmpistris128") + }, + "_cmpistrz" => Intrinsic { + inputs: vec![v(i(8), 16), i(32), v(i(8), 16), i(32), i(32)], + output: i(32), + definition: Named("llvm.x86.sse42.pcmpistriz128") + }, + "256_addsub_ps" => Intrinsic { + inputs: vec![v(f(32), 8), v(f(32), 8)], + output: v(f(32), 8), + definition: Named("llvm.x86.avx.addsub.ps.256") + }, + "256_addsub_pd" => Intrinsic { + inputs: vec![v(f(64), 4), v(f(64), 4)], + output: v(f(64), 4), + definition: Named("llvm.x86.avx.addsub.pd.256") + }, + "256_dp_ps" => Intrinsic { + inputs: vec![v(f(32), 8), v(f(32), 8), i(32)], + output: v(f(32), 8), + definition: Named("llvm.x86.avx.dp.ps.256") + }, + "256_hadd_ps" => Intrinsic { + inputs: vec![v(f(32), 8), v(f(32), 8)], + output: v(f(32), 8), + definition: Named("llvm.x86.avx.hadd.ps.256") + }, + "256_hadd_pd" => Intrinsic { + inputs: vec![v(f(64), 4), v(f(64), 4)], + output: v(f(64), 4), + definition: Named("llvm.x86.avx.hadd.pd.256") + }, + "256_hsub_ps" => Intrinsic { + inputs: vec![v(f(32), 8), v(f(32), 8)], + output: v(f(32), 8), + definition: Named("llvm.x86.avx.hsub.ps.256") + }, + "256_hsub_pd" => Intrinsic { + inputs: vec![v(f(64), 4), v(f(64), 4)], + output: v(f(64), 4), + definition: Named("llvm.x86.avx.hsub.pd.256") + }, + "256_max_ps" => Intrinsic { + inputs: vec![v(f(32), 8), v(f(32), 8)], + output: v(f(32), 8), + definition: Named("llvm.x86.avx.max.ps.256") + }, + "256_max_pd" => Intrinsic { + inputs: vec![v(f(64), 4), v(f(64), 4)], + output: v(f(64), 4), + definition: Named("llvm.x86.avx.max.pd.256") + }, + "256_min_ps" => Intrinsic { + inputs: vec![v(f(32), 8), v(f(32), 8)], + output: v(f(32), 8), + definition: Named("llvm.x86.avx.min.ps.256") + }, + "256_min_pd" => Intrinsic { + inputs: vec![v(f(64), 4), v(f(64), 4)], + output: v(f(64), 4), + definition: Named("llvm.x86.avx.min.pd.256") + }, + "256_movemask_ps" => Intrinsic { + inputs: vec![v(f(32), 8)], + output: i(32), + definition: Named("llvm.x86.avx.movmsk.ps.256") + }, + "256_movemask_pd" => Intrinsic { + inputs: vec![v(f(64), 4)], + output: i(32), + definition: Named("llvm.x86.avx.movmsk.pd.256") + }, + "_permutevar_ps" => Intrinsic { + inputs: vec![v(f(32), 4), v(i(32), 4)], + output: v(f(32), 4), + definition: Named("llvm.x86.avx.vpermilvar.ps") + }, + "_permutevar_pd" => Intrinsic { + inputs: vec![v(f(64), 2), v(i(64), 2)], + output: v(f(64), 2), + definition: Named("llvm.x86.avx.vpermilvar.pd") + }, + "256_permutevar_ps" => Intrinsic { + inputs: vec![v(f(32), 8), v(i(32), 8)], + output: v(f(32), 8), + definition: Named("llvm.x86.avx.vpermilvar.ps.256") + }, + "256_permutevar_pd" => Intrinsic { + inputs: vec![v(f(64), 4), v(i(64), 4)], + output: v(f(64), 4), + definition: Named("llvm.x86.avx.vpermilvar.pd.256") + }, + "256_rcp_ps" => Intrinsic { + inputs: vec![v(f(32), 8)], + output: v(f(32), 8), + definition: Named("llvm.x86.avx.rcp.ps.256") + }, + "256_rsqrt_ps" => Intrinsic { + inputs: vec![v(f(32), 8)], + output: v(f(32), 8), + definition: Named("llvm.x86.avx.rsqrt.ps.256") + }, + "256_sqrt_ps" => Intrinsic { + inputs: vec![v(f(32), 8)], + output: v(f(32), 8), + definition: Named("llvm.sqrt.v8f32") + }, + "256_sqrt_pd" => Intrinsic { + inputs: vec![v(f(64), 4)], + output: v(f(64), 4), + definition: Named("llvm.sqrt.v4f64") + }, + "_testc_ps" => Intrinsic { + inputs: vec![v(f(32), 4), v(f(32), 4)], + output: i(32), + definition: Named("llvm.x86.avx.vtestc.ps") + }, + "256_testc_ps" => Intrinsic { + inputs: vec![v(f(32), 8), v(f(32), 8)], + output: i(32), + definition: Named("llvm.x86.avx.vtestc.ps.256") + }, + "_testc_pd" => Intrinsic { + inputs: vec![v(f(64), 2), v(f(64), 2)], + output: i(32), + definition: Named("llvm.x86.avx.vtestc.pd") + }, + "256_testc_pd" => Intrinsic { + inputs: vec![v(f(64), 4), v(f(64), 4)], + output: i(32), + definition: Named("llvm.x86.avx.vtestc.pd.256") + }, + "256_testc_si256" => Intrinsic { + inputs: vec![v(u(64), 4), v(u(64), 4)], + output: i(32), + definition: Named("llvm.x86.avx.ptestc.256") + }, + "_testnzc_ps" => Intrinsic { + inputs: vec![v(f(32), 4), v(f(32), 4)], + output: i(32), + definition: Named("llvm.x86.avx.vtestnzc.ps") + }, + "256_testnzc_ps" => Intrinsic { + inputs: vec![v(f(32), 8), v(f(32), 8)], + output: i(32), + definition: Named("llvm.x86.avx.vtestnzc.ps.256") + }, + "_testnzc_pd" => Intrinsic { + inputs: vec![v(f(64), 2), v(f(64), 2)], + output: i(32), + definition: Named("llvm.x86.avx.vtestnzc.pd") + }, + "256_testnzc_pd" => Intrinsic { + inputs: vec![v(f(64), 4), v(f(64), 4)], + output: i(32), + definition: Named("llvm.x86.avx.vtestnzc.pd.256") + }, + "256_testnzc_si256" => Intrinsic { + inputs: vec![v(u(64), 4), v(u(64), 4)], + output: i(32), + definition: Named("llvm.x86.avx.ptestnzc.256") + }, + "_testz_ps" => Intrinsic { + inputs: vec![v(f(32), 4), v(f(32), 4)], + output: i(32), + definition: Named("llvm.x86.avx.vtestz.ps") + }, + "256_testz_ps" => Intrinsic { + inputs: vec![v(f(32), 8), v(f(32), 8)], + output: i(32), + definition: Named("llvm.x86.avx.vtestz.ps.256") + }, + "_testz_pd" => Intrinsic { + inputs: vec![v(f(64), 2), v(f(64), 2)], + output: i(32), + definition: Named("llvm.x86.avx.vtestz.pd") + }, + "256_testz_pd" => Intrinsic { + inputs: vec![v(f(64), 4), v(f(64), 4)], + output: i(32), + definition: Named("llvm.x86.avx.vtestz.pd.256") + }, + "256_testz_si256" => Intrinsic { + inputs: vec![v(u(64), 4), v(u(64), 4)], + output: i(32), + definition: Named("llvm.x86.avx.ptestz.256") + }, + "256_abs_epi8" => Intrinsic { + inputs: vec![v(i(8), 32)], + output: v(i(8), 32), + definition: Named("llvm.x86.avx2.avx2.pabs.b") + }, + "256_abs_epi16" => Intrinsic { + inputs: vec![v(i(16), 16)], + output: v(i(16), 16), + definition: Named("llvm.x86.avx2.avx2.pabs.w") + }, + "256_abs_epi32" => Intrinsic { + inputs: vec![v(i(32), 8)], + output: v(i(32), 8), + definition: Named("llvm.x86.avx2.avx2.pabs.d") + }, + "256_adds_epi8" => Intrinsic { + inputs: vec![v(i(8), 32), v(i(8), 32)], + output: v(i(8), 32), + definition: Named("llvm.x86.avx2.avx2.padds.b") + }, + "256_adds_epu8" => Intrinsic { + inputs: vec![v(u(8), 32), v(u(8), 32)], + output: v(u(8), 32), + definition: Named("llvm.x86.avx2.avx2.paddus.b") + }, + "256_adds_epi16" => Intrinsic { + inputs: vec![v(i(16), 16), v(i(16), 16)], + output: v(i(16), 16), + definition: Named("llvm.x86.avx2.avx2.padds.w") + }, + "256_adds_epu16" => Intrinsic { + inputs: vec![v(u(16), 16), v(u(16), 16)], + output: v(u(16), 16), + definition: Named("llvm.x86.avx2.avx2.paddus.w") + }, + "256_avg_epu8" => Intrinsic { + inputs: vec![v(u(8), 32), v(u(8), 32)], + output: v(u(8), 32), + definition: Named("llvm.x86.avx2.avx2.pavg.b") + }, + "256_avg_epu16" => Intrinsic { + inputs: vec![v(u(16), 16), v(u(16), 16)], + output: v(u(16), 16), + definition: Named("llvm.x86.avx2.avx2.pavg.w") + }, + "256_hadd_epi16" => Intrinsic { + inputs: vec![v(i(16), 16), v(i(16), 16)], + output: v(i(16), 16), + definition: Named("llvm.x86.avx2.phadd.w") + }, + "256_hadd_epi32" => Intrinsic { + inputs: vec![v(i(32), 8), v(i(32), 8)], + output: v(i(32), 8), + definition: Named("llvm.x86.avx2.phadd.d") + }, + "256_hadds_epi16" => Intrinsic { + inputs: vec![v(i(16), 16), v(i(16), 16)], + output: v(i(16), 16), + definition: Named("llvm.x86.avx2.phadd.sw") + }, + "256_hsub_epi16" => Intrinsic { + inputs: vec![v(i(16), 16), v(i(16), 16)], + output: v(i(16), 16), + definition: Named("llvm.x86.avx2.phsub.w") + }, + "256_hsub_epi32" => Intrinsic { + inputs: vec![v(i(32), 8), v(i(32), 8)], + output: v(i(32), 8), + definition: Named("llvm.x86.avx2.phsub.d") + }, + "256_hsubs_epi16" => Intrinsic { + inputs: vec![v(i(16), 16), v(i(16), 16)], + output: v(i(16), 16), + definition: Named("llvm.x86.avx2.phsub.sw") + }, + "256_madd_epi16" => Intrinsic { + inputs: vec![v(i(16), 16), v(i(16), 16)], + output: v(i(32), 8), + definition: Named("llvm.x86.avx2.pmadd.wd") + }, + "256_maddubs_epi16" => Intrinsic { + inputs: vec![v(i(8), 32), v(i(8), 32)], + output: v(i(16), 16), + definition: Named("llvm.x86.avx2.pmadd.ub.sw") + }, + "256_max_epi8" => Intrinsic { + inputs: vec![v(i(8), 32), v(i(8), 32)], + output: v(i(8), 32), + definition: Named("llvm.x86.avx2.pmaxs.b") + }, + "256_max_epu8" => Intrinsic { + inputs: vec![v(u(8), 32), v(u(8), 32)], + output: v(u(8), 32), + definition: Named("llvm.x86.avx2.pmaxu.b") + }, + "256_max_epi16" => Intrinsic { + inputs: vec![v(i(16), 16), v(i(16), 16)], + output: v(i(16), 16), + definition: Named("llvm.x86.avx2.pmaxs.w") + }, + "256_max_epu16" => Intrinsic { + inputs: vec![v(u(16), 16), v(u(16), 16)], + output: v(u(16), 16), + definition: Named("llvm.x86.avx2.pmaxu.w") + }, + "256_max_epi32" => Intrinsic { + inputs: vec![v(i(32), 8), v(i(32), 8)], + output: v(i(32), 8), + definition: Named("llvm.x86.avx2.pmaxs.d") + }, + "256_max_epu32" => Intrinsic { + inputs: vec![v(u(32), 8), v(u(32), 8)], + output: v(u(32), 8), + definition: Named("llvm.x86.avx2.pmaxu.d") + }, + "256_min_epi8" => Intrinsic { + inputs: vec![v(i(8), 32), v(i(8), 32)], + output: v(i(8), 32), + definition: Named("llvm.x86.avx2.pmins.b") + }, + "256_min_epu8" => Intrinsic { + inputs: vec![v(u(8), 32), v(u(8), 32)], + output: v(u(8), 32), + definition: Named("llvm.x86.avx2.pminu.b") + }, + "256_min_epi16" => Intrinsic { + inputs: vec![v(i(16), 16), v(i(16), 16)], + output: v(i(16), 16), + definition: Named("llvm.x86.avx2.pmins.w") + }, + "256_min_epu16" => Intrinsic { + inputs: vec![v(u(16), 16), v(u(16), 16)], + output: v(u(16), 16), + definition: Named("llvm.x86.avx2.pminu.w") + }, + "256_min_epi32" => Intrinsic { + inputs: vec![v(i(32), 8), v(i(32), 8)], + output: v(i(32), 8), + definition: Named("llvm.x86.avx2.pmins.d") + }, + "256_min_epu32" => Intrinsic { + inputs: vec![v(u(32), 8), v(u(32), 8)], + output: v(u(32), 8), + definition: Named("llvm.x86.avx2.pminu.d") + }, + "256_mul_epi64" => Intrinsic { + inputs: vec![v(i(32), 8), v(i(32), 8)], + output: v(i(64), 4), + definition: Named("llvm.x86.avx2.pmulq.dq") + }, + "256_mul_epu64" => Intrinsic { + inputs: vec![v(u(32), 8), v(u(32), 8)], + output: v(u(64), 4), + definition: Named("llvm.x86.avx2.pmulq.dq") + }, + "256_mulhi_epi16" => Intrinsic { + inputs: vec![v(i(16), 16), v(i(16), 16)], + output: v(i(16), 16), + definition: Named("llvm.x86.avx2.pmulhw.w") + }, + "256_mulhi_epu16" => Intrinsic { + inputs: vec![v(u(16), 16), v(u(16), 16)], + output: v(u(16), 16), + definition: Named("llvm.x86.avx2.pmulhw.w") + }, + "256_mulhrs_epi16" => Intrinsic { + inputs: vec![v(i(16), 16), v(i(16), 16)], + output: v(i(16), 16), + definition: Named("llvm.x86.avx2.pmul.hr.sw") + }, + "256_packs_epi16" => Intrinsic { + inputs: vec![v(i(16), 16), v(i(16), 16)], + output: v(i(8), 32), + definition: Named("llvm.x86.avx2.packsswb") + }, + "256_packus_epi16" => Intrinsic { + inputs: vec![v(i(16), 16), v(i(16), 16)], + output: v(u(8), 32), + definition: Named("llvm.x86.avx2.packuswb") + }, + "256_packs_epi32" => Intrinsic { + inputs: vec![v(i(32), 8), v(i(32), 8)], + output: v(i(16), 16), + definition: Named("llvm.x86.avx2.packssdw") + }, + "256_packus_epi32" => Intrinsic { + inputs: vec![v(i(32), 8), v(i(32), 8)], + output: v(u(16), 16), + definition: Named("llvm.x86.avx2.packusdw") + }, + "256_permutevar8x32_epi32" => Intrinsic { + inputs: vec![v(i(32), 8), v(i(32), 8)], + output: v(i(32), 8), + definition: Named("llvm.x86.avx2.permd") + }, + "256_permutevar8x32_ps" => Intrinsic { + inputs: vec![v(f(32), 8), v(i(32), 8)], + output: v(f(32), 8), + definition: Named("llvm.x86.avx2.permps") + }, + "256_sad_epu8" => Intrinsic { + inputs: vec![v(u(8), 32), v(u(8), 32)], + output: v(u(8), 32), + definition: Named("llvm.x86.avx2.psad.bw") + }, + "256_shuffle_epi8" => Intrinsic { + inputs: vec![v(i(8), 32), v(i(8), 32)], + output: v(i(8), 32), + definition: Named("llvm.x86.avx2.pshuf.b") + }, + "256_sign_epi8" => Intrinsic { + inputs: vec![v(i(8), 32), v(i(8), 32)], + output: v(i(8), 32), + definition: Named("llvm.x86.avx2.psign.b") + }, + "256_sign_epi16" => Intrinsic { + inputs: vec![v(i(16), 16), v(i(16), 16)], + output: v(i(16), 16), + definition: Named("llvm.x86.avx2.psign.w") + }, + "256_sign_epi32" => Intrinsic { + inputs: vec![v(i(32), 8), v(i(32), 8)], + output: v(i(32), 8), + definition: Named("llvm.x86.avx2.psign.d") + }, + "256_subs_epi8" => Intrinsic { + inputs: vec![v(i(8), 32), v(i(8), 32)], + output: v(i(8), 32), + definition: Named("llvm.x86.avx2.psubs.b") + }, + "256_subs_epu8" => Intrinsic { + inputs: vec![v(u(8), 32), v(u(8), 32)], + output: v(u(8), 32), + definition: Named("llvm.x86.avx2.psubus.b") + }, + "256_subs_epi16" => Intrinsic { + inputs: vec![v(i(16), 16), v(i(16), 16)], + output: v(i(16), 16), + definition: Named("llvm.x86.avx2.psubs.w") + }, + "256_subs_epu16" => Intrinsic { + inputs: vec![v(u(16), 16), v(u(16), 16)], + output: v(u(16), 16), + definition: Named("llvm.x86.avx2.psubus.w") + }, + _ => return None, + }) }