Auto merge of #32140 - ruud-v-a:avx-intrinsics, r=alexcrichton

Add AVX broadcast and conversion intrinsics

This adds the following intrinsics:

 * `_mm256_broadcast_pd`
 * `_mm256_broadcast_ps`
 * `_mm256_cvtepi32_pd`
 * `_mm256_cvtepi32_ps`
 * `_mm256_cvtpd_epi32`
 * `_mm256_cvtpd_ps`
 * `_mm256_cvtps_epi32`
 * `_mm256_cvtps_pd`
 * `_mm256_cvttpd_epi32`
 * `_mm256_cvttps_epi32`

The "avx" codegen feature must be enabled to use these.
This commit is contained in:
bors 2016-03-12 16:18:34 -08:00
commit 531b928ea6
2 changed files with 113 additions and 0 deletions

View file

@ -8,6 +8,69 @@
"ret": "f(32-64)",
"args": ["0", "0"]
},
{
"intrinsic": "256_broadcast_{0.data_type}",
"width": [256],
"llvm": "vbroadcastf128.{0.data_type}.256",
"ret": "f(32-64)",
"args": ["s8SPc"]
},
{
"intrinsic": "256_cvtepi32_pd",
"width": [256],
"llvm": "cvtdq2.pd.256",
"ret": "f64",
"args": ["s32h"]
},
{
"intrinsic": "256_cvtepi32_ps",
"width": [256],
"llvm": "cvtdq2.ps.256",
"ret": "f32",
"args": ["s32"]
},
{
"intrinsic": "256_cvtpd_epi32",
"width": [256],
"llvm": "cvt.pd2dq.256",
"ret": "s32h",
"args": ["f64"]
},
{
"intrinsic": "256_cvtpd_ps",
"width": [256],
"llvm": "cvt.pd2.ps.256",
"ret": "f32h",
"args": ["f64"]
},
{
"intrinsic": "256_cvtps_epi32",
"width": [256],
"llvm": "cvt.ps2dq.256",
"ret": "s32",
"args": ["f32"]
},
{
"intrinsic": "256_cvtps_pd",
"width": [256],
"llvm": "cvt.ps2.pd.256",
"ret": "f64",
"args": ["f32h"]
},
{
"intrinsic": "256_cvttpd_epi32",
"width": [256],
"llvm": "cvtt.pd2dq.256",
"ret": "s32h",
"args": ["f64"]
},
{
"intrinsic": "256_cvttps_epi32",
"width": [256],
"llvm": "cvtt.ps2dq.256",
"ret": "s32",
"args": ["f32"]
},
{
"intrinsic": "256_dp_ps",
"width": [256],

View file

@ -498,6 +498,56 @@ pub fn find<'tcx>(_tcx: &TyCtxt<'tcx>, name: &str) -> Option<Intrinsic> {
output: v(f(64), 4),
definition: Named("llvm.x86.avx.addsub.pd.256")
},
"256_broadcast_ps" => Intrinsic {
inputs: vec![p(true, i(8), None)],
output: v(f(32), 8),
definition: Named("llvm.x86.avx.vbroadcastf128.ps.256")
},
"256_broadcast_pd" => Intrinsic {
inputs: vec![p(true, i(8), None)],
output: v(f(64), 4),
definition: Named("llvm.x86.avx.vbroadcastf128.pd.256")
},
"256_cvtepi32_pd" => Intrinsic {
inputs: vec![v(i(32), 4)],
output: v(f(64), 4),
definition: Named("llvm.x86.avx.cvtdq2.pd.256")
},
"256_cvtepi32_ps" => Intrinsic {
inputs: vec![v(i(32), 8)],
output: v(f(32), 8),
definition: Named("llvm.x86.avx.cvtdq2.ps.256")
},
"256_cvtpd_epi32" => Intrinsic {
inputs: vec![v(f(64), 4)],
output: v(i(32), 4),
definition: Named("llvm.x86.avx.cvt.pd2dq.256")
},
"256_cvtpd_ps" => Intrinsic {
inputs: vec![v(f(64), 4)],
output: v(f(32), 4),
definition: Named("llvm.x86.avx.cvt.pd2.ps.256")
},
"256_cvtps_epi32" => Intrinsic {
inputs: vec![v(f(32), 8)],
output: v(i(32), 8),
definition: Named("llvm.x86.avx.cvt.ps2dq.256")
},
"256_cvtps_pd" => Intrinsic {
inputs: vec![v(f(32), 4)],
output: v(f(64), 4),
definition: Named("llvm.x86.avx.cvt.ps2.pd.256")
},
"256_cvttpd_epi32" => Intrinsic {
inputs: vec![v(f(64), 4)],
output: v(i(32), 4),
definition: Named("llvm.x86.avx.cvtt.pd2dq.256")
},
"256_cvttps_epi32" => Intrinsic {
inputs: vec![v(f(32), 8)],
output: v(i(32), 8),
definition: Named("llvm.x86.avx.cvtt.ps2dq.256")
},
"256_dp_ps" => Intrinsic {
inputs: vec![v(f(32), 8), v(f(32), 8), i_(32, 8)],
output: v(f(32), 8),