Auto merge of #32140 - ruud-v-a:avx-intrinsics, r=alexcrichton
Add AVX broadcast and conversion intrinsics This adds the following intrinsics: * `_mm256_broadcast_pd` * `_mm256_broadcast_ps` * `_mm256_cvtepi32_pd` * `_mm256_cvtepi32_ps` * `_mm256_cvtpd_epi32` * `_mm256_cvtpd_ps` * `_mm256_cvtps_epi32` * `_mm256_cvtps_pd` * `_mm256_cvttpd_epi32` * `_mm256_cvttps_epi32` The "avx" codegen feature must be enabled to use these.
This commit is contained in:
commit
531b928ea6
2 changed files with 113 additions and 0 deletions
|
|
@ -8,6 +8,69 @@
|
|||
"ret": "f(32-64)",
|
||||
"args": ["0", "0"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "256_broadcast_{0.data_type}",
|
||||
"width": [256],
|
||||
"llvm": "vbroadcastf128.{0.data_type}.256",
|
||||
"ret": "f(32-64)",
|
||||
"args": ["s8SPc"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "256_cvtepi32_pd",
|
||||
"width": [256],
|
||||
"llvm": "cvtdq2.pd.256",
|
||||
"ret": "f64",
|
||||
"args": ["s32h"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "256_cvtepi32_ps",
|
||||
"width": [256],
|
||||
"llvm": "cvtdq2.ps.256",
|
||||
"ret": "f32",
|
||||
"args": ["s32"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "256_cvtpd_epi32",
|
||||
"width": [256],
|
||||
"llvm": "cvt.pd2dq.256",
|
||||
"ret": "s32h",
|
||||
"args": ["f64"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "256_cvtpd_ps",
|
||||
"width": [256],
|
||||
"llvm": "cvt.pd2.ps.256",
|
||||
"ret": "f32h",
|
||||
"args": ["f64"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "256_cvtps_epi32",
|
||||
"width": [256],
|
||||
"llvm": "cvt.ps2dq.256",
|
||||
"ret": "s32",
|
||||
"args": ["f32"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "256_cvtps_pd",
|
||||
"width": [256],
|
||||
"llvm": "cvt.ps2.pd.256",
|
||||
"ret": "f64",
|
||||
"args": ["f32h"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "256_cvttpd_epi32",
|
||||
"width": [256],
|
||||
"llvm": "cvtt.pd2dq.256",
|
||||
"ret": "s32h",
|
||||
"args": ["f64"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "256_cvttps_epi32",
|
||||
"width": [256],
|
||||
"llvm": "cvtt.ps2dq.256",
|
||||
"ret": "s32",
|
||||
"args": ["f32"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "256_dp_ps",
|
||||
"width": [256],
|
||||
|
|
|
|||
|
|
@ -498,6 +498,56 @@ pub fn find<'tcx>(_tcx: &TyCtxt<'tcx>, name: &str) -> Option<Intrinsic> {
|
|||
output: v(f(64), 4),
|
||||
definition: Named("llvm.x86.avx.addsub.pd.256")
|
||||
},
|
||||
"256_broadcast_ps" => Intrinsic {
|
||||
inputs: vec![p(true, i(8), None)],
|
||||
output: v(f(32), 8),
|
||||
definition: Named("llvm.x86.avx.vbroadcastf128.ps.256")
|
||||
},
|
||||
"256_broadcast_pd" => Intrinsic {
|
||||
inputs: vec![p(true, i(8), None)],
|
||||
output: v(f(64), 4),
|
||||
definition: Named("llvm.x86.avx.vbroadcastf128.pd.256")
|
||||
},
|
||||
"256_cvtepi32_pd" => Intrinsic {
|
||||
inputs: vec![v(i(32), 4)],
|
||||
output: v(f(64), 4),
|
||||
definition: Named("llvm.x86.avx.cvtdq2.pd.256")
|
||||
},
|
||||
"256_cvtepi32_ps" => Intrinsic {
|
||||
inputs: vec![v(i(32), 8)],
|
||||
output: v(f(32), 8),
|
||||
definition: Named("llvm.x86.avx.cvtdq2.ps.256")
|
||||
},
|
||||
"256_cvtpd_epi32" => Intrinsic {
|
||||
inputs: vec![v(f(64), 4)],
|
||||
output: v(i(32), 4),
|
||||
definition: Named("llvm.x86.avx.cvt.pd2dq.256")
|
||||
},
|
||||
"256_cvtpd_ps" => Intrinsic {
|
||||
inputs: vec![v(f(64), 4)],
|
||||
output: v(f(32), 4),
|
||||
definition: Named("llvm.x86.avx.cvt.pd2.ps.256")
|
||||
},
|
||||
"256_cvtps_epi32" => Intrinsic {
|
||||
inputs: vec![v(f(32), 8)],
|
||||
output: v(i(32), 8),
|
||||
definition: Named("llvm.x86.avx.cvt.ps2dq.256")
|
||||
},
|
||||
"256_cvtps_pd" => Intrinsic {
|
||||
inputs: vec![v(f(32), 4)],
|
||||
output: v(f(64), 4),
|
||||
definition: Named("llvm.x86.avx.cvt.ps2.pd.256")
|
||||
},
|
||||
"256_cvttpd_epi32" => Intrinsic {
|
||||
inputs: vec![v(f(64), 4)],
|
||||
output: v(i(32), 4),
|
||||
definition: Named("llvm.x86.avx.cvtt.pd2dq.256")
|
||||
},
|
||||
"256_cvttps_epi32" => Intrinsic {
|
||||
inputs: vec![v(f(32), 8)],
|
||||
output: v(i(32), 8),
|
||||
definition: Named("llvm.x86.avx.cvtt.ps2dq.256")
|
||||
},
|
||||
"256_dp_ps" => Intrinsic {
|
||||
inputs: vec![v(f(32), 8), v(f(32), 8), i_(32, 8)],
|
||||
output: v(f(32), 8),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue