From 37efeae8866c3c4d9827d0ca271b8e27f731c3e1 Mon Sep 17 00:00:00 2001 From: Ruud van Asseldonk Date: Tue, 8 Mar 2016 21:41:18 +0100 Subject: [PATCH] Define AVX broadcast intrinsics This defines `_mm256_broadcast_ps` and `_mm256_broadcast_pd`. The `_ss` and `_sd` variants are not supported by LLVM. In Clang these intrinsics are implemented as inline functions in C++. Intel reference: https://software.intel.com/en-us/node/514144. Note: the argument type should really be "0hPc" (a pointer to a vector of half the width), but internally the LLVM intrinsic takes a pointer to a signed integer, and for any other type LLVM will complain. This means that a transmute is required to call these intrinsics. The AVX2 broadcast intrinsics `_mm256_broadcastss_ps` and `_mm256_broadcastsd_pd` are not available as LLVM intrinsics. In Clang they are implemented using the shufflevector builtin. --- src/etc/platform-intrinsics/x86/avx.json | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/etc/platform-intrinsics/x86/avx.json b/src/etc/platform-intrinsics/x86/avx.json index 2c1492c2954c..981838536b2f 100644 --- a/src/etc/platform-intrinsics/x86/avx.json +++ b/src/etc/platform-intrinsics/x86/avx.json @@ -8,6 +8,13 @@ "ret": "f(32-64)", "args": ["0", "0"] }, + { + "intrinsic": "256_broadcast_{0.data_type}", + "width": [256], + "llvm": "vbroadcastf128.{0.data_type}.256", + "ret": "f(32-64)", + "args": ["s8SPc"] + }, { "intrinsic": "256_dp_ps", "width": [256],