Update Intel Intrinsics Guide links
This commit is contained in:
parent
1c18225f32
commit
0301eced58
49 changed files with 4549 additions and 4549 deletions
|
|
@ -47,7 +47,7 @@ the AVX2 feature as [documented by Intel][intel-dox] so to correctly call
|
|||
this function we need to (a) guarantee we only call it on `x86`/`x86_64`
|
||||
and (b) ensure that the CPU feature is available
|
||||
|
||||
[intel-dox]: https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_epi64&expand=100
|
||||
[intel-dox]: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi64&expand=100
|
||||
|
||||
## Static CPU Feature Detection
|
||||
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ use stdarch_test::assert_instr;
|
|||
///
|
||||
/// When the operand is zero, it returns its size in bits.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_lzcnt_u32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_lzcnt_u32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "lzcnt")]
|
||||
#[cfg_attr(test, assert_instr(lzcnt))]
|
||||
|
|
@ -35,7 +35,7 @@ pub unsafe fn _lzcnt_u32(x: u32) -> u32 {
|
|||
|
||||
/// Counts the bits that are set.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_popcnt32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_popcnt32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "popcnt")]
|
||||
#[cfg_attr(test, assert_instr(popcnt))]
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ extern "C" {
|
|||
|
||||
/// Performs one round of an AES decryption flow on data (state) in `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesdec_si128)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdec_si128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "aes")]
|
||||
#[cfg_attr(test, assert_instr(aesdec))]
|
||||
|
|
@ -41,7 +41,7 @@ pub unsafe fn _mm_aesdec_si128(a: __m128i, round_key: __m128i) -> __m128i {
|
|||
|
||||
/// Performs the last round of an AES decryption flow on data (state) in `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesdeclast_si128)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdeclast_si128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "aes")]
|
||||
#[cfg_attr(test, assert_instr(aesdeclast))]
|
||||
|
|
@ -52,7 +52,7 @@ pub unsafe fn _mm_aesdeclast_si128(a: __m128i, round_key: __m128i) -> __m128i {
|
|||
|
||||
/// Performs one round of an AES encryption flow on data (state) in `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesenc_si128)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenc_si128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "aes")]
|
||||
#[cfg_attr(test, assert_instr(aesenc))]
|
||||
|
|
@ -63,7 +63,7 @@ pub unsafe fn _mm_aesenc_si128(a: __m128i, round_key: __m128i) -> __m128i {
|
|||
|
||||
/// Performs the last round of an AES encryption flow on data (state) in `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesenclast_si128)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenclast_si128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "aes")]
|
||||
#[cfg_attr(test, assert_instr(aesenclast))]
|
||||
|
|
@ -74,7 +74,7 @@ pub unsafe fn _mm_aesenclast_si128(a: __m128i, round_key: __m128i) -> __m128i {
|
|||
|
||||
/// Performs the `InvMixColumns` transformation on `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesimc_si128)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesimc_si128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "aes")]
|
||||
#[cfg_attr(test, assert_instr(aesimc))]
|
||||
|
|
@ -89,7 +89,7 @@ pub unsafe fn _mm_aesimc_si128(a: __m128i) -> __m128i {
|
|||
/// generating a round key for encryption cipher using data from `a` and an
|
||||
/// 8-bit round constant `IMM8`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aeskeygenassist_si128)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aeskeygenassist_si128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "aes")]
|
||||
#[cfg_attr(test, assert_instr(aeskeygenassist, IMM8 = 0))]
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -58,7 +58,7 @@ extern "C" {
|
|||
|
||||
/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_popcnt_epi16)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_popcnt_epi16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512bitalg")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntw))]
|
||||
|
|
@ -71,7 +71,7 @@ pub unsafe fn _mm512_popcnt_epi16(a: __m512i) -> __m512i {
|
|||
/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_popcnt_epi16)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_popcnt_epi16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512bitalg")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntw))]
|
||||
|
|
@ -85,7 +85,7 @@ pub unsafe fn _mm512_maskz_popcnt_epi16(k: __mmask32, a: __m512i) -> __m512i {
|
|||
/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_popcnt_epi16)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_popcnt_epi16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512bitalg")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntw))]
|
||||
|
|
@ -99,7 +99,7 @@ pub unsafe fn _mm512_mask_popcnt_epi16(src: __m512i, k: __mmask32, a: __m512i) -
|
|||
|
||||
/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_popcnt_epi16)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_popcnt_epi16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512bitalg,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntw))]
|
||||
|
|
@ -112,7 +112,7 @@ pub unsafe fn _mm256_popcnt_epi16(a: __m256i) -> __m256i {
|
|||
/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_popcnt_epi16)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_popcnt_epi16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512bitalg,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntw))]
|
||||
|
|
@ -126,7 +126,7 @@ pub unsafe fn _mm256_maskz_popcnt_epi16(k: __mmask16, a: __m256i) -> __m256i {
|
|||
/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_popcnt_epi16)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_popcnt_epi16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512bitalg,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntw))]
|
||||
|
|
@ -140,7 +140,7 @@ pub unsafe fn _mm256_mask_popcnt_epi16(src: __m256i, k: __mmask16, a: __m256i) -
|
|||
|
||||
/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_popcnt_epi16)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_epi16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512bitalg,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntw))]
|
||||
|
|
@ -153,7 +153,7 @@ pub unsafe fn _mm_popcnt_epi16(a: __m128i) -> __m128i {
|
|||
/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_popcnt_epi16)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_popcnt_epi16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512bitalg,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntw))]
|
||||
|
|
@ -167,7 +167,7 @@ pub unsafe fn _mm_maskz_popcnt_epi16(k: __mmask8, a: __m128i) -> __m128i {
|
|||
/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_popcnt_epi16)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_popcnt_epi16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512bitalg,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntw))]
|
||||
|
|
@ -181,7 +181,7 @@ pub unsafe fn _mm_mask_popcnt_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __
|
|||
|
||||
/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_popcnt_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_popcnt_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512bitalg")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntb))]
|
||||
|
|
@ -194,7 +194,7 @@ pub unsafe fn _mm512_popcnt_epi8(a: __m512i) -> __m512i {
|
|||
/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_popcnt_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_popcnt_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512bitalg")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntb))]
|
||||
|
|
@ -208,7 +208,7 @@ pub unsafe fn _mm512_maskz_popcnt_epi8(k: __mmask64, a: __m512i) -> __m512i {
|
|||
/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_popcnt_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_popcnt_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512bitalg")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntb))]
|
||||
|
|
@ -222,7 +222,7 @@ pub unsafe fn _mm512_mask_popcnt_epi8(src: __m512i, k: __mmask64, a: __m512i) ->
|
|||
|
||||
/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_popcnt_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_popcnt_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512bitalg,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntb))]
|
||||
|
|
@ -235,7 +235,7 @@ pub unsafe fn _mm256_popcnt_epi8(a: __m256i) -> __m256i {
|
|||
/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_popcnt_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_popcnt_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512bitalg,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntb))]
|
||||
|
|
@ -249,7 +249,7 @@ pub unsafe fn _mm256_maskz_popcnt_epi8(k: __mmask32, a: __m256i) -> __m256i {
|
|||
/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_popcnt_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_popcnt_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512bitalg,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntb))]
|
||||
|
|
@ -263,7 +263,7 @@ pub unsafe fn _mm256_mask_popcnt_epi8(src: __m256i, k: __mmask32, a: __m256i) ->
|
|||
|
||||
/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_popcnt_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512bitalg,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntb))]
|
||||
|
|
@ -276,7 +276,7 @@ pub unsafe fn _mm_popcnt_epi8(a: __m128i) -> __m128i {
|
|||
/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_popcnt_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_popcnt_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512bitalg,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntb))]
|
||||
|
|
@ -290,7 +290,7 @@ pub unsafe fn _mm_maskz_popcnt_epi8(k: __mmask16, a: __m128i) -> __m128i {
|
|||
/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_popcnt_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_popcnt_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512bitalg,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntb))]
|
||||
|
|
@ -306,7 +306,7 @@ pub unsafe fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __
|
|||
/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
|
||||
/// It then selects these bits and packs them into the output.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_bitshuffle_epi64_mask)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bitshuffle_epi64_mask)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512bitalg")]
|
||||
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
|
||||
|
|
@ -321,7 +321,7 @@ pub unsafe fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64
|
|||
/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_bitshuffle_epi64_mask)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_bitshuffle_epi64_mask)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512bitalg")]
|
||||
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
|
||||
|
|
@ -333,7 +333,7 @@ pub unsafe fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m
|
|||
/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
|
||||
/// It then selects these bits and packs them into the output.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bitshuffle_epi64_mask)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_bitshuffle_epi64_mask)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512bitalg,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
|
||||
|
|
@ -348,7 +348,7 @@ pub unsafe fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32
|
|||
/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_bitshuffle_epi64_mask)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_bitshuffle_epi64_mask)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512bitalg,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
|
||||
|
|
@ -360,7 +360,7 @@ pub unsafe fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m
|
|||
/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
|
||||
/// It then selects these bits and packs them into the output.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bitshuffle_epi64_mask)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bitshuffle_epi64_mask)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512bitalg,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
|
||||
|
|
@ -375,7 +375,7 @@ pub unsafe fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 {
|
|||
/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_bitshuffle_epi64_mask)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_bitshuffle_epi64_mask)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512bitalg,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -8,7 +8,7 @@ use stdarch_test::assert_instr;
|
|||
|
||||
/// Broadcast the low 16-bits from input mask k to all 32-bit elements of dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_broadcastmw_epi32&expand=553)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastmw_epi32&expand=553)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd")]
|
||||
#[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmw2d
|
||||
|
|
@ -18,7 +18,7 @@ pub unsafe fn _mm512_broadcastmw_epi32(k: __mmask16) -> __m512i {
|
|||
|
||||
/// Broadcast the low 16-bits from input mask k to all 32-bit elements of dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastmw_epi32&expand=552)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastmw_epi32&expand=552)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmw2d
|
||||
|
|
@ -28,7 +28,7 @@ pub unsafe fn _mm256_broadcastmw_epi32(k: __mmask16) -> __m256i {
|
|||
|
||||
/// Broadcast the low 16-bits from input mask k to all 32-bit elements of dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastmw_epi32&expand=551)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastmw_epi32&expand=551)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmw2d
|
||||
|
|
@ -38,7 +38,7 @@ pub unsafe fn _mm_broadcastmw_epi32(k: __mmask16) -> __m128i {
|
|||
|
||||
/// Broadcast the low 8-bits from input mask k to all 64-bit elements of dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_broadcastmb_epi64&expand=550)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastmb_epi64&expand=550)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd")]
|
||||
#[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmb2q
|
||||
|
|
@ -48,7 +48,7 @@ pub unsafe fn _mm512_broadcastmb_epi64(k: __mmask8) -> __m512i {
|
|||
|
||||
/// Broadcast the low 8-bits from input mask k to all 64-bit elements of dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastmb_epi64&expand=549)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastmb_epi64&expand=549)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmb2q
|
||||
|
|
@ -58,7 +58,7 @@ pub unsafe fn _mm256_broadcastmb_epi64(k: __mmask8) -> __m256i {
|
|||
|
||||
/// Broadcast the low 8-bits from input mask k to all 64-bit elements of dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastmb_epi64&expand=548)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastmb_epi64&expand=548)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmb2q
|
||||
|
|
@ -68,7 +68,7 @@ pub unsafe fn _mm_broadcastmb_epi64(k: __mmask8) -> __m128i {
|
|||
|
||||
/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_conflict_epi32&expand=1248)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_conflict_epi32&expand=1248)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictd))]
|
||||
|
|
@ -78,7 +78,7 @@ pub unsafe fn _mm512_conflict_epi32(a: __m512i) -> __m512i {
|
|||
|
||||
/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_conflict_epi32&expand=1249)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_conflict_epi32&expand=1249)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictd))]
|
||||
|
|
@ -89,7 +89,7 @@ pub unsafe fn _mm512_mask_conflict_epi32(src: __m512i, k: __mmask16, a: __m512i)
|
|||
|
||||
/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_conflict_epi32&expand=1250)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_conflict_epi32&expand=1250)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictd))]
|
||||
|
|
@ -101,7 +101,7 @@ pub unsafe fn _mm512_maskz_conflict_epi32(k: __mmask16, a: __m512i) -> __m512i {
|
|||
|
||||
/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_conflict_epi32&expand=1245)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_conflict_epi32&expand=1245)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictd))]
|
||||
|
|
@ -111,7 +111,7 @@ pub unsafe fn _mm256_conflict_epi32(a: __m256i) -> __m256i {
|
|||
|
||||
/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_conflict_epi32&expand=1246)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_conflict_epi32&expand=1246)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictd))]
|
||||
|
|
@ -122,7 +122,7 @@ pub unsafe fn _mm256_mask_conflict_epi32(src: __m256i, k: __mmask8, a: __m256i)
|
|||
|
||||
/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_conflict_epi32&expand=1247)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_conflict_epi32&expand=1247)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictd))]
|
||||
|
|
@ -134,7 +134,7 @@ pub unsafe fn _mm256_maskz_conflict_epi32(k: __mmask8, a: __m256i) -> __m256i {
|
|||
|
||||
/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_conflict_epi32&expand=1242)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_conflict_epi32&expand=1242)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictd))]
|
||||
|
|
@ -144,7 +144,7 @@ pub unsafe fn _mm_conflict_epi32(a: __m128i) -> __m128i {
|
|||
|
||||
/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_conflict_epi32&expand=1243)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_conflict_epi32&expand=1243)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictd))]
|
||||
|
|
@ -155,7 +155,7 @@ pub unsafe fn _mm_mask_conflict_epi32(src: __m128i, k: __mmask8, a: __m128i) ->
|
|||
|
||||
/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_conflict_epi32&expand=1244)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_conflict_epi32&expand=1244)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictd))]
|
||||
|
|
@ -167,7 +167,7 @@ pub unsafe fn _mm_maskz_conflict_epi32(k: __mmask8, a: __m128i) -> __m128i {
|
|||
|
||||
/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_conflict_epi64&expand=1257)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_conflict_epi64&expand=1257)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictq))]
|
||||
|
|
@ -177,7 +177,7 @@ pub unsafe fn _mm512_conflict_epi64(a: __m512i) -> __m512i {
|
|||
|
||||
/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_conflict_epi64&expand=1258)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_conflict_epi64&expand=1258)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictq))]
|
||||
|
|
@ -188,7 +188,7 @@ pub unsafe fn _mm512_mask_conflict_epi64(src: __m512i, k: __mmask8, a: __m512i)
|
|||
|
||||
/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_conflict_epi64&expand=1259)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_conflict_epi64&expand=1259)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictq))]
|
||||
|
|
@ -200,7 +200,7 @@ pub unsafe fn _mm512_maskz_conflict_epi64(k: __mmask8, a: __m512i) -> __m512i {
|
|||
|
||||
/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_conflict_epi64&expand=1254)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_conflict_epi64&expand=1254)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictq))]
|
||||
|
|
@ -210,7 +210,7 @@ pub unsafe fn _mm256_conflict_epi64(a: __m256i) -> __m256i {
|
|||
|
||||
/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_conflict_epi64&expand=1255)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_conflict_epi64&expand=1255)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictq))]
|
||||
|
|
@ -221,7 +221,7 @@ pub unsafe fn _mm256_mask_conflict_epi64(src: __m256i, k: __mmask8, a: __m256i)
|
|||
|
||||
/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_conflict_epi64&expand=1256)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_conflict_epi64&expand=1256)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictq))]
|
||||
|
|
@ -233,7 +233,7 @@ pub unsafe fn _mm256_maskz_conflict_epi64(k: __mmask8, a: __m256i) -> __m256i {
|
|||
|
||||
/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_conflict_epi64&expand=1251)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_conflict_epi64&expand=1251)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictq))]
|
||||
|
|
@ -243,7 +243,7 @@ pub unsafe fn _mm_conflict_epi64(a: __m128i) -> __m128i {
|
|||
|
||||
/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_conflict_epi64&expand=1252)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_conflict_epi64&expand=1252)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictq))]
|
||||
|
|
@ -254,7 +254,7 @@ pub unsafe fn _mm_mask_conflict_epi64(src: __m128i, k: __mmask8, a: __m128i) ->
|
|||
|
||||
/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_conflict_epi64&expand=1253)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_conflict_epi64&expand=1253)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictq))]
|
||||
|
|
@ -266,7 +266,7 @@ pub unsafe fn _mm_maskz_conflict_epi64(k: __mmask8, a: __m128i) -> __m128i {
|
|||
|
||||
/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_lzcnt_epi32&expand=3491)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_lzcnt_epi32&expand=3491)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntd))]
|
||||
|
|
@ -276,7 +276,7 @@ pub unsafe fn _mm512_lzcnt_epi32(a: __m512i) -> __m512i {
|
|||
|
||||
/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_lzcnt_epi32&expand=3492)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_lzcnt_epi32&expand=3492)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntd))]
|
||||
|
|
@ -287,7 +287,7 @@ pub unsafe fn _mm512_mask_lzcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) ->
|
|||
|
||||
/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_lzcnt_epi32&expand=3493)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_lzcnt_epi32&expand=3493)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntd))]
|
||||
|
|
@ -299,7 +299,7 @@ pub unsafe fn _mm512_maskz_lzcnt_epi32(k: __mmask16, a: __m512i) -> __m512i {
|
|||
|
||||
/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_lzcnt_epi32&expand=3488)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_lzcnt_epi32&expand=3488)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntd))]
|
||||
|
|
@ -309,7 +309,7 @@ pub unsafe fn _mm256_lzcnt_epi32(a: __m256i) -> __m256i {
|
|||
|
||||
/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_lzcnt_epi32&expand=3489)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_lzcnt_epi32&expand=3489)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntd))]
|
||||
|
|
@ -320,7 +320,7 @@ pub unsafe fn _mm256_mask_lzcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) ->
|
|||
|
||||
/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_lzcnt_epi32&expand=3490)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_lzcnt_epi32&expand=3490)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntd))]
|
||||
|
|
@ -332,7 +332,7 @@ pub unsafe fn _mm256_maskz_lzcnt_epi32(k: __mmask8, a: __m256i) -> __m256i {
|
|||
|
||||
/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lzcnt_epi32&expand=3485)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_lzcnt_epi32&expand=3485)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntd))]
|
||||
|
|
@ -342,7 +342,7 @@ pub unsafe fn _mm_lzcnt_epi32(a: __m128i) -> __m128i {
|
|||
|
||||
/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_lzcnt_epi32&expand=3486)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_lzcnt_epi32&expand=3486)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntd))]
|
||||
|
|
@ -353,7 +353,7 @@ pub unsafe fn _mm_mask_lzcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m
|
|||
|
||||
/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_lzcnt_epi32&expand=3487)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_lzcnt_epi32&expand=3487)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntd))]
|
||||
|
|
@ -365,7 +365,7 @@ pub unsafe fn _mm_maskz_lzcnt_epi32(k: __mmask8, a: __m128i) -> __m128i {
|
|||
|
||||
/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_lzcnt_epi64&expand=3500)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_lzcnt_epi64&expand=3500)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntq))]
|
||||
|
|
@ -375,7 +375,7 @@ pub unsafe fn _mm512_lzcnt_epi64(a: __m512i) -> __m512i {
|
|||
|
||||
/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_lzcnt_epi64&expand=3501)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_lzcnt_epi64&expand=3501)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntq))]
|
||||
|
|
@ -386,7 +386,7 @@ pub unsafe fn _mm512_mask_lzcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) ->
|
|||
|
||||
/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_lzcnt_epi64&expand=3502)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_lzcnt_epi64&expand=3502)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntq))]
|
||||
|
|
@ -398,7 +398,7 @@ pub unsafe fn _mm512_maskz_lzcnt_epi64(k: __mmask8, a: __m512i) -> __m512i {
|
|||
|
||||
/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_lzcnt_epi64&expand=3497)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_lzcnt_epi64&expand=3497)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntq))]
|
||||
|
|
@ -408,7 +408,7 @@ pub unsafe fn _mm256_lzcnt_epi64(a: __m256i) -> __m256i {
|
|||
|
||||
/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_lzcnt_epi64&expand=3498)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_lzcnt_epi64&expand=3498)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntq))]
|
||||
|
|
@ -419,7 +419,7 @@ pub unsafe fn _mm256_mask_lzcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) ->
|
|||
|
||||
/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_lzcnt_epi64&expand=3499)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_lzcnt_epi64&expand=3499)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntq))]
|
||||
|
|
@ -431,7 +431,7 @@ pub unsafe fn _mm256_maskz_lzcnt_epi64(k: __mmask8, a: __m256i) -> __m256i {
|
|||
|
||||
/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lzcnt_epi64&expand=3494)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_lzcnt_epi64&expand=3494)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntq))]
|
||||
|
|
@ -441,7 +441,7 @@ pub unsafe fn _mm_lzcnt_epi64(a: __m128i) -> __m128i {
|
|||
|
||||
/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_lzcnt_epi64&expand=3495)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_lzcnt_epi64&expand=3495)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntq))]
|
||||
|
|
@ -452,7 +452,7 @@ pub unsafe fn _mm_mask_lzcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m
|
|||
|
||||
/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_lzcnt_epi64&expand=3496)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_lzcnt_epi64&expand=3496)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntq))]
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -37,7 +37,7 @@ pub unsafe fn _mm512_madd52lo_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m51
|
|||
/// corresponding unsigned 64-bit integer in `a`, and store the
|
||||
/// results in `dst`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=vpmadd52&avx512techs=AVX512IFMA52,AVX512VL&expand=3485)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=vpmadd52&avx512techs=AVX512IFMA52,AVX512VL&expand=3485)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512ifma,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpmadd52huq))]
|
||||
|
|
@ -51,7 +51,7 @@ pub unsafe fn _mm256_madd52hi_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m25
|
|||
/// corresponding unsigned 64-bit integer in `a`, and store the
|
||||
/// results in `dst`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=vpmadd52&avx512techs=AVX512IFMA52,AVX512VL&expand=3494)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=vpmadd52&avx512techs=AVX512IFMA52,AVX512VL&expand=3494)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512ifma,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpmadd52luq))]
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ use stdarch_test::assert_instr;
|
|||
|
||||
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutex2var_epi8&expand=4262)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi8&expand=4262)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vbmi")]
|
||||
#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
|
||||
|
|
@ -15,7 +15,7 @@ pub unsafe fn _mm512_permutex2var_epi8(a: __m512i, idx: __m512i, b: __m512i) ->
|
|||
|
||||
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_permutex2var_epi8&expand=4259)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi8&expand=4259)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vbmi")]
|
||||
#[cfg_attr(test, assert_instr(vpermt2b))]
|
||||
|
|
@ -31,7 +31,7 @@ pub unsafe fn _mm512_mask_permutex2var_epi8(
|
|||
|
||||
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_permutex2var_epi8&expand=4261)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi8&expand=4261)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vbmi")]
|
||||
#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
|
||||
|
|
@ -48,7 +48,7 @@ pub unsafe fn _mm512_maskz_permutex2var_epi8(
|
|||
|
||||
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask2_permutex2var_epi8&expand=4260)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi8&expand=4260)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vbmi")]
|
||||
#[cfg_attr(test, assert_instr(vpermi2b))]
|
||||
|
|
@ -64,7 +64,7 @@ pub unsafe fn _mm512_mask2_permutex2var_epi8(
|
|||
|
||||
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutex2var_epi8&expand=4258)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi8&expand=4258)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
|
||||
|
|
@ -74,7 +74,7 @@ pub unsafe fn _mm256_permutex2var_epi8(a: __m256i, idx: __m256i, b: __m256i) ->
|
|||
|
||||
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_permutex2var_epi8&expand=4255)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi8&expand=4255)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpermt2b))]
|
||||
|
|
@ -90,7 +90,7 @@ pub unsafe fn _mm256_mask_permutex2var_epi8(
|
|||
|
||||
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_permutex2var_epi8&expand=4257)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi8&expand=4257)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
|
||||
|
|
@ -107,7 +107,7 @@ pub unsafe fn _mm256_maskz_permutex2var_epi8(
|
|||
|
||||
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask2_permutex2var_epi8&expand=4256)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi8&expand=4256)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpermi2b))]
|
||||
|
|
@ -123,7 +123,7 @@ pub unsafe fn _mm256_mask2_permutex2var_epi8(
|
|||
|
||||
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutex2var_epi8&expand=4254)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi8&expand=4254)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
|
||||
|
|
@ -133,7 +133,7 @@ pub unsafe fn _mm_permutex2var_epi8(a: __m128i, idx: __m128i, b: __m128i) -> __m
|
|||
|
||||
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_permutex2var_epi8&expand=4251)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi8&expand=4251)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpermt2b))]
|
||||
|
|
@ -149,7 +149,7 @@ pub unsafe fn _mm_mask_permutex2var_epi8(
|
|||
|
||||
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_permutex2var_epi8&expand=4253)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi8&expand=4253)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
|
||||
|
|
@ -166,7 +166,7 @@ pub unsafe fn _mm_maskz_permutex2var_epi8(
|
|||
|
||||
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask2_permutex2var_epi8&expand=4252)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi8&expand=4252)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpermi2b))]
|
||||
|
|
@ -182,7 +182,7 @@ pub unsafe fn _mm_mask2_permutex2var_epi8(
|
|||
|
||||
/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutexvar_epi8&expand=4316)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi8&expand=4316)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vbmi")]
|
||||
#[cfg_attr(test, assert_instr(vpermb))]
|
||||
|
|
@ -192,7 +192,7 @@ pub unsafe fn _mm512_permutexvar_epi8(idx: __m512i, a: __m512i) -> __m512i {
|
|||
|
||||
/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_permutexvar_epi8&expand=4314)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi8&expand=4314)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vbmi")]
|
||||
#[cfg_attr(test, assert_instr(vpermb))]
|
||||
|
|
@ -208,7 +208,7 @@ pub unsafe fn _mm512_mask_permutexvar_epi8(
|
|||
|
||||
/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_permutexvar_epi8&expand=4315)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi8&expand=4315)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vbmi")]
|
||||
#[cfg_attr(test, assert_instr(vpermb))]
|
||||
|
|
@ -220,7 +220,7 @@ pub unsafe fn _mm512_maskz_permutexvar_epi8(k: __mmask64, idx: __m512i, a: __m51
|
|||
|
||||
/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutexvar_epi8&expand=4313)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi8&expand=4313)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpermb))]
|
||||
|
|
@ -230,7 +230,7 @@ pub unsafe fn _mm256_permutexvar_epi8(idx: __m256i, a: __m256i) -> __m256i {
|
|||
|
||||
/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_permutexvar_epi8&expand=4311)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi8&expand=4311)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpermb))]
|
||||
|
|
@ -246,7 +246,7 @@ pub unsafe fn _mm256_mask_permutexvar_epi8(
|
|||
|
||||
/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_permutexvar_epi8&expand=4312)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi8&expand=4312)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpermb))]
|
||||
|
|
@ -258,7 +258,7 @@ pub unsafe fn _mm256_maskz_permutexvar_epi8(k: __mmask32, idx: __m256i, a: __m25
|
|||
|
||||
/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutexvar_epi8&expand=4310)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutexvar_epi8&expand=4310)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpermb))]
|
||||
|
|
@ -268,7 +268,7 @@ pub unsafe fn _mm_permutexvar_epi8(idx: __m128i, a: __m128i) -> __m128i {
|
|||
|
||||
/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_permutexvar_epi8&expand=4308)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutexvar_epi8&expand=4308)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpermb))]
|
||||
|
|
@ -284,7 +284,7 @@ pub unsafe fn _mm_mask_permutexvar_epi8(
|
|||
|
||||
/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_permutexvar_epi8&expand=4309)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutexvar_epi8&expand=4309)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpermb))]
|
||||
|
|
@ -296,7 +296,7 @@ pub unsafe fn _mm_maskz_permutexvar_epi8(k: __mmask16, idx: __m128i, a: __m128i)
|
|||
|
||||
/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_multishift_epi64_epi8&expand=4026)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_multishift_epi64_epi8&expand=4026)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vbmi")]
|
||||
#[cfg_attr(test, assert_instr(vpmultishiftqb))]
|
||||
|
|
@ -306,7 +306,7 @@ pub unsafe fn _mm512_multishift_epi64_epi8(a: __m512i, b: __m512i) -> __m512i {
|
|||
|
||||
/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_multishift_epi64_epi8&expand=4024)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_multishift_epi64_epi8&expand=4024)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vbmi")]
|
||||
#[cfg_attr(test, assert_instr(vpmultishiftqb))]
|
||||
|
|
@ -322,7 +322,7 @@ pub unsafe fn _mm512_mask_multishift_epi64_epi8(
|
|||
|
||||
/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_multishift_epi64_epi8&expand=4025)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_multishift_epi64_epi8&expand=4025)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vbmi")]
|
||||
#[cfg_attr(test, assert_instr(vpmultishiftqb))]
|
||||
|
|
@ -334,7 +334,7 @@ pub unsafe fn _mm512_maskz_multishift_epi64_epi8(k: __mmask64, a: __m512i, b: __
|
|||
|
||||
/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_multishift_epi64_epi8&expand=4023)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_multishift_epi64_epi8&expand=4023)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpmultishiftqb))]
|
||||
|
|
@ -344,7 +344,7 @@ pub unsafe fn _mm256_multishift_epi64_epi8(a: __m256i, b: __m256i) -> __m256i {
|
|||
|
||||
/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_multishift_epi64_epi8&expand=4021)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_multishift_epi64_epi8&expand=4021)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpmultishiftqb))]
|
||||
|
|
@ -360,7 +360,7 @@ pub unsafe fn _mm256_mask_multishift_epi64_epi8(
|
|||
|
||||
/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_multishift_epi64_epi8&expand=4022)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_multishift_epi64_epi8&expand=4022)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpmultishiftqb))]
|
||||
|
|
@ -382,7 +382,7 @@ pub unsafe fn _mm_multishift_epi64_epi8(a: __m128i, b: __m128i) -> __m128i {
|
|||
|
||||
/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_multishift_epi64_epi8&expand=4018)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_multishift_epi64_epi8&expand=4018)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpmultishiftqb))]
|
||||
|
|
@ -398,7 +398,7 @@ pub unsafe fn _mm_mask_multishift_epi64_epi8(
|
|||
|
||||
/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_multishift_epi64_epi8&expand=4019)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_multishift_epi64_epi8&expand=4019)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpmultishiftqb))]
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -8,7 +8,7 @@ use stdarch_test::assert_instr;
|
|||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_dpwssd_epi32&expand=2219)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpwssd_epi32&expand=2219)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssd))]
|
||||
|
|
@ -18,7 +18,7 @@ pub unsafe fn _mm512_dpwssd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m51
|
|||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_dpwssd_epi32&expand=2220)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpwssd_epi32&expand=2220)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssd))]
|
||||
|
|
@ -34,7 +34,7 @@ pub unsafe fn _mm512_mask_dpwssd_epi32(
|
|||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_dpwssd_epi32&expand=2221)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpwssd_epi32&expand=2221)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssd))]
|
||||
|
|
@ -51,7 +51,7 @@ pub unsafe fn _mm512_maskz_dpwssd_epi32(
|
|||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_dpwssd_epi32&expand=2216)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssd_epi32&expand=2216)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssd))]
|
||||
|
|
@ -61,7 +61,7 @@ pub unsafe fn _mm256_dpwssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m25
|
|||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_dpwssd_epi32&expand=2217)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpwssd_epi32&expand=2217)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssd))]
|
||||
|
|
@ -77,7 +77,7 @@ pub unsafe fn _mm256_mask_dpwssd_epi32(
|
|||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_dpwssd_epi32&expand=2218)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpwssd_epi32&expand=2218)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssd))]
|
||||
|
|
@ -94,7 +94,7 @@ pub unsafe fn _mm256_maskz_dpwssd_epi32(
|
|||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dpwssd_epi32&expand=2213)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssd_epi32&expand=2213)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssd))]
|
||||
|
|
@ -104,7 +104,7 @@ pub unsafe fn _mm_dpwssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
|
|||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_dpwssd_epi32&expand=2214)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpwssd_epi32&expand=2214)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssd))]
|
||||
|
|
@ -115,7 +115,7 @@ pub unsafe fn _mm_mask_dpwssd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __
|
|||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_dpwssd_epi32&expand=2215)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpwssd_epi32&expand=2215)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssd))]
|
||||
|
|
@ -127,7 +127,7 @@ pub unsafe fn _mm_maskz_dpwssd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: _
|
|||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_dpwssds_epi32&expand=2228)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpwssds_epi32&expand=2228)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssds))]
|
||||
|
|
@ -137,7 +137,7 @@ pub unsafe fn _mm512_dpwssds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m5
|
|||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_dpwssds_epi32&expand=2229)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpwssds_epi32&expand=2229)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssds))]
|
||||
|
|
@ -153,7 +153,7 @@ pub unsafe fn _mm512_mask_dpwssds_epi32(
|
|||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_dpwssds_epi32&expand=2230)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpwssds_epi32&expand=2230)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssds))]
|
||||
|
|
@ -170,7 +170,7 @@ pub unsafe fn _mm512_maskz_dpwssds_epi32(
|
|||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_dpwssds_epi32&expand=2225)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssds_epi32&expand=2225)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssds))]
|
||||
|
|
@ -180,7 +180,7 @@ pub unsafe fn _mm256_dpwssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m2
|
|||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_dpwssds_epi32&expand=2226)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpwssds_epi32&expand=2226)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssds))]
|
||||
|
|
@ -196,7 +196,7 @@ pub unsafe fn _mm256_mask_dpwssds_epi32(
|
|||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_dpwssds_epi32&expand=2227)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpwssds_epi32&expand=2227)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssds))]
|
||||
|
|
@ -213,7 +213,7 @@ pub unsafe fn _mm256_maskz_dpwssds_epi32(
|
|||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dpwssds_epi32&expand=2222)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssds_epi32&expand=2222)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssds))]
|
||||
|
|
@ -223,7 +223,7 @@ pub unsafe fn _mm_dpwssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
|
|||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_dpwssds_epi32&expand=2223)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpwssds_epi32&expand=2223)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssds))]
|
||||
|
|
@ -234,7 +234,7 @@ pub unsafe fn _mm_mask_dpwssds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: _
|
|||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_dpwssds_epi32&expand=2224)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpwssds_epi32&expand=2224)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssds))]
|
||||
|
|
@ -251,7 +251,7 @@ pub unsafe fn _mm_maskz_dpwssds_epi32(
|
|||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_dpbusd_epi32&expand=2201)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpbusd_epi32&expand=2201)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusd))]
|
||||
|
|
@ -261,7 +261,7 @@ pub unsafe fn _mm512_dpbusd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m51
|
|||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_dpbusd_epi32&expand=2202)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpbusd_epi32&expand=2202)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusd))]
|
||||
|
|
@ -277,7 +277,7 @@ pub unsafe fn _mm512_mask_dpbusd_epi32(
|
|||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_dpbusd_epi32&expand=2203)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpbusd_epi32&expand=2203)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusd))]
|
||||
|
|
@ -294,7 +294,7 @@ pub unsafe fn _mm512_maskz_dpbusd_epi32(
|
|||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_dpbusd_epi32&expand=2198)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusd_epi32&expand=2198)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusd))]
|
||||
|
|
@ -304,7 +304,7 @@ pub unsafe fn _mm256_dpbusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m25
|
|||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_dpbusd_epi32&expand=2199)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpbusd_epi32&expand=2199)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusd))]
|
||||
|
|
@ -320,7 +320,7 @@ pub unsafe fn _mm256_mask_dpbusd_epi32(
|
|||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_dpbusd_epi32&expand=2200)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpbusd_epi32&expand=2200)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusd))]
|
||||
|
|
@ -337,7 +337,7 @@ pub unsafe fn _mm256_maskz_dpbusd_epi32(
|
|||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dpbusd_epi32&expand=2195)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusd_epi32&expand=2195)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusd))]
|
||||
|
|
@ -347,7 +347,7 @@ pub unsafe fn _mm_dpbusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
|
|||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_dpbusd_epi32&expand=2196)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpbusd_epi32&expand=2196)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusd))]
|
||||
|
|
@ -358,7 +358,7 @@ pub unsafe fn _mm_mask_dpbusd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __
|
|||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_dpbusd_epi32&expand=2197)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpbusd_epi32&expand=2197)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusd))]
|
||||
|
|
@ -370,7 +370,7 @@ pub unsafe fn _mm_maskz_dpbusd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: _
|
|||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_dpbusds_epi32&expand=2210)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpbusds_epi32&expand=2210)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusds))]
|
||||
|
|
@ -380,7 +380,7 @@ pub unsafe fn _mm512_dpbusds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m5
|
|||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_dpbusds_epi32&expand=2211)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpbusds_epi32&expand=2211)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusds))]
|
||||
|
|
@ -396,7 +396,7 @@ pub unsafe fn _mm512_mask_dpbusds_epi32(
|
|||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_dpbusds_epi32&expand=2212)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpbusds_epi32&expand=2212)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusds))]
|
||||
|
|
@ -413,7 +413,7 @@ pub unsafe fn _mm512_maskz_dpbusds_epi32(
|
|||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_dpbusds_epi32&expand=2207)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusds_epi32&expand=2207)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusds))]
|
||||
|
|
@ -423,7 +423,7 @@ pub unsafe fn _mm256_dpbusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m2
|
|||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_dpbusds_epi32&expand=2208)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpbusds_epi32&expand=2208)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusds))]
|
||||
|
|
@ -439,7 +439,7 @@ pub unsafe fn _mm256_mask_dpbusds_epi32(
|
|||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_dpbusds_epi32&expand=2209)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpbusds_epi32&expand=2209)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusds))]
|
||||
|
|
@ -456,7 +456,7 @@ pub unsafe fn _mm256_maskz_dpbusds_epi32(
|
|||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dpbusds_epi32&expand=2204)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusds_epi32&expand=2204)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusds))]
|
||||
|
|
@ -466,7 +466,7 @@ pub unsafe fn _mm_dpbusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
|
|||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_dpbusds_epi32&expand=2205)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpbusds_epi32&expand=2205)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusds))]
|
||||
|
|
@ -477,7 +477,7 @@ pub unsafe fn _mm_mask_dpbusds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: _
|
|||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_dpbusds_epi32&expand=2206)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpbusds_epi32&expand=2206)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusds))]
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ extern "C" {
|
|||
|
||||
/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_popcnt_epi32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_popcnt_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntd))]
|
||||
|
|
@ -62,7 +62,7 @@ pub unsafe fn _mm512_popcnt_epi32(a: __m512i) -> __m512i {
|
|||
/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_popcnt_epi32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_popcnt_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntd))]
|
||||
|
|
@ -76,7 +76,7 @@ pub unsafe fn _mm512_maskz_popcnt_epi32(k: __mmask16, a: __m512i) -> __m512i {
|
|||
/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_popcnt_epi32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_popcnt_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntd))]
|
||||
|
|
@ -90,7 +90,7 @@ pub unsafe fn _mm512_mask_popcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -
|
|||
|
||||
/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_popcnt_epi32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_popcnt_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntd))]
|
||||
|
|
@ -103,7 +103,7 @@ pub unsafe fn _mm256_popcnt_epi32(a: __m256i) -> __m256i {
|
|||
/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_popcnt_epi32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_popcnt_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntd))]
|
||||
|
|
@ -117,7 +117,7 @@ pub unsafe fn _mm256_maskz_popcnt_epi32(k: __mmask8, a: __m256i) -> __m256i {
|
|||
/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_popcnt_epi32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_popcnt_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntd))]
|
||||
|
|
@ -131,7 +131,7 @@ pub unsafe fn _mm256_mask_popcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) ->
|
|||
|
||||
/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_popcnt_epi32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntd))]
|
||||
|
|
@ -144,7 +144,7 @@ pub unsafe fn _mm_popcnt_epi32(a: __m128i) -> __m128i {
|
|||
/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_popcnt_epi32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_popcnt_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntd))]
|
||||
|
|
@ -158,7 +158,7 @@ pub unsafe fn _mm_maskz_popcnt_epi32(k: __mmask8, a: __m128i) -> __m128i {
|
|||
/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_popcnt_epi32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_popcnt_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntd))]
|
||||
|
|
@ -172,7 +172,7 @@ pub unsafe fn _mm_mask_popcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __
|
|||
|
||||
/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_popcnt_epi64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_popcnt_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntq))]
|
||||
|
|
@ -185,7 +185,7 @@ pub unsafe fn _mm512_popcnt_epi64(a: __m512i) -> __m512i {
|
|||
/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_popcnt_epi64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_popcnt_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntq))]
|
||||
|
|
@ -199,7 +199,7 @@ pub unsafe fn _mm512_maskz_popcnt_epi64(k: __mmask8, a: __m512i) -> __m512i {
|
|||
/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_popcnt_epi64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_popcnt_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntq))]
|
||||
|
|
@ -213,7 +213,7 @@ pub unsafe fn _mm512_mask_popcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) ->
|
|||
|
||||
/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_popcnt_epi64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_popcnt_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntq))]
|
||||
|
|
@ -226,7 +226,7 @@ pub unsafe fn _mm256_popcnt_epi64(a: __m256i) -> __m256i {
|
|||
/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_popcnt_epi64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_popcnt_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntq))]
|
||||
|
|
@ -240,7 +240,7 @@ pub unsafe fn _mm256_maskz_popcnt_epi64(k: __mmask8, a: __m256i) -> __m256i {
|
|||
/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_popcnt_epi64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_popcnt_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntq))]
|
||||
|
|
@ -254,7 +254,7 @@ pub unsafe fn _mm256_mask_popcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) ->
|
|||
|
||||
/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_popcnt_epi64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntq))]
|
||||
|
|
@ -267,7 +267,7 @@ pub unsafe fn _mm_popcnt_epi64(a: __m128i) -> __m128i {
|
|||
/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_popcnt_epi64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_popcnt_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntq))]
|
||||
|
|
@ -281,7 +281,7 @@ pub unsafe fn _mm_maskz_popcnt_epi64(k: __mmask8, a: __m128i) -> __m128i {
|
|||
/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_popcnt_epi64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_popcnt_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntq))]
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ use stdarch_test::assert_instr;
|
|||
/// Extracts bits in range [`start`, `start` + `length`) from `a` into
|
||||
/// the least significant bits of the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bextr_u32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_bextr_u32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi1")]
|
||||
#[cfg_attr(test, assert_instr(bextr))]
|
||||
|
|
@ -30,7 +30,7 @@ pub unsafe fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 {
|
|||
/// Bits `[7,0]` of `control` specify the index to the first bit in the range
|
||||
/// to be extracted, and bits `[15,8]` specify the length of the range.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bextr2_u32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_bextr2_u32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi1")]
|
||||
#[cfg_attr(test, assert_instr(bextr))]
|
||||
|
|
@ -41,7 +41,7 @@ pub unsafe fn _bextr2_u32(a: u32, control: u32) -> u32 {
|
|||
|
||||
/// Bitwise logical `AND` of inverted `a` with `b`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_andn_u32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_andn_u32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi1")]
|
||||
#[cfg_attr(test, assert_instr(andn))]
|
||||
|
|
@ -52,7 +52,7 @@ pub unsafe fn _andn_u32(a: u32, b: u32) -> u32 {
|
|||
|
||||
/// Extracts lowest set isolated bit.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsi_u32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_blsi_u32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi1")]
|
||||
#[cfg_attr(test, assert_instr(blsi))]
|
||||
|
|
@ -63,7 +63,7 @@ pub unsafe fn _blsi_u32(x: u32) -> u32 {
|
|||
|
||||
/// Gets mask up to lowest set bit.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsmsk_u32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_blsmsk_u32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi1")]
|
||||
#[cfg_attr(test, assert_instr(blsmsk))]
|
||||
|
|
@ -76,7 +76,7 @@ pub unsafe fn _blsmsk_u32(x: u32) -> u32 {
|
|||
///
|
||||
/// If `x` is sets CF.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsr_u32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_blsr_u32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi1")]
|
||||
#[cfg_attr(test, assert_instr(blsr))]
|
||||
|
|
@ -89,7 +89,7 @@ pub unsafe fn _blsr_u32(x: u32) -> u32 {
|
|||
///
|
||||
/// When the source operand is `0`, it returns its size in bits.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_tzcnt_u32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_tzcnt_u32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi1")]
|
||||
#[cfg_attr(test, assert_instr(tzcnt))]
|
||||
|
|
@ -102,7 +102,7 @@ pub unsafe fn _tzcnt_u32(x: u32) -> u32 {
|
|||
///
|
||||
/// When the source operand is `0`, it returns its size in bits.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_tzcnt_32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_tzcnt_32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi1")]
|
||||
#[cfg_attr(test, assert_instr(tzcnt))]
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ use stdarch_test::assert_instr;
|
|||
/// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with
|
||||
/// the low half and the high half of the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mulx_u32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mulx_u32)
|
||||
#[inline]
|
||||
// LLVM BUG (should be mulxl): https://bugs.llvm.org/show_bug.cgi?id=34232
|
||||
#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(imul))]
|
||||
|
|
@ -33,7 +33,7 @@ pub unsafe fn _mulx_u32(a: u32, b: u32, hi: &mut u32) -> u32 {
|
|||
|
||||
/// Zeroes higher bits of `a` >= `index`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bzhi_u32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_bzhi_u32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi2")]
|
||||
#[cfg_attr(test, assert_instr(bzhi))]
|
||||
|
|
@ -45,7 +45,7 @@ pub unsafe fn _bzhi_u32(a: u32, index: u32) -> u32 {
|
|||
/// Scatter contiguous low order bits of `a` to the result at the positions
|
||||
/// specified by the `mask`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_pdep_u32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pdep_u32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi2")]
|
||||
#[cfg_attr(test, assert_instr(pdep))]
|
||||
|
|
@ -57,7 +57,7 @@ pub unsafe fn _pdep_u32(a: u32, mask: u32) -> u32 {
|
|||
/// Gathers the bits of `x` specified by the `mask` into the contiguous low
|
||||
/// order bit positions of the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_pext_u32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pext_u32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi2")]
|
||||
#[cfg_attr(test, assert_instr(pext))]
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ use stdarch_test::assert_instr;
|
|||
|
||||
/// Returns an integer with the reversed byte order of x
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bswap)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_bswap)
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(bswap))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ use crate::arch::asm;
|
|||
|
||||
/// Reads EFLAGS.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__readeflags)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=__readeflags)
|
||||
#[cfg(target_arch = "x86")]
|
||||
#[inline(always)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
|
|
@ -21,7 +21,7 @@ pub unsafe fn __readeflags() -> u32 {
|
|||
|
||||
/// Reads EFLAGS.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__readeflags)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=__readeflags)
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[inline(always)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
|
|
@ -38,7 +38,7 @@ pub unsafe fn __readeflags() -> u64 {
|
|||
|
||||
/// Write EFLAGS.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__writeeflags)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=__writeeflags)
|
||||
#[cfg(target_arch = "x86")]
|
||||
#[inline(always)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
|
|
@ -53,7 +53,7 @@ pub unsafe fn __writeeflags(eflags: u32) {
|
|||
|
||||
/// Write EFLAGS.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__writeeflags)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=__writeeflags)
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[inline(always)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
//! [F16C intrinsics].
|
||||
//!
|
||||
//! [F16C intrinsics]: https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=fp16&expand=1769
|
||||
//! [F16C intrinsics]: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=fp16&expand=1769
|
||||
|
||||
use crate::{
|
||||
core_arch::{simd::*, x86::*},
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ use stdarch_test::assert_instr;
|
|||
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
|
||||
/// and `b`, and add the intermediate result to packed elements in `c`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_pd)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmadd_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmadd))]
|
||||
|
|
@ -39,7 +39,7 @@ pub unsafe fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
|||
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
|
||||
/// and `b`, and add the intermediate result to packed elements in `c`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_pd)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmadd_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmadd))]
|
||||
|
|
@ -51,7 +51,7 @@ pub unsafe fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
|
|||
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
|
||||
/// and `b`, and add the intermediate result to packed elements in `c`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmadd_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmadd))]
|
||||
|
|
@ -63,7 +63,7 @@ pub unsafe fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
|
|||
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
|
||||
/// and `b`, and add the intermediate result to packed elements in `c`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmadd_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmadd))]
|
||||
|
|
@ -77,7 +77,7 @@ pub unsafe fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
|
|||
/// Stores the result in the lower element of the returned value, and copy the
|
||||
/// upper element from `a` to the upper elements of the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_sd)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmadd_sd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmadd))]
|
||||
|
|
@ -91,7 +91,7 @@ pub unsafe fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
|||
/// Stores the result in the lower element of the returned value, and copy the
|
||||
/// 3 upper elements from `a` to the upper elements of the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmadd_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmadd))]
|
||||
|
|
@ -104,7 +104,7 @@ pub unsafe fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
|
|||
/// and `b`, and alternatively add and subtract packed elements in `c` to/from
|
||||
/// the intermediate result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmaddsub_pd)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmaddsub))]
|
||||
|
|
@ -117,7 +117,7 @@ pub unsafe fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
|||
/// and `b`, and alternatively add and subtract packed elements in `c` to/from
|
||||
/// the intermediate result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmaddsub_pd)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmaddsub))]
|
||||
|
|
@ -130,7 +130,7 @@ pub unsafe fn _mm256_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d
|
|||
/// and `b`, and alternatively add and subtract packed elements in `c` to/from
|
||||
/// the intermediate result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmaddsub_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmaddsub))]
|
||||
|
|
@ -143,7 +143,7 @@ pub unsafe fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
|
|||
/// and `b`, and alternatively add and subtract packed elements in `c` to/from
|
||||
/// the intermediate result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmaddsub_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmaddsub))]
|
||||
|
|
@ -155,7 +155,7 @@ pub unsafe fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
|
|||
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
|
||||
/// and `b`, and subtract packed elements in `c` from the intermediate result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_pd)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsub_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmsub))]
|
||||
|
|
@ -167,7 +167,7 @@ pub unsafe fn _mm_fmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
|||
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
|
||||
/// and `b`, and subtract packed elements in `c` from the intermediate result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsub_pd)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsub_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmsub))]
|
||||
|
|
@ -179,7 +179,7 @@ pub unsafe fn _mm256_fmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
|
|||
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
|
||||
/// and `b`, and subtract packed elements in `c` from the intermediate result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsub_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmsub213ps))]
|
||||
|
|
@ -191,7 +191,7 @@ pub unsafe fn _mm_fmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
|
|||
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
|
||||
/// and `b`, and subtract packed elements in `c` from the intermediate result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsub_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsub_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmsub213ps))]
|
||||
|
|
@ -205,7 +205,7 @@ pub unsafe fn _mm256_fmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
|
|||
/// result. Store the result in the lower element of the returned value, and
|
||||
/// copy the upper element from `a` to the upper elements of the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_sd)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsub_sd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmsub))]
|
||||
|
|
@ -219,7 +219,7 @@ pub unsafe fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
|||
/// result. Store the result in the lower element of the returned value, and
|
||||
/// copy the 3 upper elements from `a` to the upper elements of the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsub_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmsub))]
|
||||
|
|
@ -232,7 +232,7 @@ pub unsafe fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
|
|||
/// and `b`, and alternatively subtract and add packed elements in `c` from/to
|
||||
/// the intermediate result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsubadd_pd)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmsubadd))]
|
||||
|
|
@ -245,7 +245,7 @@ pub unsafe fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
|||
/// and `b`, and alternatively subtract and add packed elements in `c` from/to
|
||||
/// the intermediate result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsubadd_pd)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmsubadd))]
|
||||
|
|
@ -258,7 +258,7 @@ pub unsafe fn _mm256_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d
|
|||
/// and `b`, and alternatively subtract and add packed elements in `c` from/to
|
||||
/// the intermediate result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsubadd_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmsubadd))]
|
||||
|
|
@ -271,7 +271,7 @@ pub unsafe fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
|
|||
/// and `b`, and alternatively subtract and add packed elements in `c` from/to
|
||||
/// the intermediate result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsubadd_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmsubadd))]
|
||||
|
|
@ -283,7 +283,7 @@ pub unsafe fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
|
|||
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
|
||||
/// and `b`, and add the negated intermediate result to packed elements in `c`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_pd)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfnmadd))]
|
||||
|
|
@ -295,7 +295,7 @@ pub unsafe fn _mm_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
|||
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
|
||||
/// and `b`, and add the negated intermediate result to packed elements in `c`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmadd_pd)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfnmadd))]
|
||||
|
|
@ -307,7 +307,7 @@ pub unsafe fn _mm256_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
|
|||
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
|
||||
/// and `b`, and add the negated intermediate result to packed elements in `c`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfnmadd))]
|
||||
|
|
@ -319,7 +319,7 @@ pub unsafe fn _mm_fnmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
|
|||
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
|
||||
/// and `b`, and add the negated intermediate result to packed elements in `c`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmadd_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfnmadd))]
|
||||
|
|
@ -333,7 +333,7 @@ pub unsafe fn _mm256_fnmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
|
|||
/// in `c`. Store the result in the lower element of the returned value, and
|
||||
/// copy the upper element from `a` to the upper elements of the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_sd)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_sd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfnmadd))]
|
||||
|
|
@ -347,7 +347,7 @@ pub unsafe fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
|||
/// in `c`. Store the result in the lower element of the returned value, and
|
||||
/// copy the 3 upper elements from `a` to the upper elements of the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfnmadd))]
|
||||
|
|
@ -360,7 +360,7 @@ pub unsafe fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
|
|||
/// and `b`, and subtract packed elements in `c` from the negated intermediate
|
||||
/// result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_pd)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmsub_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfnmsub))]
|
||||
|
|
@ -373,7 +373,7 @@ pub unsafe fn _mm_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
|||
/// and `b`, and subtract packed elements in `c` from the negated intermediate
|
||||
/// result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmsub_pd)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmsub_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfnmsub))]
|
||||
|
|
@ -386,7 +386,7 @@ pub unsafe fn _mm256_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
|
|||
/// and `b`, and subtract packed elements in `c` from the negated intermediate
|
||||
/// result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmsub_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfnmsub))]
|
||||
|
|
@ -399,7 +399,7 @@ pub unsafe fn _mm_fnmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
|
|||
/// and `b`, and subtract packed elements in `c` from the negated intermediate
|
||||
/// result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmsub_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmsub_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfnmsub))]
|
||||
|
|
@ -414,7 +414,7 @@ pub unsafe fn _mm256_fnmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
|
|||
/// value, and copy the upper element from `a` to the upper elements of the
|
||||
/// result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_sd)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmsub_sd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfnmsub))]
|
||||
|
|
@ -429,7 +429,7 @@ pub unsafe fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
|||
/// returned value, and copy the 3 upper elements from `a` to the upper
|
||||
/// elements of the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmsub_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfnmsub))]
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ extern "C" {
|
|||
/// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html
|
||||
/// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_fxsave)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_fxsave)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fxsr")]
|
||||
#[cfg_attr(test, assert_instr(fxsave))]
|
||||
|
|
@ -46,7 +46,7 @@ pub unsafe fn _fxsave(mem_addr: *mut u8) {
|
|||
/// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html
|
||||
/// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_fxrstor)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_fxrstor)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fxsr")]
|
||||
#[cfg_attr(test, assert_instr(fxrstor))]
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ extern "C" {
|
|||
/// The field is in polynomial representation with the reduction polynomial
|
||||
/// x^8 + x^4 + x^3 + x + 1.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_gf2p8mul_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_gf2p8mul_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "gfni,avx512bw,avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vgf2p8mulb))]
|
||||
|
|
@ -78,7 +78,7 @@ pub unsafe fn _mm512_gf2p8mul_epi8(a: __m512i, b: __m512i) -> __m512i {
|
|||
/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_gf2p8mul_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_gf2p8mul_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "gfni,avx512bw,avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vgf2p8mulb))]
|
||||
|
|
@ -102,7 +102,7 @@ pub unsafe fn _mm512_mask_gf2p8mul_epi8(
|
|||
/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_gf2p8mul_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_gf2p8mul_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "gfni,avx512bw,avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vgf2p8mulb))]
|
||||
|
|
@ -119,7 +119,7 @@ pub unsafe fn _mm512_maskz_gf2p8mul_epi8(k: __mmask64, a: __m512i, b: __m512i) -
|
|||
/// The field is in polynomial representation with the reduction polynomial
|
||||
/// x^8 + x^4 + x^3 + x + 1.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_gf2p8mul_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_gf2p8mul_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "gfni,avx")]
|
||||
#[cfg_attr(test, assert_instr(vgf2p8mulb))]
|
||||
|
|
@ -134,7 +134,7 @@ pub unsafe fn _mm256_gf2p8mul_epi8(a: __m256i, b: __m256i) -> __m256i {
|
|||
/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_gf2p8mul_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_gf2p8mul_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "gfni,avx512bw,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vgf2p8mulb))]
|
||||
|
|
@ -158,7 +158,7 @@ pub unsafe fn _mm256_mask_gf2p8mul_epi8(
|
|||
/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_gf2p8mul_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_gf2p8mul_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "gfni,avx512bw,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vgf2p8mulb))]
|
||||
|
|
@ -175,7 +175,7 @@ pub unsafe fn _mm256_maskz_gf2p8mul_epi8(k: __mmask32, a: __m256i, b: __m256i) -
|
|||
/// The field is in polynomial representation with the reduction polynomial
|
||||
/// x^8 + x^4 + x^3 + x + 1.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_gf2p8mul_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_gf2p8mul_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "gfni")]
|
||||
#[cfg_attr(test, assert_instr(gf2p8mulb))]
|
||||
|
|
@ -190,7 +190,7 @@ pub unsafe fn _mm_gf2p8mul_epi8(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_gf2p8mul_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_gf2p8mul_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "gfni,avx512bw,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vgf2p8mulb))]
|
||||
|
|
@ -214,7 +214,7 @@ pub unsafe fn _mm_mask_gf2p8mul_epi8(
|
|||
/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_gf2p8mul_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_gf2p8mul_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "gfni,avx512bw,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vgf2p8mulb))]
|
||||
|
|
@ -232,7 +232,7 @@ pub unsafe fn _mm_maskz_gf2p8mul_epi8(k: __mmask16, a: __m128i, b: __m128i) -> _
|
|||
/// and b being a constant 8-bit immediate value.
|
||||
/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_gf2p8affine_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_gf2p8affine_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "gfni,avx512bw,avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
|
||||
|
|
@ -254,7 +254,7 @@ pub unsafe fn _mm512_gf2p8affine_epi64_epi8<const B: i32>(x: __m512i, a: __m512i
|
|||
/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_gf2p8affine_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_gf2p8affine_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "gfni,avx512bw,avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
|
||||
|
|
@ -281,7 +281,7 @@ pub unsafe fn _mm512_maskz_gf2p8affine_epi64_epi8<const B: i32>(
|
|||
/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_gf2p8affine_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_gf2p8affine_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "gfni,avx512bw,avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
|
||||
|
|
@ -305,7 +305,7 @@ pub unsafe fn _mm512_mask_gf2p8affine_epi64_epi8<const B: i32>(
|
|||
/// and b being a constant 8-bit immediate value.
|
||||
/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_gf2p8affine_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_gf2p8affine_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "gfni,avx")]
|
||||
#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
|
||||
|
|
@ -327,7 +327,7 @@ pub unsafe fn _mm256_gf2p8affine_epi64_epi8<const B: i32>(x: __m256i, a: __m256i
|
|||
/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_gf2p8affine_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_gf2p8affine_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "gfni,avx512bw,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
|
||||
|
|
@ -354,7 +354,7 @@ pub unsafe fn _mm256_maskz_gf2p8affine_epi64_epi8<const B: i32>(
|
|||
/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_gf2p8affine_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_gf2p8affine_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "gfni,avx512bw,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
|
||||
|
|
@ -378,7 +378,7 @@ pub unsafe fn _mm256_mask_gf2p8affine_epi64_epi8<const B: i32>(
|
|||
/// and b being a constant 8-bit immediate value.
|
||||
/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_gf2p8affine_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_gf2p8affine_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "gfni")]
|
||||
#[cfg_attr(test, assert_instr(gf2p8affineqb, B = 0))]
|
||||
|
|
@ -400,7 +400,7 @@ pub unsafe fn _mm_gf2p8affine_epi64_epi8<const B: i32>(x: __m128i, a: __m128i) -
|
|||
/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_gf2p8affine_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_gf2p8affine_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "gfni,avx512bw,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
|
||||
|
|
@ -427,7 +427,7 @@ pub unsafe fn _mm_maskz_gf2p8affine_epi64_epi8<const B: i32>(
|
|||
/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_gf2p8affine_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_gf2p8affine_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "gfni,avx512bw,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
|
||||
|
|
@ -453,7 +453,7 @@ pub unsafe fn _mm_mask_gf2p8affine_epi64_epi8<const B: i32>(
|
|||
/// The inverse of 0 is 0.
|
||||
/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_gf2p8affineinv_epi64_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_gf2p8affineinv_epi64_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "gfni,avx512bw,avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
|
||||
|
|
@ -477,7 +477,7 @@ pub unsafe fn _mm512_gf2p8affineinv_epi64_epi8<const B: i32>(x: __m512i, a: __m5
|
|||
/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_gf2p8affineinv_epi64_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_gf2p8affineinv_epi64_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "gfni,avx512bw,avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
|
||||
|
|
@ -506,7 +506,7 @@ pub unsafe fn _mm512_maskz_gf2p8affineinv_epi64_epi8<const B: i32>(
|
|||
/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_gf2p8affineinv_epi64_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_gf2p8affineinv_epi64_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "gfni,avx512bw,avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
|
||||
|
|
@ -532,7 +532,7 @@ pub unsafe fn _mm512_mask_gf2p8affineinv_epi64_epi8<const B: i32>(
|
|||
/// The inverse of 0 is 0.
|
||||
/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_gf2p8affineinv_epi64_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_gf2p8affineinv_epi64_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "gfni,avx")]
|
||||
#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
|
||||
|
|
@ -556,7 +556,7 @@ pub unsafe fn _mm256_gf2p8affineinv_epi64_epi8<const B: i32>(x: __m256i, a: __m2
|
|||
/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_gf2p8affineinv_epi64_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_gf2p8affineinv_epi64_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "gfni,avx512bw,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
|
||||
|
|
@ -585,7 +585,7 @@ pub unsafe fn _mm256_maskz_gf2p8affineinv_epi64_epi8<const B: i32>(
|
|||
/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_gf2p8affineinv_epi64_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_gf2p8affineinv_epi64_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "gfni,avx512bw,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
|
||||
|
|
@ -611,7 +611,7 @@ pub unsafe fn _mm256_mask_gf2p8affineinv_epi64_epi8<const B: i32>(
|
|||
/// The inverse of 0 is 0.
|
||||
/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_gf2p8affineinv_epi64_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_gf2p8affineinv_epi64_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "gfni")]
|
||||
#[cfg_attr(test, assert_instr(gf2p8affineinvqb, B = 0))]
|
||||
|
|
@ -635,7 +635,7 @@ pub unsafe fn _mm_gf2p8affineinv_epi64_epi8<const B: i32>(x: __m128i, a: __m128i
|
|||
/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_gf2p8affineinv_epi64_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_gf2p8affineinv_epi64_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "gfni,avx512bw,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
|
||||
|
|
@ -664,7 +664,7 @@ pub unsafe fn _mm_maskz_gf2p8affineinv_epi64_epi8<const B: i32>(
|
|||
/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_gf2p8affineinv_epi64_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_gf2p8affineinv_epi64_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "gfni,avx512bw,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
|
||||
|
|
@ -698,7 +698,7 @@ mod tests {
|
|||
|
||||
fn mulbyte(left: u8, right: u8) -> u8 {
|
||||
// this implementation follows the description in
|
||||
// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_gf2p8mul_epi8
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_gf2p8mul_epi8
|
||||
const REDUCTION_POLYNOMIAL: u16 = 0x11b;
|
||||
let left: u16 = left.into();
|
||||
let right: u16 = right.into();
|
||||
|
|
@ -742,7 +742,7 @@ mod tests {
|
|||
|
||||
fn mat_vec_multiply_affine(matrix: u64, x: u8, b: u8) -> u8 {
|
||||
// this implementation follows the description in
|
||||
// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_gf2p8affine_epi64_epi8
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_gf2p8affine_epi64_epi8
|
||||
let mut accumulator = 0;
|
||||
|
||||
for bit in 0..8 {
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ extern "C" {
|
|||
/// The immediate byte is used for determining which halves of `a` and `b`
|
||||
/// should be used. Immediate bits other than 0 and 4 are ignored.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clmulepi64_si128)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clmulepi64_si128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "pclmulqdq")]
|
||||
#[cfg_attr(all(test, not(target_os = "linux")), assert_instr(pclmulqdq, IMM8 = 0))]
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ use stdarch_test::assert_instr;
|
|||
/// Read a hardware generated 16-bit random value and store the result in val.
|
||||
/// Returns 1 if a random value was generated, and 0 otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdrand16_step)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_rdrand16_step)
|
||||
#[inline]
|
||||
#[target_feature(enable = "rdrand")]
|
||||
#[cfg_attr(test, assert_instr(rdrand))]
|
||||
|
|
@ -35,7 +35,7 @@ pub unsafe fn _rdrand16_step(val: &mut u16) -> i32 {
|
|||
/// Read a hardware generated 32-bit random value and store the result in val.
|
||||
/// Returns 1 if a random value was generated, and 0 otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdrand32_step)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_rdrand32_step)
|
||||
#[inline]
|
||||
#[target_feature(enable = "rdrand")]
|
||||
#[cfg_attr(test, assert_instr(rdrand))]
|
||||
|
|
@ -49,7 +49,7 @@ pub unsafe fn _rdrand32_step(val: &mut u32) -> i32 {
|
|||
/// Read a 16-bit NIST SP800-90B and SP800-90C compliant random value and store
|
||||
/// in val. Return 1 if a random value was generated, and 0 otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdseed16_step)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_rdseed16_step)
|
||||
#[inline]
|
||||
#[target_feature(enable = "rdseed")]
|
||||
#[cfg_attr(test, assert_instr(rdseed))]
|
||||
|
|
@ -63,7 +63,7 @@ pub unsafe fn _rdseed16_step(val: &mut u16) -> i32 {
|
|||
/// Read a 32-bit NIST SP800-90B and SP800-90C compliant random value and store
|
||||
/// in val. Return 1 if a random value was generated, and 0 otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdseed32_step)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_rdseed32_step)
|
||||
#[inline]
|
||||
#[target_feature(enable = "rdseed")]
|
||||
#[cfg_attr(test, assert_instr(rdseed))]
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ use stdarch_test::assert_instr;
|
|||
/// On processors that support the Intel 64 architecture, the
|
||||
/// high-order 32 bits of each of RAX and RDX are cleared.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdtsc)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_rdtsc)
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(rdtsc))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
|
|
@ -41,7 +41,7 @@ pub unsafe fn _rdtsc() -> u64 {
|
|||
/// On processors that support the Intel 64 architecture, the
|
||||
/// high-order 32 bits of each of RAX, RDX, and RCX are cleared.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__rdtscp)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=__rdtscp)
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(rdtscp))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ use stdarch_test::assert_instr;
|
|||
/// (unsigned 32-bit integers) using previous message values from `a` and `b`,
|
||||
/// and returning the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha1msg1_epu32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1msg1_epu32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sha")]
|
||||
#[cfg_attr(test, assert_instr(sha1msg1))]
|
||||
|
|
@ -41,7 +41,7 @@ pub unsafe fn _mm_sha1msg1_epu32(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// (unsigned 32-bit integers) using the intermediate result in `a` and the
|
||||
/// previous message values in `b`, and returns the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha1msg2_epu32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1msg2_epu32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sha")]
|
||||
#[cfg_attr(test, assert_instr(sha1msg2))]
|
||||
|
|
@ -54,7 +54,7 @@ pub unsafe fn _mm_sha1msg2_epu32(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// current SHA1 state variable `a`, add that value to the scheduled values
|
||||
/// (unsigned 32-bit integers) in `b`, and returns the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha1nexte_epu32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1nexte_epu32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sha")]
|
||||
#[cfg_attr(test, assert_instr(sha1nexte))]
|
||||
|
|
@ -69,7 +69,7 @@ pub unsafe fn _mm_sha1nexte_epu32(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// updated SHA1 state (A,B,C,D). `FUNC` contains the logic functions and round
|
||||
/// constants.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha1rnds4_epu32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1rnds4_epu32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sha")]
|
||||
#[cfg_attr(test, assert_instr(sha1rnds4, FUNC = 0))]
|
||||
|
|
@ -84,7 +84,7 @@ pub unsafe fn _mm_sha1rnds4_epu32<const FUNC: i32>(a: __m128i, b: __m128i) -> __
|
|||
/// (unsigned 32-bit integers) using previous message values from `a` and `b`,
|
||||
/// and return the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha256msg1_epu32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256msg1_epu32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sha")]
|
||||
#[cfg_attr(test, assert_instr(sha256msg1))]
|
||||
|
|
@ -97,7 +97,7 @@ pub unsafe fn _mm_sha256msg1_epu32(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// (unsigned 32-bit integers) using previous message values from `a` and `b`,
|
||||
/// and return the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha256msg2_epu32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256msg2_epu32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sha")]
|
||||
#[cfg_attr(test, assert_instr(sha256msg2))]
|
||||
|
|
@ -112,7 +112,7 @@ pub unsafe fn _mm_sha256msg2_epu32(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// integers) and the corresponding round constants from `k`, and store the
|
||||
/// updated SHA256 state (A,B,E,F) in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha256rnds2_epu32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256rnds2_epu32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sha")]
|
||||
#[cfg_attr(test, assert_instr(sha256rnds2))]
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ use stdarch_test::assert_instr;
|
|||
/// Adds the first component of `a` and `b`, the other components are copied
|
||||
/// from `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(addss))]
|
||||
|
|
@ -22,7 +22,7 @@ pub unsafe fn _mm_add_ss(a: __m128, b: __m128) -> __m128 {
|
|||
|
||||
/// Adds __m128 vectors.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(addps))]
|
||||
|
|
@ -34,7 +34,7 @@ pub unsafe fn _mm_add_ps(a: __m128, b: __m128) -> __m128 {
|
|||
/// Subtracts the first component of `b` from `a`, the other components are
|
||||
/// copied from `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(subss))]
|
||||
|
|
@ -45,7 +45,7 @@ pub unsafe fn _mm_sub_ss(a: __m128, b: __m128) -> __m128 {
|
|||
|
||||
/// Subtracts __m128 vectors.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(subps))]
|
||||
|
|
@ -57,7 +57,7 @@ pub unsafe fn _mm_sub_ps(a: __m128, b: __m128) -> __m128 {
|
|||
/// Multiplies the first component of `a` and `b`, the other components are
|
||||
/// copied from `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(mulss))]
|
||||
|
|
@ -68,7 +68,7 @@ pub unsafe fn _mm_mul_ss(a: __m128, b: __m128) -> __m128 {
|
|||
|
||||
/// Multiplies __m128 vectors.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(mulps))]
|
||||
|
|
@ -80,7 +80,7 @@ pub unsafe fn _mm_mul_ps(a: __m128, b: __m128) -> __m128 {
|
|||
/// Divides the first component of `b` by `a`, the other components are
|
||||
/// copied from `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(divss))]
|
||||
|
|
@ -91,7 +91,7 @@ pub unsafe fn _mm_div_ss(a: __m128, b: __m128) -> __m128 {
|
|||
|
||||
/// Divides __m128 vectors.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(divps))]
|
||||
|
|
@ -103,7 +103,7 @@ pub unsafe fn _mm_div_ps(a: __m128, b: __m128) -> __m128 {
|
|||
/// Returns the square root of the first single-precision (32-bit)
|
||||
/// floating-point element in `a`, the other elements are unchanged.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(sqrtss))]
|
||||
|
|
@ -115,7 +115,7 @@ pub unsafe fn _mm_sqrt_ss(a: __m128) -> __m128 {
|
|||
/// Returns the square root of packed single-precision (32-bit) floating-point
|
||||
/// elements in `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(sqrtps))]
|
||||
|
|
@ -127,7 +127,7 @@ pub unsafe fn _mm_sqrt_ps(a: __m128) -> __m128 {
|
|||
/// Returns the approximate reciprocal of the first single-precision
|
||||
/// (32-bit) floating-point element in `a`, the other elements are unchanged.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(rcpss))]
|
||||
|
|
@ -139,7 +139,7 @@ pub unsafe fn _mm_rcp_ss(a: __m128) -> __m128 {
|
|||
/// Returns the approximate reciprocal of packed single-precision (32-bit)
|
||||
/// floating-point elements in `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(rcpps))]
|
||||
|
|
@ -151,7 +151,7 @@ pub unsafe fn _mm_rcp_ps(a: __m128) -> __m128 {
|
|||
/// Returns the approximate reciprocal square root of the first single-precision
|
||||
/// (32-bit) floating-point element in `a`, the other elements are unchanged.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(rsqrtss))]
|
||||
|
|
@ -163,7 +163,7 @@ pub unsafe fn _mm_rsqrt_ss(a: __m128) -> __m128 {
|
|||
/// Returns the approximate reciprocal square root of packed single-precision
|
||||
/// (32-bit) floating-point elements in `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(rsqrtps))]
|
||||
|
|
@ -176,7 +176,7 @@ pub unsafe fn _mm_rsqrt_ps(a: __m128) -> __m128 {
|
|||
/// and `b`, and return the minimum value in the first element of the return
|
||||
/// value, the other elements are copied from `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(minss))]
|
||||
|
|
@ -188,7 +188,7 @@ pub unsafe fn _mm_min_ss(a: __m128, b: __m128) -> __m128 {
|
|||
/// Compares packed single-precision (32-bit) floating-point elements in `a` and
|
||||
/// `b`, and return the corresponding minimum values.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(minps))]
|
||||
|
|
@ -202,7 +202,7 @@ pub unsafe fn _mm_min_ps(a: __m128, b: __m128) -> __m128 {
|
|||
/// and `b`, and return the maximum value in the first element of the return
|
||||
/// value, the other elements are copied from `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(maxss))]
|
||||
|
|
@ -214,7 +214,7 @@ pub unsafe fn _mm_max_ss(a: __m128, b: __m128) -> __m128 {
|
|||
/// Compares packed single-precision (32-bit) floating-point elements in `a` and
|
||||
/// `b`, and return the corresponding maximum values.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(maxps))]
|
||||
|
|
@ -226,7 +226,7 @@ pub unsafe fn _mm_max_ps(a: __m128, b: __m128) -> __m128 {
|
|||
|
||||
/// Bitwise AND of packed single-precision (32-bit) floating-point elements.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_and_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
// i586 only seems to generate plain `and` instructions, so ignore it.
|
||||
|
|
@ -246,7 +246,7 @@ pub unsafe fn _mm_and_ps(a: __m128, b: __m128) -> __m128 {
|
|||
///
|
||||
/// Computes `!a & b` for each bit in `a` and `b`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_andnot_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
// i586 only seems to generate plain `not` and `and` instructions, so ignore
|
||||
|
|
@ -265,7 +265,7 @@ pub unsafe fn _mm_andnot_ps(a: __m128, b: __m128) -> __m128 {
|
|||
|
||||
/// Bitwise OR of packed single-precision (32-bit) floating-point elements.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_or_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
// i586 only seems to generate plain `or` instructions, so we ignore it.
|
||||
|
|
@ -283,7 +283,7 @@ pub unsafe fn _mm_or_ps(a: __m128, b: __m128) -> __m128 {
|
|||
/// Bitwise exclusive OR of packed single-precision (32-bit) floating-point
|
||||
/// elements.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_xor_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
// i586 only seems to generate plain `xor` instructions, so we ignore it.
|
||||
|
|
@ -302,7 +302,7 @@ pub unsafe fn _mm_xor_ps(a: __m128, b: __m128) -> __m128 {
|
|||
/// the result will be `0xffffffff` if the two inputs are equal, or `0`
|
||||
/// otherwise. The upper 96 bits of the result are the upper 96 bits of `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cmpeqss))]
|
||||
|
|
@ -316,7 +316,7 @@ pub unsafe fn _mm_cmpeq_ss(a: __m128, b: __m128) -> __m128 {
|
|||
/// `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are the
|
||||
/// upper 96 bits of `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cmpltss))]
|
||||
|
|
@ -330,7 +330,7 @@ pub unsafe fn _mm_cmplt_ss(a: __m128, b: __m128) -> __m128 {
|
|||
/// or equal `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result
|
||||
/// are the upper 96 bits of `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cmpless))]
|
||||
|
|
@ -344,7 +344,7 @@ pub unsafe fn _mm_cmple_ss(a: __m128, b: __m128) -> __m128 {
|
|||
/// than `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result
|
||||
/// are the upper 96 bits of `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cmpltss))]
|
||||
|
|
@ -358,7 +358,7 @@ pub unsafe fn _mm_cmpgt_ss(a: __m128, b: __m128) -> __m128 {
|
|||
/// greater than or equal `b.extract(0)`, or `0` otherwise. The upper 96 bits
|
||||
/// of the result are the upper 96 bits of `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cmpless))]
|
||||
|
|
@ -372,7 +372,7 @@ pub unsafe fn _mm_cmpge_ss(a: __m128, b: __m128) -> __m128 {
|
|||
/// `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are the
|
||||
/// upper 96 bits of `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cmpneqss))]
|
||||
|
|
@ -386,7 +386,7 @@ pub unsafe fn _mm_cmpneq_ss(a: __m128, b: __m128) -> __m128 {
|
|||
/// `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are the
|
||||
/// upper 96 bits of `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cmpnltss))]
|
||||
|
|
@ -400,7 +400,7 @@ pub unsafe fn _mm_cmpnlt_ss(a: __m128, b: __m128) -> __m128 {
|
|||
/// less than or equal to `b.extract(0)`, or `0` otherwise. The upper 96 bits
|
||||
/// of the result are the upper 96 bits of `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cmpnless))]
|
||||
|
|
@ -414,7 +414,7 @@ pub unsafe fn _mm_cmpnle_ss(a: __m128, b: __m128) -> __m128 {
|
|||
/// than `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are
|
||||
/// the upper 96 bits of `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cmpnltss))]
|
||||
|
|
@ -428,7 +428,7 @@ pub unsafe fn _mm_cmpngt_ss(a: __m128, b: __m128) -> __m128 {
|
|||
/// greater than or equal to `b.extract(0)`, or `0` otherwise. The upper 96
|
||||
/// bits of the result are the upper 96 bits of `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cmpnless))]
|
||||
|
|
@ -442,7 +442,7 @@ pub unsafe fn _mm_cmpnge_ss(a: __m128, b: __m128) -> __m128 {
|
|||
/// `b.extract(0)` is a NaN, or `0` otherwise. The upper 96 bits of the result
|
||||
/// are the upper 96 bits of `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cmpordss))]
|
||||
|
|
@ -456,7 +456,7 @@ pub unsafe fn _mm_cmpord_ss(a: __m128, b: __m128) -> __m128 {
|
|||
/// `b.extract(0)` is a NaN, or `0` otherwise. The upper 96 bits of the result
|
||||
/// are the upper 96 bits of `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cmpunordss))]
|
||||
|
|
@ -469,7 +469,7 @@ pub unsafe fn _mm_cmpunord_ss(a: __m128, b: __m128) -> __m128 {
|
|||
/// The result in the output vector will be `0xffffffff` if the input elements
|
||||
/// were equal, or `0` otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cmpeqps))]
|
||||
|
|
@ -482,7 +482,7 @@ pub unsafe fn _mm_cmpeq_ps(a: __m128, b: __m128) -> __m128 {
|
|||
/// The result in the output vector will be `0xffffffff` if the input element
|
||||
/// in `a` is less than the corresponding element in `b`, or `0` otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cmpltps))]
|
||||
|
|
@ -496,7 +496,7 @@ pub unsafe fn _mm_cmplt_ps(a: __m128, b: __m128) -> __m128 {
|
|||
/// in `a` is less than or equal to the corresponding element in `b`, or `0`
|
||||
/// otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cmpleps))]
|
||||
|
|
@ -509,7 +509,7 @@ pub unsafe fn _mm_cmple_ps(a: __m128, b: __m128) -> __m128 {
|
|||
/// The result in the output vector will be `0xffffffff` if the input element
|
||||
/// in `a` is greater than the corresponding element in `b`, or `0` otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cmpltps))]
|
||||
|
|
@ -523,7 +523,7 @@ pub unsafe fn _mm_cmpgt_ps(a: __m128, b: __m128) -> __m128 {
|
|||
/// in `a` is greater than or equal to the corresponding element in `b`, or `0`
|
||||
/// otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cmpleps))]
|
||||
|
|
@ -536,7 +536,7 @@ pub unsafe fn _mm_cmpge_ps(a: __m128, b: __m128) -> __m128 {
|
|||
/// The result in the output vector will be `0xffffffff` if the input elements
|
||||
/// are **not** equal, or `0` otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cmpneqps))]
|
||||
|
|
@ -550,7 +550,7 @@ pub unsafe fn _mm_cmpneq_ps(a: __m128, b: __m128) -> __m128 {
|
|||
/// in `a` is **not** less than the corresponding element in `b`, or `0`
|
||||
/// otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cmpnltps))]
|
||||
|
|
@ -564,7 +564,7 @@ pub unsafe fn _mm_cmpnlt_ps(a: __m128, b: __m128) -> __m128 {
|
|||
/// in `a` is **not** less than or equal to the corresponding element in `b`, or
|
||||
/// `0` otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cmpnleps))]
|
||||
|
|
@ -578,7 +578,7 @@ pub unsafe fn _mm_cmpnle_ps(a: __m128, b: __m128) -> __m128 {
|
|||
/// in `a` is **not** greater than the corresponding element in `b`, or `0`
|
||||
/// otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cmpnltps))]
|
||||
|
|
@ -592,7 +592,7 @@ pub unsafe fn _mm_cmpngt_ps(a: __m128, b: __m128) -> __m128 {
|
|||
/// in `a` is **not** greater than or equal to the corresponding element in `b`,
|
||||
/// or `0` otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cmpnleps))]
|
||||
|
|
@ -606,7 +606,7 @@ pub unsafe fn _mm_cmpnge_ps(a: __m128, b: __m128) -> __m128 {
|
|||
/// in the output vector will be `0xffffffff` if the input elements in `a` and
|
||||
/// `b` are ordered (i.e., neither of them is a NaN), or 0 otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cmpordps))]
|
||||
|
|
@ -620,7 +620,7 @@ pub unsafe fn _mm_cmpord_ps(a: __m128, b: __m128) -> __m128 {
|
|||
/// in the output vector will be `0xffffffff` if the input elements in `a` and
|
||||
/// `b` are unordered (i.e., at least on of them is a NaN), or 0 otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cmpunordps))]
|
||||
|
|
@ -632,7 +632,7 @@ pub unsafe fn _mm_cmpunord_ps(a: __m128, b: __m128) -> __m128 {
|
|||
/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns
|
||||
/// `1` if they are equal, or `0` otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comieq_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(comiss))]
|
||||
|
|
@ -644,7 +644,7 @@ pub unsafe fn _mm_comieq_ss(a: __m128, b: __m128) -> i32 {
|
|||
/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns
|
||||
/// `1` if the value from `a` is less than the one from `b`, or `0` otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comilt_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(comiss))]
|
||||
|
|
@ -657,7 +657,7 @@ pub unsafe fn _mm_comilt_ss(a: __m128, b: __m128) -> i32 {
|
|||
/// `1` if the value from `a` is less than or equal to the one from `b`, or `0`
|
||||
/// otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comile_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comile_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(comiss))]
|
||||
|
|
@ -670,7 +670,7 @@ pub unsafe fn _mm_comile_ss(a: __m128, b: __m128) -> i32 {
|
|||
/// `1` if the value from `a` is greater than the one from `b`, or `0`
|
||||
/// otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comigt_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comigt_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(comiss))]
|
||||
|
|
@ -683,7 +683,7 @@ pub unsafe fn _mm_comigt_ss(a: __m128, b: __m128) -> i32 {
|
|||
/// `1` if the value from `a` is greater than or equal to the one from `b`, or
|
||||
/// `0` otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comige_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comige_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(comiss))]
|
||||
|
|
@ -695,7 +695,7 @@ pub unsafe fn _mm_comige_ss(a: __m128, b: __m128) -> i32 {
|
|||
/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns
|
||||
/// `1` if they are **not** equal, or `0` otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comineq_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(comiss))]
|
||||
|
|
@ -708,7 +708,7 @@ pub unsafe fn _mm_comineq_ss(a: __m128, b: __m128) -> i32 {
|
|||
/// `1` if they are equal, or `0` otherwise. This instruction will not signal
|
||||
/// an exception if either argument is a quiet NaN.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomieq_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomieq_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(ucomiss))]
|
||||
|
|
@ -722,7 +722,7 @@ pub unsafe fn _mm_ucomieq_ss(a: __m128, b: __m128) -> i32 {
|
|||
/// This instruction will not signal an exception if either argument is a quiet
|
||||
/// NaN.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomilt_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomilt_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(ucomiss))]
|
||||
|
|
@ -736,7 +736,7 @@ pub unsafe fn _mm_ucomilt_ss(a: __m128, b: __m128) -> i32 {
|
|||
/// otherwise. This instruction will not signal an exception if either argument
|
||||
/// is a quiet NaN.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomile_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomile_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(ucomiss))]
|
||||
|
|
@ -750,7 +750,7 @@ pub unsafe fn _mm_ucomile_ss(a: __m128, b: __m128) -> i32 {
|
|||
/// otherwise. This instruction will not signal an exception if either argument
|
||||
/// is a quiet NaN.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomigt_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomigt_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(ucomiss))]
|
||||
|
|
@ -764,7 +764,7 @@ pub unsafe fn _mm_ucomigt_ss(a: __m128, b: __m128) -> i32 {
|
|||
/// `0` otherwise. This instruction will not signal an exception if either
|
||||
/// argument is a quiet NaN.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomige_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomige_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(ucomiss))]
|
||||
|
|
@ -777,7 +777,7 @@ pub unsafe fn _mm_ucomige_ss(a: __m128, b: __m128) -> i32 {
|
|||
/// `1` if they are **not** equal, or `0` otherwise. This instruction will not
|
||||
/// signal an exception if either argument is a quiet NaN.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomineq_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomineq_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(ucomiss))]
|
||||
|
|
@ -795,7 +795,7 @@ pub unsafe fn _mm_ucomineq_ss(a: __m128, b: __m128) -> i32 {
|
|||
///
|
||||
/// This corresponds to the `CVTSS2SI` instruction (with 32 bit output).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_si32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_si32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cvtss2si))]
|
||||
|
|
@ -806,7 +806,7 @@ pub unsafe fn _mm_cvtss_si32(a: __m128) -> i32 {
|
|||
|
||||
/// Alias for [`_mm_cvtss_si32`](fn._mm_cvtss_si32.html).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_ss2si)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_ss2si)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cvtss2si))]
|
||||
|
|
@ -826,7 +826,7 @@ pub unsafe fn _mm_cvt_ss2si(a: __m128) -> i32 {
|
|||
///
|
||||
/// This corresponds to the `CVTTSS2SI` instruction (with 32 bit output).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttss_si32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_si32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cvttss2si))]
|
||||
|
|
@ -837,7 +837,7 @@ pub unsafe fn _mm_cvttss_si32(a: __m128) -> i32 {
|
|||
|
||||
/// Alias for [`_mm_cvttss_si32`](fn._mm_cvttss_si32.html).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_ss2si)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_ss2si)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cvttss2si))]
|
||||
|
|
@ -848,7 +848,7 @@ pub unsafe fn _mm_cvtt_ss2si(a: __m128) -> i32 {
|
|||
|
||||
/// Extracts the lowest 32 bit float from the input vector.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_f32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_f32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
// No point in using assert_instrs. In Unix x86_64 calling convention this is a
|
||||
|
|
@ -864,7 +864,7 @@ pub unsafe fn _mm_cvtss_f32(a: __m128) -> f32 {
|
|||
/// This intrinsic corresponds to the `CVTSI2SS` instruction (with 32 bit
|
||||
/// input).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cvtsi2ss))]
|
||||
|
|
@ -875,7 +875,7 @@ pub unsafe fn _mm_cvtsi32_ss(a: __m128, b: i32) -> __m128 {
|
|||
|
||||
/// Alias for [`_mm_cvtsi32_ss`](fn._mm_cvtsi32_ss.html).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_si2ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_si2ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cvtsi2ss))]
|
||||
|
|
@ -887,7 +887,7 @@ pub unsafe fn _mm_cvt_si2ss(a: __m128, b: i32) -> __m128 {
|
|||
/// Construct a `__m128` with the lowest element set to `a` and the rest set to
|
||||
/// zero.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(movss))]
|
||||
|
|
@ -898,7 +898,7 @@ pub unsafe fn _mm_set_ss(a: f32) -> __m128 {
|
|||
|
||||
/// Construct a `__m128` with all element set to `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(shufps))]
|
||||
|
|
@ -909,7 +909,7 @@ pub unsafe fn _mm_set1_ps(a: f32) -> __m128 {
|
|||
|
||||
/// Alias for [`_mm_set1_ps`](fn._mm_set1_ps.html)
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_ps1)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_ps1)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(shufps))]
|
||||
|
|
@ -936,7 +936,7 @@ pub unsafe fn _mm_set_ps1(a: f32) -> __m128 {
|
|||
/// let v = _mm_set_ps(d, c, b, a);
|
||||
/// ```
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(unpcklps))]
|
||||
|
|
@ -954,7 +954,7 @@ pub unsafe fn _mm_set_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 {
|
|||
/// assert_eq!(__m128::new(a, b, c, d), _mm_setr_ps(a, b, c, d));
|
||||
/// ```
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(
|
||||
|
|
@ -973,7 +973,7 @@ pub unsafe fn _mm_setr_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 {
|
|||
|
||||
/// Construct a `__m128` with all elements initialized to zero.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(xorps))]
|
||||
|
|
@ -997,11 +997,11 @@ pub const fn _MM_SHUFFLE(z: u32, y: u32, x: u32, w: u32) -> i32 {
|
|||
/// The lower half of result takes values from `a` and the higher half from
|
||||
/// `b`. Mask is split to 2 control bits each to index the element from inputs.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_ps)
|
||||
///
|
||||
/// Note that there appears to be a mistake within Intel's Intrinsics Guide.
|
||||
/// `_mm_shuffle_ps` is supposed to take an `i32` instead of a `u32`
|
||||
/// as is the case for [other shuffle intrinsics](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_).
|
||||
/// as is the case for [other shuffle intrinsics](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_).
|
||||
/// Performing an implicit type conversion between an unsigned integer and a signed integer
|
||||
/// does not cause a problem in C, however Rust's commitment to strong typing does not allow this.
|
||||
#[inline]
|
||||
|
|
@ -1026,7 +1026,7 @@ pub unsafe fn _mm_shuffle_ps<const MASK: i32>(a: __m128, b: __m128) -> __m128 {
|
|||
/// Unpacks and interleave single-precision (32-bit) floating-point elements
|
||||
/// from the higher half of `a` and `b`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(unpckhps))]
|
||||
|
|
@ -1038,7 +1038,7 @@ pub unsafe fn _mm_unpackhi_ps(a: __m128, b: __m128) -> __m128 {
|
|||
/// Unpacks and interleave single-precision (32-bit) floating-point elements
|
||||
/// from the lower half of `a` and `b`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(unpcklps))]
|
||||
|
|
@ -1050,7 +1050,7 @@ pub unsafe fn _mm_unpacklo_ps(a: __m128, b: __m128) -> __m128 {
|
|||
/// Combine higher half of `a` and `b`. The highwe half of `b` occupies the
|
||||
/// lower half of result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movehl_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movehl_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movhlps))]
|
||||
|
|
@ -1063,7 +1063,7 @@ pub unsafe fn _mm_movehl_ps(a: __m128, b: __m128) -> __m128 {
|
|||
/// Combine lower half of `a` and `b`. The lower half of `b` occupies the
|
||||
/// higher half of result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movelh_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movelh_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))]
|
||||
|
|
@ -1077,7 +1077,7 @@ pub unsafe fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 {
|
|||
/// The mask is stored in the 4 least significant bits of the return value.
|
||||
/// All other bits are set to `0`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(movmskps))]
|
||||
|
|
@ -1091,7 +1091,7 @@ pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 {
|
|||
///
|
||||
/// This corresponds to instructions `VMOVSS` / `MOVSS`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(movss))]
|
||||
|
|
@ -1106,7 +1106,7 @@ pub unsafe fn _mm_load_ss(p: *const f32) -> __m128 {
|
|||
/// This corresponds to instructions `VMOVSS` / `MOVSS` followed by some
|
||||
/// shuffling.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load1_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load1_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(movss))]
|
||||
|
|
@ -1118,7 +1118,7 @@ pub unsafe fn _mm_load1_ps(p: *const f32) -> __m128 {
|
|||
|
||||
/// Alias for [`_mm_load1_ps`](fn._mm_load1_ps.html)
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_ps1)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_ps1)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(movss))]
|
||||
|
|
@ -1136,7 +1136,7 @@ pub unsafe fn _mm_load_ps1(p: *const f32) -> __m128 {
|
|||
///
|
||||
/// This corresponds to instructions `VMOVAPS` / `MOVAPS`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(movaps))]
|
||||
|
|
@ -1154,7 +1154,7 @@ pub unsafe fn _mm_load_ps(p: *const f32) -> __m128 {
|
|||
///
|
||||
/// This corresponds to instructions `VMOVUPS` / `MOVUPS`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(movups))]
|
||||
|
|
@ -1191,7 +1191,7 @@ pub unsafe fn _mm_loadu_ps(p: *const f32) -> __m128 {
|
|||
/// This corresponds to instructions `VMOVAPS` / `MOVAPS` followed by some
|
||||
/// shuffling.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadr_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadr_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(movaps))]
|
||||
|
|
@ -1205,7 +1205,7 @@ pub unsafe fn _mm_loadr_ps(p: *const f32) -> __m128 {
|
|||
///
|
||||
/// `mem_addr` does not need to be aligned on any particular boundary.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_si64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[stable(feature = "simd_x86_mm_loadu_si64", since = "1.46.0")]
|
||||
|
|
@ -1217,7 +1217,7 @@ pub unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i {
|
|||
///
|
||||
/// This intrinsic corresponds to the `MOVSS` instruction.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(movss))]
|
||||
|
|
@ -1243,7 +1243,7 @@ pub unsafe fn _mm_store_ss(p: *mut f32, a: __m128) {
|
|||
/// *p.add(3) = x;
|
||||
/// ```
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store1_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store1_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(movaps))]
|
||||
|
|
@ -1256,7 +1256,7 @@ pub unsafe fn _mm_store1_ps(p: *mut f32, a: __m128) {
|
|||
|
||||
/// Alias for [`_mm_store1_ps`](fn._mm_store1_ps.html)
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_ps1)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_ps1)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(movaps))]
|
||||
|
|
@ -1275,7 +1275,7 @@ pub unsafe fn _mm_store_ps1(p: *mut f32, a: __m128) {
|
|||
///
|
||||
/// This corresponds to instructions `VMOVAPS` / `MOVAPS`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(movaps))]
|
||||
|
|
@ -1291,7 +1291,7 @@ pub unsafe fn _mm_store_ps(p: *mut f32, a: __m128) {
|
|||
///
|
||||
/// This corresponds to instructions `VMOVUPS` / `MOVUPS`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(movups))]
|
||||
|
|
@ -1319,7 +1319,7 @@ pub unsafe fn _mm_storeu_ps(p: *mut f32, a: __m128) {
|
|||
/// *p.add(3) = a.extract(0);
|
||||
/// ```
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storer_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storer_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(movaps))]
|
||||
|
|
@ -1338,7 +1338,7 @@ pub unsafe fn _mm_storer_ps(p: *mut f32, a: __m128) {
|
|||
/// _mm_move_ss(a, b) == a.replace(0, b.extract(0))
|
||||
/// ```
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(movss))]
|
||||
|
|
@ -1354,7 +1354,7 @@ pub unsafe fn _mm_move_ss(a: __m128, b: __m128) -> __m128 {
|
|||
/// globally visible before any store instruction which follows the fence in
|
||||
/// program order.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sfence)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sfence)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(sfence))]
|
||||
|
|
@ -1367,7 +1367,7 @@ pub unsafe fn _mm_sfence() {
|
|||
///
|
||||
/// For more info see [`_mm_setcsr`](fn._mm_setcsr.html)
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getcsr)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getcsr)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(stmxcsr))]
|
||||
|
|
@ -1504,7 +1504,7 @@ pub unsafe fn _mm_getcsr() -> u32 {
|
|||
/// ```
|
||||
///
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setcsr)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setcsr)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(ldmxcsr))]
|
||||
|
|
@ -1586,7 +1586,7 @@ pub const _MM_FLUSH_ZERO_OFF: u32 = 0x0000;
|
|||
|
||||
/// See [`_mm_setcsr`](fn._mm_setcsr.html)
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_GET_EXCEPTION_MASK)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_GET_EXCEPTION_MASK)
|
||||
#[inline]
|
||||
#[allow(non_snake_case)]
|
||||
#[target_feature(enable = "sse")]
|
||||
|
|
@ -1597,7 +1597,7 @@ pub unsafe fn _MM_GET_EXCEPTION_MASK() -> u32 {
|
|||
|
||||
/// See [`_mm_setcsr`](fn._mm_setcsr.html)
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_GET_EXCEPTION_STATE)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_GET_EXCEPTION_STATE)
|
||||
#[inline]
|
||||
#[allow(non_snake_case)]
|
||||
#[target_feature(enable = "sse")]
|
||||
|
|
@ -1608,7 +1608,7 @@ pub unsafe fn _MM_GET_EXCEPTION_STATE() -> u32 {
|
|||
|
||||
/// See [`_mm_setcsr`](fn._mm_setcsr.html)
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_GET_FLUSH_ZERO_MODE)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_GET_FLUSH_ZERO_MODE)
|
||||
#[inline]
|
||||
#[allow(non_snake_case)]
|
||||
#[target_feature(enable = "sse")]
|
||||
|
|
@ -1619,7 +1619,7 @@ pub unsafe fn _MM_GET_FLUSH_ZERO_MODE() -> u32 {
|
|||
|
||||
/// See [`_mm_setcsr`](fn._mm_setcsr.html)
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_GET_ROUNDING_MODE)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_GET_ROUNDING_MODE)
|
||||
#[inline]
|
||||
#[allow(non_snake_case)]
|
||||
#[target_feature(enable = "sse")]
|
||||
|
|
@ -1630,7 +1630,7 @@ pub unsafe fn _MM_GET_ROUNDING_MODE() -> u32 {
|
|||
|
||||
/// See [`_mm_setcsr`](fn._mm_setcsr.html)
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_SET_EXCEPTION_MASK)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_SET_EXCEPTION_MASK)
|
||||
#[inline]
|
||||
#[allow(non_snake_case)]
|
||||
#[target_feature(enable = "sse")]
|
||||
|
|
@ -1641,7 +1641,7 @@ pub unsafe fn _MM_SET_EXCEPTION_MASK(x: u32) {
|
|||
|
||||
/// See [`_mm_setcsr`](fn._mm_setcsr.html)
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_SET_EXCEPTION_STATE)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_SET_EXCEPTION_STATE)
|
||||
#[inline]
|
||||
#[allow(non_snake_case)]
|
||||
#[target_feature(enable = "sse")]
|
||||
|
|
@ -1652,7 +1652,7 @@ pub unsafe fn _MM_SET_EXCEPTION_STATE(x: u32) {
|
|||
|
||||
/// See [`_mm_setcsr`](fn._mm_setcsr.html)
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_SET_FLUSH_ZERO_MODE)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_SET_FLUSH_ZERO_MODE)
|
||||
#[inline]
|
||||
#[allow(non_snake_case)]
|
||||
#[target_feature(enable = "sse")]
|
||||
|
|
@ -1665,7 +1665,7 @@ pub unsafe fn _MM_SET_FLUSH_ZERO_MODE(x: u32) {
|
|||
|
||||
/// See [`_mm_setcsr`](fn._mm_setcsr.html)
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_SET_ROUNDING_MODE)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_SET_ROUNDING_MODE)
|
||||
#[inline]
|
||||
#[allow(non_snake_case)]
|
||||
#[target_feature(enable = "sse")]
|
||||
|
|
@ -1739,7 +1739,7 @@ pub const _MM_HINT_ET1: i32 = 6;
|
|||
/// resources (e.g., request buffers).
|
||||
///
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_prefetch)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_prefetch)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(prefetcht0, STRATEGY = _MM_HINT_T0))]
|
||||
|
|
@ -1756,7 +1756,7 @@ pub unsafe fn _mm_prefetch<const STRATEGY: i32>(p: *const i8) {
|
|||
|
||||
/// Returns vector of type __m128 with undefined elements.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
|
|
@ -1766,7 +1766,7 @@ pub unsafe fn _mm_undefined_ps() -> __m128 {
|
|||
|
||||
/// Transpose the 4x4 matrix formed by 4 rows of __m128 in place.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_TRANSPOSE4_PS)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_TRANSPOSE4_PS)
|
||||
#[inline]
|
||||
#[allow(non_snake_case)]
|
||||
#[target_feature(enable = "sse")]
|
||||
|
|
@ -1869,7 +1869,7 @@ extern "C" {
|
|||
/// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
|
||||
/// exception _may_ be generated.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(movntps))]
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -11,7 +11,7 @@ use stdarch_test::assert_instr;
|
|||
/// Alternatively add and subtract packed single-precision (32-bit)
|
||||
/// floating-point elements in `a` to/from packed elements in `b`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_addsub_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_addsub_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse3")]
|
||||
#[cfg_attr(test, assert_instr(addsubps))]
|
||||
|
|
@ -23,7 +23,7 @@ pub unsafe fn _mm_addsub_ps(a: __m128, b: __m128) -> __m128 {
|
|||
/// Alternatively add and subtract packed double-precision (64-bit)
|
||||
/// floating-point elements in `a` to/from packed elements in `b`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_addsub_pd)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_addsub_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse3")]
|
||||
#[cfg_attr(test, assert_instr(addsubpd))]
|
||||
|
|
@ -35,7 +35,7 @@ pub unsafe fn _mm_addsub_pd(a: __m128d, b: __m128d) -> __m128d {
|
|||
/// Horizontally adds adjacent pairs of double-precision (64-bit)
|
||||
/// floating-point elements in `a` and `b`, and pack the results.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_pd)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse3")]
|
||||
#[cfg_attr(test, assert_instr(haddpd))]
|
||||
|
|
@ -47,7 +47,7 @@ pub unsafe fn _mm_hadd_pd(a: __m128d, b: __m128d) -> __m128d {
|
|||
/// Horizontally adds adjacent pairs of single-precision (32-bit)
|
||||
/// floating-point elements in `a` and `b`, and pack the results.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse3")]
|
||||
#[cfg_attr(test, assert_instr(haddps))]
|
||||
|
|
@ -59,7 +59,7 @@ pub unsafe fn _mm_hadd_ps(a: __m128, b: __m128) -> __m128 {
|
|||
/// Horizontally subtract adjacent pairs of double-precision (64-bit)
|
||||
/// floating-point elements in `a` and `b`, and pack the results.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_pd)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse3")]
|
||||
#[cfg_attr(test, assert_instr(hsubpd))]
|
||||
|
|
@ -71,7 +71,7 @@ pub unsafe fn _mm_hsub_pd(a: __m128d, b: __m128d) -> __m128d {
|
|||
/// Horizontally adds adjacent pairs of single-precision (32-bit)
|
||||
/// floating-point elements in `a` and `b`, and pack the results.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse3")]
|
||||
#[cfg_attr(test, assert_instr(hsubps))]
|
||||
|
|
@ -84,7 +84,7 @@ pub unsafe fn _mm_hsub_ps(a: __m128, b: __m128) -> __m128 {
|
|||
/// This intrinsic may perform better than `_mm_loadu_si128`
|
||||
/// when the data crosses a cache line boundary.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lddqu_si128)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_lddqu_si128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse3")]
|
||||
#[cfg_attr(test, assert_instr(lddqu))]
|
||||
|
|
@ -96,7 +96,7 @@ pub unsafe fn _mm_lddqu_si128(mem_addr: *const __m128i) -> __m128i {
|
|||
/// Duplicate the low double-precision (64-bit) floating-point element
|
||||
/// from `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movedup_pd)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movedup_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse3")]
|
||||
#[cfg_attr(test, assert_instr(movddup))]
|
||||
|
|
@ -108,7 +108,7 @@ pub unsafe fn _mm_movedup_pd(a: __m128d) -> __m128d {
|
|||
/// Loads a double-precision (64-bit) floating-point element from memory
|
||||
/// into both elements of return vector.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loaddup_pd)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loaddup_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse3")]
|
||||
#[cfg_attr(test, assert_instr(movddup))]
|
||||
|
|
@ -120,7 +120,7 @@ pub unsafe fn _mm_loaddup_pd(mem_addr: *const f64) -> __m128d {
|
|||
/// Duplicate odd-indexed single-precision (32-bit) floating-point elements
|
||||
/// from `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movehdup_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movehdup_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse3")]
|
||||
#[cfg_attr(test, assert_instr(movshdup))]
|
||||
|
|
@ -132,7 +132,7 @@ pub unsafe fn _mm_movehdup_ps(a: __m128) -> __m128 {
|
|||
/// Duplicate even-indexed single-precision (32-bit) floating-point elements
|
||||
/// from `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_moveldup_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_moveldup_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse3")]
|
||||
#[cfg_attr(test, assert_instr(movsldup))]
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ pub const _MM_FROUND_NEARBYINT: i32 = _MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTI
|
|||
/// If the high bit is set the element of `a` is selected. The element
|
||||
/// of `b` is selected otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blendv_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blendv_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pblendvb))]
|
||||
|
|
@ -71,7 +71,7 @@ pub unsafe fn _mm_blendv_epi8(a: __m128i, b: __m128i, mask: __m128i) -> __m128i
|
|||
/// corresponding element of `a`, and a set bit the corresponding
|
||||
/// element of `b`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_epi16)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_epi16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
// Note: LLVM7 prefers the single-precision floating-point domain when possible
|
||||
|
|
@ -88,7 +88,7 @@ pub unsafe fn _mm_blend_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128
|
|||
/// Blend packed double-precision (64-bit) floating-point elements from `a`
|
||||
/// and `b` using `mask`
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blendv_pd)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blendv_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(blendvpd))]
|
||||
|
|
@ -100,7 +100,7 @@ pub unsafe fn _mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d {
|
|||
/// Blend packed single-precision (32-bit) floating-point elements from `a`
|
||||
/// and `b` using `mask`
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blendv_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blendv_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(blendvps))]
|
||||
|
|
@ -112,7 +112,7 @@ pub unsafe fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 {
|
|||
/// Blend packed double-precision (64-bit) floating-point elements from `a`
|
||||
/// and `b` using control mask `IMM2`
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_pd)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
// Note: LLVM7 prefers the single-precision floating-point domain when possible
|
||||
|
|
@ -129,7 +129,7 @@ pub unsafe fn _mm_blend_pd<const IMM2: i32>(a: __m128d, b: __m128d) -> __m128d {
|
|||
/// Blend packed single-precision (32-bit) floating-point elements from `a`
|
||||
/// and `b` using mask `IMM4`
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(blendps, IMM4 = 0b0101))]
|
||||
|
|
@ -164,7 +164,7 @@ pub unsafe fn _mm_blend_ps<const IMM4: i32>(a: __m128, b: __m128) -> __m128 {
|
|||
/// # }
|
||||
/// # }
|
||||
/// ```
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(
|
||||
|
|
@ -183,7 +183,7 @@ pub unsafe fn _mm_extract_ps<const IMM8: i32>(a: __m128) -> i32 {
|
|||
///
|
||||
/// See [LLVM commit D20468](https://reviews.llvm.org/D20468).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pextrb, IMM8 = 0))]
|
||||
|
|
@ -196,7 +196,7 @@ pub unsafe fn _mm_extract_epi8<const IMM8: i32>(a: __m128i) -> i32 {
|
|||
|
||||
/// Extracts an 32-bit integer from `a` selected with `IMM8`
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(
|
||||
|
|
@ -233,7 +233,7 @@ pub unsafe fn _mm_extract_epi32<const IMM8: i32>(a: __m128i) -> i32 {
|
|||
/// * Bits `[3:0]`: If any of these bits are set, the corresponding result
|
||||
/// element is cleared.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(insertps, IMM8 = 0b1010))]
|
||||
|
|
@ -247,7 +247,7 @@ pub unsafe fn _mm_insert_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
|
|||
/// Returns a copy of `a` with the 8-bit integer from `i` inserted at a
|
||||
/// location specified by `IMM8`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pinsrb, IMM8 = 0))]
|
||||
|
|
@ -261,7 +261,7 @@ pub unsafe fn _mm_insert_epi8<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
|
|||
/// Returns a copy of `a` with the 32-bit integer from `i` inserted at a
|
||||
/// location specified by `IMM8`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pinsrd, IMM8 = 0))]
|
||||
|
|
@ -275,7 +275,7 @@ pub unsafe fn _mm_insert_epi32<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
|
|||
/// Compares packed 8-bit integers in `a` and `b` and returns packed maximum
|
||||
/// values in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmaxsb))]
|
||||
|
|
@ -289,7 +289,7 @@ pub unsafe fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed
|
||||
/// maximum.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu16)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmaxuw))]
|
||||
|
|
@ -303,7 +303,7 @@ pub unsafe fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// Compares packed 32-bit integers in `a` and `b`, and returns packed maximum
|
||||
/// values.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmaxsd))]
|
||||
|
|
@ -317,7 +317,7 @@ pub unsafe fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed
|
||||
/// maximum values.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmaxud))]
|
||||
|
|
@ -331,7 +331,7 @@ pub unsafe fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// Compares packed 8-bit integers in `a` and `b` and returns packed minimum
|
||||
/// values in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pminsb))]
|
||||
|
|
@ -345,7 +345,7 @@ pub unsafe fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed
|
||||
/// minimum.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu16)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pminuw))]
|
||||
|
|
@ -359,7 +359,7 @@ pub unsafe fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// Compares packed 32-bit integers in `a` and `b`, and returns packed minimum
|
||||
/// values.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pminsd))]
|
||||
|
|
@ -373,7 +373,7 @@ pub unsafe fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed
|
||||
/// minimum values.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pminud))]
|
||||
|
|
@ -387,7 +387,7 @@ pub unsafe fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
|
||||
/// using unsigned saturation
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packus_epi32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(packusdw))]
|
||||
|
|
@ -398,7 +398,7 @@ pub unsafe fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i {
|
|||
|
||||
/// Compares packed 64-bit integers in `a` and `b` for equality
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pcmpeqq))]
|
||||
|
|
@ -409,7 +409,7 @@ pub unsafe fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i {
|
|||
|
||||
/// Sign extend packed 8-bit integers in `a` to packed 16-bit integers
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi8_epi16)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi8_epi16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmovsxbw))]
|
||||
|
|
@ -422,7 +422,7 @@ pub unsafe fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i {
|
|||
|
||||
/// Sign extend packed 8-bit integers in `a` to packed 32-bit integers
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi8_epi32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi8_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmovsxbd))]
|
||||
|
|
@ -436,7 +436,7 @@ pub unsafe fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i {
|
|||
/// Sign extend packed 8-bit integers in the low 8 bytes of `a` to packed
|
||||
/// 64-bit integers
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi8_epi64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi8_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmovsxbq))]
|
||||
|
|
@ -449,7 +449,7 @@ pub unsafe fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i {
|
|||
|
||||
/// Sign extend packed 16-bit integers in `a` to packed 32-bit integers
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_epi32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi16_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmovsxwd))]
|
||||
|
|
@ -462,7 +462,7 @@ pub unsafe fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i {
|
|||
|
||||
/// Sign extend packed 16-bit integers in `a` to packed 64-bit integers
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_epi64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi16_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmovsxwq))]
|
||||
|
|
@ -475,7 +475,7 @@ pub unsafe fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i {
|
|||
|
||||
/// Sign extend packed 32-bit integers in `a` to packed 64-bit integers
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_epi64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmovsxdq))]
|
||||
|
|
@ -488,7 +488,7 @@ pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i {
|
|||
|
||||
/// Zeroes extend packed unsigned 8-bit integers in `a` to packed 16-bit integers
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu8_epi16)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu8_epi16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmovzxbw))]
|
||||
|
|
@ -501,7 +501,7 @@ pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i {
|
|||
|
||||
/// Zeroes extend packed unsigned 8-bit integers in `a` to packed 32-bit integers
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu8_epi32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu8_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmovzxbd))]
|
||||
|
|
@ -514,7 +514,7 @@ pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i {
|
|||
|
||||
/// Zeroes extend packed unsigned 8-bit integers in `a` to packed 64-bit integers
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu8_epi64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu8_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmovzxbq))]
|
||||
|
|
@ -528,7 +528,7 @@ pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i {
|
|||
/// Zeroes extend packed unsigned 16-bit integers in `a`
|
||||
/// to packed 32-bit integers
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu16_epi32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu16_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmovzxwd))]
|
||||
|
|
@ -542,7 +542,7 @@ pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i {
|
|||
/// Zeroes extend packed unsigned 16-bit integers in `a`
|
||||
/// to packed 64-bit integers
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu16_epi64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu16_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmovzxwq))]
|
||||
|
|
@ -556,7 +556,7 @@ pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i {
|
|||
/// Zeroes extend packed unsigned 32-bit integers in `a`
|
||||
/// to packed 64-bit integers
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu32_epi64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu32_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmovzxdq))]
|
||||
|
|
@ -575,7 +575,7 @@ pub unsafe fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i {
|
|||
/// the dot product will be stored in the return value component. Otherwise if
|
||||
/// the broadcast mask bit is zero then the return component will be zero.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dp_pd)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dp_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(dppd, IMM8 = 0))]
|
||||
|
|
@ -594,7 +594,7 @@ pub unsafe fn _mm_dp_pd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
|
|||
/// the dot product will be stored in the return value component. Otherwise if
|
||||
/// the broadcast mask bit is zero then the return component will be zero.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dp_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dp_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(dpps, IMM8 = 0))]
|
||||
|
|
@ -609,7 +609,7 @@ pub unsafe fn _mm_dp_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
|
|||
/// down to an integer value, and stores the results as packed double-precision
|
||||
/// floating-point elements.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_pd)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(roundpd))]
|
||||
|
|
@ -622,7 +622,7 @@ pub unsafe fn _mm_floor_pd(a: __m128d) -> __m128d {
|
|||
/// down to an integer value, and stores the results as packed single-precision
|
||||
/// floating-point elements.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(roundps))]
|
||||
|
|
@ -637,7 +637,7 @@ pub unsafe fn _mm_floor_ps(a: __m128) -> __m128 {
|
|||
/// and copies the upper element from `a` to the upper element of the intrinsic
|
||||
/// result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_sd)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_sd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(roundsd))]
|
||||
|
|
@ -652,7 +652,7 @@ pub unsafe fn _mm_floor_sd(a: __m128d, b: __m128d) -> __m128d {
|
|||
/// and copies the upper 3 packed elements from `a` to the upper elements
|
||||
/// of the intrinsic result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(roundss))]
|
||||
|
|
@ -665,7 +665,7 @@ pub unsafe fn _mm_floor_ss(a: __m128, b: __m128) -> __m128 {
|
|||
/// up to an integer value, and stores the results as packed double-precision
|
||||
/// floating-point elements.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_pd)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(roundpd))]
|
||||
|
|
@ -678,7 +678,7 @@ pub unsafe fn _mm_ceil_pd(a: __m128d) -> __m128d {
|
|||
/// up to an integer value, and stores the results as packed single-precision
|
||||
/// floating-point elements.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(roundps))]
|
||||
|
|
@ -693,7 +693,7 @@ pub unsafe fn _mm_ceil_ps(a: __m128) -> __m128 {
|
|||
/// and copies the upper element from `a` to the upper element
|
||||
/// of the intrinsic result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_sd)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_sd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(roundsd))]
|
||||
|
|
@ -708,7 +708,7 @@ pub unsafe fn _mm_ceil_sd(a: __m128d, b: __m128d) -> __m128d {
|
|||
/// and copies the upper 3 packed elements from `a` to the upper elements
|
||||
/// of the intrinsic result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(roundss))]
|
||||
|
|
@ -747,7 +747,7 @@ pub unsafe fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 {
|
|||
/// # }
|
||||
/// ```
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_pd)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(roundpd, ROUNDING = 0))]
|
||||
|
|
@ -788,7 +788,7 @@ pub unsafe fn _mm_round_pd<const ROUNDING: i32>(a: __m128d) -> __m128d {
|
|||
/// # }
|
||||
/// ```
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_ps)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(roundps, ROUNDING = 0))]
|
||||
|
|
@ -831,7 +831,7 @@ pub unsafe fn _mm_round_ps<const ROUNDING: i32>(a: __m128) -> __m128 {
|
|||
/// # }
|
||||
/// ```
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_sd)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_sd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(roundsd, ROUNDING = 0))]
|
||||
|
|
@ -874,7 +874,7 @@ pub unsafe fn _mm_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m12
|
|||
/// # }
|
||||
/// ```
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(roundss, ROUNDING = 0))]
|
||||
|
|
@ -905,7 +905,7 @@ pub unsafe fn _mm_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128
|
|||
/// * bits `[18:16]` - contain the index of the minimum value
|
||||
/// * remaining bits are set to `0`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_minpos_epu16)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_minpos_epu16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(phminposuw))]
|
||||
|
|
@ -917,7 +917,7 @@ pub unsafe fn _mm_minpos_epu16(a: __m128i) -> __m128i {
|
|||
/// Multiplies the low 32-bit integers from each packed 64-bit
|
||||
/// element in `a` and `b`, and returns the signed 64-bit result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_epi32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmuldq))]
|
||||
|
|
@ -933,7 +933,7 @@ pub unsafe fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// arithmetic `pmulld __m128i::splat(i32::MAX), __m128i::splat(2)` would
|
||||
/// return a negative number.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mullo_epi32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmulld))]
|
||||
|
|
@ -974,7 +974,7 @@ pub unsafe fn _mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// * A `__m128i` vector containing the sums of the sets of absolute
|
||||
/// differences between both operands.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mpsadbw_epu8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mpsadbw_epu8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(mpsadbw, IMM8 = 0))]
|
||||
|
|
@ -999,7 +999,7 @@ pub unsafe fn _mm_mpsadbw_epu8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m12
|
|||
/// * `1` - if the specified bits are all zeros,
|
||||
/// * `0` - otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testz_si128)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testz_si128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(ptest))]
|
||||
|
|
@ -1022,7 +1022,7 @@ pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 {
|
|||
/// * `1` - if the specified bits are all ones,
|
||||
/// * `0` - otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testc_si128)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testc_si128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(ptest))]
|
||||
|
|
@ -1045,7 +1045,7 @@ pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 {
|
|||
/// * `1` - if the specified bits are neither all zeros nor all ones,
|
||||
/// * `0` - otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testnzc_si128)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testnzc_si128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(ptest))]
|
||||
|
|
@ -1068,7 +1068,7 @@ pub unsafe fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 {
|
|||
/// * `1` - if the specified bits are all zeros,
|
||||
/// * `0` - otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_all_zeros)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_all_zeros)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(ptest))]
|
||||
|
|
@ -1089,7 +1089,7 @@ pub unsafe fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 {
|
|||
/// * `1` - if the bits specified in the operand are all set to 1,
|
||||
/// * `0` - otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_all_ones)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_all_ones)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pcmpeqd))]
|
||||
|
|
@ -1113,7 +1113,7 @@ pub unsafe fn _mm_test_all_ones(a: __m128i) -> i32 {
|
|||
/// * `1` - if the specified bits are neither all zeros nor all ones,
|
||||
/// * `0` - otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_mix_ones_zeros)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_mix_ones_zeros)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(ptest))]
|
||||
|
|
|
|||
|
|
@ -67,7 +67,7 @@ pub const _SIDD_UNIT_MASK: i32 = 0b0100_0000;
|
|||
/// Compares packed strings with implicit lengths in `a` and `b` using the
|
||||
/// control in `IMM8`, and return the generated mask.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrm)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrm)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpistrm, IMM8 = 0))]
|
||||
|
|
@ -255,7 +255,7 @@ pub unsafe fn _mm_cmpistrm<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// # }
|
||||
/// ```
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistri)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistri)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))]
|
||||
|
|
@ -270,7 +270,7 @@ pub unsafe fn _mm_cmpistri<const IMM8: i32>(a: __m128i, b: __m128i) -> i32 {
|
|||
/// control in `IMM8`, and return `1` if any character in `b` was null.
|
||||
/// and `0` otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrz)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrz)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))]
|
||||
|
|
@ -285,7 +285,7 @@ pub unsafe fn _mm_cmpistrz<const IMM8: i32>(a: __m128i, b: __m128i) -> i32 {
|
|||
/// control in `IMM8`, and return `1` if the resulting mask was non-zero,
|
||||
/// and `0` otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrc)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrc)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))]
|
||||
|
|
@ -300,7 +300,7 @@ pub unsafe fn _mm_cmpistrc<const IMM8: i32>(a: __m128i, b: __m128i) -> i32 {
|
|||
/// control in `IMM8`, and returns `1` if any character in `a` was null,
|
||||
/// and `0` otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrs)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrs)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))]
|
||||
|
|
@ -314,7 +314,7 @@ pub unsafe fn _mm_cmpistrs<const IMM8: i32>(a: __m128i, b: __m128i) -> i32 {
|
|||
/// Compares packed strings with implicit lengths in `a` and `b` using the
|
||||
/// control in `IMM8`, and return bit `0` of the resulting bit mask.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistro)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistro)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))]
|
||||
|
|
@ -329,7 +329,7 @@ pub unsafe fn _mm_cmpistro<const IMM8: i32>(a: __m128i, b: __m128i) -> i32 {
|
|||
/// control in `IMM8`, and return `1` if `b` did not contain a null
|
||||
/// character and the resulting mask was zero, and `0` otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistra)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistra)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))]
|
||||
|
|
@ -343,7 +343,7 @@ pub unsafe fn _mm_cmpistra<const IMM8: i32>(a: __m128i, b: __m128i) -> i32 {
|
|||
/// Compares packed strings in `a` and `b` with lengths `la` and `lb`
|
||||
/// using the control in `IMM8`, and return the generated mask.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrm)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrm)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpestrm, IMM8 = 0))]
|
||||
|
|
@ -432,7 +432,7 @@ pub unsafe fn _mm_cmpestrm<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb:
|
|||
/// [`_SIDD_MOST_SIGNIFICANT`]: constant._SIDD_MOST_SIGNIFICANT.html
|
||||
/// [`_mm_cmpistri`]: fn._mm_cmpistri.html
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestri)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestri)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))]
|
||||
|
|
@ -447,7 +447,7 @@ pub unsafe fn _mm_cmpestri<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb:
|
|||
/// using the control in `IMM8`, and return `1` if any character in
|
||||
/// `b` was null, and `0` otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrz)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrz)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))]
|
||||
|
|
@ -462,7 +462,7 @@ pub unsafe fn _mm_cmpestrz<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb:
|
|||
/// using the control in `IMM8`, and return `1` if the resulting mask
|
||||
/// was non-zero, and `0` otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrc)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrc)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))]
|
||||
|
|
@ -477,7 +477,7 @@ pub unsafe fn _mm_cmpestrc<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb:
|
|||
/// using the control in `IMM8`, and return `1` if any character in
|
||||
/// a was null, and `0` otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrs)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrs)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))]
|
||||
|
|
@ -492,7 +492,7 @@ pub unsafe fn _mm_cmpestrs<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb:
|
|||
/// using the control in `IMM8`, and return bit `0` of the resulting
|
||||
/// bit mask.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestro)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestro)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))]
|
||||
|
|
@ -508,7 +508,7 @@ pub unsafe fn _mm_cmpestro<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb:
|
|||
/// contain a null character and the resulting mask was zero, and `0`
|
||||
/// otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestra)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestra)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))]
|
||||
|
|
@ -522,7 +522,7 @@ pub unsafe fn _mm_cmpestra<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb:
|
|||
/// Starting with the initial value in `crc`, return the accumulated
|
||||
/// CRC32-C value for unsigned 8-bit integer `v`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_crc32_u8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_crc32_u8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(crc32))]
|
||||
|
|
@ -534,7 +534,7 @@ pub unsafe fn _mm_crc32_u8(crc: u32, v: u8) -> u32 {
|
|||
/// Starting with the initial value in `crc`, return the accumulated
|
||||
/// CRC32-C value for unsigned 16-bit integer `v`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_crc32_u16)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_crc32_u16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(crc32))]
|
||||
|
|
@ -546,7 +546,7 @@ pub unsafe fn _mm_crc32_u16(crc: u32, v: u16) -> u32 {
|
|||
/// Starting with the initial value in `crc`, return the accumulated
|
||||
/// CRC32-C value for unsigned 32-bit integer `v`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_crc32_u32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_crc32_u32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(crc32))]
|
||||
|
|
@ -558,7 +558,7 @@ pub unsafe fn _mm_crc32_u32(crc: u32, v: u32) -> u32 {
|
|||
/// Compares packed 64-bit integers in `a` and `b` for greater-than,
|
||||
/// return the results.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpgtq))]
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ use stdarch_test::assert_instr;
|
|||
/// Computes the absolute value of packed 8-bit signed integers in `a` and
|
||||
/// return the unsigned results.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[cfg_attr(test, assert_instr(pabsb))]
|
||||
|
|
@ -24,7 +24,7 @@ pub unsafe fn _mm_abs_epi8(a: __m128i) -> __m128i {
|
|||
/// `a` and
|
||||
/// return the 16-bit unsigned integer
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi16)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[cfg_attr(test, assert_instr(pabsw))]
|
||||
|
|
@ -37,7 +37,7 @@ pub unsafe fn _mm_abs_epi16(a: __m128i) -> __m128i {
|
|||
/// `a` and
|
||||
/// return the 32-bit unsigned integer
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[cfg_attr(test, assert_instr(pabsd))]
|
||||
|
|
@ -71,7 +71,7 @@ pub unsafe fn _mm_abs_epi32(a: __m128i) -> __m128i {
|
|||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[cfg_attr(test, assert_instr(pshufb))]
|
||||
|
|
@ -83,7 +83,7 @@ pub unsafe fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// Concatenate 16-byte blocks in `a` and `b` into a 32-byte temporary result,
|
||||
/// shift the result right by `n` bytes, and returns the low 16 bytes.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_alignr_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[cfg_attr(test, assert_instr(palignr, IMM8 = 15))]
|
||||
|
|
@ -141,7 +141,7 @@ pub unsafe fn _mm_alignr_epi8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128
|
|||
/// Horizontally adds the adjacent pairs of values contained in 2 packed
|
||||
/// 128-bit vectors of `[8 x i16]`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_epi16)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_epi16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[cfg_attr(test, assert_instr(phaddw))]
|
||||
|
|
@ -154,7 +154,7 @@ pub unsafe fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// 128-bit vectors of `[8 x i16]`. Positive sums greater than 7FFFh are
|
||||
/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadds_epi16)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadds_epi16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[cfg_attr(test, assert_instr(phaddsw))]
|
||||
|
|
@ -166,7 +166,7 @@ pub unsafe fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// Horizontally adds the adjacent pairs of values contained in 2 packed
|
||||
/// 128-bit vectors of `[4 x i32]`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_epi32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[cfg_attr(test, assert_instr(phaddd))]
|
||||
|
|
@ -178,7 +178,7 @@ pub unsafe fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// Horizontally subtract the adjacent pairs of values contained in 2
|
||||
/// packed 128-bit vectors of `[8 x i16]`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_epi16)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_epi16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[cfg_attr(test, assert_instr(phsubw))]
|
||||
|
|
@ -192,7 +192,7 @@ pub unsafe fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
|
||||
/// saturated to 8000h.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsubs_epi16)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsubs_epi16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[cfg_attr(test, assert_instr(phsubsw))]
|
||||
|
|
@ -204,7 +204,7 @@ pub unsafe fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// Horizontally subtract the adjacent pairs of values contained in 2
|
||||
/// packed 128-bit vectors of `[4 x i32]`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_epi32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[cfg_attr(test, assert_instr(phsubd))]
|
||||
|
|
@ -219,7 +219,7 @@ pub unsafe fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// contiguous products with signed saturation, and writes the 16-bit sums to
|
||||
/// the corresponding bits in the destination.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maddubs_epi16)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maddubs_epi16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[cfg_attr(test, assert_instr(pmaddubsw))]
|
||||
|
|
@ -232,7 +232,7 @@ pub unsafe fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// product to the 18 most significant bits by right-shifting, round the
|
||||
/// truncated value by adding 1, and write bits `[16:1]` to the destination.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhrs_epi16)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhrs_epi16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[cfg_attr(test, assert_instr(pmulhrsw))]
|
||||
|
|
@ -246,7 +246,7 @@ pub unsafe fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// Elements in result are zeroed out when the corresponding element in `b`
|
||||
/// is zero.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_epi8)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[cfg_attr(test, assert_instr(psignb))]
|
||||
|
|
@ -260,7 +260,7 @@ pub unsafe fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// Elements in result are zeroed out when the corresponding element in `b`
|
||||
/// is zero.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_epi16)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[cfg_attr(test, assert_instr(psignw))]
|
||||
|
|
@ -274,7 +274,7 @@ pub unsafe fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// Element in result are zeroed out when the corresponding element in `b`
|
||||
/// is zero.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_epi32)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[cfg_attr(test, assert_instr(psignd))]
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ extern "C" {
|
|||
/// Performs one round of an AES encryption flow on each 128-bit word (state) in `a` using
|
||||
/// the corresponding 128-bit word (key) in `round_key`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesenc_epi128)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_aesenc_epi128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "vaes")]
|
||||
#[cfg_attr(test, assert_instr(vaesenc))]
|
||||
|
|
@ -47,7 +47,7 @@ pub unsafe fn _mm256_aesenc_epi128(a: __m256i, round_key: __m256i) -> __m256i {
|
|||
/// Performs the last round of an AES encryption flow on each 128-bit word (state) in `a` using
|
||||
/// the corresponding 128-bit word (key) in `round_key`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesenclast_epi128)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_aesenclast_epi128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "vaes")]
|
||||
#[cfg_attr(test, assert_instr(vaesenclast))]
|
||||
|
|
@ -58,7 +58,7 @@ pub unsafe fn _mm256_aesenclast_epi128(a: __m256i, round_key: __m256i) -> __m256
|
|||
/// Performs one round of an AES decryption flow on each 128-bit word (state) in `a` using
|
||||
/// the corresponding 128-bit word (key) in `round_key`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesdec_epi128)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_aesdec_epi128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "vaes")]
|
||||
#[cfg_attr(test, assert_instr(vaesdec))]
|
||||
|
|
@ -69,7 +69,7 @@ pub unsafe fn _mm256_aesdec_epi128(a: __m256i, round_key: __m256i) -> __m256i {
|
|||
/// Performs the last round of an AES decryption flow on each 128-bit word (state) in `a` using
|
||||
/// the corresponding 128-bit word (key) in `round_key`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesdeclast_epi128)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_aesdeclast_epi128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "vaes")]
|
||||
#[cfg_attr(test, assert_instr(vaesdeclast))]
|
||||
|
|
@ -80,7 +80,7 @@ pub unsafe fn _mm256_aesdeclast_epi128(a: __m256i, round_key: __m256i) -> __m256
|
|||
/// Performs one round of an AES encryption flow on each 128-bit word (state) in `a` using
|
||||
/// the corresponding 128-bit word (key) in `round_key`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesenc_epi128)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_aesenc_epi128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "vaes,avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vaesenc))]
|
||||
|
|
@ -91,7 +91,7 @@ pub unsafe fn _mm512_aesenc_epi128(a: __m512i, round_key: __m512i) -> __m512i {
|
|||
/// Performs the last round of an AES encryption flow on each 128-bit word (state) in `a` using
|
||||
/// the corresponding 128-bit word (key) in `round_key`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesenclast_epi128)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_aesenclast_epi128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "vaes,avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vaesenclast))]
|
||||
|
|
@ -102,7 +102,7 @@ pub unsafe fn _mm512_aesenclast_epi128(a: __m512i, round_key: __m512i) -> __m512
|
|||
/// Performs one round of an AES decryption flow on each 128-bit word (state) in `a` using
|
||||
/// the corresponding 128-bit word (key) in `round_key`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesdec_epi128)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_aesdec_epi128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "vaes,avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vaesdec))]
|
||||
|
|
@ -113,7 +113,7 @@ pub unsafe fn _mm512_aesdec_epi128(a: __m512i, round_key: __m512i) -> __m512i {
|
|||
/// Performs the last round of an AES decryption flow on each 128-bit word (state) in `a` using
|
||||
/// the corresponding 128-bit word (key) in `round_key`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesdeclast_epi128)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_aesdeclast_epi128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "vaes,avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vaesdeclast))]
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ extern "C" {
|
|||
/// should be used. Immediate bits other than 0 and 4 are ignored.
|
||||
/// All lanes share immediate byte.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_clmulepi64_epi128)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_clmulepi64_epi128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "vpclmulqdq,avx512f")]
|
||||
// technically according to Intel's documentation we don't need avx512f here, however LLVM gets confused otherwise
|
||||
|
|
@ -48,7 +48,7 @@ pub unsafe fn _mm512_clmulepi64_epi128<const IMM8: i32>(a: __m512i, b: __m512i)
|
|||
/// should be used. Immediate bits other than 0 and 4 are ignored.
|
||||
/// All lanes share immediate byte.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_clmulepi64_epi128)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_clmulepi64_epi128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "vpclmulqdq")]
|
||||
#[cfg_attr(test, assert_instr(vpclmul, IMM8 = 0))]
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ extern "C" {
|
|||
/// The format of the XSAVE area is detailed in Section 13.4, “XSAVE Area,” of
|
||||
/// Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsave)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_xsave)
|
||||
#[inline]
|
||||
#[target_feature(enable = "xsave")]
|
||||
#[cfg_attr(test, assert_instr(xsave))]
|
||||
|
|
@ -49,7 +49,7 @@ pub unsafe fn _xsave(mem_addr: *mut u8, save_mask: u64) {
|
|||
/// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte
|
||||
/// boundary.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xrstor)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_xrstor)
|
||||
#[inline]
|
||||
#[target_feature(enable = "xsave")]
|
||||
#[cfg_attr(test, assert_instr(xrstor))]
|
||||
|
|
@ -69,7 +69,7 @@ pub const _XCR_XFEATURE_ENABLED_MASK: u32 = 0;
|
|||
///
|
||||
/// Currently only `XFEATURE_ENABLED_MASK` `XCR` is supported.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsetbv)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_xsetbv)
|
||||
#[inline]
|
||||
#[target_feature(enable = "xsave")]
|
||||
#[cfg_attr(test, assert_instr(xsetbv))]
|
||||
|
|
@ -81,7 +81,7 @@ pub unsafe fn _xsetbv(a: u32, val: u64) {
|
|||
/// Reads the contents of the extended control register `XCR`
|
||||
/// specified in `xcr_no`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xgetbv)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_xgetbv)
|
||||
#[inline]
|
||||
#[target_feature(enable = "xsave")]
|
||||
#[cfg_attr(test, assert_instr(xgetbv))]
|
||||
|
|
@ -98,7 +98,7 @@ pub unsafe fn _xgetbv(xcr_no: u32) -> u64 {
|
|||
/// the manner in which data is saved. The performance of this instruction will
|
||||
/// be equal to or better than using the `XSAVE` instruction.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsaveopt)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_xsaveopt)
|
||||
#[inline]
|
||||
#[target_feature(enable = "xsave,xsaveopt")]
|
||||
#[cfg_attr(test, assert_instr(xsaveopt))]
|
||||
|
|
@ -114,7 +114,7 @@ pub unsafe fn _xsaveopt(mem_addr: *mut u8, save_mask: u64) {
|
|||
/// use init optimization. State is saved based on bits `[62:0]` in `save_mask`
|
||||
/// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsavec)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_xsavec)
|
||||
#[inline]
|
||||
#[target_feature(enable = "xsave,xsavec")]
|
||||
#[cfg_attr(test, assert_instr(xsavec))]
|
||||
|
|
@ -131,7 +131,7 @@ pub unsafe fn _xsavec(mem_addr: *mut u8, save_mask: u64) {
|
|||
/// modified optimization. State is saved based on bits `[62:0]` in `save_mask`
|
||||
/// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsaves)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_xsaves)
|
||||
#[inline]
|
||||
#[target_feature(enable = "xsave,xsaves")]
|
||||
#[cfg_attr(test, assert_instr(xsaves))]
|
||||
|
|
@ -150,7 +150,7 @@ pub unsafe fn _xsaves(mem_addr: *mut u8, save_mask: u64) {
|
|||
/// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte
|
||||
/// boundary.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xrstors)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_xrstors)
|
||||
#[inline]
|
||||
#[target_feature(enable = "xsave,xsaves")]
|
||||
#[cfg_attr(test, assert_instr(xrstors))]
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ use stdarch_test::assert_instr;
|
|||
///
|
||||
/// When the operand is zero, it returns its size in bits.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_lzcnt_u64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_lzcnt_u64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "lzcnt")]
|
||||
#[cfg_attr(test, assert_instr(lzcnt))]
|
||||
|
|
@ -35,7 +35,7 @@ pub unsafe fn _lzcnt_u64(x: u64) -> u64 {
|
|||
|
||||
/// Counts the bits that are set.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_popcnt64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_popcnt64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "popcnt")]
|
||||
#[cfg_attr(test, assert_instr(popcnt))]
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ use crate::{
|
|||
/// Copies `a` to result, and insert the 64-bit integer `i` into result
|
||||
/// at the location specified by `index`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_insert_epi64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insert_epi64)
|
||||
#[inline]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
#[target_feature(enable = "avx")]
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ use crate::core_arch::{simd_llvm::*, x86::*};
|
|||
|
||||
/// Extracts a 64-bit integer from `a`, selected with `INDEX`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extract_epi64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ use stdarch_test::assert_instr;
|
|||
|
||||
/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer, and store the result in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_i64&expand=1792)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_i64&expand=1792)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vcvtsd2si))]
|
||||
|
|
@ -18,7 +18,7 @@ pub unsafe fn _mm_cvtsd_i64(a: __m128d) -> i64 {
|
|||
|
||||
/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer, and store the result in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_i64&expand=1894)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_i64&expand=1894)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vcvtss2si))]
|
||||
|
|
@ -28,7 +28,7 @@ pub unsafe fn _mm_cvtss_i64(a: __m128) -> i64 {
|
|||
|
||||
/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_u64&expand=1902)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_u64&expand=1902)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vcvtss2usi))]
|
||||
|
|
@ -38,7 +38,7 @@ pub unsafe fn _mm_cvtss_u64(a: __m128) -> u64 {
|
|||
|
||||
/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_u64&expand=1800)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_u64&expand=1800)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vcvtsd2usi))]
|
||||
|
|
@ -48,7 +48,7 @@ pub unsafe fn _mm_cvtsd_u64(a: __m128d) -> u64 {
|
|||
|
||||
/// Convert the signed 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvti32_ss&expand=1643)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_ss&expand=1643)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vcvtsi2ss))]
|
||||
|
|
@ -60,7 +60,7 @@ pub unsafe fn _mm_cvti64_ss(a: __m128, b: i64) -> __m128 {
|
|||
|
||||
/// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvti64_sd&expand=1644)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvti64_sd&expand=1644)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vcvtsi2sd))]
|
||||
|
|
@ -72,7 +72,7 @@ pub unsafe fn _mm_cvti64_sd(a: __m128d, b: i64) -> __m128d {
|
|||
|
||||
/// Convert the unsigned 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu64_ss&expand=2035)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu64_ss&expand=2035)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vcvtusi2ss))]
|
||||
|
|
@ -84,7 +84,7 @@ pub unsafe fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 {
|
|||
|
||||
/// Convert the unsigned 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu64_sd&expand=2034)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu64_sd&expand=2034)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vcvtusi2sd))]
|
||||
|
|
@ -96,7 +96,7 @@ pub unsafe fn _mm_cvtu64_sd(a: __m128d, b: u64) -> __m128d {
|
|||
|
||||
/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_i64&expand=2016)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_i64&expand=2016)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vcvtsd2si))]
|
||||
|
|
@ -106,7 +106,7 @@ pub unsafe fn _mm_cvttsd_i64(a: __m128d) -> i64 {
|
|||
|
||||
/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_u64&expand=2021)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_u64&expand=2021)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vcvtsd2usi))]
|
||||
|
|
@ -116,7 +116,7 @@ pub unsafe fn _mm_cvttsd_u64(a: __m128d) -> u64 {
|
|||
|
||||
/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=#text=_mm_cvttss_i64&expand=2023)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=#text=_mm_cvttss_i64&expand=2023)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vcvtss2si))]
|
||||
|
|
@ -126,7 +126,7 @@ pub unsafe fn _mm_cvttss_i64(a: __m128) -> i64 {
|
|||
|
||||
/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttss_u64&expand=2027)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_u64&expand=2027)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vcvtss2usi))]
|
||||
|
|
@ -142,7 +142,7 @@ pub unsafe fn _mm_cvttss_u64(a: __m128) -> u64 {
|
|||
/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
|
||||
/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi64_sd&expand=1313)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundi64_sd&expand=1313)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vcvtsi2sd, ROUNDING = 8))]
|
||||
|
|
@ -162,7 +162,7 @@ pub unsafe fn _mm_cvt_roundi64_sd<const ROUNDING: i32>(a: __m128d, b: i64) -> __
|
|||
/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
|
||||
/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsi64_sd&expand=1367)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundsi64_sd&expand=1367)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vcvtsi2sd, ROUNDING = 8))]
|
||||
|
|
@ -182,7 +182,7 @@ pub unsafe fn _mm_cvt_roundsi64_sd<const ROUNDING: i32>(a: __m128d, b: i64) -> _
|
|||
/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
|
||||
/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi64_ss&expand=1314)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundi64_ss&expand=1314)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
|
||||
|
|
@ -202,7 +202,7 @@ pub unsafe fn _mm_cvt_roundi64_ss<const ROUNDING: i32>(a: __m128, b: i64) -> __m
|
|||
/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
|
||||
/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu64_sd&expand=1379)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundu64_sd&expand=1379)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vcvtusi2sd, ROUNDING = 8))]
|
||||
|
|
@ -222,7 +222,7 @@ pub unsafe fn _mm_cvt_roundu64_sd<const ROUNDING: i32>(a: __m128d, b: u64) -> __
|
|||
/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
|
||||
/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsi64_ss&expand=1368)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundsi64_ss&expand=1368)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
|
||||
|
|
@ -242,7 +242,7 @@ pub unsafe fn _mm_cvt_roundsi64_ss<const ROUNDING: i32>(a: __m128, b: i64) -> __
|
|||
/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
|
||||
/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu64_ss&expand=1380)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundu64_ss&expand=1380)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vcvtusi2ss, ROUNDING = 8))]
|
||||
|
|
@ -262,7 +262,7 @@ pub unsafe fn _mm_cvt_roundu64_ss<const ROUNDING: i32>(a: __m128, b: u64) -> __m
|
|||
/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
|
||||
/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_si64&expand=1360)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundsd_si64&expand=1360)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
|
||||
|
|
@ -282,7 +282,7 @@ pub unsafe fn _mm_cvt_roundsd_si64<const ROUNDING: i32>(a: __m128d) -> i64 {
|
|||
/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
|
||||
/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_i64&expand=1358)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundsd_i64&expand=1358)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
|
||||
|
|
@ -302,7 +302,7 @@ pub unsafe fn _mm_cvt_roundsd_i64<const ROUNDING: i32>(a: __m128d) -> i64 {
|
|||
/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
|
||||
/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_u64&expand=1365)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundsd_u64&expand=1365)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vcvtsd2usi, ROUNDING = 8))]
|
||||
|
|
@ -322,7 +322,7 @@ pub unsafe fn _mm_cvt_roundsd_u64<const ROUNDING: i32>(a: __m128d) -> u64 {
|
|||
/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
|
||||
/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_si64&expand=1375)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundss_si64&expand=1375)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
|
||||
|
|
@ -342,7 +342,7 @@ pub unsafe fn _mm_cvt_roundss_si64<const ROUNDING: i32>(a: __m128) -> i64 {
|
|||
/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
|
||||
/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_i64&expand=1370)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundss_i64&expand=1370)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
|
||||
|
|
@ -362,7 +362,7 @@ pub unsafe fn _mm_cvt_roundss_i64<const ROUNDING: i32>(a: __m128) -> i64 {
|
|||
/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
|
||||
/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_u64&expand=1377)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundss_u64&expand=1377)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vcvtss2usi, ROUNDING = 8))]
|
||||
|
|
@ -377,7 +377,7 @@ pub unsafe fn _mm_cvt_roundss_u64<const ROUNDING: i32>(a: __m128) -> u64 {
|
|||
/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
|
||||
/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_si64&expand=1931)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_si64&expand=1931)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vcvtsd2si, SAE = 8))]
|
||||
|
|
@ -392,7 +392,7 @@ pub unsafe fn _mm_cvtt_roundsd_si64<const SAE: i32>(a: __m128d) -> i64 {
|
|||
/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
|
||||
/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_i64&expand=1929)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_i64&expand=1929)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vcvtsd2si, SAE = 8))]
|
||||
|
|
@ -407,7 +407,7 @@ pub unsafe fn _mm_cvtt_roundsd_i64<const SAE: i32>(a: __m128d) -> i64 {
|
|||
/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.\
|
||||
/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_u64&expand=1933)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_u64&expand=1933)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vcvtsd2usi, SAE = 8))]
|
||||
|
|
@ -422,7 +422,7 @@ pub unsafe fn _mm_cvtt_roundsd_u64<const SAE: i32>(a: __m128d) -> u64 {
|
|||
/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
|
||||
/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundss_i64&expand=1935)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundss_i64&expand=1935)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vcvtss2si, SAE = 8))]
|
||||
|
|
@ -437,7 +437,7 @@ pub unsafe fn _mm_cvtt_roundss_i64<const SAE: i32>(a: __m128) -> i64 {
|
|||
/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
|
||||
/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundss_si64&expand=1937)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundss_si64&expand=1937)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vcvtss2si, SAE = 8))]
|
||||
|
|
@ -452,7 +452,7 @@ pub unsafe fn _mm_cvtt_roundss_si64<const SAE: i32>(a: __m128) -> i64 {
|
|||
/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.\
|
||||
/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundss_u64&expand=1939)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundss_u64&expand=1939)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vcvtss2usi, SAE = 8))]
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ use stdarch_test::assert_instr;
|
|||
/// Extracts bits in range [`start`, `start` + `length`) from `a` into
|
||||
/// the least significant bits of the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bextr_u64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_bextr_u64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi1")]
|
||||
#[cfg_attr(test, assert_instr(bextr))]
|
||||
|
|
@ -31,7 +31,7 @@ pub unsafe fn _bextr_u64(a: u64, start: u32, len: u32) -> u64 {
|
|||
/// Bits `[7,0]` of `control` specify the index to the first bit in the range
|
||||
/// to be extracted, and bits `[15,8]` specify the length of the range.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bextr2_u64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_bextr2_u64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi1")]
|
||||
#[cfg_attr(test, assert_instr(bextr))]
|
||||
|
|
@ -43,7 +43,7 @@ pub unsafe fn _bextr2_u64(a: u64, control: u64) -> u64 {
|
|||
|
||||
/// Bitwise logical `AND` of inverted `a` with `b`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_andn_u64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_andn_u64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi1")]
|
||||
#[cfg_attr(test, assert_instr(andn))]
|
||||
|
|
@ -54,7 +54,7 @@ pub unsafe fn _andn_u64(a: u64, b: u64) -> u64 {
|
|||
|
||||
/// Extracts lowest set isolated bit.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsi_u64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_blsi_u64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi1")]
|
||||
#[cfg_attr(test, assert_instr(blsi))]
|
||||
|
|
@ -66,7 +66,7 @@ pub unsafe fn _blsi_u64(x: u64) -> u64 {
|
|||
|
||||
/// Gets mask up to lowest set bit.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsmsk_u64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_blsmsk_u64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi1")]
|
||||
#[cfg_attr(test, assert_instr(blsmsk))]
|
||||
|
|
@ -80,7 +80,7 @@ pub unsafe fn _blsmsk_u64(x: u64) -> u64 {
|
|||
///
|
||||
/// If `x` is sets CF.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsr_u64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_blsr_u64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi1")]
|
||||
#[cfg_attr(test, assert_instr(blsr))]
|
||||
|
|
@ -94,7 +94,7 @@ pub unsafe fn _blsr_u64(x: u64) -> u64 {
|
|||
///
|
||||
/// When the source operand is `0`, it returns its size in bits.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_tzcnt_u64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_tzcnt_u64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi1")]
|
||||
#[cfg_attr(test, assert_instr(tzcnt))]
|
||||
|
|
@ -107,7 +107,7 @@ pub unsafe fn _tzcnt_u64(x: u64) -> u64 {
|
|||
///
|
||||
/// When the source operand is `0`, it returns its size in bits.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_tzcnt_64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_tzcnt_64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi1")]
|
||||
#[cfg_attr(test, assert_instr(tzcnt))]
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ use stdarch_test::assert_instr;
|
|||
/// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with
|
||||
/// the low half and the high half of the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mulx_u64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mulx_u64)
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(mul))]
|
||||
#[target_feature(enable = "bmi2")]
|
||||
|
|
@ -32,7 +32,7 @@ pub unsafe fn _mulx_u64(a: u64, b: u64, hi: &mut u64) -> u64 {
|
|||
|
||||
/// Zeroes higher bits of `a` >= `index`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bzhi_u64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_bzhi_u64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi2")]
|
||||
#[cfg_attr(test, assert_instr(bzhi))]
|
||||
|
|
@ -45,7 +45,7 @@ pub unsafe fn _bzhi_u64(a: u64, index: u32) -> u64 {
|
|||
/// Scatter contiguous low order bits of `a` to the result at the positions
|
||||
/// specified by the `mask`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_pdep_u64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pdep_u64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi2")]
|
||||
#[cfg_attr(test, assert_instr(pdep))]
|
||||
|
|
@ -58,7 +58,7 @@ pub unsafe fn _pdep_u64(a: u64, mask: u64) -> u64 {
|
|||
/// Gathers the bits of `x` specified by the `mask` into the contiguous low
|
||||
/// order bit positions of the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_pext_u64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pext_u64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi2")]
|
||||
#[cfg_attr(test, assert_instr(pext))]
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ use stdarch_test::assert_instr;
|
|||
|
||||
/// Returns an integer with the reversed byte order of x
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bswap64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_bswap64)
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(bswap))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ extern "C" {
|
|||
/// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html
|
||||
/// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_fxsave64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_fxsave64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fxsr")]
|
||||
#[cfg_attr(test, assert_instr(fxsave64))]
|
||||
|
|
@ -46,7 +46,7 @@ pub unsafe fn _fxsave64(mem_addr: *mut u8) {
|
|||
/// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html
|
||||
/// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_fxrstor64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_fxrstor64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fxsr")]
|
||||
#[cfg_attr(test, assert_instr(fxrstor64))]
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ use stdarch_test::assert_instr;
|
|||
/// Read a hardware generated 64-bit random value and store the result in val.
|
||||
/// Returns 1 if a random value was generated, and 0 otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdrand64_step)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_rdrand64_step)
|
||||
#[inline]
|
||||
#[target_feature(enable = "rdrand")]
|
||||
#[cfg_attr(test, assert_instr(rdrand))]
|
||||
|
|
@ -32,7 +32,7 @@ pub unsafe fn _rdrand64_step(val: &mut u64) -> i32 {
|
|||
/// Read a 64-bit NIST SP800-90B and SP800-90C compliant random value and store
|
||||
/// in val. Return 1 if a random value was generated, and 0 otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdseed64_step)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_rdseed64_step)
|
||||
#[inline]
|
||||
#[target_feature(enable = "rdseed")]
|
||||
#[cfg_attr(test, assert_instr(rdseed))]
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ extern "C" {
|
|||
///
|
||||
/// This corresponds to the `CVTSS2SI` instruction (with 64 bit output).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_si64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_si64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cvtss2si))]
|
||||
|
|
@ -44,7 +44,7 @@ pub unsafe fn _mm_cvtss_si64(a: __m128) -> i64 {
|
|||
///
|
||||
/// This corresponds to the `CVTTSS2SI` instruction (with 64 bit output).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttss_si64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_si64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cvttss2si))]
|
||||
|
|
@ -59,7 +59,7 @@ pub unsafe fn _mm_cvttss_si64(a: __m128) -> i64 {
|
|||
/// This intrinsic corresponds to the `CVTSI2SS` instruction (with 64 bit
|
||||
/// input).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64_ss)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cvtsi2ss))]
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ extern "C" {
|
|||
/// Converts the lower double-precision (64-bit) floating-point element in a to
|
||||
/// a 64-bit integer.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_si64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(cvtsd2si))]
|
||||
|
|
@ -30,7 +30,7 @@ pub unsafe fn _mm_cvtsd_si64(a: __m128d) -> i64 {
|
|||
|
||||
/// Alias for `_mm_cvtsd_si64`
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si64x)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_si64x)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(cvtsd2si))]
|
||||
|
|
@ -42,7 +42,7 @@ pub unsafe fn _mm_cvtsd_si64x(a: __m128d) -> i64 {
|
|||
/// Converts the lower double-precision (64-bit) floating-point element in `a`
|
||||
/// to a 64-bit integer with truncation.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_si64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(cvttsd2si))]
|
||||
|
|
@ -53,7 +53,7 @@ pub unsafe fn _mm_cvttsd_si64(a: __m128d) -> i64 {
|
|||
|
||||
/// Alias for `_mm_cvttsd_si64`
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si64x)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_si64x)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(cvttsd2si))]
|
||||
|
|
@ -66,7 +66,7 @@ pub unsafe fn _mm_cvttsd_si64x(a: __m128d) -> i64 {
|
|||
/// To minimize caching, the data is flagged as non-temporal (unlikely to be
|
||||
/// used again soon).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_si64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(movnti))]
|
||||
|
|
@ -78,7 +78,7 @@ pub unsafe fn _mm_stream_si64(mem_addr: *mut i64, a: i64) {
|
|||
/// Returns a vector whose lowest element is `a` and all higher elements are
|
||||
/// `0`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64_si128)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64_si128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(all(test, not(windows)), assert_instr(movq))]
|
||||
|
|
@ -90,7 +90,7 @@ pub unsafe fn _mm_cvtsi64_si128(a: i64) -> __m128i {
|
|||
/// Returns a vector whose lowest element is `a` and all higher elements are
|
||||
/// `0`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64x_si128)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64x_si128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(all(test, not(windows)), assert_instr(movq))]
|
||||
|
|
@ -101,7 +101,7 @@ pub unsafe fn _mm_cvtsi64x_si128(a: i64) -> __m128i {
|
|||
|
||||
/// Returns the lowest element of `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi128_si64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(all(test, not(windows)), assert_instr(movq))]
|
||||
|
|
@ -112,7 +112,7 @@ pub unsafe fn _mm_cvtsi128_si64(a: __m128i) -> i64 {
|
|||
|
||||
/// Returns the lowest element of `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si64x)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi128_si64x)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(all(test, not(windows)), assert_instr(movq))]
|
||||
|
|
@ -124,7 +124,7 @@ pub unsafe fn _mm_cvtsi128_si64x(a: __m128i) -> i64 {
|
|||
/// Returns `a` with its lower element replaced by `b` after converting it to
|
||||
/// an `f64`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64_sd)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64_sd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(cvtsi2sd))]
|
||||
|
|
@ -136,7 +136,7 @@ pub unsafe fn _mm_cvtsi64_sd(a: __m128d, b: i64) -> __m128d {
|
|||
/// Returns `a` with its lower element replaced by `b` after converting it to
|
||||
/// an `f64`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64x_sd)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64x_sd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(cvtsi2sd))]
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ use stdarch_test::assert_instr;
|
|||
|
||||
/// Extracts an 64-bit integer from `a` selected with `IMM1`
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(pextrq, IMM1 = 1))]
|
||||
|
|
@ -24,7 +24,7 @@ pub unsafe fn _mm_extract_epi64<const IMM1: i32>(a: __m128i) -> i64 {
|
|||
/// Returns a copy of `a` with the 64-bit integer from `i` inserted at a
|
||||
/// location specified by `IMM1`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pinsrq, IMM1 = 0))]
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ extern "C" {
|
|||
/// Starting with the initial value in `crc`, return the accumulated
|
||||
/// CRC32-C value for unsigned 64-bit integer `v`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_crc32_u64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_crc32_u64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(crc32))]
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ extern "C" {
|
|||
/// The format of the XSAVE area is detailed in Section 13.4, “XSAVE Area,” of
|
||||
/// Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsave64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_xsave64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "xsave")]
|
||||
#[cfg_attr(test, assert_instr(xsave64))]
|
||||
|
|
@ -46,7 +46,7 @@ pub unsafe fn _xsave64(mem_addr: *mut u8, save_mask: u64) {
|
|||
/// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte
|
||||
/// boundary.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xrstor64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_xrstor64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "xsave")]
|
||||
#[cfg_attr(test, assert_instr(xrstor64))]
|
||||
|
|
@ -63,7 +63,7 @@ pub unsafe fn _xrstor64(mem_addr: *const u8, rs_mask: u64) {
|
|||
/// the manner in which data is saved. The performance of this instruction will
|
||||
/// be equal to or better than using the `XSAVE64` instruction.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsaveopt64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_xsaveopt64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "xsave,xsaveopt")]
|
||||
#[cfg_attr(test, assert_instr(xsaveopt64))]
|
||||
|
|
@ -79,7 +79,7 @@ pub unsafe fn _xsaveopt64(mem_addr: *mut u8, save_mask: u64) {
|
|||
/// use init optimization. State is saved based on bits `[62:0]` in `save_mask`
|
||||
/// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsavec64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_xsavec64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "xsave,xsavec")]
|
||||
#[cfg_attr(test, assert_instr(xsavec64))]
|
||||
|
|
@ -96,7 +96,7 @@ pub unsafe fn _xsavec64(mem_addr: *mut u8, save_mask: u64) {
|
|||
/// modified optimization. State is saved based on bits `[62:0]` in `save_mask`
|
||||
/// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsaves64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_xsaves64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "xsave,xsaves")]
|
||||
#[cfg_attr(test, assert_instr(xsaves64))]
|
||||
|
|
@ -115,7 +115,7 @@ pub unsafe fn _xsaves64(mem_addr: *mut u8, save_mask: u64) {
|
|||
/// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte
|
||||
/// boundary.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xrstors64)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_xrstors64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "xsave,xsaves")]
|
||||
#[cfg_attr(test, assert_instr(xrstors64))]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue