Merge branch 'master' into master
This commit is contained in:
commit
d5ed881685
27 changed files with 784 additions and 1519 deletions
|
|
@ -19,4 +19,5 @@ debug = true
|
|||
opt-level = 3
|
||||
|
||||
[dev-dependencies]
|
||||
assert-instr = { path = "assert-instr" }
|
||||
stdsimd-test = { path = "stdsimd-test" }
|
||||
cupid = "0.3"
|
||||
|
|
|
|||
|
|
@ -1,926 +0,0 @@
|
|||
**TIP**: Use the following command to generate a section in this list for
|
||||
Intel intrinsics. Replace `SSE4.2` with the intended type.
|
||||
|
||||
```
|
||||
rg '^<intrinsic' intel-intrinsics-3.3.15.xml | rg "'SSE4.2'" | rg '^.*name=\x27([^\x27]+)\x27.*$' -r '* [ ] `$1`' >> TODO.md
|
||||
```
|
||||
|
||||
rg calls the ripgrep tool, which can be installed with `cargo install ripgrep`
|
||||
|
||||
sse
|
||||
---
|
||||
* [ ] `_MM_TRANSPOSE4_PS`
|
||||
* [ ] `_mm_getcsr`
|
||||
* [ ] `_mm_setcsr`
|
||||
* [ ] `_MM_GET_EXCEPTION_STATE`
|
||||
* [ ] `_MM_SET_EXCEPTION_STATE`
|
||||
* [ ] `_MM_GET_EXCEPTION_MASK`
|
||||
* [ ] `_MM_SET_EXCEPTION_MASK`
|
||||
* [ ] `_MM_GET_ROUNDING_MODE`
|
||||
* [ ] `_MM_SET_ROUNDING_MODE`
|
||||
* [ ] `_MM_GET_FLUSH_ZERO_MODE`
|
||||
* [ ] `_MM_SET_FLUSH_ZERO_MODE`
|
||||
* [ ] `_mm_prefetch`
|
||||
* [ ] `_mm_sfence`
|
||||
* [ ] `_mm_max_pi16`
|
||||
* [ ] `_m_pmaxsw`
|
||||
* [ ] `_mm_max_pu8`
|
||||
* [ ] `_m_pmaxub`
|
||||
* [ ] `_mm_min_pi16`
|
||||
* [ ] `_m_pminsw`
|
||||
* [ ] `_mm_min_pu8`
|
||||
* [ ] `_m_pminub`
|
||||
* [ ] `_mm_mulhi_pu16`
|
||||
* [ ] `_m_pmulhuw`
|
||||
* [ ] `_mm_avg_pu8`
|
||||
* [ ] `_m_pavgb`
|
||||
* [ ] `_mm_avg_pu16`
|
||||
* [ ] `_m_pavgw`
|
||||
* [ ] `_mm_sad_pu8`
|
||||
* [ ] `_m_psadbw`
|
||||
* [ ] `_mm_cvtsi32_ss`
|
||||
* [ ] `_mm_cvt_si2ss`
|
||||
* [ ] `_mm_cvtsi64_ss`
|
||||
* [ ] `_mm_cvtpi32_ps`
|
||||
* [ ] `_mm_cvt_pi2ps`
|
||||
* [ ] `_mm_cvtpi16_ps`
|
||||
* [ ] `_mm_cvtpu16_ps`
|
||||
* [ ] `_mm_cvtpi8_ps`
|
||||
* [ ] `_mm_cvtpu8_ps`
|
||||
* [ ] `_mm_cvtpi32x2_ps`
|
||||
* [ ] `_mm_stream_pi`
|
||||
* [ ] `_mm_maskmove_si64`
|
||||
* [ ] `_m_maskmovq`
|
||||
* [ ] `_mm_extract_pi16`
|
||||
* [ ] `_m_pextrw`
|
||||
* [ ] `_mm_insert_pi16`
|
||||
* [ ] `_m_pinsrw`
|
||||
* [ ] `_mm_movemask_pi8`
|
||||
* [ ] `_m_pmovmskb`
|
||||
* [ ] `_mm_shuffle_pi16`
|
||||
* [ ] `_m_pshufw`
|
||||
* [x] `_mm_add_ss`
|
||||
* [x] `_mm_add_ps`
|
||||
* [x] `_mm_sub_ss`
|
||||
* [x] `_mm_sub_ps`
|
||||
* [x] `_mm_mul_ss`
|
||||
* [x] `_mm_mul_ps`
|
||||
* [x] `_mm_div_ss`
|
||||
* [x] `_mm_div_ps`
|
||||
* [x] `_mm_sqrt_ss`
|
||||
* [x] `_mm_sqrt_ps`
|
||||
* [x] `_mm_rcp_ss`
|
||||
* [x] `_mm_rcp_ps`
|
||||
* [x] `_mm_rsqrt_ss`
|
||||
* [x] `_mm_rsqrt_ps`
|
||||
* [x] `_mm_min_ss`
|
||||
* [x] `_mm_min_ps`
|
||||
* [x] `_mm_max_ss`
|
||||
* [x] `_mm_max_ps`
|
||||
* [ ] `_mm_and_ps`
|
||||
* [ ] `_mm_andnot_ps`
|
||||
* [ ] `_mm_or_ps`
|
||||
* [ ] `_mm_xor_ps`
|
||||
* [ ] `_mm_cmpeq_ss`
|
||||
* [ ] `_mm_cmpeq_ps`
|
||||
* [ ] `_mm_cmplt_ss`
|
||||
* [ ] `_mm_cmplt_ps`
|
||||
* [ ] `_mm_cmple_ss`
|
||||
* [ ] `_mm_cmple_ps`
|
||||
* [ ] `_mm_cmpgt_ss`
|
||||
* [ ] `_mm_cmpgt_ps`
|
||||
* [ ] `_mm_cmpge_ss`
|
||||
* [ ] `_mm_cmpge_ps`
|
||||
* [ ] `_mm_cmpneq_ss`
|
||||
* [ ] `_mm_cmpneq_ps`
|
||||
* [ ] `_mm_cmpnlt_ss`
|
||||
* [ ] `_mm_cmpnlt_ps`
|
||||
* [ ] `_mm_cmpnle_ss`
|
||||
* [ ] `_mm_cmpnle_ps`
|
||||
* [ ] `_mm_cmpngt_ss`
|
||||
* [ ] `_mm_cmpngt_ps`
|
||||
* [ ] `_mm_cmpnge_ss`
|
||||
* [ ] `_mm_cmpnge_ps`
|
||||
* [ ] `_mm_cmpord_ss`
|
||||
* [ ] `_mm_cmpord_ps`
|
||||
* [ ] `_mm_cmpunord_ss`
|
||||
* [ ] `_mm_cmpunord_ps`
|
||||
* [ ] `_mm_comieq_ss`
|
||||
* [ ] `_mm_comilt_ss`
|
||||
* [ ] `_mm_comile_ss`
|
||||
* [ ] `_mm_comigt_ss`
|
||||
* [ ] `_mm_comige_ss`
|
||||
* [ ] `_mm_comineq_ss`
|
||||
* [ ] `_mm_ucomieq_ss`
|
||||
* [ ] `_mm_ucomilt_ss`
|
||||
* [ ] `_mm_ucomile_ss`
|
||||
* [ ] `_mm_ucomigt_ss`
|
||||
* [ ] `_mm_ucomige_ss`
|
||||
* [ ] `_mm_ucomineq_ss`
|
||||
* [ ] `_mm_cvtss_si32`
|
||||
* [ ] `_mm_cvt_ss2si`
|
||||
* [ ] `_mm_cvtss_si64`
|
||||
* [ ] `_mm_cvtss_f32`
|
||||
* [ ] `_mm_cvtps_pi32`
|
||||
* [ ] `_mm_cvt_ps2pi`
|
||||
* [ ] `_mm_cvttss_si32`
|
||||
* [ ] `_mm_cvtt_ss2si`
|
||||
* [ ] `_mm_cvttss_si64`
|
||||
* [ ] `_mm_cvttps_pi32`
|
||||
* [ ] `_mm_cvtt_ps2pi`
|
||||
* [ ] `_mm_cvtps_pi16`
|
||||
* [ ] `_mm_cvtps_pi8`
|
||||
* [ ] `_mm_set_ss`
|
||||
* [ ] `_mm_set1_ps`
|
||||
* [ ] `_mm_set_ps1`
|
||||
* [ ] `_mm_set_ps`
|
||||
* [ ] `_mm_setr_ps`
|
||||
* [ ] `_mm_setzero_ps`
|
||||
* [ ] `_mm_loadh_pi`
|
||||
* [ ] `_mm_loadl_pi`
|
||||
* [ ] `_mm_load_ss`
|
||||
* [ ] `_mm_load1_ps`
|
||||
* [ ] `_mm_load_ps1`
|
||||
* [ ] `_mm_load_ps`
|
||||
* [ ] `_mm_loadu_ps`
|
||||
* [ ] `_mm_loadr_ps`
|
||||
* [ ] `_mm_stream_ps`
|
||||
* [ ] `_mm_storeh_pi`
|
||||
* [ ] `_mm_storel_pi`
|
||||
* [ ] `_mm_store_ss`
|
||||
* [ ] `_mm_store1_ps`
|
||||
* [ ] `_mm_store_ps1`
|
||||
* [ ] `_mm_store_ps`
|
||||
* [ ] `_mm_storeu_ps`
|
||||
* [ ] `_mm_storer_ps`
|
||||
* [ ] `_mm_move_ss`
|
||||
* [ ] `_mm_shuffle_ps`
|
||||
* [x] `_mm_unpackhi_ps`
|
||||
* [ ] `_mm_unpacklo_ps`
|
||||
* [ ] `_mm_movehl_ps`
|
||||
* [ ] `_mm_movelh_ps`
|
||||
* [x] `_mm_movemask_ps`
|
||||
* [ ] `_mm_undefined_ps`
|
||||
|
||||
|
||||
sse2
|
||||
----
|
||||
* [x] `_mm_pause`
|
||||
* [x] `_mm_clflush`
|
||||
* [x] `_mm_lfence`
|
||||
* [x] `_mm_mfence`
|
||||
* [x] `_mm_add_epi8`
|
||||
* [x] `_mm_add_epi16`
|
||||
* [x] `_mm_add_epi32`
|
||||
* [ ] `_mm_add_si64`
|
||||
* [x] `_mm_add_epi64`
|
||||
* [x] `_mm_adds_epi8`
|
||||
* [x] `_mm_adds_epi16`
|
||||
* [x] `_mm_adds_epu8`
|
||||
* [x] `_mm_adds_epu16`
|
||||
* [x] `_mm_avg_epu8`
|
||||
* [x] `_mm_avg_epu16`
|
||||
* [x] `_mm_madd_epi16`
|
||||
* [x] `_mm_max_epi16`
|
||||
* [x] `_mm_max_epu8`
|
||||
* [x] `_mm_min_epi16`
|
||||
* [x] `_mm_min_epu8`
|
||||
* [x] `_mm_mulhi_epi16`
|
||||
* [x] `_mm_mulhi_epu16`
|
||||
* [x] `_mm_mullo_epi16`
|
||||
* [ ] `_mm_mul_su32`
|
||||
* [x] `_mm_mul_epu32`
|
||||
* [x] `_mm_sad_epu8`
|
||||
* [x] `_mm_sub_epi8`
|
||||
* [x] `_mm_sub_epi16`
|
||||
* [x] `_mm_sub_epi32`
|
||||
* [ ] `_mm_sub_si64`
|
||||
* [x] `_mm_sub_epi64`
|
||||
* [x] `_mm_subs_epi8`
|
||||
* [x] `_mm_subs_epi16`
|
||||
* [x] `_mm_subs_epu8`
|
||||
* [x] `_mm_subs_epu16`
|
||||
* [x] `_mm_slli_si128`
|
||||
* [x] `_mm_bslli_si128`
|
||||
* [x] `_mm_bsrli_si128`
|
||||
* [x] `_mm_slli_epi16`
|
||||
* [x] `_mm_sll_epi16`
|
||||
* [x] `_mm_slli_epi32`
|
||||
* [x] `_mm_sll_epi32`
|
||||
* [x] `_mm_slli_epi64`
|
||||
* [x] `_mm_sll_epi64`
|
||||
* [x] `_mm_srai_epi16`
|
||||
* [x] `_mm_sra_epi16`
|
||||
* [x] `_mm_srai_epi32`
|
||||
* [x] `_mm_sra_epi32`
|
||||
* [x] `_mm_srli_si128`
|
||||
* [x] `_mm_srli_epi16`
|
||||
* [x] `_mm_srl_epi16`
|
||||
* [x] `_mm_srli_epi32`
|
||||
* [x] `_mm_srl_epi32`
|
||||
* [x] `_mm_srli_epi64`
|
||||
* [x] `_mm_srl_epi64`
|
||||
* [x] `_mm_and_si128`
|
||||
* [x] `_mm_andnot_si128`
|
||||
* [x] `_mm_or_si128`
|
||||
* [x] `_mm_xor_si128`
|
||||
* [x] `_mm_cmpeq_epi8`
|
||||
* [x] `_mm_cmpeq_epi16`
|
||||
* [x] `_mm_cmpeq_epi32`
|
||||
* [x] `_mm_cmpgt_epi8`
|
||||
* [x] `_mm_cmpgt_epi16`
|
||||
* [x] `_mm_cmpgt_epi32`
|
||||
* [x] `_mm_cmplt_epi8`
|
||||
* [x] `_mm_cmplt_epi16`
|
||||
* [x] `_mm_cmplt_epi32`
|
||||
* [x] `_mm_cvtepi32_pd`
|
||||
* [x] `_mm_cvtsi32_sd`
|
||||
* [x] `_mm_cvtsi64_sd`
|
||||
* [x] `_mm_cvtsi64x_sd`
|
||||
* [x] `_mm_cvtepi32_ps`
|
||||
* [ ] `_mm_cvtpi32_pd`
|
||||
* [x] `_mm_cvtsi32_si128`
|
||||
* [x] `_mm_cvtsi64_si128`
|
||||
* [x] `_mm_cvtsi64x_si128`
|
||||
* [x] `_mm_cvtsi128_si32`
|
||||
* [x] `_mm_cvtsi128_si64`
|
||||
* [x] `_mm_cvtsi128_si64x`
|
||||
* [ ] `_mm_set_epi64`
|
||||
* [x] `_mm_set_epi64x`
|
||||
* [x] `_mm_set_epi32`
|
||||
* [x] `_mm_set_epi16`
|
||||
* [x] `_mm_set_epi8`
|
||||
* [ ] `_mm_set1_epi64`
|
||||
* [x] `_mm_set1_epi64x`
|
||||
* [x] `_mm_set1_epi32`
|
||||
* [x] `_mm_set1_epi16`
|
||||
* [x] `_mm_set1_epi8`
|
||||
* [ ] `_mm_setr_epi64`
|
||||
* [x] `_mm_setr_epi32`
|
||||
* [x] `_mm_setr_epi16`
|
||||
* [x] `_mm_setr_epi8`
|
||||
* [x] `_mm_setzero_si128`
|
||||
* [x] `_mm_loadl_epi64`
|
||||
* [x] `_mm_load_si128`
|
||||
* [x] `_mm_loadu_si128`
|
||||
* [x] `_mm_maskmoveu_si128`
|
||||
* [x] `_mm_store_si128`
|
||||
* [x] `_mm_storeu_si128`
|
||||
* [x] `_mm_storel_epi64`
|
||||
* [ ] `_mm_stream_si128`
|
||||
* [ ] `_mm_stream_si32`
|
||||
* [ ] `_mm_stream_si64`
|
||||
* [ ] `_mm_movepi64_pi64`
|
||||
* [ ] `_mm_movpi64_epi64`
|
||||
* [x] `_mm_move_epi64`
|
||||
* [x] `_mm_packs_epi16`
|
||||
* [x] `_mm_packs_epi32`
|
||||
* [x] `_mm_packus_epi16`
|
||||
* [x] `_mm_extract_epi16`
|
||||
* [x] `_mm_insert_epi16`
|
||||
* [x] `_mm_movemask_epi8`
|
||||
* [x] `_mm_shuffle_epi32`
|
||||
* [x] `_mm_shufflehi_epi16`
|
||||
* [x] `_mm_shufflelo_epi16`
|
||||
* [x] `_mm_unpackhi_epi8`
|
||||
* [x] `_mm_unpackhi_epi16`
|
||||
* [x] `_mm_unpackhi_epi32`
|
||||
* [x] `_mm_unpackhi_epi64`
|
||||
* [x] `_mm_unpacklo_epi8`
|
||||
* [x] `_mm_unpacklo_epi16`
|
||||
* [x] `_mm_unpacklo_epi32`
|
||||
* [x] `_mm_unpacklo_epi64`
|
||||
* [x] `_mm_add_sd`
|
||||
* [x] `_mm_add_pd`
|
||||
* [x] `_mm_div_sd`
|
||||
* [x] `_mm_div_pd`
|
||||
* [x] `_mm_max_sd`
|
||||
* [x] `_mm_max_pd`
|
||||
* [x] `_mm_min_sd`
|
||||
* [x] `_mm_min_pd`
|
||||
* [x] `_mm_mul_sd`
|
||||
* [x] `_mm_mul_pd`
|
||||
* [x] `_mm_sqrt_sd`
|
||||
* [x] `_mm_sqrt_pd`
|
||||
* [x] `_mm_sub_sd`
|
||||
* [x] `_mm_sub_pd`
|
||||
* [x] `_mm_and_pd`
|
||||
* [x] `_mm_andnot_pd`
|
||||
* [x] `_mm_or_pd`
|
||||
* [x] `_mm_xor_pd`
|
||||
* [x] `_mm_cmpeq_sd`
|
||||
* [x] `_mm_cmplt_sd`
|
||||
* [x] `_mm_cmple_sd`
|
||||
* [x] `_mm_cmpgt_sd`
|
||||
* [x] `_mm_cmpge_sd`
|
||||
* [x] `_mm_cmpord_sd`
|
||||
* [x] `_mm_cmpunord_sd`
|
||||
* [x] `_mm_cmpneq_sd`
|
||||
* [x] `_mm_cmpnlt_sd`
|
||||
* [x] `_mm_cmpnle_sd`
|
||||
* [x] `_mm_cmpngt_sd`
|
||||
* [x] `_mm_cmpnge_sd`
|
||||
* [x] `_mm_cmpeq_pd`
|
||||
* [x] `_mm_cmplt_pd`
|
||||
* [x] `_mm_cmple_pd`
|
||||
* [x] `_mm_cmpgt_pd`
|
||||
* [x] `_mm_cmpge_pd`
|
||||
* [x] `_mm_cmpord_pd`
|
||||
* [x] `_mm_cmpunord_pd`
|
||||
* [x] `_mm_cmpneq_pd`
|
||||
* [x] `_mm_cmpnlt_pd`
|
||||
* [x] `_mm_cmpnle_pd`
|
||||
* [x] `_mm_cmpngt_pd`
|
||||
* [x] `_mm_cmpnge_pd`
|
||||
* [x] `_mm_comieq_sd`
|
||||
* [x] `_mm_comilt_sd`
|
||||
* [x] `_mm_comile_sd`
|
||||
* [x] `_mm_comigt_sd`
|
||||
* [x] `_mm_comige_sd`
|
||||
* [x] `_mm_comineq_sd`
|
||||
* [x] `_mm_ucomieq_sd`
|
||||
* [x] `_mm_ucomilt_sd`
|
||||
* [x] `_mm_ucomile_sd`
|
||||
* [x] `_mm_ucomigt_sd`
|
||||
* [x] `_mm_ucomige_sd`
|
||||
* [x] `_mm_ucomineq_sd`
|
||||
* [ ] `_mm_cvtpd_ps`
|
||||
* [ ] `_mm_cvtps_pd`
|
||||
* [ ] `_mm_cvtpd_epi32`
|
||||
* [ ] `_mm_cvtsd_si32`
|
||||
* [ ] `_mm_cvtsd_si64`
|
||||
* [ ] `_mm_cvtsd_si64x`
|
||||
* [ ] `_mm_cvtsd_ss`
|
||||
* [ ] `_mm_cvtsd_f64`
|
||||
* [ ] `_mm_cvtss_sd`
|
||||
* [ ] `_mm_cvttpd_epi32`
|
||||
* [ ] `_mm_cvttsd_si32`
|
||||
* [ ] `_mm_cvttsd_si64`
|
||||
* [ ] `_mm_cvttsd_si64x`
|
||||
* [ ] `_mm_cvtps_epi32`
|
||||
* [ ] `_mm_cvttps_epi32`
|
||||
* [ ] `_mm_cvtpd_pi32`
|
||||
* [ ] `_mm_cvttpd_pi32`
|
||||
* [ ] `_mm_set_sd`
|
||||
* [ ] `_mm_set1_pd`
|
||||
* [ ] `_mm_set_pd1`
|
||||
* [ ] `_mm_set_pd`
|
||||
* [ ] `_mm_setr_pd`
|
||||
* [ ] `_mm_setzero_pd`
|
||||
* [ ] `_mm_load_pd`
|
||||
* [ ] `_mm_load1_pd`
|
||||
* [ ] `_mm_load_pd1`
|
||||
* [ ] `_mm_loadr_pd`
|
||||
* [ ] `_mm_loadu_pd`
|
||||
* [ ] `_mm_load_sd`
|
||||
* [ ] `_mm_loadh_pd`
|
||||
* [ ] `_mm_loadl_pd`
|
||||
* [ ] `_mm_stream_pd`
|
||||
* [ ] `_mm_store_sd`
|
||||
* [ ] `_mm_store1_pd`
|
||||
* [ ] `_mm_store_pd1`
|
||||
* [ ] `_mm_store_pd`
|
||||
* [ ] `_mm_storeu_pd`
|
||||
* [ ] `_mm_storer_pd`
|
||||
* [ ] `_mm_storeh_pd`
|
||||
* [ ] `_mm_storel_pd`
|
||||
* [ ] `_mm_unpackhi_pd`
|
||||
* [ ] `_mm_unpacklo_pd`
|
||||
* [x] `_mm_movemask_pd`
|
||||
* [ ] `_mm_shuffle_pd`
|
||||
* [ ] `_mm_move_sd`
|
||||
* [ ] `_mm_castpd_ps`
|
||||
* [ ] `_mm_castpd_si128`
|
||||
* [ ] `_mm_castps_pd`
|
||||
* [ ] `_mm_castps_si128`
|
||||
* [ ] `_mm_castsi128_pd`
|
||||
* [ ] `_mm_castsi128_ps`
|
||||
* [ ] `_mm_undefined_pd`
|
||||
* [ ] `_mm_undefined_si128`
|
||||
|
||||
|
||||
sse3
|
||||
----
|
||||
* [ ] `_mm_addsub_ps`
|
||||
* [ ] `_mm_addsub_pd`
|
||||
* [ ] `_mm_hadd_pd`
|
||||
* [ ] `_mm_hadd_ps`
|
||||
* [ ] `_mm_hsub_pd`
|
||||
* [ ] `_mm_hsub_ps`
|
||||
* [ ] `_mm_lddqu_si128`
|
||||
* [ ] `_mm_movedup_pd`
|
||||
* [ ] `_mm_loaddup_pd`
|
||||
* [ ] `_mm_movehdup_ps`
|
||||
* [ ] `_mm_moveldup_ps`
|
||||
|
||||
|
||||
ssse3
|
||||
-----
|
||||
* [ ] `_mm_abs_pi8`
|
||||
* [x] `_mm_abs_epi8`
|
||||
* [ ] `_mm_abs_pi16`
|
||||
* [ ] `_mm_abs_epi16`
|
||||
* [ ] `_mm_abs_pi32`
|
||||
* [ ] `_mm_abs_epi32`
|
||||
* [x] `_mm_shuffle_epi8`
|
||||
* [ ] `_mm_shuffle_pi8`
|
||||
* [ ] `_mm_alignr_epi8`
|
||||
* [ ] `_mm_alignr_pi8`
|
||||
* [ ] `_mm_hadd_epi16`
|
||||
* [ ] `_mm_hadds_epi16`
|
||||
* [ ] `_mm_hadd_epi32`
|
||||
* [ ] `_mm_hadd_pi16`
|
||||
* [ ] `_mm_hadd_pi32`
|
||||
* [ ] `_mm_hadds_pi16`
|
||||
* [ ] `_mm_hsub_epi16`
|
||||
* [ ] `_mm_hsubs_epi16`
|
||||
* [ ] `_mm_hsub_epi32`
|
||||
* [ ] `_mm_hsub_pi16`
|
||||
* [ ] `_mm_hsub_pi32`
|
||||
* [ ] `_mm_hsubs_pi16`
|
||||
* [ ] `_mm_maddubs_epi16`
|
||||
* [ ] `_mm_maddubs_pi16`
|
||||
* [ ] `_mm_mulhrs_epi16`
|
||||
* [ ] `_mm_mulhrs_pi16`
|
||||
* [ ] `_mm_sign_epi8`
|
||||
* [ ] `_mm_sign_epi16`
|
||||
* [ ] `_mm_sign_epi32`
|
||||
* [ ] `_mm_sign_pi8`
|
||||
* [ ] `_mm_sign_pi16`
|
||||
* [ ] `_mm_sign_pi32`
|
||||
|
||||
|
||||
sse4.1
|
||||
------
|
||||
* [ ] `_mm_blend_pd`
|
||||
* [ ] `_mm_blend_ps`
|
||||
* [ ] `_mm_blendv_pd`
|
||||
* [ ] `_mm_blendv_ps`
|
||||
* [x] `_mm_blendv_epi8`
|
||||
* [ ] `_mm_blend_epi16`
|
||||
* [x] `_mm_dp_pd`
|
||||
* [x] `_mm_dp_ps`
|
||||
* [ ] `_mm_extract_ps`
|
||||
* [ ] `_mm_extract_epi8`
|
||||
* [ ] `_mm_extract_epi32`
|
||||
* [ ] `_mm_extract_epi64`
|
||||
* [ ] `_mm_insert_ps`
|
||||
* [ ] `_mm_insert_epi8`
|
||||
* [ ] `_mm_insert_epi32`
|
||||
* [ ] `_mm_insert_epi64`
|
||||
* [ ] `_mm_max_epi8`
|
||||
* [ ] `_mm_max_epi32`
|
||||
* [ ] `_mm_max_epu32`
|
||||
* [ ] `_mm_max_epu16`
|
||||
* [ ] `_mm_min_epi8`
|
||||
* [ ] `_mm_min_epi32`
|
||||
* [ ] `_mm_min_epu32`
|
||||
* [ ] `_mm_min_epu16`
|
||||
* [ ] `_mm_packus_epi32`
|
||||
* [ ] `_mm_cmpeq_epi64`
|
||||
* [ ] `_mm_cvtepi8_epi16`
|
||||
* [ ] `_mm_cvtepi8_epi32`
|
||||
* [ ] `_mm_cvtepi8_epi64`
|
||||
* [ ] `_mm_cvtepi16_epi32`
|
||||
* [ ] `_mm_cvtepi16_epi64`
|
||||
* [ ] `_mm_cvtepi32_epi64`
|
||||
* [ ] `_mm_cvtepu8_epi16`
|
||||
* [ ] `_mm_cvtepu8_epi32`
|
||||
* [ ] `_mm_cvtepu8_epi64`
|
||||
* [ ] `_mm_cvtepu16_epi32`
|
||||
* [ ] `_mm_cvtepu16_epi64`
|
||||
* [ ] `_mm_cvtepu32_epi64`
|
||||
* [ ] `_mm_mul_epi32`
|
||||
* [ ] `_mm_mullo_epi32`
|
||||
* [ ] `_mm_testz_si128`
|
||||
* [ ] `_mm_testc_si128`
|
||||
* [ ] `_mm_testnzc_si128`
|
||||
* [ ] `_mm_test_all_zeros`
|
||||
* [ ] `_mm_test_mix_ones_zeros`
|
||||
* [ ] `_mm_test_all_ones`
|
||||
* [ ] `_mm_round_pd`
|
||||
* [ ] `_mm_floor_pd`
|
||||
* [ ] `_mm_ceil_pd`
|
||||
* [ ] `_mm_round_ps`
|
||||
* [ ] `_mm_floor_ps`
|
||||
* [ ] `_mm_ceil_ps`
|
||||
* [ ] `_mm_round_sd`
|
||||
* [ ] `_mm_floor_sd`
|
||||
* [ ] `_mm_ceil_sd`
|
||||
* [ ] `_mm_round_ss`
|
||||
* [ ] `_mm_floor_ss`
|
||||
* [ ] `_mm_ceil_ss`
|
||||
* [ ] `_mm_minpos_epu16`
|
||||
* [ ] `_mm_mpsadbw_epu8`
|
||||
* [ ] `_mm_stream_load_si128`
|
||||
|
||||
|
||||
sse4.2
|
||||
------
|
||||
* [ ] `_mm_cmpistrm`
|
||||
* [ ] `_mm_cmpistri`
|
||||
* [ ] `_mm_cmpistrz`
|
||||
* [ ] `_mm_cmpistrc`
|
||||
* [ ] `_mm_cmpistrs`
|
||||
* [ ] `_mm_cmpistro`
|
||||
* [ ] `_mm_cmpistra`
|
||||
* [ ] `_mm_cmpestrm`
|
||||
* [ ] `_mm_cmpestri`
|
||||
* [ ] `_mm_cmpestrz`
|
||||
* [ ] `_mm_cmpestrc`
|
||||
* [ ] `_mm_cmpestrs`
|
||||
* [ ] `_mm_cmpestro`
|
||||
* [ ] `_mm_cmpestra`
|
||||
* [ ] `_mm_cmpgt_epi64`
|
||||
* [ ] `_mm_crc32_u8`
|
||||
* [ ] `_mm_crc32_u16`
|
||||
* [ ] `_mm_crc32_u32`
|
||||
* [ ] `_mm_crc32_u64`
|
||||
|
||||
|
||||
avx
|
||||
---
|
||||
* [x] `_mm256_add_pd`
|
||||
* [x] `_mm256_add_ps`
|
||||
* [x] `_mm256_addsub_pd`
|
||||
* [ ] `_mm256_addsub_ps`
|
||||
* [ ] `_mm256_and_pd`
|
||||
* [ ] `_mm256_and_ps`
|
||||
* [ ] `_mm256_andnot_pd`
|
||||
* [ ] `_mm256_andnot_ps`
|
||||
* [ ] `_mm256_blend_pd`
|
||||
* [ ] `_mm256_blend_ps`
|
||||
* [ ] `_mm256_blendv_pd`
|
||||
* [ ] `_mm256_blendv_ps`
|
||||
* [ ] `_mm256_div_pd`
|
||||
* [ ] `_mm256_div_ps`
|
||||
* [ ] `_mm256_dp_ps`
|
||||
* [ ] `_mm256_hadd_pd`
|
||||
* [ ] `_mm256_hadd_ps`
|
||||
* [ ] `_mm256_hsub_pd`
|
||||
* [ ] `_mm256_hsub_ps`
|
||||
* [ ] `_mm256_max_pd`
|
||||
* [ ] `_mm256_max_ps`
|
||||
* [ ] `_mm256_min_pd`
|
||||
* [ ] `_mm256_min_ps`
|
||||
* [ ] `_mm256_mul_pd`
|
||||
* [ ] `_mm256_mul_ps`
|
||||
* [ ] `_mm256_or_pd`
|
||||
* [ ] `_mm256_or_ps`
|
||||
* [ ] `_mm256_shuffle_pd`
|
||||
* [ ] `_mm256_shuffle_ps`
|
||||
* [ ] `_mm256_sub_pd`
|
||||
* [ ] `_mm256_sub_ps`
|
||||
* [ ] `_mm256_xor_pd`
|
||||
* [ ] `_mm256_xor_ps`
|
||||
* [ ] `_mm_cmp_pd`
|
||||
* [ ] `_mm256_cmp_pd`
|
||||
* [ ] `_mm_cmp_ps`
|
||||
* [ ] `_mm256_cmp_ps`
|
||||
* [ ] `_mm_cmp_sd`
|
||||
* [ ] `_mm_cmp_ss`
|
||||
* [ ] `_mm256_cvtepi32_pd`
|
||||
* [ ] `_mm256_cvtepi32_ps`
|
||||
* [ ] `_mm256_cvtpd_ps`
|
||||
* [ ] `_mm256_cvtps_epi32`
|
||||
* [ ] `_mm256_cvtps_pd`
|
||||
* [ ] `_mm256_cvttpd_epi32`
|
||||
* [ ] `_mm256_cvtpd_epi32`
|
||||
* [ ] `_mm256_cvttps_epi32`
|
||||
* [ ] `_mm256_extractf128_ps`
|
||||
* [ ] `_mm256_extractf128_pd`
|
||||
* [ ] `_mm256_extractf128_si256`
|
||||
* [ ] `_mm256_extract_epi8`
|
||||
* [ ] `_mm256_extract_epi16`
|
||||
* [ ] `_mm256_extract_epi32`
|
||||
* [ ] `_mm256_extract_epi64`
|
||||
* [ ] `_mm256_zeroall`
|
||||
* [ ] `_mm256_zeroupper`
|
||||
* [ ] `_mm256_permutevar_ps`
|
||||
* [ ] `_mm_permutevar_ps`
|
||||
* [ ] `_mm256_permute_ps`
|
||||
* [ ] `_mm_permute_ps`
|
||||
* [ ] `_mm256_permutevar_pd`
|
||||
* [ ] `_mm_permutevar_pd`
|
||||
* [ ] `_mm256_permute_pd`
|
||||
* [ ] `_mm_permute_pd`
|
||||
* [ ] `_mm256_permute2f128_ps`
|
||||
* [ ] `_mm256_permute2f128_pd`
|
||||
* [ ] `_mm256_permute2f128_si256`
|
||||
* [ ] `_mm256_broadcast_ss`
|
||||
* [ ] `_mm_broadcast_ss`
|
||||
* [ ] `_mm256_broadcast_sd`
|
||||
* [ ] `_mm256_broadcast_ps`
|
||||
* [ ] `_mm256_broadcast_pd`
|
||||
* [ ] `_mm256_insertf128_ps`
|
||||
* [ ] `_mm256_insertf128_pd`
|
||||
* [ ] `_mm256_insertf128_si256`
|
||||
* [ ] `_mm256_insert_epi8`
|
||||
* [ ] `_mm256_insert_epi16`
|
||||
* [ ] `_mm256_insert_epi32`
|
||||
* [ ] `_mm256_insert_epi64`
|
||||
* [ ] `_mm256_load_pd`
|
||||
* [ ] `_mm256_store_pd`
|
||||
* [ ] `_mm256_load_ps`
|
||||
* [ ] `_mm256_store_ps`
|
||||
* [ ] `_mm256_loadu_pd`
|
||||
* [ ] `_mm256_storeu_pd`
|
||||
* [ ] `_mm256_loadu_ps`
|
||||
* [ ] `_mm256_storeu_ps`
|
||||
* [ ] `_mm256_load_si256`
|
||||
* [ ] `_mm256_store_si256`
|
||||
* [ ] `_mm256_loadu_si256`
|
||||
* [ ] `_mm256_storeu_si256`
|
||||
* [ ] `_mm256_maskload_pd`
|
||||
* [ ] `_mm256_maskstore_pd`
|
||||
* [ ] `_mm_maskload_pd`
|
||||
* [ ] `_mm_maskstore_pd`
|
||||
* [ ] `_mm256_maskload_ps`
|
||||
* [ ] `_mm256_maskstore_ps`
|
||||
* [ ] `_mm_maskload_ps`
|
||||
* [ ] `_mm_maskstore_ps`
|
||||
* [ ] `_mm256_movehdup_ps`
|
||||
* [ ] `_mm256_moveldup_ps`
|
||||
* [ ] `_mm256_movedup_pd`
|
||||
* [ ] `_mm256_lddqu_si256`
|
||||
* [ ] `_mm256_stream_si256`
|
||||
* [ ] `_mm256_stream_pd`
|
||||
* [ ] `_mm256_stream_ps`
|
||||
* [ ] `_mm256_rcp_ps`
|
||||
* [ ] `_mm256_rsqrt_ps`
|
||||
* [ ] `_mm256_sqrt_pd`
|
||||
* [ ] `_mm256_sqrt_ps`
|
||||
* [ ] `_mm256_round_pd`
|
||||
* [ ] `_mm256_round_ps`
|
||||
* [ ] `_mm256_unpackhi_pd`
|
||||
* [ ] `_mm256_unpackhi_ps`
|
||||
* [ ] `_mm256_unpacklo_pd`
|
||||
* [ ] `_mm256_unpacklo_ps`
|
||||
* [ ] `_mm256_testz_si256`
|
||||
* [ ] `_mm256_testc_si256`
|
||||
* [ ] `_mm256_testnzc_si256`
|
||||
* [ ] `_mm256_testz_pd`
|
||||
* [ ] `_mm256_testc_pd`
|
||||
* [ ] `_mm256_testnzc_pd`
|
||||
* [ ] `_mm_testz_pd`
|
||||
* [ ] `_mm_testc_pd`
|
||||
* [ ] `_mm_testnzc_pd`
|
||||
* [ ] `_mm256_testz_ps`
|
||||
* [ ] `_mm256_testc_ps`
|
||||
* [ ] `_mm256_testnzc_ps`
|
||||
* [ ] `_mm_testz_ps`
|
||||
* [ ] `_mm_testc_ps`
|
||||
* [ ] `_mm_testnzc_ps`
|
||||
* [ ] `_mm256_movemask_pd`
|
||||
* [ ] `_mm256_movemask_ps`
|
||||
* [ ] `_mm256_setzero_pd`
|
||||
* [ ] `_mm256_setzero_ps`
|
||||
* [ ] `_mm256_setzero_si256`
|
||||
* [ ] `_mm256_set_pd`
|
||||
* [ ] `_mm256_set_ps`
|
||||
* [ ] `_mm256_set_epi8`
|
||||
* [ ] `_mm256_set_epi16`
|
||||
* [ ] `_mm256_set_epi32`
|
||||
* [ ] `_mm256_set_epi64x`
|
||||
* [ ] `_mm256_setr_pd`
|
||||
* [ ] `_mm256_setr_ps`
|
||||
* [ ] `_mm256_setr_epi8`
|
||||
* [ ] `_mm256_setr_epi16`
|
||||
* [ ] `_mm256_setr_epi32`
|
||||
* [ ] `_mm256_setr_epi64x`
|
||||
* [ ] `_mm256_set1_pd`
|
||||
* [ ] `_mm256_set1_ps`
|
||||
* [ ] `_mm256_set1_epi8`
|
||||
* [ ] `_mm256_set1_epi16`
|
||||
* [ ] `_mm256_set1_epi32`
|
||||
* [ ] `_mm256_set1_epi64x`
|
||||
* [ ] `_mm256_castpd_ps`
|
||||
* [ ] `_mm256_castps_pd`
|
||||
* [ ] `_mm256_castps_si256`
|
||||
* [ ] `_mm256_castpd_si256`
|
||||
* [ ] `_mm256_castsi256_ps`
|
||||
* [ ] `_mm256_castsi256_pd`
|
||||
* [ ] `_mm256_castps256_ps128`
|
||||
* [ ] `_mm256_castpd256_pd128`
|
||||
* [ ] `_mm256_castsi256_si128`
|
||||
* [ ] `_mm256_castps128_ps256`
|
||||
* [ ] `_mm256_castpd128_pd256`
|
||||
* [ ] `_mm256_castsi128_si256`
|
||||
* [ ] `_mm256_zextps128_ps256`
|
||||
* [ ] `_mm256_zextpd128_pd256`
|
||||
* [ ] `_mm256_zextsi128_si256`
|
||||
* [ ] `_mm256_floor_ps`
|
||||
* [ ] `_mm256_ceil_ps`
|
||||
* [ ] `_mm256_floor_pd`
|
||||
* [ ] `_mm256_ceil_pd`
|
||||
* [ ] `_mm256_undefined_ps`
|
||||
* [ ] `_mm256_undefined_pd`
|
||||
* [ ] `_mm256_undefined_si256`
|
||||
* [ ] `_mm256_set_m128`
|
||||
* [ ] `_mm256_set_m128d`
|
||||
* [ ] `_mm256_set_m128i`
|
||||
* [ ] `_mm256_setr_m128`
|
||||
* [ ] `_mm256_setr_m128d`
|
||||
* [ ] `_mm256_setr_m128i`
|
||||
* [ ] `_mm256_loadu2_m128`
|
||||
* [ ] `_mm256_loadu2_m128d`
|
||||
* [ ] `_mm256_loadu2_m128i`
|
||||
* [ ] `_mm256_storeu2_m128`
|
||||
* [ ] `_mm256_storeu2_m128d`
|
||||
* [ ] `_mm256_storeu2_m128i`
|
||||
|
||||
|
||||
|
||||
avx2
|
||||
----
|
||||
* [x] `_mm256_abs_epi8`
|
||||
* [x] `_mm256_abs_epi16`
|
||||
* [x] `_mm256_abs_epi32`
|
||||
* [x] `_mm256_add_epi8`
|
||||
* [x] `_mm256_add_epi16`
|
||||
* [x] `_mm256_add_epi32`
|
||||
* [x] `_mm256_add_epi64`
|
||||
* [x] `_mm256_adds_epi8`
|
||||
* [x] `_mm256_adds_epi16`
|
||||
* [x] `_mm256_adds_epu8`
|
||||
* [x] `_mm256_adds_epu16`
|
||||
* [ ] `_mm256_alignr_epi8`
|
||||
* [x] `_mm256_and_si256`
|
||||
* [x] `_mm256_andnot_si256`
|
||||
* [x] `_mm256_avg_epu8`
|
||||
* [x] `_mm256_avg_epu16`
|
||||
* [ ] `_mm256_blend_epi16`
|
||||
* [ ] `_mm_blend_epi32`
|
||||
* [ ] `_mm256_blend_epi32`
|
||||
* [x] `_mm256_blendv_epi8`
|
||||
* [ ] `_mm_broadcastb_epi8`
|
||||
* [ ] `_mm256_broadcastb_epi8`
|
||||
* [ ] `_mm_broadcastd_epi32`
|
||||
* [ ] `_mm256_broadcastd_epi32`
|
||||
* [ ] `_mm_broadcastq_epi64`
|
||||
* [ ] `_mm256_broadcastq_epi64`
|
||||
* [ ] `_mm_broadcastsd_pd`
|
||||
* [ ] `_mm256_broadcastsd_pd`
|
||||
* [ ] `_mm_broadcastsi128_si256`
|
||||
* [ ] `_mm256_broadcastsi128_si256`
|
||||
* [ ] `_mm_broadcastss_ps`
|
||||
* [ ] `_mm256_broadcastss_ps`
|
||||
* [ ] `_mm_broadcastw_epi16`
|
||||
* [ ] `_mm256_broadcastw_epi16`
|
||||
* [x] `_mm256_cmpeq_epi8`
|
||||
* [x] `_mm256_cmpeq_epi16`
|
||||
* [x] `_mm256_cmpeq_epi32`
|
||||
* [x] `_mm256_cmpeq_epi64`
|
||||
* [x] `_mm256_cmpgt_epi8`
|
||||
* [x] `_mm256_cmpgt_epi16`
|
||||
* [x] `_mm256_cmpgt_epi32`
|
||||
* [x] `_mm256_cmpgt_epi64`
|
||||
* [ ] `_mm256_cvtepi16_epi32`
|
||||
* [ ] `_mm256_cvtepi16_epi64`
|
||||
* [ ] `_mm256_cvtepi32_epi64`
|
||||
* [ ] `_mm256_cvtepi8_epi16`
|
||||
* [ ] `_mm256_cvtepi8_epi32`
|
||||
* [ ] `_mm256_cvtepi8_epi64`
|
||||
* [ ] `_mm256_cvtepu16_epi32`
|
||||
* [ ] `_mm256_cvtepu16_epi64`
|
||||
* [ ] `_mm256_cvtepu32_epi64`
|
||||
* [ ] `_mm256_cvtepu8_epi16`
|
||||
* [ ] `_mm256_cvtepu8_epi32`
|
||||
* [ ] `_mm256_cvtepu8_epi64`
|
||||
* [ ] `_mm256_extracti128_si256`
|
||||
* [x] `_mm256_hadd_epi16`
|
||||
* [x] `_mm256_hadd_epi32`
|
||||
* [x] `_mm256_hadds_epi16`
|
||||
* [x] `_mm256_hsub_epi16`
|
||||
* [x] `_mm256_hsub_epi32`
|
||||
* [x] `_mm256_hsubs_epi16`
|
||||
* [ ] `_mm_i32gather_pd`
|
||||
* [ ] `_mm256_i32gather_pd`
|
||||
* [ ] `_mm_i32gather_ps`
|
||||
* [ ] `_mm256_i32gather_ps`
|
||||
* [ ] `_mm_i32gather_epi32`
|
||||
* [ ] `_mm256_i32gather_epi32`
|
||||
* [ ] `_mm_i32gather_epi64`
|
||||
* [ ] `_mm256_i32gather_epi64`
|
||||
* [ ] `_mm_i64gather_pd`
|
||||
* [ ] `_mm256_i64gather_pd`
|
||||
* [ ] `_mm_i64gather_ps`
|
||||
* [ ] `_mm256_i64gather_ps`
|
||||
* [ ] `_mm_i64gather_epi32`
|
||||
* [ ] `_mm256_i64gather_epi32`
|
||||
* [ ] `_mm_i64gather_epi64`
|
||||
* [ ] `_mm256_i64gather_epi64`
|
||||
* [ ] `_mm256_inserti128_si256`
|
||||
* [x] `_mm256_madd_epi16`
|
||||
* [x] `_mm256_maddubs_epi16`
|
||||
* [ ] `_mm_mask_i32gather_pd`
|
||||
* [ ] `_mm256_mask_i32gather_pd`
|
||||
* [ ] `_mm_mask_i32gather_ps`
|
||||
* [ ] `_mm256_mask_i32gather_ps`
|
||||
* [ ] `_mm_mask_i32gather_epi32`
|
||||
* [ ] `_mm256_mask_i32gather_epi32`
|
||||
* [ ] `_mm_mask_i32gather_epi64`
|
||||
* [ ] `_mm256_mask_i32gather_epi64`
|
||||
* [ ] `_mm_mask_i64gather_pd`
|
||||
* [ ] `_mm256_mask_i64gather_pd`
|
||||
* [ ] `_mm_mask_i64gather_ps`
|
||||
* [ ] `_mm256_mask_i64gather_ps`
|
||||
* [ ] `_mm_mask_i64gather_epi32`
|
||||
* [ ] `_mm256_mask_i64gather_epi32`
|
||||
* [ ] `_mm_mask_i64gather_epi64`
|
||||
* [ ] `_mm256_mask_i64gather_epi64`
|
||||
* [ ] `_mm_maskload_epi32`
|
||||
* [ ] `_mm256_maskload_epi32`
|
||||
* [ ] `_mm_maskload_epi64`
|
||||
* [ ] `_mm256_maskload_epi64`
|
||||
* [ ] `_mm_maskstore_epi32`
|
||||
* [ ] `_mm256_maskstore_epi32`
|
||||
* [ ] `_mm_maskstore_epi64`
|
||||
* [ ] `_mm256_maskstore_epi64`
|
||||
* [x] `_mm256_max_epi8`
|
||||
* [x] `_mm256_max_epi16`
|
||||
* [x] `_mm256_max_epi32`
|
||||
* [x] `_mm256_max_epu8`
|
||||
* [x] `_mm256_max_epu16`
|
||||
* [x] `_mm256_max_epu32`
|
||||
* [x] `_mm256_min_epi8`
|
||||
* [x] `_mm256_min_epi16`
|
||||
* [x] `_mm256_min_epi32`
|
||||
* [x] `_mm256_min_epu8`
|
||||
* [x] `_mm256_min_epu16`
|
||||
* [x] `_mm256_min_epu32`
|
||||
* [ ] `_mm256_movemask_epi8`
|
||||
* [ ] `_mm256_mpsadbw_epu8`
|
||||
* [x] `_mm256_mul_epi32`
|
||||
* [x] `_mm256_mul_epu32`
|
||||
* [x] `_mm256_mulhi_epi16`
|
||||
* [x] `_mm256_mulhi_epu16`
|
||||
* [x] `_mm256_mulhrs_epi16`
|
||||
* [x] `_mm256_mullo_epi16`
|
||||
* [x] `_mm256_mullo_epi32`
|
||||
* [x] `_mm256_or_si256`
|
||||
* [x] `_mm256_packs_epi16`
|
||||
* [x] `_mm256_packs_epi32`
|
||||
* [x] `_mm256_packus_epi16`
|
||||
* [x] `_mm256_packus_epi32`
|
||||
* [ ] `_mm256_permute2x128_si256`
|
||||
* [ ] `_mm256_permute4x64_epi64`
|
||||
* [ ] `_mm256_permute4x64_pd`
|
||||
* [ ] `_mm256_permutevar8x32_epi32`
|
||||
* [ ] `_mm256_permutevar8x32_ps`
|
||||
* [x] `_mm256_sad_epu8`
|
||||
* [ ] `_mm256_shuffle_epi32`
|
||||
* [ ] `_mm256_shuffle_epi8`
|
||||
* [ ] `_mm256_shufflehi_epi16`
|
||||
* [ ] `_mm256_shufflelo_epi16`
|
||||
* [x] `_mm256_sign_epi8`
|
||||
* [x] `_mm256_sign_epi16`
|
||||
* [x] `_mm256_sign_epi32`
|
||||
* [ ] `_mm256_slli_si256`
|
||||
* [ ] `_mm256_bslli_epi128`
|
||||
* [x] `_mm256_sll_epi16`
|
||||
* [x] `_mm256_slli_epi16`
|
||||
* [x] `_mm256_sll_epi32`
|
||||
* [x] `_mm256_slli_epi32`
|
||||
* [x] `_mm256_sll_epi64`
|
||||
* [x] `_mm256_slli_epi64`
|
||||
* [x] `_mm_sllv_epi32`
|
||||
* [x] `_mm256_sllv_epi32`
|
||||
* [x] `_mm_sllv_epi64`
|
||||
* [x] `_mm256_sllv_epi64`
|
||||
* [x] `_mm256_sra_epi16`
|
||||
* [x] `_mm256_srai_epi16`
|
||||
* [x] `_mm256_sra_epi32`
|
||||
* [x] `_mm256_srai_epi32`
|
||||
* [x] `_mm_srav_epi32`
|
||||
* [x] `_mm256_srav_epi32`
|
||||
* [x] `_mm256_srli_si256`
|
||||
* [ ] `_mm256_bsrli_epi128`
|
||||
* [x] `_mm256_srl_epi16`
|
||||
* [x] `_mm256_srli_epi16`
|
||||
* [x] `_mm256_srl_epi32`
|
||||
* [x] `_mm256_srli_epi32`
|
||||
* [x] `_mm256_srl_epi64`
|
||||
* [x] `_mm256_srli_epi64`
|
||||
* [x] `_mm_srlv_epi32`
|
||||
* [x] `_mm256_srlv_epi32`
|
||||
* [x] `_mm_srlv_epi64`
|
||||
* [x] `_mm256_srlv_epi64`
|
||||
* [ ] `_mm256_stream_load_si256`
|
||||
* [x] `_mm256_sub_epi8`
|
||||
* [x] `_mm256_sub_epi16`
|
||||
* [x] `_mm256_sub_epi32`
|
||||
* [x] `_mm256_sub_epi64`
|
||||
* [x] `_mm256_subs_epi8`
|
||||
* [x] `_mm256_subs_epi16`
|
||||
* [x] `_mm256_subs_epu8`
|
||||
* [x] `_mm256_subs_epu16`
|
||||
* [x] `_mm256_xor_si256`
|
||||
* [ ] `_mm256_unpackhi_epi8`
|
||||
* [ ] `_mm256_unpackhi_epi16`
|
||||
* [ ] `_mm256_unpackhi_epi32`
|
||||
* [ ] `_mm256_unpackhi_epi64`
|
||||
* [ ] `_mm256_unpacklo_epi8`
|
||||
* [ ] `_mm256_unpacklo_epi16`
|
||||
* [ ] `_mm256_unpacklo_epi32`
|
||||
* [ ] `_mm256_unpacklo_epi64`
|
||||
|
|
@ -4,7 +4,7 @@
|
|||
//! Manual](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0419c/index.html).
|
||||
|
||||
#[cfg(test)]
|
||||
use assert_instr::assert_instr;
|
||||
use stdsimd_test::assert_instr;
|
||||
|
||||
/// Reverse the order of the bytes.
|
||||
#[inline(always)]
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
pub use super::v6::*;
|
||||
|
||||
#[cfg(test)]
|
||||
use assert_instr::assert_instr;
|
||||
use stdsimd_test::assert_instr;
|
||||
|
||||
/// Count Leading Zeros.
|
||||
#[inline(always)]
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
pub use super::v7::*;
|
||||
|
||||
#[cfg(test)]
|
||||
use assert_instr::assert_instr;
|
||||
use stdsimd_test::assert_instr;
|
||||
|
||||
/// Reverse the order of the bytes.
|
||||
#[inline(always)]
|
||||
|
|
|
|||
|
|
@ -89,7 +89,7 @@
|
|||
#![cfg_attr(test, feature(proc_macro))]
|
||||
|
||||
#[cfg(test)]
|
||||
extern crate assert_instr;
|
||||
extern crate stdsimd_test;
|
||||
|
||||
/// Platform independent SIMD vector types and operations.
|
||||
pub mod simd {
|
||||
|
|
|
|||
|
|
@ -23,12 +23,12 @@ macro_rules! define_impl {
|
|||
$($elname:ident),+
|
||||
) => {
|
||||
impl $name {
|
||||
#[inline]
|
||||
#[inline(always)]
|
||||
pub fn new($($elname: $elemty),*) -> $name {
|
||||
$name($($elname),*)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[inline(always)]
|
||||
pub fn splat(value: $elemty) -> $name {
|
||||
$name($({
|
||||
#[allow(non_camel_case_types, dead_code)]
|
||||
|
|
@ -37,25 +37,25 @@ macro_rules! define_impl {
|
|||
}),*)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[inline(always)]
|
||||
pub fn extract(self, idx: u32) -> $elemty {
|
||||
assert!(idx < $nelems);
|
||||
unsafe { simd_extract(self, idx) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[inline(always)]
|
||||
pub fn replace(self, idx: u32, val: $elemty) -> $name {
|
||||
assert!(idx < $nelems);
|
||||
unsafe { simd_insert(self, idx, val) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[inline(always)]
|
||||
pub fn store(self, slice: &mut [$elemty], offset: usize) {
|
||||
assert!(slice[offset..].len() >= $nelems);
|
||||
unsafe { self.store_unchecked(slice, offset) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[inline(always)]
|
||||
pub unsafe fn store_unchecked(
|
||||
self,
|
||||
slice: &mut [$elemty],
|
||||
|
|
@ -70,13 +70,13 @@ macro_rules! define_impl {
|
|||
size_of::<$name>());
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[inline(always)]
|
||||
pub fn load(slice: &[$elemty], offset: usize) -> $name {
|
||||
assert!(slice[offset..].len() >= $nelems);
|
||||
unsafe { $name::load_unchecked(slice, offset) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[inline(always)]
|
||||
pub unsafe fn load_unchecked(
|
||||
slice: &[$elemty],
|
||||
offset: usize,
|
||||
|
|
@ -92,32 +92,32 @@ macro_rules! define_impl {
|
|||
x
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[inline(always)]
|
||||
pub fn eq(self, other: $name) -> $boolname {
|
||||
unsafe { simd_eq(self, other) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[inline(always)]
|
||||
pub fn ne(self, other: $name) -> $boolname {
|
||||
unsafe { simd_ne(self, other) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[inline(always)]
|
||||
pub fn lt(self, other: $name) -> $boolname {
|
||||
unsafe { simd_lt(self, other) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[inline(always)]
|
||||
pub fn le(self, other: $name) -> $boolname {
|
||||
unsafe { simd_le(self, other) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[inline(always)]
|
||||
pub fn gt(self, other: $name) -> $boolname {
|
||||
unsafe { simd_gt(self, other) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[inline(always)]
|
||||
pub fn ge(self, other: $name) -> $boolname {
|
||||
unsafe { simd_ge(self, other) }
|
||||
}
|
||||
|
|
@ -129,6 +129,7 @@ macro_rules! define_from {
|
|||
($to:ident, $($from:ident),+) => {
|
||||
$(
|
||||
impl From<$from> for $to {
|
||||
#[inline(always)]
|
||||
fn from(f: $from) -> $to {
|
||||
unsafe { ::std::mem::transmute(f) }
|
||||
}
|
||||
|
|
@ -259,7 +260,7 @@ macro_rules! define_casts {
|
|||
($(($fromty:ident, $toty:ident, $cast:ident)),+) => {
|
||||
$(
|
||||
impl $fromty {
|
||||
#[inline]
|
||||
#[inline(always)]
|
||||
pub fn $cast(self) -> ::simd::$toty {
|
||||
unsafe { simd_cast(self) }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@
|
|||
//! provides a quick overview of the instructions available.
|
||||
|
||||
#[cfg(test)]
|
||||
use assert_instr::assert_instr;
|
||||
use stdsimd_test::assert_instr;
|
||||
|
||||
/// Counts the leading most significant zero bits.
|
||||
///
|
||||
|
|
@ -41,30 +41,28 @@ pub fn _popcnt32(x: u32) -> u32 { x.count_ones() }
|
|||
#[cfg_attr(test, assert_instr(popcnt))]
|
||||
pub fn _popcnt64(x: u64) -> u64 { x.count_ones() as u64 }
|
||||
|
||||
#[cfg(all(test, target_feature = "bmi", any(target_arch = "x86", target_arch = "x86_64")))]
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
use x86::abm;
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+lzcnt"]
|
||||
#[simd_test = "lzcnt"]
|
||||
fn _lzcnt_u32() {
|
||||
assert_eq!(abm::_lzcnt_u32(0b0101_1010u32), 25u32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+lzcnt"]
|
||||
#[simd_test = "lzcnt"]
|
||||
fn _lzcnt_u64() {
|
||||
assert_eq!(abm::_lzcnt_u64(0b0101_1010u64), 57u64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+popcnt"]
|
||||
#[simd_test = "popcnt"]
|
||||
fn _popcnt32() {
|
||||
assert_eq!(abm::_popcnt32(0b0101_1010u32), 4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+popcnt"]
|
||||
#[simd_test = "popcnt"]
|
||||
fn _popcnt64() {
|
||||
assert_eq!(abm::_popcnt64(0b0101_1010u64), 4);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -94,38 +94,14 @@ extern "C" {
|
|||
fn roundpd256(a: f64x4, b: i32) -> f64x4;
|
||||
}
|
||||
|
||||
// Function stubs: work around assert_instr issues in expanded forms
|
||||
// ref: https://github.com/rust-lang-nursery/stdsimd/issues/49
|
||||
// ref: https://github.com/rust-lang-nursery/stdsimd/issues/47
|
||||
|
||||
// #[cfg(test)]
|
||||
// #[target_feature = "+avx"]
|
||||
// #[cfg_attr(test, assert_instr(vroundpd))]
|
||||
// pub fn _mm256_round_pd_auto(a: f64x4, b: i32) -> f64x4 {
|
||||
// return _mm256_round_pd(a, b);
|
||||
// }
|
||||
|
||||
// #[cfg(test)]
|
||||
// #[target_feature = "+avx"]
|
||||
// #[cfg_attr(test, assert_instr(vroundpd))]
|
||||
// pub fn _mm256_ceil_pd_auto(a: f64x4) -> f64x4 {
|
||||
// return _mm256_ceil_pd(a);
|
||||
// }
|
||||
|
||||
// #[cfg(test)]
|
||||
// #[target_feature = "+avx"]
|
||||
// #[cfg_attr(test, assert_instr(vroundpd))]
|
||||
// pub fn _mm256_floor_pd_auto(a: f64x4) -> f64x4 {
|
||||
// return _mm256_floor_pd(a);
|
||||
// }
|
||||
|
||||
#[cfg(all(test, target_feature = "avx", any(target_arch = "x86", target_arch = "x86_64")))]
|
||||
mod tests {
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
use v256::*;
|
||||
use x86::avx;
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+avx"]
|
||||
#[simd_test = "avx"]
|
||||
fn _mm256_add_pd() {
|
||||
let a = f64x4::new(1.0, 2.0, 3.0, 4.0);
|
||||
let b = f64x4::new(5.0, 6.0, 7.0, 8.0);
|
||||
|
|
@ -134,8 +110,7 @@ mod tests {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+avx"]
|
||||
#[simd_test = "avx"]
|
||||
fn _mm256_add_ps() {
|
||||
let a = f32x8::new(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
|
||||
let b = f32x8::new(9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
|
||||
|
|
@ -144,8 +119,7 @@ mod tests {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+avx"]
|
||||
#[simd_test = "avx"]
|
||||
fn _mm256_addsub_pd() {
|
||||
let a = f64x4::new(1.0, 2.0, 3.0, 4.0);
|
||||
let b = f64x4::new(5.0, 6.0, 7.0, 8.0);
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -8,7 +8,7 @@
|
|||
//! provides a quick overview of the available instructions.
|
||||
|
||||
#[cfg(test)]
|
||||
use assert_instr::assert_instr;
|
||||
use stdsimd_test::assert_instr;
|
||||
|
||||
#[allow(dead_code)]
|
||||
extern "C" {
|
||||
|
|
@ -183,24 +183,24 @@ pub fn _mm_tzcnt_u64(x: u64) -> u64 {
|
|||
x.trailing_zeros() as u64
|
||||
}
|
||||
|
||||
#[cfg(all(test, target_feature = "bmi", any(target_arch = "x86", target_arch = "x86_64")))]
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
use x86::bmi;
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+bmi"]
|
||||
#[simd_test = "bmi"]
|
||||
fn _bextr_u32() {
|
||||
assert_eq!(bmi::_bextr_u32(0b0101_0000u32, 4, 4), 0b0000_0101u32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+bmi"]
|
||||
#[simd_test = "bmi"]
|
||||
#[cfg(not(target_arch = "x86"))]
|
||||
fn _bextr_u64() {
|
||||
assert_eq!(bmi::_bextr_u64(0b0101_0000u64, 4, 4), 0b0000_0101u64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+bmi"]
|
||||
#[simd_test = "bmi"]
|
||||
fn _andn_u32() {
|
||||
assert_eq!(bmi::_andn_u32(0, 0), 0);
|
||||
assert_eq!(bmi::_andn_u32(0, 1), 1);
|
||||
|
|
@ -214,8 +214,8 @@ mod tests {
|
|||
assert_eq!(bmi::_andn_u32(0b0100_0000u32, 0b0101_1101u32), 0b0001_1101u32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+bmi"]
|
||||
#[simd_test = "bmi"]
|
||||
#[cfg(not(target_arch = "x86"))]
|
||||
fn _andn_u64() {
|
||||
assert_eq!(bmi::_andn_u64(0, 0), 0);
|
||||
assert_eq!(bmi::_andn_u64(0, 1), 1);
|
||||
|
|
@ -229,62 +229,57 @@ mod tests {
|
|||
assert_eq!(bmi::_andn_u64(0b0100_0000u64, 0b0101_1101u64), 0b0001_1101u64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+bmi"]
|
||||
#[simd_test = "bmi"]
|
||||
fn _blsi_u32() {
|
||||
assert_eq!(bmi::_blsi_u32(0b1101_0000u32), 0b0001_0000u32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+bmi"]
|
||||
#[simd_test = "bmi"]
|
||||
#[cfg(not(target_arch = "x86"))]
|
||||
fn _blsi_u64() {
|
||||
assert_eq!(bmi::_blsi_u64(0b1101_0000u64), 0b0001_0000u64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+bmi"]
|
||||
#[simd_test = "bmi"]
|
||||
fn _blsmsk_u32() {
|
||||
assert_eq!(bmi::_blsmsk_u32(0b0011_0000u32), 0b0001_1111u32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+bmi"]
|
||||
#[simd_test = "bmi"]
|
||||
#[cfg(not(target_arch = "x86"))]
|
||||
fn _blsmsk_u64() {
|
||||
assert_eq!(bmi::_blsmsk_u64(0b0011_0000u64), 0b0001_1111u64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+bmi"]
|
||||
#[simd_test = "bmi"]
|
||||
fn _blsr_u32() {
|
||||
/// TODO: test the behavior when the input is 0
|
||||
assert_eq!(bmi::_blsr_u32(0b0011_0000u32), 0b0010_0000u32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+bmi"]
|
||||
#[simd_test = "bmi"]
|
||||
#[cfg(not(target_arch = "x86"))]
|
||||
fn _blsr_u64() {
|
||||
/// TODO: test the behavior when the input is 0
|
||||
assert_eq!(bmi::_blsr_u64(0b0011_0000u64), 0b0010_0000u64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+bmi"]
|
||||
#[simd_test = "bmi"]
|
||||
fn _tzcnt_u16() {
|
||||
assert_eq!(bmi::_tzcnt_u16(0b0000_0001u16), 0u16);
|
||||
assert_eq!(bmi::_tzcnt_u16(0b0000_0000u16), 16u16);
|
||||
assert_eq!(bmi::_tzcnt_u16(0b1001_0000u16), 4u16);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+bmi"]
|
||||
#[simd_test = "bmi"]
|
||||
fn _tzcnt_u32() {
|
||||
assert_eq!(bmi::_tzcnt_u32(0b0000_0001u32), 0u32);
|
||||
assert_eq!(bmi::_tzcnt_u32(0b0000_0000u32), 32u32);
|
||||
assert_eq!(bmi::_tzcnt_u32(0b1001_0000u32), 4u32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+bmi"]
|
||||
#[simd_test = "bmi"]
|
||||
#[cfg(not(target_arch = "x86"))]
|
||||
fn _tzcnt_u64() {
|
||||
assert_eq!(bmi::_tzcnt_u64(0b0000_0001u64), 0u64);
|
||||
assert_eq!(bmi::_tzcnt_u64(0b0000_0000u64), 64u64);
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@
|
|||
//! provides a quick overview of the available instructions.
|
||||
|
||||
#[cfg(test)]
|
||||
use assert_instr::assert_instr;
|
||||
use stdsimd_test::assert_instr;
|
||||
|
||||
/// Unsigned multiply without affecting flags.
|
||||
///
|
||||
|
|
@ -112,12 +112,13 @@ pub fn _pext_u64(a: u64, mask: u64) -> u64 {
|
|||
unsafe { x86_bmi2_pext_64(a, mask) }
|
||||
}
|
||||
|
||||
#[cfg(all(test, target_feature = "bmi2", any(target_arch = "x86", target_arch = "x86_64")))]
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
use x86::bmi2;
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+bmi2"]
|
||||
#[simd_test = "bmi2"]
|
||||
fn _pext_u32() {
|
||||
let n = 0b1011_1110_1001_0011u32;
|
||||
|
||||
|
|
@ -131,8 +132,8 @@ mod tests {
|
|||
assert_eq!(bmi2::_pext_u32(n, m1), s1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+bmi2"]
|
||||
#[simd_test = "bmi2"]
|
||||
#[cfg(not(target_arch = "x86"))]
|
||||
fn _pext_u64() {
|
||||
let n = 0b1011_1110_1001_0011u64;
|
||||
|
||||
|
|
@ -146,8 +147,7 @@ mod tests {
|
|||
assert_eq!(bmi2::_pext_u64(n, m1), s1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+bmi2"]
|
||||
#[simd_test = "bmi2"]
|
||||
fn _pdep_u32() {
|
||||
let n = 0b1011_1110_1001_0011u32;
|
||||
|
||||
|
|
@ -161,8 +161,8 @@ mod tests {
|
|||
assert_eq!(bmi2::_pdep_u32(n, m1), s1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+bmi2"]
|
||||
#[simd_test = "bmi2"]
|
||||
#[cfg(not(target_arch = "x86"))]
|
||||
fn _pdep_u64() {
|
||||
let n = 0b1011_1110_1001_0011u64;
|
||||
|
||||
|
|
@ -176,24 +176,22 @@ mod tests {
|
|||
assert_eq!(bmi2::_pdep_u64(n, m1), s1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+bmi2"]
|
||||
#[simd_test = "bmi2"]
|
||||
fn _bzhi_u32() {
|
||||
let n = 0b1111_0010u32;
|
||||
let s = 0b0001_0010u32;
|
||||
assert_eq!(bmi2::_bzhi_u32(n, 5), s);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+bmi2"]
|
||||
#[simd_test = "bmi2"]
|
||||
#[cfg(not(target_arch = "x86"))]
|
||||
fn _bzhi_u64() {
|
||||
let n = 0b1111_0010u64;
|
||||
let s = 0b0001_0010u64;
|
||||
assert_eq!(bmi2::_bzhi_u64(n, 5), s);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+bmi2"]
|
||||
#[simd_test = "bmi2"]
|
||||
fn _mulx_u32() {
|
||||
let a: u32 = 4_294_967_200;
|
||||
let b: u32 = 2;
|
||||
|
|
@ -205,8 +203,8 @@ mod tests {
|
|||
assert_eq!(hi, 0b0001u32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+bmi2"]
|
||||
#[simd_test = "bmi2"]
|
||||
#[cfg(not(target_arch = "x86"))]
|
||||
fn _mulx_u64() {
|
||||
let a: u64 = 9_223_372_036_854_775_800;
|
||||
let b: u64 = 100;
|
||||
|
|
|
|||
|
|
@ -20,6 +20,9 @@ pub type __m256i = ::v256::i8x32;
|
|||
|
||||
#[macro_use]
|
||||
mod macros;
|
||||
#[macro_use]
|
||||
mod runtime;
|
||||
|
||||
mod sse;
|
||||
mod sse2;
|
||||
mod ssse3;
|
||||
|
|
@ -32,6 +35,3 @@ mod abm;
|
|||
mod bmi;
|
||||
mod bmi2;
|
||||
mod tbm;
|
||||
|
||||
#[macro_use]
|
||||
mod runtime;
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ use simd_llvm::simd_shuffle4;
|
|||
use v128::*;
|
||||
|
||||
#[cfg(test)]
|
||||
use assert_instr::assert_instr;
|
||||
use stdsimd_test::assert_instr;
|
||||
|
||||
/// Adds the first component of `a` and `b`, the other components are copied
|
||||
/// from `a`.
|
||||
|
|
@ -127,7 +127,7 @@ pub fn _mm_rsqrt_ps(a: f32x4) -> f32x4 {
|
|||
}
|
||||
|
||||
/// Compare the first single-precision (32-bit) floating-point element of `a`
|
||||
/// and `b`, and return the minimum value in the first element of the return
|
||||
/// and `b`, and return the minimum value in the first element of the return
|
||||
/// value, the other elements are copied from `a`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
|
|
@ -146,7 +146,7 @@ pub fn _mm_min_ps(a: f32x4, b: f32x4) -> f32x4 {
|
|||
}
|
||||
|
||||
/// Compare the first single-precision (32-bit) floating-point element of `a`
|
||||
/// and `b`, and return the maximum value in the first element of the return
|
||||
/// and `b`, and return the maximum value in the first element of the return
|
||||
/// value, the other elements are copied from `a`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
|
|
@ -164,14 +164,103 @@ pub fn _mm_max_ps(a: f32x4, b: f32x4) -> f32x4 {
|
|||
unsafe { maxps(a, b) }
|
||||
}
|
||||
|
||||
/// Unpack and interleave single-precision (32-bit) floating-point elements
|
||||
/// from the high half of `a` and `b`;
|
||||
// Shuffle packed single-precision (32-bit) floating-point elements in `a` and `b`
|
||||
// using `mask`.
|
||||
// The lower half of result takes values from `a` and the higher half from `b`.
|
||||
// Mask is split to 2 control bits each to index the element from inputs.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
pub fn _mm_shuffle_ps(a: f32x4, b: f32x4, mask: i32) -> f32x4 {
|
||||
let mask = (mask & 0xFF) as u8;
|
||||
|
||||
macro_rules! shuffle_done {
|
||||
($x01:expr, $x23:expr, $x45:expr, $x67:expr) => {
|
||||
unsafe {
|
||||
simd_shuffle4(a, b, [$x01, $x23, $x45, $x67])
|
||||
}
|
||||
}
|
||||
}
|
||||
macro_rules! shuffle_x67 {
|
||||
($x01:expr, $x23:expr, $x45:expr) => {
|
||||
match (mask >> 6) & 0b11 {
|
||||
0b00 => shuffle_done!($x01, $x23, $x45, 4),
|
||||
0b01 => shuffle_done!($x01, $x23, $x45, 5),
|
||||
0b10 => shuffle_done!($x01, $x23, $x45, 6),
|
||||
_ => shuffle_done!($x01, $x23, $x45, 7),
|
||||
}
|
||||
}
|
||||
}
|
||||
macro_rules! shuffle_x45 {
|
||||
($x01:expr, $x23:expr) => {
|
||||
match (mask >> 4) & 0b11 {
|
||||
0b00 => shuffle_x67!($x01, $x23, 4),
|
||||
0b01 => shuffle_x67!($x01, $x23, 5),
|
||||
0b10 => shuffle_x67!($x01, $x23, 6),
|
||||
_ => shuffle_x67!($x01, $x23, 7),
|
||||
}
|
||||
}
|
||||
}
|
||||
macro_rules! shuffle_x23 {
|
||||
($x01:expr) => {
|
||||
match (mask >> 2) & 0b11 {
|
||||
0b00 => shuffle_x45!($x01, 0),
|
||||
0b01 => shuffle_x45!($x01, 1),
|
||||
0b10 => shuffle_x45!($x01, 2),
|
||||
_ => shuffle_x45!($x01, 3),
|
||||
}
|
||||
}
|
||||
}
|
||||
match mask & 0b11 {
|
||||
0b00 => shuffle_x23!(0),
|
||||
0b01 => shuffle_x23!(1),
|
||||
0b10 => shuffle_x23!(2),
|
||||
_ => shuffle_x23!(3),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[cfg_attr(test, assert_instr(shufps))]
|
||||
#[target_feature = "+sse"]
|
||||
fn _test_mm_shuffle_ps(a: f32x4, b: f32x4) -> f32x4 {
|
||||
_mm_shuffle_ps(a, b, 3)
|
||||
}
|
||||
|
||||
/// Unpack and interleave single-precision (32-bit) floating-point elements
|
||||
/// from the higher half of `a` and `b`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(unpckhps))]
|
||||
pub fn _mm_unpackhi_ps(a: f32x4, b: f32x4) -> f32x4 {
|
||||
unsafe { simd_shuffle4(a, b, [2, 6, 3, 7]) }
|
||||
}
|
||||
|
||||
/// Unpack and interleave single-precision (32-bit) floating-point elements
|
||||
/// from the lower half of `a` and `b`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(unpcklps))]
|
||||
pub fn _mm_unpacklo_ps(a: f32x4, b: f32x4) -> f32x4 {
|
||||
unsafe { simd_shuffle4(a, b, [0, 4, 1, 5]) }
|
||||
}
|
||||
|
||||
/// Combine higher half of `a` and `b`. The highwe half of `b` occupies the lower
|
||||
/// half of result.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(movhlps))]
|
||||
pub fn _mm_movehl_ps(a: f32x4, b: f32x4) -> f32x4 {
|
||||
unsafe { simd_shuffle4(a, b, [6, 7, 2, 3]) }
|
||||
}
|
||||
|
||||
/// Combine lower half of `a` and `b`. The lower half of `b` occupies the higher
|
||||
/// half of result.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(unpcklpd))]
|
||||
pub fn _mm_movelh_ps(a: f32x4, b: f32x4) -> f32x4 {
|
||||
unsafe { simd_shuffle4(a, b, [0, 1, 4, 5]) }
|
||||
}
|
||||
|
||||
/// Return a mask of the most significant bit of each element in `a`.
|
||||
///
|
||||
/// The mask is stored in the 4 least significant bits of the return value.
|
||||
|
|
@ -217,13 +306,13 @@ extern {
|
|||
fn movmskps(a: f32x4) -> i32;
|
||||
}
|
||||
|
||||
#[cfg(all(test, target_feature = "sse", any(target_arch = "x86", target_arch = "x86_64")))]
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use v128::*;
|
||||
use x86::sse;
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
#[simd_test = "sse"]
|
||||
fn _mm_add_ps() {
|
||||
let a = f32x4::new(-1.0, 5.0, 0.0, -10.0);
|
||||
let b = f32x4::new(-100.0, 20.0, 0.0, -5.0);
|
||||
|
|
@ -231,8 +320,7 @@ mod tests {
|
|||
assert_eq!(r, f32x4::new(-101.0, 25.0, 0.0, -15.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
#[simd_test = "sse"]
|
||||
fn _mm_add_ss() {
|
||||
let a = f32x4::new(-1.0, 5.0, 0.0, -10.0);
|
||||
let b = f32x4::new(-100.0, 20.0, 0.0, -5.0);
|
||||
|
|
@ -240,8 +328,7 @@ mod tests {
|
|||
assert_eq!(r, f32x4::new(-101.0, 5.0, 0.0, -10.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
#[simd_test = "sse"]
|
||||
fn _mm_sub_ps() {
|
||||
let a = f32x4::new(-1.0, 5.0, 0.0, -10.0);
|
||||
let b = f32x4::new(-100.0, 20.0, 0.0, -5.0);
|
||||
|
|
@ -249,8 +336,7 @@ mod tests {
|
|||
assert_eq!(r, f32x4::new(99.0, -15.0, 0.0, -5.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
#[simd_test = "sse"]
|
||||
fn _mm_sub_ss() {
|
||||
let a = f32x4::new(-1.0, 5.0, 0.0, -10.0);
|
||||
let b = f32x4::new(-100.0, 20.0, 0.0, -5.0);
|
||||
|
|
@ -258,8 +344,7 @@ mod tests {
|
|||
assert_eq!(r, f32x4::new(99.0, 5.0, 0.0, -10.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
#[simd_test = "sse"]
|
||||
fn _mm_mul_ps() {
|
||||
let a = f32x4::new(-1.0, 5.0, 0.0, -10.0);
|
||||
let b = f32x4::new(-100.0, 20.0, 0.0, -5.0);
|
||||
|
|
@ -267,8 +352,7 @@ mod tests {
|
|||
assert_eq!(r, f32x4::new(100.0, 100.0, 0.0, 50.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
#[simd_test = "sse"]
|
||||
fn _mm_mul_ss() {
|
||||
let a = f32x4::new(-1.0, 5.0, 0.0, -10.0);
|
||||
let b = f32x4::new(-100.0, 20.0, 0.0, -5.0);
|
||||
|
|
@ -276,8 +360,7 @@ mod tests {
|
|||
assert_eq!(r, f32x4::new(100.0, 5.0, 0.0, -10.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
#[simd_test = "sse"]
|
||||
fn _mm_div_ps() {
|
||||
let a = f32x4::new(-1.0, 5.0, 2.0, -10.0);
|
||||
let b = f32x4::new(-100.0, 20.0, 0.2, -5.0);
|
||||
|
|
@ -285,8 +368,7 @@ mod tests {
|
|||
assert_eq!(r, f32x4::new(0.01, 0.25, 10.0, 2.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
#[simd_test = "sse"]
|
||||
fn _mm_div_ss() {
|
||||
let a = f32x4::new(-1.0, 5.0, 0.0, -10.0);
|
||||
let b = f32x4::new(-100.0, 20.0, 0.0, -5.0);
|
||||
|
|
@ -294,8 +376,7 @@ mod tests {
|
|||
assert_eq!(r, f32x4::new(0.01, 5.0, 0.0, -10.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
#[simd_test = "sse"]
|
||||
fn _mm_sqrt_ss() {
|
||||
let a = f32x4::new(4.0, 13.0, 16.0, 100.0);
|
||||
let r = sse::_mm_sqrt_ss(a);
|
||||
|
|
@ -303,8 +384,7 @@ mod tests {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
#[simd_test = "sse"]
|
||||
fn _mm_sqrt_ps() {
|
||||
let a = f32x4::new(4.0, 13.0, 16.0, 100.0);
|
||||
let r = sse::_mm_sqrt_ps(a);
|
||||
|
|
@ -312,8 +392,7 @@ mod tests {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
#[simd_test = "sse"]
|
||||
fn _mm_rcp_ss() {
|
||||
let a = f32x4::new(4.0, 13.0, 16.0, 100.0);
|
||||
let r = sse::_mm_rcp_ss(a);
|
||||
|
|
@ -321,8 +400,7 @@ mod tests {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
#[simd_test = "sse"]
|
||||
fn _mm_rcp_ps() {
|
||||
let a = f32x4::new(4.0, 13.0, 16.0, 100.0);
|
||||
let r = sse::_mm_rcp_ps(a);
|
||||
|
|
@ -330,8 +408,7 @@ mod tests {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
#[simd_test = "sse"]
|
||||
fn _mm_rsqrt_ss() {
|
||||
let a = f32x4::new(4.0, 13.0, 16.0, 100.0);
|
||||
let r = sse::_mm_rsqrt_ss(a);
|
||||
|
|
@ -339,8 +416,7 @@ mod tests {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
#[simd_test = "sse"]
|
||||
fn _mm_rsqrt_ps() {
|
||||
let a = f32x4::new(4.0, 13.0, 16.0, 100.0);
|
||||
let r = sse::_mm_rsqrt_ps(a);
|
||||
|
|
@ -348,8 +424,7 @@ mod tests {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
#[simd_test = "sse"]
|
||||
fn _mm_min_ss() {
|
||||
let a = f32x4::new(-1.0, 5.0, 0.0, -10.0);
|
||||
let b = f32x4::new(-100.0, 20.0, 0.0, -5.0);
|
||||
|
|
@ -357,8 +432,7 @@ mod tests {
|
|||
assert_eq!(r, f32x4::new(-100.0, 5.0, 0.0, -10.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
#[simd_test = "sse"]
|
||||
fn _mm_min_ps() {
|
||||
let a = f32x4::new(-1.0, 5.0, 0.0, -10.0);
|
||||
let b = f32x4::new(-100.0, 20.0, 0.0, -5.0);
|
||||
|
|
@ -366,8 +440,7 @@ mod tests {
|
|||
assert_eq!(r, f32x4::new(-100.0, 5.0, 0.0, -10.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
#[simd_test = "sse"]
|
||||
fn _mm_max_ss() {
|
||||
let a = f32x4::new(-1.0, 5.0, 0.0, -10.0);
|
||||
let b = f32x4::new(-100.0, 20.0, 0.0, -5.0);
|
||||
|
|
@ -375,8 +448,7 @@ mod tests {
|
|||
assert_eq!(r, f32x4::new(-1.0, 5.0, 0.0, -10.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
#[simd_test = "sse"]
|
||||
fn _mm_max_ps() {
|
||||
let a = f32x4::new(-1.0, 5.0, 0.0, -10.0);
|
||||
let b = f32x4::new(-100.0, 20.0, 0.0, -5.0);
|
||||
|
|
@ -384,8 +456,16 @@ mod tests {
|
|||
assert_eq!(r, f32x4::new(-1.0, 20.0, 0.0, -5.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
#[simd_test = "sse"]
|
||||
fn _mm_shuffle_ps() {
|
||||
let a = f32x4::new(1.0, 2.0, 3.0, 4.0);
|
||||
let b = f32x4::new(5.0, 6.0, 7.0, 8.0);
|
||||
let mask = 0b00_01_01_11;
|
||||
let r = sse::_mm_shuffle_ps(a, b, mask);
|
||||
assert_eq!(r, f32x4::new(4.0, 2.0, 6.0, 5.0));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
fn _mm_unpackhi_ps() {
|
||||
let a = f32x4::new(1.0, 2.0, 3.0, 4.0);
|
||||
let b = f32x4::new(5.0, 6.0, 7.0, 8.0);
|
||||
|
|
@ -393,8 +473,31 @@ mod tests {
|
|||
assert_eq!(r, f32x4::new(3.0, 7.0, 4.0, 8.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
#[simd_test = "sse"]
|
||||
fn _mm_unpacklo_ps() {
|
||||
let a = f32x4::new(1.0, 2.0, 3.0, 4.0);
|
||||
let b = f32x4::new(5.0, 6.0, 7.0, 8.0);
|
||||
let r = sse::_mm_unpacklo_ps(a, b);
|
||||
assert_eq!(r, f32x4::new(1.0, 5.0, 2.0, 6.0));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
fn _mm_movehl_ps() {
|
||||
let a = f32x4::new(1.0, 2.0, 3.0, 4.0);
|
||||
let b = f32x4::new(5.0, 6.0, 7.0, 8.0);
|
||||
let r = sse::_mm_movehl_ps(a, b);
|
||||
assert_eq!(r, f32x4::new(7.0, 8.0, 3.0, 4.0));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
fn _mm_movelh_ps() {
|
||||
let a = f32x4::new(1.0, 2.0, 3.0, 4.0);
|
||||
let b = f32x4::new(5.0, 6.0, 7.0, 8.0);
|
||||
let r = sse::_mm_movelh_ps(a, b);
|
||||
assert_eq!(r, f32x4::new(1.0, 2.0, 5.0, 6.0));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
fn _mm_movemask_ps() {
|
||||
let r = sse::_mm_movemask_ps(f32x4::new(-1.0, 5.0, -5.0, 0.0));
|
||||
assert_eq!(r, 0b0101);
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1,8 +1,12 @@
|
|||
use v128::*;
|
||||
use x86::__m128i;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdsimd_test::assert_instr;
|
||||
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse4.1"]
|
||||
#[cfg_attr(test, assert_instr(pblendvb))]
|
||||
pub fn _mm_blendv_epi8(
|
||||
a: __m128i,
|
||||
b: __m128i,
|
||||
|
|
@ -57,13 +61,14 @@ extern {
|
|||
fn dpps(a: f32x4, b: f32x4, imm8: u8) -> f32x4;
|
||||
}
|
||||
|
||||
#[cfg(all(test, target_feature = "sse4.1", any(target_arch = "x86", target_arch = "x86_64")))]
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
use v128::*;
|
||||
use x86::sse41;
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse4.1"]
|
||||
#[simd_test = "sse4.1"]
|
||||
fn _mm_blendv_epi8() {
|
||||
let a = i8x16::new(
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
|
|
@ -76,8 +81,7 @@ mod tests {
|
|||
assert_eq!(sse41::_mm_blendv_epi8(a, b, mask), e);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse4.1"]
|
||||
#[simd_test = "sse4.1"]
|
||||
fn _mm_dp_pd() {
|
||||
let a = f64x2::new(2.0, 3.0);
|
||||
let b = f64x2::new(1.0, 4.0);
|
||||
|
|
@ -85,8 +89,7 @@ mod tests {
|
|||
assert_eq!(sse41::_mm_dp_pd(a, b, 0b00110001), e);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse4.1"]
|
||||
#[simd_test = "sse4.1"]
|
||||
fn _mm_dp_ps() {
|
||||
let a = f32x4::new(2.0, 3.0, 1.0, 10.0);
|
||||
let b = f32x4::new(1.0, 4.0, 0.5, 10.0);
|
||||
|
|
|
|||
|
|
@ -40,13 +40,14 @@ extern {
|
|||
fn pcmpestri128(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8) -> i32;
|
||||
}
|
||||
|
||||
#[cfg(all(test, target_feature = "sse4.2", any(target_arch = "x86", target_arch = "x86_64")))]
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
use v128::*;
|
||||
use x86::{__m128i, sse42};
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse4.2"]
|
||||
#[simd_test = "sse4.2"]
|
||||
fn _mm_cmpestri() {
|
||||
let a = &b"bar "[..];
|
||||
let b = &b"foobar "[..];
|
||||
|
|
|
|||
|
|
@ -1,15 +1,17 @@
|
|||
use v128::*;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdsimd_test::assert_instr;
|
||||
|
||||
/// Compute the absolute value of packed 8-bit signed integers in `a` and
|
||||
/// return the unsigned results.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+ssse3"]
|
||||
#[cfg_attr(test, assert_instr(pabsb))]
|
||||
pub fn _mm_abs_epi8(a: i8x16) -> u8x16 {
|
||||
unsafe { pabsb128(a) }
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// Shuffle bytes from `a` according to the content of `b`.
|
||||
///
|
||||
/// The last 4 bits of each byte of `b` are used as addresses
|
||||
|
|
@ -36,6 +38,7 @@ pub fn _mm_abs_epi8(a: i8x16) -> u8x16 {
|
|||
/// ```
|
||||
#[inline(always)]
|
||||
#[target_feature = "+ssse3"]
|
||||
#[cfg_attr(test, assert_instr(pshufb))]
|
||||
pub fn _mm_shuffle_epi8(a: u8x16, b: u8x16) -> u8x16 {
|
||||
unsafe { pshufb128(a, b) }
|
||||
}
|
||||
|
|
@ -50,20 +53,20 @@ extern {
|
|||
fn pshufb128(a: u8x16, b: u8x16) -> u8x16;
|
||||
}
|
||||
|
||||
#[cfg(all(test, target_feature = "ssse3", any(target_arch = "x86", target_arch = "x86_64")))]
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
use v128::*;
|
||||
use x86::ssse3 as ssse3;
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+ssse3"]
|
||||
#[simd_test = "ssse3"]
|
||||
fn _mm_abs_epi8() {
|
||||
let r = ssse3::_mm_abs_epi8(i8x16::splat(-5));
|
||||
assert_eq!(r, u8x16::splat(5));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+ssse3"]
|
||||
#[simd_test = "ssse3"]
|
||||
fn _mm_shuffle_epi8() {
|
||||
let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let b = u8x16::new(4, 128, 4, 3, 24, 12, 6, 19, 12, 5, 5, 10, 4, 1, 8, 0);
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@
|
|||
//! provides a quick overview of the available instructions.
|
||||
|
||||
#[cfg(test)]
|
||||
use assert_instr::assert_instr;
|
||||
use stdsimd_test::assert_instr;
|
||||
|
||||
// TODO: LLVM-CODEGEN ERROR: LLVM ERROR: Cannot select: intrinsic %llvm.x86.tbm.bextri.u32
|
||||
/*
|
||||
|
|
@ -252,40 +252,38 @@ pub fn _tzmsk_u64(x: u64) -> u64 {
|
|||
!x & (x.wrapping_sub(1))
|
||||
}
|
||||
|
||||
#[cfg(all(test, target_feature = "tbm", any(target_arch = "x86", target_arch = "x86_64")))]
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
use x86::tbm;
|
||||
|
||||
/*
|
||||
#[test]
|
||||
#[target_feature = "+tbm"]
|
||||
#[simd_test = "tbm"]
|
||||
fn _bextr_u32() {
|
||||
assert_eq!(tbm::_bextr_u32(0b0101_0000u32, 4, 4), 0b0000_0101u32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+tbm"]
|
||||
#[simd_test = "tbm"]
|
||||
fn _bextr_u64() {
|
||||
assert_eq!(tbm::_bextr_u64(0b0101_0000u64, 4, 4), 0b0000_0101u64);
|
||||
}
|
||||
*/
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+tbm"]
|
||||
#[simd_test = "tbm"]
|
||||
fn _blcfill_u32() {
|
||||
assert_eq!(tbm::_blcfill_u32(0b0101_0111u32), 0b0101_0000u32);
|
||||
assert_eq!(tbm::_blcfill_u32(0b1111_1111u32), 0u32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+tbm"]
|
||||
#[simd_test = "tbm"]
|
||||
#[cfg(not(target_arch = "x86"))]
|
||||
fn _blcfill_u64() {
|
||||
assert_eq!(tbm::_blcfill_u64(0b0101_0111u64), 0b0101_0000u64);
|
||||
assert_eq!(tbm::_blcfill_u64(0b1111_1111u64), 0u64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+tbm"]
|
||||
#[simd_test = "tbm"]
|
||||
fn _blci_u32() {
|
||||
assert_eq!(tbm::_blci_u32(0b0101_0000u32),
|
||||
0b1111_1111_1111_1111_1111_1111_1111_1110u32);
|
||||
|
|
@ -293,8 +291,8 @@ mod tests {
|
|||
0b1111_1111_1111_1111_1111_1110_1111_1111u32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+tbm"]
|
||||
#[simd_test = "tbm"]
|
||||
#[cfg(not(target_arch = "x86"))]
|
||||
fn _blci_u64() {
|
||||
assert_eq!(tbm::_blci_u64(0b0101_0000u64),
|
||||
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110u64);
|
||||
|
|
@ -302,99 +300,92 @@ mod tests {
|
|||
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110_1111_1111u64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+tbm"]
|
||||
#[simd_test = "tbm"]
|
||||
fn _blcic_u32() {
|
||||
assert_eq!(tbm::_blcic_u32(0b0101_0001u32), 0b0000_0010u32);
|
||||
assert_eq!(tbm::_blcic_u32(0b1111_1111u32), 0b1_0000_0000u32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+tbm"]
|
||||
#[simd_test = "tbm"]
|
||||
#[cfg(not(target_arch = "x86"))]
|
||||
fn _blcic_u64() {
|
||||
assert_eq!(tbm::_blcic_u64(0b0101_0001u64), 0b0000_0010u64);
|
||||
assert_eq!(tbm::_blcic_u64(0b1111_1111u64), 0b1_0000_0000u64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+tbm"]
|
||||
#[simd_test = "tbm"]
|
||||
fn _blcmsk_u32() {
|
||||
assert_eq!(tbm::_blcmsk_u32(0b0101_0001u32), 0b0000_0011u32);
|
||||
assert_eq!(tbm::_blcmsk_u32(0b1111_1111u32), 0b1_1111_1111u32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+tbm"]
|
||||
#[simd_test = "tbm"]
|
||||
#[cfg(not(target_arch = "x86"))]
|
||||
fn _blcmsk_u64() {
|
||||
assert_eq!(tbm::_blcmsk_u64(0b0101_0001u64), 0b0000_0011u64);
|
||||
assert_eq!(tbm::_blcmsk_u64(0b1111_1111u64), 0b1_1111_1111u64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+tbm"]
|
||||
#[simd_test = "tbm"]
|
||||
fn _blcs_u32() {
|
||||
assert_eq!(tbm::_blcs_u32(0b0101_0001u32), 0b0101_0011u32);
|
||||
assert_eq!(tbm::_blcs_u32(0b1111_1111u32), 0b1_1111_1111u32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+tbm"]
|
||||
#[simd_test = "tbm"]
|
||||
#[cfg(not(target_arch = "x86"))]
|
||||
fn _blcs_u64() {
|
||||
assert_eq!(tbm::_blcs_u64(0b0101_0001u64), 0b0101_0011u64);
|
||||
assert_eq!(tbm::_blcs_u64(0b1111_1111u64), 0b1_1111_1111u64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+tbm"]
|
||||
#[simd_test = "tbm"]
|
||||
fn _blsfill_u32() {
|
||||
assert_eq!(tbm::_blsfill_u32(0b0101_0100u32), 0b0101_0111u32);
|
||||
assert_eq!(tbm::_blsfill_u32(0u32), 0b1111_1111_1111_1111_1111_1111_1111_1111u32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+tbm"]
|
||||
#[simd_test = "tbm"]
|
||||
#[cfg(not(target_arch = "x86"))]
|
||||
fn _blsfill_u64() {
|
||||
assert_eq!(tbm::_blsfill_u64(0b0101_0100u64), 0b0101_0111u64);
|
||||
assert_eq!(tbm::_blsfill_u64(0u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+tbm"]
|
||||
#[simd_test = "tbm"]
|
||||
fn _blsic_u32() {
|
||||
assert_eq!(tbm::_blsic_u32(0b0101_0100u32), 0b1111_1111_1111_1111_1111_1111_1111_1011u32);
|
||||
assert_eq!(tbm::_blsic_u32(0u32), 0b1111_1111_1111_1111_1111_1111_1111_1111u32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+tbm"]
|
||||
#[simd_test = "tbm"]
|
||||
#[cfg(not(target_arch = "x86"))]
|
||||
fn _blsic_u64() {
|
||||
assert_eq!(tbm::_blsic_u64(0b0101_0100u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1011u64);
|
||||
assert_eq!(tbm::_blsic_u64(0u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+tbm"]
|
||||
#[simd_test = "tbm"]
|
||||
fn _t1mskc_u32() {
|
||||
assert_eq!(tbm::_t1mskc_u32(0b0101_0111u32), 0b1111_1111_1111_1111_1111_1111_1111_1000u32);
|
||||
assert_eq!(tbm::_t1mskc_u32(0u32), 0b1111_1111_1111_1111_1111_1111_1111_1111u32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+tbm"]
|
||||
#[simd_test = "tbm"]
|
||||
#[cfg(not(target_arch = "x86"))]
|
||||
fn _t1mksc_u64() {
|
||||
assert_eq!(tbm::_t1mskc_u64(0b0101_0111u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1000u64);
|
||||
assert_eq!(tbm::_t1mskc_u64(0u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+tbm"]
|
||||
#[simd_test = "tbm"]
|
||||
fn _tzmsk_u32() {
|
||||
assert_eq!(tbm::_tzmsk_u32(0b0101_1000u32), 0b0000_0111u32);
|
||||
assert_eq!(tbm::_tzmsk_u32(0b0101_1001u32), 0b0000_0000u32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+tbm"]
|
||||
#[simd_test = "tbm"]
|
||||
#[cfg(not(target_arch = "x86"))]
|
||||
fn _tzmsk_u64() {
|
||||
assert_eq!(tbm::_tzmsk_u64(0b0101_1000u64), 0b0000_0111u64);
|
||||
assert_eq!(tbm::_tzmsk_u64(0b0101_1001u64), 0b0000_0000u64);
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
[package]
|
||||
name = "assert-instr"
|
||||
name = "stdsimd-test"
|
||||
version = "0.1.0"
|
||||
authors = ["Alex Crichton <alex@alexcrichton.com>"]
|
||||
|
||||
[dependencies]
|
||||
assert-instr-macro = { path = "assert-instr-macro" }
|
||||
simd-test-macro = { path = "simd-test-macro" }
|
||||
backtrace = "0.3"
|
||||
cc = "1.0"
|
||||
lazy_static = "0.2"
|
||||
|
|
@ -44,7 +44,7 @@ pub fn assert_instr(attr: TokenStream, item: TokenStream) -> TokenStream {
|
|||
#[allow(non_snake_case)]
|
||||
{ignore}
|
||||
fn assert_instr_{name}() {{
|
||||
::assert_instr::assert({name} as usize,
|
||||
::stdsimd_test::assert({name} as usize,
|
||||
\"{name}\",
|
||||
\"{instr}\");
|
||||
}}
|
||||
11
library/stdarch/stdsimd-test/simd-test-macro/Cargo.toml
Normal file
11
library/stdarch/stdsimd-test/simd-test-macro/Cargo.toml
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
[package]
|
||||
name = "simd-test-macro"
|
||||
version = "0.1.0"
|
||||
authors = ["Alex Crichton <alex@alexcrichton.com>"]
|
||||
|
||||
[lib]
|
||||
proc-macro = true
|
||||
|
||||
[dependencies]
|
||||
proc-macro2 = { version = "0.1", features = ["unstable"] }
|
||||
quote = { git = 'https://github.com/dtolnay/quote' }
|
||||
76
library/stdarch/stdsimd-test/simd-test-macro/src/lib.rs
Normal file
76
library/stdarch/stdsimd-test/simd-test-macro/src/lib.rs
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
//! Implementation of the `#[simd_test]` macro
|
||||
//!
|
||||
//! This macro expands to a `#[test]` function which tests the local machine for
|
||||
//! the appropriate cfg before calling the inner test function.
|
||||
|
||||
#![feature(proc_macro)]
|
||||
|
||||
#[macro_use]
|
||||
extern crate quote;
|
||||
extern crate proc_macro;
|
||||
extern crate proc_macro2;
|
||||
|
||||
use proc_macro2::{TokenStream, Term, TokenNode, TokenTree};
|
||||
use proc_macro2::Literal;
|
||||
|
||||
fn string(s: &str) -> TokenTree {
|
||||
TokenTree {
|
||||
kind: TokenNode::Literal(Literal::string(s)),
|
||||
|
||||
span: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
#[proc_macro_attribute]
|
||||
pub fn simd_test(attr: proc_macro::TokenStream,
|
||||
item: proc_macro::TokenStream) -> proc_macro::TokenStream {
|
||||
let tokens = TokenStream::from(attr).into_iter().collect::<Vec<_>>();
|
||||
if tokens.len() != 2 {
|
||||
panic!("expected #[simd_test = \"feature\"]");
|
||||
}
|
||||
match tokens[0].kind {
|
||||
TokenNode::Op('=', _) => {}
|
||||
_ => panic!("expected #[simd_test = \"feature\"]"),
|
||||
}
|
||||
let target_feature = &tokens[1];
|
||||
let enable_feature = match tokens[1].kind {
|
||||
TokenNode::Literal(ref l) => l.to_string(),
|
||||
_ => panic!("expected #[simd_test = \"feature\"]"),
|
||||
};
|
||||
let enable_feature = enable_feature.trim_left_matches('"')
|
||||
.trim_right_matches('"');
|
||||
let enable_feature = string(&format!("+{}", enable_feature));
|
||||
let item = TokenStream::from(item);
|
||||
let name = find_name(item.clone());
|
||||
|
||||
let name: TokenStream = name.as_str().parse().unwrap();
|
||||
|
||||
let ret: TokenStream = quote! {
|
||||
#[test]
|
||||
fn #name() {
|
||||
if cfg_feature_enabled!(#target_feature) {
|
||||
return #name();
|
||||
}
|
||||
|
||||
#[target_feature = #enable_feature]
|
||||
#item
|
||||
}
|
||||
}.into();
|
||||
ret.into()
|
||||
}
|
||||
|
||||
fn find_name(item: TokenStream) -> Term {
|
||||
let mut tokens = item.into_iter();
|
||||
while let Some(tok) = tokens.next() {
|
||||
if let TokenNode::Term(word) = tok.kind {
|
||||
if word.as_str() == "fn" {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match tokens.next().map(|t| t.kind) {
|
||||
Some(TokenNode::Term(word)) => word,
|
||||
_ => panic!("failed to find function name"),
|
||||
}
|
||||
}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
//! Runtime support needed for the `#![assert_instr]` macro
|
||||
//! Runtime support needed for testing the stdsimd crate.
|
||||
//!
|
||||
//! This basically just disassembles the current executable and then parses the
|
||||
//! output once globally and then provides the `assert` function which makes
|
||||
|
|
@ -7,6 +7,7 @@
|
|||
#![feature(proc_macro)]
|
||||
|
||||
extern crate assert_instr_macro;
|
||||
extern crate simd_test_macro;
|
||||
extern crate backtrace;
|
||||
extern crate cc;
|
||||
extern crate rustc_demangle;
|
||||
|
|
@ -19,6 +20,7 @@ use std::process::Command;
|
|||
use std::str;
|
||||
|
||||
pub use assert_instr_macro::*;
|
||||
pub use simd_test_macro::*;
|
||||
|
||||
lazy_static! {
|
||||
static ref DISASSEMBLY: HashMap<String, Vec<Function>> = disassemble_myself();
|
||||
25
library/stdarch/tests/cpu-detection.rs
Normal file
25
library/stdarch/tests/cpu-detection.rs
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
#![feature(cfg_target_feature)]
|
||||
|
||||
#[macro_use]
|
||||
extern crate stdsimd;
|
||||
extern crate cupid;
|
||||
|
||||
#[test]
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
fn works() {
|
||||
let information = cupid::master().unwrap();
|
||||
assert_eq!(cfg_feature_enabled!("sse"), information.sse());
|
||||
assert_eq!(cfg_feature_enabled!("sse2"), information.sse2());
|
||||
assert_eq!(cfg_feature_enabled!("sse3"), information.sse3());
|
||||
assert_eq!(cfg_feature_enabled!("ssse3"), information.ssse3());
|
||||
assert_eq!(cfg_feature_enabled!("sse4.1"), information.sse4_1());
|
||||
assert_eq!(cfg_feature_enabled!("sse4.2"), information.sse4_2());
|
||||
assert_eq!(cfg_feature_enabled!("avx"), information.avx());
|
||||
assert_eq!(cfg_feature_enabled!("avx2"), information.avx2());
|
||||
assert_eq!(cfg_feature_enabled!("fma"), information.fma());
|
||||
assert_eq!(cfg_feature_enabled!("bmi"), information.bmi1());
|
||||
assert_eq!(cfg_feature_enabled!("bmi2"), information.bmi2());
|
||||
assert_eq!(cfg_feature_enabled!("popcnt"), information.popcnt());
|
||||
|
||||
// TODO: tbm, abm, lzcnt
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue