diff --git a/library/stdarch/crates/core_arch/src/x86/sse41.rs b/library/stdarch/crates/core_arch/src/x86/sse41.rs index 21a580e44f00..1970bdf7b0ba 100644 --- a/library/stdarch/crates/core_arch/src/x86/sse41.rs +++ b/library/stdarch/crates/core_arch/src/x86/sse41.rs @@ -59,9 +59,11 @@ pub const _MM_FROUND_NEARBYINT: i32 = _MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTI #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pblendvb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_blendv_epi8(a: __m128i, b: __m128i, mask: __m128i) -> __m128i { - let mask: i8x16 = simd_lt(mask.as_i8x16(), i8x16::ZERO); - transmute(simd_select(mask, b.as_i8x16(), a.as_i8x16())) +pub fn _mm_blendv_epi8(a: __m128i, b: __m128i, mask: __m128i) -> __m128i { + unsafe { + let mask: i8x16 = simd_lt(mask.as_i8x16(), i8x16::ZERO); + transmute(simd_select(mask, b.as_i8x16(), a.as_i8x16())) + } } /// Blend packed 16-bit integers from `a` and `b` using the mask `IMM8`. @@ -76,22 +78,24 @@ pub unsafe fn _mm_blendv_epi8(a: __m128i, b: __m128i, mask: __m128i) -> __m128i #[cfg_attr(test, assert_instr(pblendw, IMM8 = 0xB1))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_blend_epi16(a: __m128i, b: __m128i) -> __m128i { +pub fn _mm_blend_epi16(a: __m128i, b: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - transmute::(simd_shuffle!( - a.as_i16x8(), - b.as_i16x8(), - [ - [0, 8][IMM8 as usize & 1], - [1, 9][(IMM8 >> 1) as usize & 1], - [2, 10][(IMM8 >> 2) as usize & 1], - [3, 11][(IMM8 >> 3) as usize & 1], - [4, 12][(IMM8 >> 4) as usize & 1], - [5, 13][(IMM8 >> 5) as usize & 1], - [6, 14][(IMM8 >> 6) as usize & 1], - [7, 15][(IMM8 >> 7) as usize & 1], - ] - )) + unsafe { + transmute::(simd_shuffle!( + a.as_i16x8(), + b.as_i16x8(), + [ + [0, 8][IMM8 as usize & 1], + [1, 9][(IMM8 >> 1) as usize & 1], + [2, 10][(IMM8 >> 2) as usize & 1], + [3, 11][(IMM8 >> 3) as usize & 1], + [4, 12][(IMM8 >> 4) as usize & 1], + [5, 13][(IMM8 >> 5) as usize & 1], + [6, 14][(IMM8 >> 6) as usize & 1], + [7, 15][(IMM8 >> 7) as usize & 1], + ] + )) + } } /// Blend packed double-precision (64-bit) floating-point elements from `a` @@ -102,9 +106,11 @@ pub unsafe fn _mm_blend_epi16(a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(blendvpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d { - let mask: i64x2 = simd_lt(transmute::<_, i64x2>(mask), i64x2::ZERO); - transmute(simd_select(mask, b.as_f64x2(), a.as_f64x2())) +pub fn _mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d { + unsafe { + let mask: i64x2 = simd_lt(transmute::<_, i64x2>(mask), i64x2::ZERO); + transmute(simd_select(mask, b.as_f64x2(), a.as_f64x2())) + } } /// Blend packed single-precision (32-bit) floating-point elements from `a` @@ -115,9 +121,11 @@ pub unsafe fn _mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(blendvps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 { - let mask: i32x4 = simd_lt(transmute::<_, i32x4>(mask), i32x4::ZERO); - transmute(simd_select(mask, b.as_f32x4(), a.as_f32x4())) +pub fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 { + unsafe { + let mask: i32x4 = simd_lt(transmute::<_, i32x4>(mask), i32x4::ZERO); + transmute(simd_select(mask, b.as_f32x4(), a.as_f32x4())) + } } /// Blend packed double-precision (64-bit) floating-point elements from `a` @@ -132,13 +140,15 @@ pub unsafe fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 { #[cfg_attr(test, assert_instr(blendps, IMM2 = 0b10))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_blend_pd(a: __m128d, b: __m128d) -> __m128d { +pub fn _mm_blend_pd(a: __m128d, b: __m128d) -> __m128d { static_assert_uimm_bits!(IMM2, 2); - transmute::(simd_shuffle!( - a.as_f64x2(), - b.as_f64x2(), - [[0, 2][IMM2 as usize & 1], [1, 3][(IMM2 >> 1) as usize & 1]] - )) + unsafe { + transmute::(simd_shuffle!( + a.as_f64x2(), + b.as_f64x2(), + [[0, 2][IMM2 as usize & 1], [1, 3][(IMM2 >> 1) as usize & 1]] + )) + } } /// Blend packed single-precision (32-bit) floating-point elements from `a` @@ -150,18 +160,20 @@ pub unsafe fn _mm_blend_pd(a: __m128d, b: __m128d) -> __m128d { #[cfg_attr(test, assert_instr(blendps, IMM4 = 0b0101))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_blend_ps(a: __m128, b: __m128) -> __m128 { +pub fn _mm_blend_ps(a: __m128, b: __m128) -> __m128 { static_assert_uimm_bits!(IMM4, 4); - transmute::(simd_shuffle!( - a.as_f32x4(), - b.as_f32x4(), - [ - [0, 4][IMM4 as usize & 1], - [1, 5][(IMM4 >> 1) as usize & 1], - [2, 6][(IMM4 >> 2) as usize & 1], - [3, 7][(IMM4 >> 3) as usize & 1], - ] - )) + unsafe { + transmute::(simd_shuffle!( + a.as_f32x4(), + b.as_f32x4(), + [ + [0, 4][IMM4 as usize & 1], + [1, 5][(IMM4 >> 1) as usize & 1], + [2, 6][(IMM4 >> 2) as usize & 1], + [3, 7][(IMM4 >> 3) as usize & 1], + ] + )) + } } /// Extracts a single-precision (32-bit) floating-point element from `a`, @@ -194,9 +206,9 @@ pub unsafe fn _mm_blend_ps(a: __m128, b: __m128) -> __m128 { #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(extractps, IMM8 = 0))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_extract_ps(a: __m128) -> i32 { +pub fn _mm_extract_ps(a: __m128) -> i32 { static_assert_uimm_bits!(IMM8, 2); - simd_extract!(a, IMM8 as u32, f32).to_bits() as i32 + unsafe { simd_extract!(a, IMM8 as u32, f32).to_bits() as i32 } } /// Extracts an 8-bit integer from `a`, selected with `IMM8`. Returns a 32-bit @@ -210,9 +222,9 @@ pub unsafe fn _mm_extract_ps(a: __m128) -> i32 { #[cfg_attr(test, assert_instr(pextrb, IMM8 = 0))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_extract_epi8(a: __m128i) -> i32 { +pub fn _mm_extract_epi8(a: __m128i) -> i32 { static_assert_uimm_bits!(IMM8, 4); - simd_extract!(a.as_u8x16(), IMM8 as u32, u8) as i32 + unsafe { simd_extract!(a.as_u8x16(), IMM8 as u32, u8) as i32 } } /// Extracts an 32-bit integer from `a` selected with `IMM8` @@ -223,9 +235,9 @@ pub unsafe fn _mm_extract_epi8(a: __m128i) -> i32 { #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(extractps, IMM8 = 1))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_extract_epi32(a: __m128i) -> i32 { +pub fn _mm_extract_epi32(a: __m128i) -> i32 { static_assert_uimm_bits!(IMM8, 2); - simd_extract!(a.as_i32x4(), IMM8 as u32, i32) + unsafe { simd_extract!(a.as_i32x4(), IMM8 as u32, i32) } } /// Select a single value in `b` to store at some position in `a`, @@ -257,9 +269,9 @@ pub unsafe fn _mm_extract_epi32(a: __m128i) -> i32 { #[cfg_attr(test, assert_instr(insertps, IMM8 = 0b1010))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_insert_ps(a: __m128, b: __m128) -> __m128 { +pub fn _mm_insert_ps(a: __m128, b: __m128) -> __m128 { static_assert_uimm_bits!(IMM8, 8); - insertps(a, b, IMM8 as u8) + unsafe { insertps(a, b, IMM8 as u8) } } /// Returns a copy of `a` with the 8-bit integer from `i` inserted at a @@ -271,9 +283,9 @@ pub unsafe fn _mm_insert_ps(a: __m128, b: __m128) -> __m128 { #[cfg_attr(test, assert_instr(pinsrb, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_insert_epi8(a: __m128i, i: i32) -> __m128i { +pub fn _mm_insert_epi8(a: __m128i, i: i32) -> __m128i { static_assert_uimm_bits!(IMM8, 4); - transmute(simd_insert!(a.as_i8x16(), IMM8 as u32, i as i8)) + unsafe { transmute(simd_insert!(a.as_i8x16(), IMM8 as u32, i as i8)) } } /// Returns a copy of `a` with the 32-bit integer from `i` inserted at a @@ -285,9 +297,9 @@ pub unsafe fn _mm_insert_epi8(a: __m128i, i: i32) -> __m128i { #[cfg_attr(test, assert_instr(pinsrd, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_insert_epi32(a: __m128i, i: i32) -> __m128i { +pub fn _mm_insert_epi32(a: __m128i, i: i32) -> __m128i { static_assert_uimm_bits!(IMM8, 2); - transmute(simd_insert!(a.as_i32x4(), IMM8 as u32, i)) + unsafe { transmute(simd_insert!(a.as_i32x4(), IMM8 as u32, i)) } } /// Compares packed 8-bit integers in `a` and `b` and returns packed maximum @@ -298,10 +310,12 @@ pub unsafe fn _mm_insert_epi32(a: __m128i, i: i32) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmaxsb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i { - let a = a.as_i8x16(); - let b = b.as_i8x16(); - transmute(simd_select::(simd_gt(a, b), a, b)) +pub fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_i8x16(); + let b = b.as_i8x16(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } } /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed @@ -312,10 +326,12 @@ pub unsafe fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmaxuw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i { - let a = a.as_u16x8(); - let b = b.as_u16x8(); - transmute(simd_select::(simd_gt(a, b), a, b)) +pub fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_u16x8(); + let b = b.as_u16x8(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } } /// Compares packed 32-bit integers in `a` and `b`, and returns packed maximum @@ -326,10 +342,12 @@ pub unsafe fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmaxsd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i { - let a = a.as_i32x4(); - let b = b.as_i32x4(); - transmute(simd_select::(simd_gt(a, b), a, b)) +pub fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_i32x4(); + let b = b.as_i32x4(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } } /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed @@ -340,10 +358,12 @@ pub unsafe fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmaxud))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i { - let a = a.as_u32x4(); - let b = b.as_u32x4(); - transmute(simd_select::(simd_gt(a, b), a, b)) +pub fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_u32x4(); + let b = b.as_u32x4(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } } /// Compares packed 8-bit integers in `a` and `b` and returns packed minimum @@ -354,10 +374,12 @@ pub unsafe fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pminsb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i { - let a = a.as_i8x16(); - let b = b.as_i8x16(); - transmute(simd_select::(simd_lt(a, b), a, b)) +pub fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_i8x16(); + let b = b.as_i8x16(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } } /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed @@ -368,10 +390,12 @@ pub unsafe fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pminuw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i { - let a = a.as_u16x8(); - let b = b.as_u16x8(); - transmute(simd_select::(simd_lt(a, b), a, b)) +pub fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_u16x8(); + let b = b.as_u16x8(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } } /// Compares packed 32-bit integers in `a` and `b`, and returns packed minimum @@ -382,10 +406,12 @@ pub unsafe fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pminsd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i { - let a = a.as_i32x4(); - let b = b.as_i32x4(); - transmute(simd_select::(simd_lt(a, b), a, b)) +pub fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_i32x4(); + let b = b.as_i32x4(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } } /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed @@ -396,10 +422,12 @@ pub unsafe fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pminud))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i { - let a = a.as_u32x4(); - let b = b.as_u32x4(); - transmute(simd_select::(simd_lt(a, b), a, b)) +pub fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_u32x4(); + let b = b.as_u32x4(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } } /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers @@ -410,8 +438,8 @@ pub unsafe fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(packusdw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i { - transmute(packusdw(a.as_i32x4(), b.as_i32x4())) +pub fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(packusdw(a.as_i32x4(), b.as_i32x4())) } } /// Compares packed 64-bit integers in `a` and `b` for equality @@ -421,8 +449,8 @@ pub unsafe fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pcmpeqq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i { - transmute(simd_eq::<_, i64x2>(a.as_i64x2(), b.as_i64x2())) +pub fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_eq::<_, i64x2>(a.as_i64x2(), b.as_i64x2())) } } /// Sign extend packed 8-bit integers in `a` to packed 16-bit integers @@ -432,10 +460,12 @@ pub unsafe fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovsxbw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i { - let a = a.as_i8x16(); - let a: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); - transmute(simd_cast::<_, i16x8>(a)) +pub fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i { + unsafe { + let a = a.as_i8x16(); + let a: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + transmute(simd_cast::<_, i16x8>(a)) + } } /// Sign extend packed 8-bit integers in `a` to packed 32-bit integers @@ -445,10 +475,12 @@ pub unsafe fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovsxbd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i { - let a = a.as_i8x16(); - let a: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); - transmute(simd_cast::<_, i32x4>(a)) +pub fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i { + unsafe { + let a = a.as_i8x16(); + let a: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); + transmute(simd_cast::<_, i32x4>(a)) + } } /// Sign extend packed 8-bit integers in the low 8 bytes of `a` to packed @@ -459,10 +491,12 @@ pub unsafe fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovsxbq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i { - let a = a.as_i8x16(); - let a: i8x2 = simd_shuffle!(a, a, [0, 1]); - transmute(simd_cast::<_, i64x2>(a)) +pub fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i { + unsafe { + let a = a.as_i8x16(); + let a: i8x2 = simd_shuffle!(a, a, [0, 1]); + transmute(simd_cast::<_, i64x2>(a)) + } } /// Sign extend packed 16-bit integers in `a` to packed 32-bit integers @@ -472,10 +506,12 @@ pub unsafe fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovsxwd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i { - let a = a.as_i16x8(); - let a: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); - transmute(simd_cast::<_, i32x4>(a)) +pub fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i { + unsafe { + let a = a.as_i16x8(); + let a: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); + transmute(simd_cast::<_, i32x4>(a)) + } } /// Sign extend packed 16-bit integers in `a` to packed 64-bit integers @@ -485,10 +521,12 @@ pub unsafe fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovsxwq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i { - let a = a.as_i16x8(); - let a: i16x2 = simd_shuffle!(a, a, [0, 1]); - transmute(simd_cast::<_, i64x2>(a)) +pub fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i { + unsafe { + let a = a.as_i16x8(); + let a: i16x2 = simd_shuffle!(a, a, [0, 1]); + transmute(simd_cast::<_, i64x2>(a)) + } } /// Sign extend packed 32-bit integers in `a` to packed 64-bit integers @@ -498,10 +536,12 @@ pub unsafe fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovsxdq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i { - let a = a.as_i32x4(); - let a: i32x2 = simd_shuffle!(a, a, [0, 1]); - transmute(simd_cast::<_, i64x2>(a)) +pub fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i { + unsafe { + let a = a.as_i32x4(); + let a: i32x2 = simd_shuffle!(a, a, [0, 1]); + transmute(simd_cast::<_, i64x2>(a)) + } } /// Zeroes extend packed unsigned 8-bit integers in `a` to packed 16-bit integers @@ -511,10 +551,12 @@ pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovzxbw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i { - let a = a.as_u8x16(); - let a: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); - transmute(simd_cast::<_, i16x8>(a)) +pub fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i { + unsafe { + let a = a.as_u8x16(); + let a: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + transmute(simd_cast::<_, i16x8>(a)) + } } /// Zeroes extend packed unsigned 8-bit integers in `a` to packed 32-bit integers @@ -524,10 +566,12 @@ pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovzxbd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i { - let a = a.as_u8x16(); - let a: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); - transmute(simd_cast::<_, i32x4>(a)) +pub fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i { + unsafe { + let a = a.as_u8x16(); + let a: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); + transmute(simd_cast::<_, i32x4>(a)) + } } /// Zeroes extend packed unsigned 8-bit integers in `a` to packed 64-bit integers @@ -537,10 +581,12 @@ pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovzxbq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i { - let a = a.as_u8x16(); - let a: u8x2 = simd_shuffle!(a, a, [0, 1]); - transmute(simd_cast::<_, i64x2>(a)) +pub fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i { + unsafe { + let a = a.as_u8x16(); + let a: u8x2 = simd_shuffle!(a, a, [0, 1]); + transmute(simd_cast::<_, i64x2>(a)) + } } /// Zeroes extend packed unsigned 16-bit integers in `a` @@ -551,10 +597,12 @@ pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovzxwd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i { - let a = a.as_u16x8(); - let a: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); - transmute(simd_cast::<_, i32x4>(a)) +pub fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i { + unsafe { + let a = a.as_u16x8(); + let a: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); + transmute(simd_cast::<_, i32x4>(a)) + } } /// Zeroes extend packed unsigned 16-bit integers in `a` @@ -565,10 +613,12 @@ pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovzxwq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i { - let a = a.as_u16x8(); - let a: u16x2 = simd_shuffle!(a, a, [0, 1]); - transmute(simd_cast::<_, i64x2>(a)) +pub fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i { + unsafe { + let a = a.as_u16x8(); + let a: u16x2 = simd_shuffle!(a, a, [0, 1]); + transmute(simd_cast::<_, i64x2>(a)) + } } /// Zeroes extend packed unsigned 32-bit integers in `a` @@ -579,10 +629,12 @@ pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovzxdq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i { - let a = a.as_u32x4(); - let a: u32x2 = simd_shuffle!(a, a, [0, 1]); - transmute(simd_cast::<_, i64x2>(a)) +pub fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i { + unsafe { + let a = a.as_u32x4(); + let a: u32x2 = simd_shuffle!(a, a, [0, 1]); + transmute(simd_cast::<_, i64x2>(a)) + } } /// Returns the dot product of two __m128d vectors. @@ -599,9 +651,11 @@ pub unsafe fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(dppd, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_dp_pd(a: __m128d, b: __m128d) -> __m128d { - static_assert_uimm_bits!(IMM8, 8); - dppd(a, b, IMM8 as u8) +pub fn _mm_dp_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + dppd(a, b, IMM8 as u8) + } } /// Returns the dot product of two __m128 vectors. @@ -618,9 +672,9 @@ pub unsafe fn _mm_dp_pd(a: __m128d, b: __m128d) -> __m128d { #[cfg_attr(test, assert_instr(dpps, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_dp_ps(a: __m128, b: __m128) -> __m128 { +pub fn _mm_dp_ps(a: __m128, b: __m128) -> __m128 { static_assert_uimm_bits!(IMM8, 8); - dpps(a, b, IMM8 as u8) + unsafe { dpps(a, b, IMM8 as u8) } } /// Round the packed double-precision (64-bit) floating-point elements in `a` @@ -632,8 +686,8 @@ pub unsafe fn _mm_dp_ps(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_floor_pd(a: __m128d) -> __m128d { - simd_floor(a) +pub fn _mm_floor_pd(a: __m128d) -> __m128d { + unsafe { simd_floor(a) } } /// Round the packed single-precision (32-bit) floating-point elements in `a` @@ -645,8 +699,8 @@ pub unsafe fn _mm_floor_pd(a: __m128d) -> __m128d { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_floor_ps(a: __m128) -> __m128 { - simd_floor(a) +pub fn _mm_floor_ps(a: __m128) -> __m128 { + unsafe { simd_floor(a) } } /// Round the lower double-precision (64-bit) floating-point element in `b` @@ -660,8 +714,8 @@ pub unsafe fn _mm_floor_ps(a: __m128) -> __m128 { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundsd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_floor_sd(a: __m128d, b: __m128d) -> __m128d { - roundsd(a, b, _MM_FROUND_FLOOR) +pub fn _mm_floor_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { roundsd(a, b, _MM_FROUND_FLOOR) } } /// Round the lower single-precision (32-bit) floating-point element in `b` @@ -675,8 +729,8 @@ pub unsafe fn _mm_floor_sd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_floor_ss(a: __m128, b: __m128) -> __m128 { - roundss(a, b, _MM_FROUND_FLOOR) +pub fn _mm_floor_ss(a: __m128, b: __m128) -> __m128 { + unsafe { roundss(a, b, _MM_FROUND_FLOOR) } } /// Round the packed double-precision (64-bit) floating-point elements in `a` @@ -688,8 +742,8 @@ pub unsafe fn _mm_floor_ss(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_ceil_pd(a: __m128d) -> __m128d { - simd_ceil(a) +pub fn _mm_ceil_pd(a: __m128d) -> __m128d { + unsafe { simd_ceil(a) } } /// Round the packed single-precision (32-bit) floating-point elements in `a` @@ -701,8 +755,8 @@ pub unsafe fn _mm_ceil_pd(a: __m128d) -> __m128d { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_ceil_ps(a: __m128) -> __m128 { - simd_ceil(a) +pub fn _mm_ceil_ps(a: __m128) -> __m128 { + unsafe { simd_ceil(a) } } /// Round the lower double-precision (64-bit) floating-point element in `b` @@ -716,8 +770,8 @@ pub unsafe fn _mm_ceil_ps(a: __m128) -> __m128 { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundsd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_ceil_sd(a: __m128d, b: __m128d) -> __m128d { - roundsd(a, b, _MM_FROUND_CEIL) +pub fn _mm_ceil_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { roundsd(a, b, _MM_FROUND_CEIL) } } /// Round the lower single-precision (32-bit) floating-point element in `b` @@ -731,8 +785,8 @@ pub unsafe fn _mm_ceil_sd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 { - roundss(a, b, _MM_FROUND_CEIL) +pub fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 { + unsafe { roundss(a, b, _MM_FROUND_CEIL) } } /// Round the packed double-precision (64-bit) floating-point elements in `a` @@ -752,9 +806,9 @@ pub unsafe fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 { #[cfg_attr(test, assert_instr(roundpd, ROUNDING = 0))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_round_pd(a: __m128d) -> __m128d { +pub fn _mm_round_pd(a: __m128d) -> __m128d { static_assert_uimm_bits!(ROUNDING, 4); - roundpd(a, ROUNDING) + unsafe { roundpd(a, ROUNDING) } } /// Round the packed single-precision (32-bit) floating-point elements in `a` @@ -774,9 +828,9 @@ pub unsafe fn _mm_round_pd(a: __m128d) -> __m128d { #[cfg_attr(test, assert_instr(roundps, ROUNDING = 0))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_round_ps(a: __m128) -> __m128 { +pub fn _mm_round_ps(a: __m128) -> __m128 { static_assert_uimm_bits!(ROUNDING, 4); - roundps(a, ROUNDING) + unsafe { roundps(a, ROUNDING) } } /// Round the lower double-precision (64-bit) floating-point element in `b` @@ -798,9 +852,9 @@ pub unsafe fn _mm_round_ps(a: __m128) -> __m128 { #[cfg_attr(test, assert_instr(roundsd, ROUNDING = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_round_sd(a: __m128d, b: __m128d) -> __m128d { +pub fn _mm_round_sd(a: __m128d, b: __m128d) -> __m128d { static_assert_uimm_bits!(ROUNDING, 4); - roundsd(a, b, ROUNDING) + unsafe { roundsd(a, b, ROUNDING) } } /// Round the lower single-precision (32-bit) floating-point element in `b` @@ -822,9 +876,9 @@ pub unsafe fn _mm_round_sd(a: __m128d, b: __m128d) -> __m12 #[cfg_attr(test, assert_instr(roundss, ROUNDING = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_round_ss(a: __m128, b: __m128) -> __m128 { +pub fn _mm_round_ss(a: __m128, b: __m128) -> __m128 { static_assert_uimm_bits!(ROUNDING, 4); - roundss(a, b, ROUNDING) + unsafe { roundss(a, b, ROUNDING) } } /// Finds the minimum unsigned 16-bit element in the 128-bit __m128i vector, @@ -852,8 +906,8 @@ pub unsafe fn _mm_round_ss(a: __m128, b: __m128) -> __m128 #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(phminposuw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_minpos_epu16(a: __m128i) -> __m128i { - transmute(phminposuw(a.as_u16x8())) +pub fn _mm_minpos_epu16(a: __m128i) -> __m128i { + unsafe { transmute(phminposuw(a.as_u16x8())) } } /// Multiplies the low 32-bit integers from each packed 64-bit @@ -864,10 +918,12 @@ pub unsafe fn _mm_minpos_epu16(a: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmuldq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i { - let a = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(a.as_i64x2())); - let b = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(b.as_i64x2())); - transmute(simd_mul(a, b)) +pub fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(a.as_i64x2())); + let b = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(b.as_i64x2())); + transmute(simd_mul(a, b)) + } } /// Multiplies the packed 32-bit integers in `a` and `b`, producing intermediate @@ -882,8 +938,8 @@ pub unsafe fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmulld))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i { - transmute(simd_mul(a.as_i32x4(), b.as_i32x4())) +pub fn _mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_mul(a.as_i32x4(), b.as_i32x4())) } } /// Subtracts 8-bit unsigned integer values and computes the absolute @@ -924,9 +980,9 @@ pub unsafe fn _mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(mpsadbw, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_mpsadbw_epu8(a: __m128i, b: __m128i) -> __m128i { +pub fn _mm_mpsadbw_epu8(a: __m128i, b: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 3); - transmute(mpsadbw(a.as_u8x16(), b.as_u8x16(), IMM8 as u8)) + unsafe { transmute(mpsadbw(a.as_u8x16(), b.as_u8x16(), IMM8 as u8)) } } /// Tests whether the specified bits in a 128-bit integer vector are all @@ -948,8 +1004,8 @@ pub unsafe fn _mm_mpsadbw_epu8(a: __m128i, b: __m128i) -> __m12 #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(ptest))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 { - ptestz(a.as_i64x2(), mask.as_i64x2()) +pub fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 { + unsafe { ptestz(a.as_i64x2(), mask.as_i64x2()) } } /// Tests whether the specified bits in a 128-bit integer vector are all @@ -971,8 +1027,8 @@ pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(ptest))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 { - ptestc(a.as_i64x2(), mask.as_i64x2()) +pub fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 { + unsafe { ptestc(a.as_i64x2(), mask.as_i64x2()) } } /// Tests whether the specified bits in a 128-bit integer vector are @@ -994,8 +1050,8 @@ pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(ptest))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 { - ptestnzc(a.as_i64x2(), mask.as_i64x2()) +pub fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 { + unsafe { ptestnzc(a.as_i64x2(), mask.as_i64x2()) } } /// Tests whether the specified bits in a 128-bit integer vector are all @@ -1017,7 +1073,7 @@ pub unsafe fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(ptest))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 { +pub fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 { _mm_testz_si128(a, mask) } @@ -1039,7 +1095,7 @@ pub unsafe fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 { #[cfg_attr(test, assert_instr(pcmpeqd))] #[cfg_attr(test, assert_instr(ptest))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_test_all_ones(a: __m128i) -> i32 { +pub fn _mm_test_all_ones(a: __m128i) -> i32 { _mm_testc_si128(a, _mm_cmpeq_epi32(a, a)) } @@ -1062,7 +1118,7 @@ pub unsafe fn _mm_test_all_ones(a: __m128i) -> i32 { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(ptest))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 { +pub fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 { _mm_testnzc_si128(a, mask) } diff --git a/library/stdarch/crates/core_arch/src/x86_64/sse41.rs b/library/stdarch/crates/core_arch/src/x86_64/sse41.rs index 5b6c72e3fc25..e57ffac1ca01 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/sse41.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/sse41.rs @@ -13,9 +13,9 @@ use stdarch_test::assert_instr; #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(pextrq, IMM1 = 1))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_extract_epi64(a: __m128i) -> i64 { +pub fn _mm_extract_epi64(a: __m128i) -> i64 { static_assert_uimm_bits!(IMM1, 1); - simd_extract!(a.as_i64x2(), IMM1 as u32) + unsafe { simd_extract!(a.as_i64x2(), IMM1 as u32) } } /// Returns a copy of `a` with the 64-bit integer from `i` inserted at a @@ -27,9 +27,9 @@ pub unsafe fn _mm_extract_epi64(a: __m128i) -> i64 { #[cfg_attr(test, assert_instr(pinsrq, IMM1 = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_insert_epi64(a: __m128i, i: i64) -> __m128i { +pub fn _mm_insert_epi64(a: __m128i, i: i64) -> __m128i { static_assert_uimm_bits!(IMM1, 1); - transmute(simd_insert!(a.as_i64x2(), IMM1 as u32, i)) + unsafe { transmute(simd_insert!(a.as_i64x2(), IMM1 as u32, i)) } } #[cfg(test)]