manually const-ify shuffle arguments (#1160)
This commit is contained in:
parent
7516a80c31
commit
a34883b5d3
17 changed files with 1655 additions and 1549 deletions
File diff suppressed because it is too large
Load diff
|
|
@ -1595,7 +1595,7 @@ pub unsafe fn vext_f64<const N: i32>(a: float64x1_t, _b: float64x1_t) -> float64
|
|||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(mov))]
|
||||
pub unsafe fn vcombine_s8(low: int8x8_t, high: int8x8_t) -> int8x16_t {
|
||||
simd_shuffle16(
|
||||
simd_shuffle16!(
|
||||
low,
|
||||
high,
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
|
||||
|
|
@ -1607,7 +1607,7 @@ pub unsafe fn vcombine_s8(low: int8x8_t, high: int8x8_t) -> int8x16_t {
|
|||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(mov))]
|
||||
pub unsafe fn vcombine_s16(low: int16x4_t, high: int16x4_t) -> int16x8_t {
|
||||
simd_shuffle8(low, high, [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
simd_shuffle8!(low, high, [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
}
|
||||
|
||||
/// Vector combine
|
||||
|
|
@ -1615,7 +1615,7 @@ pub unsafe fn vcombine_s16(low: int16x4_t, high: int16x4_t) -> int16x8_t {
|
|||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(mov))]
|
||||
pub unsafe fn vcombine_s32(low: int32x2_t, high: int32x2_t) -> int32x4_t {
|
||||
simd_shuffle4(low, high, [0, 1, 2, 3])
|
||||
simd_shuffle4!(low, high, [0, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Vector combine
|
||||
|
|
@ -1623,7 +1623,7 @@ pub unsafe fn vcombine_s32(low: int32x2_t, high: int32x2_t) -> int32x4_t {
|
|||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(mov))]
|
||||
pub unsafe fn vcombine_s64(low: int64x1_t, high: int64x1_t) -> int64x2_t {
|
||||
simd_shuffle2(low, high, [0, 1])
|
||||
simd_shuffle2!(low, high, [0, 1])
|
||||
}
|
||||
|
||||
/// Vector combine
|
||||
|
|
@ -1631,7 +1631,7 @@ pub unsafe fn vcombine_s64(low: int64x1_t, high: int64x1_t) -> int64x2_t {
|
|||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(mov))]
|
||||
pub unsafe fn vcombine_u8(low: uint8x8_t, high: uint8x8_t) -> uint8x16_t {
|
||||
simd_shuffle16(
|
||||
simd_shuffle16!(
|
||||
low,
|
||||
high,
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
|
||||
|
|
@ -1643,7 +1643,7 @@ pub unsafe fn vcombine_u8(low: uint8x8_t, high: uint8x8_t) -> uint8x16_t {
|
|||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(mov))]
|
||||
pub unsafe fn vcombine_u16(low: uint16x4_t, high: uint16x4_t) -> uint16x8_t {
|
||||
simd_shuffle8(low, high, [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
simd_shuffle8!(low, high, [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
}
|
||||
|
||||
/// Vector combine
|
||||
|
|
@ -1651,7 +1651,7 @@ pub unsafe fn vcombine_u16(low: uint16x4_t, high: uint16x4_t) -> uint16x8_t {
|
|||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(mov))]
|
||||
pub unsafe fn vcombine_u32(low: uint32x2_t, high: uint32x2_t) -> uint32x4_t {
|
||||
simd_shuffle4(low, high, [0, 1, 2, 3])
|
||||
simd_shuffle4!(low, high, [0, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Vector combine
|
||||
|
|
@ -1659,7 +1659,7 @@ pub unsafe fn vcombine_u32(low: uint32x2_t, high: uint32x2_t) -> uint32x4_t {
|
|||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(mov))]
|
||||
pub unsafe fn vcombine_u64(low: uint64x1_t, high: uint64x1_t) -> uint64x2_t {
|
||||
simd_shuffle2(low, high, [0, 1])
|
||||
simd_shuffle2!(low, high, [0, 1])
|
||||
}
|
||||
|
||||
/// Vector combine
|
||||
|
|
@ -1667,7 +1667,7 @@ pub unsafe fn vcombine_u64(low: uint64x1_t, high: uint64x1_t) -> uint64x2_t {
|
|||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(mov))]
|
||||
pub unsafe fn vcombine_p64(low: poly64x1_t, high: poly64x1_t) -> poly64x2_t {
|
||||
simd_shuffle2(low, high, [0, 1])
|
||||
simd_shuffle2!(low, high, [0, 1])
|
||||
}
|
||||
|
||||
/// Duplicate vector element to vector or scalar
|
||||
|
|
@ -1772,7 +1772,7 @@ pub unsafe fn vget_low_p64(a: poly64x2_t) -> poly64x1_t {
|
|||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(mov))]
|
||||
pub unsafe fn vcombine_f16 ( low: float16x4_t, high: float16x4_t) -> float16x8_t {
|
||||
simd_shuffle8(low, high, [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
simd_shuffle8!(low, high, [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
}
|
||||
*/
|
||||
|
||||
|
|
@ -1781,7 +1781,7 @@ pub unsafe fn vcombine_f16 ( low: float16x4_t, high: float16x4_t) -> float16x8_
|
|||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(mov))]
|
||||
pub unsafe fn vcombine_f32(low: float32x2_t, high: float32x2_t) -> float32x4_t {
|
||||
simd_shuffle4(low, high, [0, 1, 2, 3])
|
||||
simd_shuffle4!(low, high, [0, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Vector combine
|
||||
|
|
@ -1789,7 +1789,7 @@ pub unsafe fn vcombine_f32(low: float32x2_t, high: float32x2_t) -> float32x4_t {
|
|||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(mov))]
|
||||
pub unsafe fn vcombine_p8(low: poly8x8_t, high: poly8x8_t) -> poly8x16_t {
|
||||
simd_shuffle16(
|
||||
simd_shuffle16!(
|
||||
low,
|
||||
high,
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
|
||||
|
|
@ -1801,7 +1801,7 @@ pub unsafe fn vcombine_p8(low: poly8x8_t, high: poly8x8_t) -> poly8x16_t {
|
|||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(mov))]
|
||||
pub unsafe fn vcombine_p16(low: poly16x4_t, high: poly16x4_t) -> poly16x8_t {
|
||||
simd_shuffle8(low, high, [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
simd_shuffle8!(low, high, [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
}
|
||||
|
||||
/// Vector combine
|
||||
|
|
@ -1809,7 +1809,7 @@ pub unsafe fn vcombine_p16(low: poly16x4_t, high: poly16x4_t) -> poly16x8_t {
|
|||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(mov))]
|
||||
pub unsafe fn vcombine_f64(low: float64x1_t, high: float64x1_t) -> float64x2_t {
|
||||
simd_shuffle2(low, high, [0, 1])
|
||||
simd_shuffle2!(low, high, [0, 1])
|
||||
}
|
||||
|
||||
/// Table look-up
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -580,7 +580,7 @@ pub unsafe fn vld1q_lane_f32<const LANE: i32>(ptr: *const f32, src: float32x4_t)
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
|
||||
pub unsafe fn vld1_dup_s8(ptr: *const i8) -> int8x8_t {
|
||||
let x = vld1_lane_s8::<0>(ptr, transmute(i8x8::splat(0)));
|
||||
simd_shuffle8(x, x, [0, 0, 0, 0, 0, 0, 0, 0])
|
||||
simd_shuffle8!(x, x, [0, 0, 0, 0, 0, 0, 0, 0])
|
||||
}
|
||||
|
||||
/// Load one single-element structure and Replicate to all lanes (of one register).
|
||||
|
|
@ -591,7 +591,7 @@ pub unsafe fn vld1_dup_s8(ptr: *const i8) -> int8x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
|
||||
pub unsafe fn vld1q_dup_s8(ptr: *const i8) -> int8x16_t {
|
||||
let x = vld1q_lane_s8::<0>(ptr, transmute(i8x16::splat(0)));
|
||||
simd_shuffle16(x, x, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
|
||||
simd_shuffle16!(x, x, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
|
||||
}
|
||||
|
||||
/// Load one single-element structure and Replicate to all lanes (of one register).
|
||||
|
|
@ -602,7 +602,7 @@ pub unsafe fn vld1q_dup_s8(ptr: *const i8) -> int8x16_t {
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
|
||||
pub unsafe fn vld1_dup_s16(ptr: *const i16) -> int16x4_t {
|
||||
let x = vld1_lane_s16::<0>(ptr, transmute(i16x4::splat(0)));
|
||||
simd_shuffle4(x, x, [0, 0, 0, 0])
|
||||
simd_shuffle4!(x, x, [0, 0, 0, 0])
|
||||
}
|
||||
|
||||
/// Load one single-element structure and Replicate to all lanes (of one register).
|
||||
|
|
@ -613,7 +613,7 @@ pub unsafe fn vld1_dup_s16(ptr: *const i16) -> int16x4_t {
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
|
||||
pub unsafe fn vld1q_dup_s16(ptr: *const i16) -> int16x8_t {
|
||||
let x = vld1q_lane_s16::<0>(ptr, transmute(i16x8::splat(0)));
|
||||
simd_shuffle8(x, x, [0, 0, 0, 0, 0, 0, 0, 0])
|
||||
simd_shuffle8!(x, x, [0, 0, 0, 0, 0, 0, 0, 0])
|
||||
}
|
||||
|
||||
/// Load one single-element structure and Replicate to all lanes (of one register).
|
||||
|
|
@ -624,7 +624,7 @@ pub unsafe fn vld1q_dup_s16(ptr: *const i16) -> int16x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
|
||||
pub unsafe fn vld1_dup_s32(ptr: *const i32) -> int32x2_t {
|
||||
let x = vld1_lane_s32::<0>(ptr, transmute(i32x2::splat(0)));
|
||||
simd_shuffle2(x, x, [0, 0])
|
||||
simd_shuffle2!(x, x, [0, 0])
|
||||
}
|
||||
|
||||
/// Load one single-element structure and Replicate to all lanes (of one register).
|
||||
|
|
@ -635,7 +635,7 @@ pub unsafe fn vld1_dup_s32(ptr: *const i32) -> int32x2_t {
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
|
||||
pub unsafe fn vld1q_dup_s32(ptr: *const i32) -> int32x4_t {
|
||||
let x = vld1q_lane_s32::<0>(ptr, transmute(i32x4::splat(0)));
|
||||
simd_shuffle4(x, x, [0, 0, 0, 0])
|
||||
simd_shuffle4!(x, x, [0, 0, 0, 0])
|
||||
}
|
||||
|
||||
/// Load one single-element structure and Replicate to all lanes (of one register).
|
||||
|
|
@ -663,7 +663,7 @@ pub unsafe fn vld1_dup_s64(ptr: *const i64) -> int64x1_t {
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
|
||||
pub unsafe fn vld1q_dup_s64(ptr: *const i64) -> int64x2_t {
|
||||
let x = vld1q_lane_s64::<0>(ptr, transmute(i64x2::splat(0)));
|
||||
simd_shuffle2(x, x, [0, 0])
|
||||
simd_shuffle2!(x, x, [0, 0])
|
||||
}
|
||||
|
||||
/// Load one single-element structure and Replicate to all lanes (of one register).
|
||||
|
|
@ -674,7 +674,7 @@ pub unsafe fn vld1q_dup_s64(ptr: *const i64) -> int64x2_t {
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
|
||||
pub unsafe fn vld1_dup_u8(ptr: *const u8) -> uint8x8_t {
|
||||
let x = vld1_lane_u8::<0>(ptr, transmute(u8x8::splat(0)));
|
||||
simd_shuffle8(x, x, [0, 0, 0, 0, 0, 0, 0, 0])
|
||||
simd_shuffle8!(x, x, [0, 0, 0, 0, 0, 0, 0, 0])
|
||||
}
|
||||
|
||||
/// Load one single-element structure and Replicate to all lanes (of one register).
|
||||
|
|
@ -685,7 +685,7 @@ pub unsafe fn vld1_dup_u8(ptr: *const u8) -> uint8x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
|
||||
pub unsafe fn vld1q_dup_u8(ptr: *const u8) -> uint8x16_t {
|
||||
let x = vld1q_lane_u8::<0>(ptr, transmute(u8x16::splat(0)));
|
||||
simd_shuffle16(x, x, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
|
||||
simd_shuffle16!(x, x, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
|
||||
}
|
||||
|
||||
/// Load one single-element structure and Replicate to all lanes (of one register).
|
||||
|
|
@ -696,7 +696,7 @@ pub unsafe fn vld1q_dup_u8(ptr: *const u8) -> uint8x16_t {
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
|
||||
pub unsafe fn vld1_dup_u16(ptr: *const u16) -> uint16x4_t {
|
||||
let x = vld1_lane_u16::<0>(ptr, transmute(u16x4::splat(0)));
|
||||
simd_shuffle4(x, x, [0, 0, 0, 0])
|
||||
simd_shuffle4!(x, x, [0, 0, 0, 0])
|
||||
}
|
||||
|
||||
/// Load one single-element structure and Replicate to all lanes (of one register).
|
||||
|
|
@ -707,7 +707,7 @@ pub unsafe fn vld1_dup_u16(ptr: *const u16) -> uint16x4_t {
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
|
||||
pub unsafe fn vld1q_dup_u16(ptr: *const u16) -> uint16x8_t {
|
||||
let x = vld1q_lane_u16::<0>(ptr, transmute(u16x8::splat(0)));
|
||||
simd_shuffle8(x, x, [0, 0, 0, 0, 0, 0, 0, 0])
|
||||
simd_shuffle8!(x, x, [0, 0, 0, 0, 0, 0, 0, 0])
|
||||
}
|
||||
|
||||
/// Load one single-element structure and Replicate to all lanes (of one register).
|
||||
|
|
@ -718,7 +718,7 @@ pub unsafe fn vld1q_dup_u16(ptr: *const u16) -> uint16x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
|
||||
pub unsafe fn vld1_dup_u32(ptr: *const u32) -> uint32x2_t {
|
||||
let x = vld1_lane_u32::<0>(ptr, transmute(u32x2::splat(0)));
|
||||
simd_shuffle2(x, x, [0, 0])
|
||||
simd_shuffle2!(x, x, [0, 0])
|
||||
}
|
||||
|
||||
/// Load one single-element structure and Replicate to all lanes (of one register).
|
||||
|
|
@ -729,7 +729,7 @@ pub unsafe fn vld1_dup_u32(ptr: *const u32) -> uint32x2_t {
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
|
||||
pub unsafe fn vld1q_dup_u32(ptr: *const u32) -> uint32x4_t {
|
||||
let x = vld1q_lane_u32::<0>(ptr, transmute(u32x4::splat(0)));
|
||||
simd_shuffle4(x, x, [0, 0, 0, 0])
|
||||
simd_shuffle4!(x, x, [0, 0, 0, 0])
|
||||
}
|
||||
|
||||
/// Load one single-element structure and Replicate to all lanes (of one register).
|
||||
|
|
@ -757,7 +757,7 @@ pub unsafe fn vld1_dup_u64(ptr: *const u64) -> uint64x1_t {
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
|
||||
pub unsafe fn vld1q_dup_u64(ptr: *const u64) -> uint64x2_t {
|
||||
let x = vld1q_lane_u64::<0>(ptr, transmute(u64x2::splat(0)));
|
||||
simd_shuffle2(x, x, [0, 0])
|
||||
simd_shuffle2!(x, x, [0, 0])
|
||||
}
|
||||
|
||||
/// Load one single-element structure and Replicate to all lanes (of one register).
|
||||
|
|
@ -768,7 +768,7 @@ pub unsafe fn vld1q_dup_u64(ptr: *const u64) -> uint64x2_t {
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
|
||||
pub unsafe fn vld1_dup_p8(ptr: *const p8) -> poly8x8_t {
|
||||
let x = vld1_lane_p8::<0>(ptr, transmute(u8x8::splat(0)));
|
||||
simd_shuffle8(x, x, [0, 0, 0, 0, 0, 0, 0, 0])
|
||||
simd_shuffle8!(x, x, [0, 0, 0, 0, 0, 0, 0, 0])
|
||||
}
|
||||
|
||||
/// Load one single-element structure and Replicate to all lanes (of one register).
|
||||
|
|
@ -779,7 +779,7 @@ pub unsafe fn vld1_dup_p8(ptr: *const p8) -> poly8x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
|
||||
pub unsafe fn vld1q_dup_p8(ptr: *const p8) -> poly8x16_t {
|
||||
let x = vld1q_lane_p8::<0>(ptr, transmute(u8x16::splat(0)));
|
||||
simd_shuffle16(x, x, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
|
||||
simd_shuffle16!(x, x, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
|
||||
}
|
||||
|
||||
/// Load one single-element structure and Replicate to all lanes (of one register).
|
||||
|
|
@ -790,7 +790,7 @@ pub unsafe fn vld1q_dup_p8(ptr: *const p8) -> poly8x16_t {
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
|
||||
pub unsafe fn vld1_dup_p16(ptr: *const p16) -> poly16x4_t {
|
||||
let x = vld1_lane_p16::<0>(ptr, transmute(u16x4::splat(0)));
|
||||
simd_shuffle4(x, x, [0, 0, 0, 0])
|
||||
simd_shuffle4!(x, x, [0, 0, 0, 0])
|
||||
}
|
||||
|
||||
/// Load one single-element structure and Replicate to all lanes (of one register).
|
||||
|
|
@ -801,7 +801,7 @@ pub unsafe fn vld1_dup_p16(ptr: *const p16) -> poly16x4_t {
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
|
||||
pub unsafe fn vld1q_dup_p16(ptr: *const p16) -> poly16x8_t {
|
||||
let x = vld1q_lane_p16::<0>(ptr, transmute(u16x8::splat(0)));
|
||||
simd_shuffle8(x, x, [0, 0, 0, 0, 0, 0, 0, 0])
|
||||
simd_shuffle8!(x, x, [0, 0, 0, 0, 0, 0, 0, 0])
|
||||
}
|
||||
|
||||
/// Load one single-element structure and Replicate to all lanes (of one register).
|
||||
|
|
@ -812,7 +812,7 @@ pub unsafe fn vld1q_dup_p16(ptr: *const p16) -> poly16x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
|
||||
pub unsafe fn vld1_dup_f32(ptr: *const f32) -> float32x2_t {
|
||||
let x = vld1_lane_f32::<0>(ptr, transmute(f32x2::splat(0.)));
|
||||
simd_shuffle2(x, x, [0, 0])
|
||||
simd_shuffle2!(x, x, [0, 0])
|
||||
}
|
||||
|
||||
/// Load one single-element structure and Replicate to all lanes (of one register).
|
||||
|
|
@ -823,7 +823,7 @@ pub unsafe fn vld1_dup_f32(ptr: *const f32) -> float32x2_t {
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
|
||||
pub unsafe fn vld1q_dup_f32(ptr: *const f32) -> float32x4_t {
|
||||
let x = vld1q_lane_f32::<0>(ptr, transmute(f32x4::splat(0.)));
|
||||
simd_shuffle4(x, x, [0, 0, 0, 0])
|
||||
simd_shuffle4!(x, x, [0, 0, 0, 0])
|
||||
}
|
||||
|
||||
// signed absolute difference and accumulate (64-bit)
|
||||
|
|
@ -1284,8 +1284,8 @@ pub unsafe fn vaddl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddl2))]
|
||||
pub unsafe fn vaddl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
|
||||
let a: int8x8_t = simd_shuffle8(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
|
||||
let b: int8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
|
||||
let a: int8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
|
||||
let b: int8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
|
||||
let a: int16x8_t = simd_cast(a);
|
||||
let b: int16x8_t = simd_cast(b);
|
||||
simd_add(a, b)
|
||||
|
|
@ -1298,8 +1298,8 @@ pub unsafe fn vaddl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddl2))]
|
||||
pub unsafe fn vaddl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
|
||||
let a: int16x4_t = simd_shuffle4(a, a, [4, 5, 6, 7]);
|
||||
let b: int16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]);
|
||||
let a: int16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]);
|
||||
let b: int16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]);
|
||||
let a: int32x4_t = simd_cast(a);
|
||||
let b: int32x4_t = simd_cast(b);
|
||||
simd_add(a, b)
|
||||
|
|
@ -1312,8 +1312,8 @@ pub unsafe fn vaddl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddl2))]
|
||||
pub unsafe fn vaddl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
|
||||
let a: int32x2_t = simd_shuffle2(a, a, [2, 3]);
|
||||
let b: int32x2_t = simd_shuffle2(b, b, [2, 3]);
|
||||
let a: int32x2_t = simd_shuffle2!(a, a, [2, 3]);
|
||||
let b: int32x2_t = simd_shuffle2!(b, b, [2, 3]);
|
||||
let a: int64x2_t = simd_cast(a);
|
||||
let b: int64x2_t = simd_cast(b);
|
||||
simd_add(a, b)
|
||||
|
|
@ -1326,8 +1326,8 @@ pub unsafe fn vaddl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddl2))]
|
||||
pub unsafe fn vaddl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
|
||||
let a: uint8x8_t = simd_shuffle8(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
|
||||
let b: uint8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
|
||||
let a: uint8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
|
||||
let b: uint8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
|
||||
let a: uint16x8_t = simd_cast(a);
|
||||
let b: uint16x8_t = simd_cast(b);
|
||||
simd_add(a, b)
|
||||
|
|
@ -1340,8 +1340,8 @@ pub unsafe fn vaddl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddl2))]
|
||||
pub unsafe fn vaddl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
|
||||
let a: uint16x4_t = simd_shuffle4(a, a, [4, 5, 6, 7]);
|
||||
let b: uint16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]);
|
||||
let a: uint16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]);
|
||||
let b: uint16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]);
|
||||
let a: uint32x4_t = simd_cast(a);
|
||||
let b: uint32x4_t = simd_cast(b);
|
||||
simd_add(a, b)
|
||||
|
|
@ -1354,8 +1354,8 @@ pub unsafe fn vaddl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddl2))]
|
||||
pub unsafe fn vaddl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
|
||||
let a: uint32x2_t = simd_shuffle2(a, a, [2, 3]);
|
||||
let b: uint32x2_t = simd_shuffle2(b, b, [2, 3]);
|
||||
let a: uint32x2_t = simd_shuffle2!(a, a, [2, 3]);
|
||||
let b: uint32x2_t = simd_shuffle2!(b, b, [2, 3]);
|
||||
let a: uint64x2_t = simd_cast(a);
|
||||
let b: uint64x2_t = simd_cast(b);
|
||||
simd_add(a, b)
|
||||
|
|
@ -1434,7 +1434,7 @@ pub unsafe fn vaddw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddw2))]
|
||||
pub unsafe fn vaddw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
|
||||
let b: int8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
|
||||
let b: int8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
|
||||
let b: int16x8_t = simd_cast(b);
|
||||
simd_add(a, b)
|
||||
}
|
||||
|
|
@ -1446,7 +1446,7 @@ pub unsafe fn vaddw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddw2))]
|
||||
pub unsafe fn vaddw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
|
||||
let b: int16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]);
|
||||
let b: int16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]);
|
||||
let b: int32x4_t = simd_cast(b);
|
||||
simd_add(a, b)
|
||||
}
|
||||
|
|
@ -1458,7 +1458,7 @@ pub unsafe fn vaddw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddw2))]
|
||||
pub unsafe fn vaddw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
|
||||
let b: int32x2_t = simd_shuffle2(b, b, [2, 3]);
|
||||
let b: int32x2_t = simd_shuffle2!(b, b, [2, 3]);
|
||||
let b: int64x2_t = simd_cast(b);
|
||||
simd_add(a, b)
|
||||
}
|
||||
|
|
@ -1470,7 +1470,7 @@ pub unsafe fn vaddw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddw2))]
|
||||
pub unsafe fn vaddw_high_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t {
|
||||
let b: uint8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
|
||||
let b: uint8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
|
||||
let b: uint16x8_t = simd_cast(b);
|
||||
simd_add(a, b)
|
||||
}
|
||||
|
|
@ -1482,7 +1482,7 @@ pub unsafe fn vaddw_high_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddw2))]
|
||||
pub unsafe fn vaddw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
|
||||
let b: uint16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]);
|
||||
let b: uint16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]);
|
||||
let b: uint32x4_t = simd_cast(b);
|
||||
simd_add(a, b)
|
||||
}
|
||||
|
|
@ -1494,7 +1494,7 @@ pub unsafe fn vaddw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddw2))]
|
||||
pub unsafe fn vaddw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t {
|
||||
let b: uint32x2_t = simd_shuffle2(b, b, [2, 3]);
|
||||
let b: uint32x2_t = simd_shuffle2!(b, b, [2, 3]);
|
||||
let b: uint64x2_t = simd_cast(b);
|
||||
simd_add(a, b)
|
||||
}
|
||||
|
|
@ -1567,7 +1567,7 @@ pub unsafe fn vaddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn2))]
|
||||
pub unsafe fn vaddhn_high_s16(r: int8x8_t, a: int16x8_t, b: int16x8_t) -> int8x16_t {
|
||||
let x = simd_cast(simd_shr(simd_add(a, b), int16x8_t(8, 8, 8, 8, 8, 8, 8, 8)));
|
||||
simd_shuffle16(r, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
|
||||
simd_shuffle16!(r, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
|
||||
}
|
||||
|
||||
/// Add returning High Narrow (high half).
|
||||
|
|
@ -1578,7 +1578,7 @@ pub unsafe fn vaddhn_high_s16(r: int8x8_t, a: int16x8_t, b: int16x8_t) -> int8x1
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn2))]
|
||||
pub unsafe fn vaddhn_high_s32(r: int16x4_t, a: int32x4_t, b: int32x4_t) -> int16x8_t {
|
||||
let x = simd_cast(simd_shr(simd_add(a, b), int32x4_t(16, 16, 16, 16)));
|
||||
simd_shuffle8(r, x, [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
simd_shuffle8!(r, x, [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
}
|
||||
|
||||
/// Add returning High Narrow (high half).
|
||||
|
|
@ -1589,7 +1589,7 @@ pub unsafe fn vaddhn_high_s32(r: int16x4_t, a: int32x4_t, b: int32x4_t) -> int16
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn2))]
|
||||
pub unsafe fn vaddhn_high_s64(r: int32x2_t, a: int64x2_t, b: int64x2_t) -> int32x4_t {
|
||||
let x = simd_cast(simd_shr(simd_add(a, b), int64x2_t(32, 32)));
|
||||
simd_shuffle4(r, x, [0, 1, 2, 3])
|
||||
simd_shuffle4!(r, x, [0, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Add returning High Narrow (high half).
|
||||
|
|
@ -1600,7 +1600,7 @@ pub unsafe fn vaddhn_high_s64(r: int32x2_t, a: int64x2_t, b: int64x2_t) -> int32
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn2))]
|
||||
pub unsafe fn vaddhn_high_u16(r: uint8x8_t, a: uint16x8_t, b: uint16x8_t) -> uint8x16_t {
|
||||
let x = simd_cast(simd_shr(simd_add(a, b), uint16x8_t(8, 8, 8, 8, 8, 8, 8, 8)));
|
||||
simd_shuffle16(r, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
|
||||
simd_shuffle16!(r, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
|
||||
}
|
||||
|
||||
/// Add returning High Narrow (high half).
|
||||
|
|
@ -1611,7 +1611,7 @@ pub unsafe fn vaddhn_high_u16(r: uint8x8_t, a: uint16x8_t, b: uint16x8_t) -> uin
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn2))]
|
||||
pub unsafe fn vaddhn_high_u32(r: uint16x4_t, a: uint32x4_t, b: uint32x4_t) -> uint16x8_t {
|
||||
let x = simd_cast(simd_shr(simd_add(a, b), uint32x4_t(16, 16, 16, 16)));
|
||||
simd_shuffle8(r, x, [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
simd_shuffle8!(r, x, [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
}
|
||||
|
||||
/// Add returning High Narrow (high half).
|
||||
|
|
@ -1622,7 +1622,7 @@ pub unsafe fn vaddhn_high_u32(r: uint16x4_t, a: uint32x4_t, b: uint32x4_t) -> ui
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn2))]
|
||||
pub unsafe fn vaddhn_high_u64(r: uint32x2_t, a: uint64x2_t, b: uint64x2_t) -> uint32x4_t {
|
||||
let x = simd_cast(simd_shr(simd_add(a, b), uint64x2_t(32, 32)));
|
||||
simd_shuffle4(r, x, [0, 1, 2, 3])
|
||||
simd_shuffle4!(r, x, [0, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Rounding Add returning High Narrow.
|
||||
|
|
@ -1693,7 +1693,7 @@ pub unsafe fn vraddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn2))]
|
||||
pub unsafe fn vraddhn_high_s16(r: int8x8_t, a: int16x8_t, b: int16x8_t) -> int8x16_t {
|
||||
let x = vraddhn_s16_(a, b);
|
||||
simd_shuffle16(r, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
|
||||
simd_shuffle16!(r, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
|
||||
}
|
||||
|
||||
/// Rounding Add returning High Narrow (high half).
|
||||
|
|
@ -1704,7 +1704,7 @@ pub unsafe fn vraddhn_high_s16(r: int8x8_t, a: int16x8_t, b: int16x8_t) -> int8x
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn2))]
|
||||
pub unsafe fn vraddhn_high_s32(r: int16x4_t, a: int32x4_t, b: int32x4_t) -> int16x8_t {
|
||||
let x = vraddhn_s32_(a, b);
|
||||
simd_shuffle8(r, x, [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
simd_shuffle8!(r, x, [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
}
|
||||
|
||||
/// Rounding Add returning High Narrow (high half).
|
||||
|
|
@ -1715,7 +1715,7 @@ pub unsafe fn vraddhn_high_s32(r: int16x4_t, a: int32x4_t, b: int32x4_t) -> int1
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn2))]
|
||||
pub unsafe fn vraddhn_high_s64(r: int32x2_t, a: int64x2_t, b: int64x2_t) -> int32x4_t {
|
||||
let x = vraddhn_s64_(a, b);
|
||||
simd_shuffle4(r, x, [0, 1, 2, 3])
|
||||
simd_shuffle4!(r, x, [0, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Rounding Add returning High Narrow (high half).
|
||||
|
|
@ -1726,7 +1726,7 @@ pub unsafe fn vraddhn_high_s64(r: int32x2_t, a: int64x2_t, b: int64x2_t) -> int3
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn2))]
|
||||
pub unsafe fn vraddhn_high_u16(r: uint8x8_t, a: uint16x8_t, b: uint16x8_t) -> uint8x16_t {
|
||||
let x: uint8x8_t = transmute(vraddhn_s16_(transmute(a), transmute(b)));
|
||||
simd_shuffle16(r, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
|
||||
simd_shuffle16!(r, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
|
||||
}
|
||||
|
||||
/// Rounding Add returning High Narrow (high half).
|
||||
|
|
@ -1737,7 +1737,7 @@ pub unsafe fn vraddhn_high_u16(r: uint8x8_t, a: uint16x8_t, b: uint16x8_t) -> ui
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn2))]
|
||||
pub unsafe fn vraddhn_high_u32(r: uint16x4_t, a: uint32x4_t, b: uint32x4_t) -> uint16x8_t {
|
||||
let x: uint16x4_t = transmute(vraddhn_s32_(transmute(a), transmute(b)));
|
||||
simd_shuffle8(r, x, [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
simd_shuffle8!(r, x, [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
}
|
||||
|
||||
/// Rounding Add returning High Narrow (high half).
|
||||
|
|
@ -1748,7 +1748,7 @@ pub unsafe fn vraddhn_high_u32(r: uint16x4_t, a: uint32x4_t, b: uint32x4_t) -> u
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn2))]
|
||||
pub unsafe fn vraddhn_high_u64(r: uint32x2_t, a: uint64x2_t, b: uint64x2_t) -> uint32x4_t {
|
||||
let x: uint32x2_t = transmute(vraddhn_s64_(transmute(a), transmute(b)));
|
||||
simd_shuffle4(r, x, [0, 1, 2, 3])
|
||||
simd_shuffle4!(r, x, [0, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Signed Add Long Pairwise.
|
||||
|
|
@ -2961,7 +2961,7 @@ pub unsafe fn vget_lane_u8<const IMM5: i32>(v: uint8x8_t) -> u8 {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
|
||||
pub unsafe fn vget_high_s8(a: int8x16_t) -> int8x8_t {
|
||||
simd_shuffle8(a, a, [8, 9, 10, 11, 12, 13, 14, 15])
|
||||
simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15])
|
||||
}
|
||||
|
||||
/// Duplicate vector element to vector or scalar
|
||||
|
|
@ -2971,7 +2971,7 @@ pub unsafe fn vget_high_s8(a: int8x16_t) -> int8x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
|
||||
pub unsafe fn vget_high_s16(a: int16x8_t) -> int16x4_t {
|
||||
simd_shuffle4(a, a, [4, 5, 6, 7])
|
||||
simd_shuffle4!(a, a, [4, 5, 6, 7])
|
||||
}
|
||||
|
||||
/// Duplicate vector element to vector or scalar
|
||||
|
|
@ -2981,7 +2981,7 @@ pub unsafe fn vget_high_s16(a: int16x8_t) -> int16x4_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
|
||||
pub unsafe fn vget_high_s32(a: int32x4_t) -> int32x2_t {
|
||||
simd_shuffle2(a, a, [2, 3])
|
||||
simd_shuffle2!(a, a, [2, 3])
|
||||
}
|
||||
|
||||
/// Duplicate vector element to vector or scalar
|
||||
|
|
@ -3001,7 +3001,7 @@ pub unsafe fn vget_high_s64(a: int64x2_t) -> int64x1_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
|
||||
pub unsafe fn vget_high_u8(a: uint8x16_t) -> uint8x8_t {
|
||||
simd_shuffle8(a, a, [8, 9, 10, 11, 12, 13, 14, 15])
|
||||
simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15])
|
||||
}
|
||||
|
||||
/// Duplicate vector element to vector or scalar
|
||||
|
|
@ -3011,7 +3011,7 @@ pub unsafe fn vget_high_u8(a: uint8x16_t) -> uint8x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
|
||||
pub unsafe fn vget_high_u16(a: uint16x8_t) -> uint16x4_t {
|
||||
simd_shuffle4(a, a, [4, 5, 6, 7])
|
||||
simd_shuffle4!(a, a, [4, 5, 6, 7])
|
||||
}
|
||||
|
||||
/// Duplicate vector element to vector or scalar
|
||||
|
|
@ -3021,7 +3021,7 @@ pub unsafe fn vget_high_u16(a: uint16x8_t) -> uint16x4_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
|
||||
pub unsafe fn vget_high_u32(a: uint32x4_t) -> uint32x2_t {
|
||||
simd_shuffle2(a, a, [2, 3])
|
||||
simd_shuffle2!(a, a, [2, 3])
|
||||
}
|
||||
|
||||
/// Duplicate vector element to vector or scalar
|
||||
|
|
@ -3041,7 +3041,7 @@ pub unsafe fn vget_high_u64(a: uint64x2_t) -> uint64x1_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
|
||||
pub unsafe fn vget_high_p8(a: poly8x16_t) -> poly8x8_t {
|
||||
simd_shuffle8(a, a, [8, 9, 10, 11, 12, 13, 14, 15])
|
||||
simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15])
|
||||
}
|
||||
|
||||
/// Duplicate vector element to vector or scalar
|
||||
|
|
@ -3051,7 +3051,7 @@ pub unsafe fn vget_high_p8(a: poly8x16_t) -> poly8x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
|
||||
pub unsafe fn vget_high_p16(a: poly16x8_t) -> poly16x4_t {
|
||||
simd_shuffle4(a, a, [4, 5, 6, 7])
|
||||
simd_shuffle4!(a, a, [4, 5, 6, 7])
|
||||
}
|
||||
|
||||
/// Duplicate vector element to vector or scalar
|
||||
|
|
@ -3061,7 +3061,7 @@ pub unsafe fn vget_high_p16(a: poly16x8_t) -> poly16x4_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
|
||||
pub unsafe fn vget_high_f32(a: float32x4_t) -> float32x2_t {
|
||||
simd_shuffle2(a, a, [2, 3])
|
||||
simd_shuffle2!(a, a, [2, 3])
|
||||
}
|
||||
|
||||
/// Duplicate vector element to vector or scalar
|
||||
|
|
@ -3071,7 +3071,7 @@ pub unsafe fn vget_high_f32(a: float32x4_t) -> float32x2_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
|
||||
pub unsafe fn vget_low_s8(a: int8x16_t) -> int8x8_t {
|
||||
simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
}
|
||||
|
||||
/// Duplicate vector element to vector or scalar
|
||||
|
|
@ -3081,7 +3081,7 @@ pub unsafe fn vget_low_s8(a: int8x16_t) -> int8x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
|
||||
pub unsafe fn vget_low_s16(a: int16x8_t) -> int16x4_t {
|
||||
simd_shuffle4(a, a, [0, 1, 2, 3])
|
||||
simd_shuffle4!(a, a, [0, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Duplicate vector element to vector or scalar
|
||||
|
|
@ -3091,7 +3091,7 @@ pub unsafe fn vget_low_s16(a: int16x8_t) -> int16x4_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
|
||||
pub unsafe fn vget_low_s32(a: int32x4_t) -> int32x2_t {
|
||||
simd_shuffle2(a, a, [0, 1])
|
||||
simd_shuffle2!(a, a, [0, 1])
|
||||
}
|
||||
|
||||
/// Duplicate vector element to vector or scalar
|
||||
|
|
@ -3111,7 +3111,7 @@ pub unsafe fn vget_low_s64(a: int64x2_t) -> int64x1_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
|
||||
pub unsafe fn vget_low_u8(a: uint8x16_t) -> uint8x8_t {
|
||||
simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
}
|
||||
|
||||
/// Duplicate vector element to vector or scalar
|
||||
|
|
@ -3121,7 +3121,7 @@ pub unsafe fn vget_low_u8(a: uint8x16_t) -> uint8x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
|
||||
pub unsafe fn vget_low_u16(a: uint16x8_t) -> uint16x4_t {
|
||||
simd_shuffle4(a, a, [0, 1, 2, 3])
|
||||
simd_shuffle4!(a, a, [0, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Duplicate vector element to vector or scalar
|
||||
|
|
@ -3131,7 +3131,7 @@ pub unsafe fn vget_low_u16(a: uint16x8_t) -> uint16x4_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
|
||||
pub unsafe fn vget_low_u32(a: uint32x4_t) -> uint32x2_t {
|
||||
simd_shuffle2(a, a, [0, 1])
|
||||
simd_shuffle2!(a, a, [0, 1])
|
||||
}
|
||||
|
||||
/// Duplicate vector element to vector or scalar
|
||||
|
|
@ -3151,7 +3151,7 @@ pub unsafe fn vget_low_u64(a: uint64x2_t) -> uint64x1_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
|
||||
pub unsafe fn vget_low_p8(a: poly8x16_t) -> poly8x8_t {
|
||||
simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
}
|
||||
|
||||
/// Duplicate vector element to vector or scalar
|
||||
|
|
@ -3161,7 +3161,7 @@ pub unsafe fn vget_low_p8(a: poly8x16_t) -> poly8x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
|
||||
pub unsafe fn vget_low_p16(a: poly16x8_t) -> poly16x4_t {
|
||||
simd_shuffle4(a, a, [0, 1, 2, 3])
|
||||
simd_shuffle4!(a, a, [0, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Duplicate vector element to vector or scalar
|
||||
|
|
@ -3171,7 +3171,7 @@ pub unsafe fn vget_low_p16(a: poly16x8_t) -> poly16x4_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
|
||||
pub unsafe fn vget_low_f32(a: float32x4_t) -> float32x2_t {
|
||||
simd_shuffle2(a, a, [0, 1])
|
||||
simd_shuffle2!(a, a, [0, 1])
|
||||
}
|
||||
|
||||
/// Duplicate vector element to vector or scalar
|
||||
|
|
@ -3713,7 +3713,7 @@ pub unsafe fn vcntq_p8(a: poly8x16_t) -> poly8x16_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev16))]
|
||||
pub unsafe fn vrev16_s8(a: int8x8_t) -> int8x8_t {
|
||||
simd_shuffle8(a, a, [1, 0, 3, 2, 5, 4, 7, 6])
|
||||
simd_shuffle8!(a, a, [1, 0, 3, 2, 5, 4, 7, 6])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -3723,7 +3723,7 @@ pub unsafe fn vrev16_s8(a: int8x8_t) -> int8x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev16))]
|
||||
pub unsafe fn vrev16q_s8(a: int8x16_t) -> int8x16_t {
|
||||
simd_shuffle16(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14])
|
||||
simd_shuffle16!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -3733,7 +3733,7 @@ pub unsafe fn vrev16q_s8(a: int8x16_t) -> int8x16_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev16))]
|
||||
pub unsafe fn vrev16_u8(a: uint8x8_t) -> uint8x8_t {
|
||||
simd_shuffle8(a, a, [1, 0, 3, 2, 5, 4, 7, 6])
|
||||
simd_shuffle8!(a, a, [1, 0, 3, 2, 5, 4, 7, 6])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -3743,7 +3743,7 @@ pub unsafe fn vrev16_u8(a: uint8x8_t) -> uint8x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev16))]
|
||||
pub unsafe fn vrev16q_u8(a: uint8x16_t) -> uint8x16_t {
|
||||
simd_shuffle16(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14])
|
||||
simd_shuffle16!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -3753,7 +3753,7 @@ pub unsafe fn vrev16q_u8(a: uint8x16_t) -> uint8x16_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev16))]
|
||||
pub unsafe fn vrev16_p8(a: poly8x8_t) -> poly8x8_t {
|
||||
simd_shuffle8(a, a, [1, 0, 3, 2, 5, 4, 7, 6])
|
||||
simd_shuffle8!(a, a, [1, 0, 3, 2, 5, 4, 7, 6])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -3763,7 +3763,7 @@ pub unsafe fn vrev16_p8(a: poly8x8_t) -> poly8x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev16))]
|
||||
pub unsafe fn vrev16q_p8(a: poly8x16_t) -> poly8x16_t {
|
||||
simd_shuffle16(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14])
|
||||
simd_shuffle16!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -3773,7 +3773,7 @@ pub unsafe fn vrev16q_p8(a: poly8x16_t) -> poly8x16_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))]
|
||||
pub unsafe fn vrev32_s8(a: int8x8_t) -> int8x8_t {
|
||||
simd_shuffle8(a, a, [3, 2, 1, 0, 7, 6, 5, 4])
|
||||
simd_shuffle8!(a, a, [3, 2, 1, 0, 7, 6, 5, 4])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -3783,7 +3783,7 @@ pub unsafe fn vrev32_s8(a: int8x8_t) -> int8x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))]
|
||||
pub unsafe fn vrev32q_s8(a: int8x16_t) -> int8x16_t {
|
||||
simd_shuffle16(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12])
|
||||
simd_shuffle16!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -3793,7 +3793,7 @@ pub unsafe fn vrev32q_s8(a: int8x16_t) -> int8x16_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))]
|
||||
pub unsafe fn vrev32_u8(a: uint8x8_t) -> uint8x8_t {
|
||||
simd_shuffle8(a, a, [3, 2, 1, 0, 7, 6, 5, 4])
|
||||
simd_shuffle8!(a, a, [3, 2, 1, 0, 7, 6, 5, 4])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -3803,7 +3803,7 @@ pub unsafe fn vrev32_u8(a: uint8x8_t) -> uint8x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))]
|
||||
pub unsafe fn vrev32q_u8(a: uint8x16_t) -> uint8x16_t {
|
||||
simd_shuffle16(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12])
|
||||
simd_shuffle16!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -3813,7 +3813,7 @@ pub unsafe fn vrev32q_u8(a: uint8x16_t) -> uint8x16_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))]
|
||||
pub unsafe fn vrev32_s16(a: int16x4_t) -> int16x4_t {
|
||||
simd_shuffle4(a, a, [1, 0, 3, 2])
|
||||
simd_shuffle4!(a, a, [1, 0, 3, 2])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -3823,7 +3823,7 @@ pub unsafe fn vrev32_s16(a: int16x4_t) -> int16x4_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))]
|
||||
pub unsafe fn vrev32q_s16(a: int16x8_t) -> int16x8_t {
|
||||
simd_shuffle8(a, a, [1, 0, 3, 2, 5, 4, 7, 6])
|
||||
simd_shuffle8!(a, a, [1, 0, 3, 2, 5, 4, 7, 6])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -3833,7 +3833,7 @@ pub unsafe fn vrev32q_s16(a: int16x8_t) -> int16x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))]
|
||||
pub unsafe fn vrev32_p16(a: poly16x4_t) -> poly16x4_t {
|
||||
simd_shuffle4(a, a, [1, 0, 3, 2])
|
||||
simd_shuffle4!(a, a, [1, 0, 3, 2])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -3843,7 +3843,7 @@ pub unsafe fn vrev32_p16(a: poly16x4_t) -> poly16x4_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))]
|
||||
pub unsafe fn vrev32q_p16(a: poly16x8_t) -> poly16x8_t {
|
||||
simd_shuffle8(a, a, [1, 0, 3, 2, 5, 4, 7, 6])
|
||||
simd_shuffle8!(a, a, [1, 0, 3, 2, 5, 4, 7, 6])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -3853,7 +3853,7 @@ pub unsafe fn vrev32q_p16(a: poly16x8_t) -> poly16x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))]
|
||||
pub unsafe fn vrev32_u16(a: uint16x4_t) -> uint16x4_t {
|
||||
simd_shuffle4(a, a, [1, 0, 3, 2])
|
||||
simd_shuffle4!(a, a, [1, 0, 3, 2])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -3863,7 +3863,7 @@ pub unsafe fn vrev32_u16(a: uint16x4_t) -> uint16x4_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))]
|
||||
pub unsafe fn vrev32q_u16(a: uint16x8_t) -> uint16x8_t {
|
||||
simd_shuffle8(a, a, [1, 0, 3, 2, 5, 4, 7, 6])
|
||||
simd_shuffle8!(a, a, [1, 0, 3, 2, 5, 4, 7, 6])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -3873,7 +3873,7 @@ pub unsafe fn vrev32q_u16(a: uint16x8_t) -> uint16x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))]
|
||||
pub unsafe fn vrev32_p8(a: poly8x8_t) -> poly8x8_t {
|
||||
simd_shuffle8(a, a, [3, 2, 1, 0, 7, 6, 5, 4])
|
||||
simd_shuffle8!(a, a, [3, 2, 1, 0, 7, 6, 5, 4])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -3883,7 +3883,7 @@ pub unsafe fn vrev32_p8(a: poly8x8_t) -> poly8x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))]
|
||||
pub unsafe fn vrev32q_p8(a: poly8x16_t) -> poly8x16_t {
|
||||
simd_shuffle16(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12])
|
||||
simd_shuffle16!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -3893,7 +3893,7 @@ pub unsafe fn vrev32q_p8(a: poly8x16_t) -> poly8x16_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
|
||||
pub unsafe fn vrev64_s8(a: int8x8_t) -> int8x8_t {
|
||||
simd_shuffle8(a, a, [7, 6, 5, 4, 3, 2, 1, 0])
|
||||
simd_shuffle8!(a, a, [7, 6, 5, 4, 3, 2, 1, 0])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -3903,7 +3903,7 @@ pub unsafe fn vrev64_s8(a: int8x8_t) -> int8x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
|
||||
pub unsafe fn vrev64q_s8(a: int8x16_t) -> int8x16_t {
|
||||
simd_shuffle16(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8])
|
||||
simd_shuffle16!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -3913,7 +3913,7 @@ pub unsafe fn vrev64q_s8(a: int8x16_t) -> int8x16_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
|
||||
pub unsafe fn vrev64_s16(a: int16x4_t) -> int16x4_t {
|
||||
simd_shuffle4(a, a, [3, 2, 1, 0])
|
||||
simd_shuffle4!(a, a, [3, 2, 1, 0])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -3923,7 +3923,7 @@ pub unsafe fn vrev64_s16(a: int16x4_t) -> int16x4_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
|
||||
pub unsafe fn vrev64q_s16(a: int16x8_t) -> int16x8_t {
|
||||
simd_shuffle8(a, a, [3, 2, 1, 0, 7, 6, 5, 4])
|
||||
simd_shuffle8!(a, a, [3, 2, 1, 0, 7, 6, 5, 4])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -3933,7 +3933,7 @@ pub unsafe fn vrev64q_s16(a: int16x8_t) -> int16x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
|
||||
pub unsafe fn vrev64_s32(a: int32x2_t) -> int32x2_t {
|
||||
simd_shuffle2(a, a, [1, 0])
|
||||
simd_shuffle2!(a, a, [1, 0])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -3943,7 +3943,7 @@ pub unsafe fn vrev64_s32(a: int32x2_t) -> int32x2_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
|
||||
pub unsafe fn vrev64q_s32(a: int32x4_t) -> int32x4_t {
|
||||
simd_shuffle4(a, a, [1, 0, 3, 2])
|
||||
simd_shuffle4!(a, a, [1, 0, 3, 2])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -3953,7 +3953,7 @@ pub unsafe fn vrev64q_s32(a: int32x4_t) -> int32x4_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
|
||||
pub unsafe fn vrev64_u8(a: uint8x8_t) -> uint8x8_t {
|
||||
simd_shuffle8(a, a, [7, 6, 5, 4, 3, 2, 1, 0])
|
||||
simd_shuffle8!(a, a, [7, 6, 5, 4, 3, 2, 1, 0])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -3963,7 +3963,7 @@ pub unsafe fn vrev64_u8(a: uint8x8_t) -> uint8x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
|
||||
pub unsafe fn vrev64q_u8(a: uint8x16_t) -> uint8x16_t {
|
||||
simd_shuffle16(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8])
|
||||
simd_shuffle16!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -3973,7 +3973,7 @@ pub unsafe fn vrev64q_u8(a: uint8x16_t) -> uint8x16_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
|
||||
pub unsafe fn vrev64_u16(a: uint16x4_t) -> uint16x4_t {
|
||||
simd_shuffle4(a, a, [3, 2, 1, 0])
|
||||
simd_shuffle4!(a, a, [3, 2, 1, 0])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -3983,7 +3983,7 @@ pub unsafe fn vrev64_u16(a: uint16x4_t) -> uint16x4_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
|
||||
pub unsafe fn vrev64q_u16(a: uint16x8_t) -> uint16x8_t {
|
||||
simd_shuffle8(a, a, [3, 2, 1, 0, 7, 6, 5, 4])
|
||||
simd_shuffle8!(a, a, [3, 2, 1, 0, 7, 6, 5, 4])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -3993,7 +3993,7 @@ pub unsafe fn vrev64q_u16(a: uint16x8_t) -> uint16x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
|
||||
pub unsafe fn vrev64_u32(a: uint32x2_t) -> uint32x2_t {
|
||||
simd_shuffle2(a, a, [1, 0])
|
||||
simd_shuffle2!(a, a, [1, 0])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -4003,7 +4003,7 @@ pub unsafe fn vrev64_u32(a: uint32x2_t) -> uint32x2_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
|
||||
pub unsafe fn vrev64q_u32(a: uint32x4_t) -> uint32x4_t {
|
||||
simd_shuffle4(a, a, [1, 0, 3, 2])
|
||||
simd_shuffle4!(a, a, [1, 0, 3, 2])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -4013,7 +4013,7 @@ pub unsafe fn vrev64q_u32(a: uint32x4_t) -> uint32x4_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
|
||||
pub unsafe fn vrev64_f32(a: float32x2_t) -> float32x2_t {
|
||||
simd_shuffle2(a, a, [1, 0])
|
||||
simd_shuffle2!(a, a, [1, 0])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -4023,7 +4023,7 @@ pub unsafe fn vrev64_f32(a: float32x2_t) -> float32x2_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
|
||||
pub unsafe fn vrev64q_f32(a: float32x4_t) -> float32x4_t {
|
||||
simd_shuffle4(a, a, [1, 0, 3, 2])
|
||||
simd_shuffle4!(a, a, [1, 0, 3, 2])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -4033,7 +4033,7 @@ pub unsafe fn vrev64q_f32(a: float32x4_t) -> float32x4_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
|
||||
pub unsafe fn vrev64_p8(a: poly8x8_t) -> poly8x8_t {
|
||||
simd_shuffle8(a, a, [7, 6, 5, 4, 3, 2, 1, 0])
|
||||
simd_shuffle8!(a, a, [7, 6, 5, 4, 3, 2, 1, 0])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -4043,7 +4043,7 @@ pub unsafe fn vrev64_p8(a: poly8x8_t) -> poly8x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
|
||||
pub unsafe fn vrev64q_p8(a: poly8x16_t) -> poly8x16_t {
|
||||
simd_shuffle16(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8])
|
||||
simd_shuffle16!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -4053,7 +4053,7 @@ pub unsafe fn vrev64q_p8(a: poly8x16_t) -> poly8x16_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
|
||||
pub unsafe fn vrev64_p16(a: poly16x4_t) -> poly16x4_t {
|
||||
simd_shuffle4(a, a, [3, 2, 1, 0])
|
||||
simd_shuffle4!(a, a, [3, 2, 1, 0])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
|
|
@ -4063,7 +4063,7 @@ pub unsafe fn vrev64_p16(a: poly16x4_t) -> poly16x4_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
|
||||
pub unsafe fn vrev64q_p16(a: poly16x8_t) -> poly16x8_t {
|
||||
simd_shuffle8(a, a, [3, 2, 1, 0, 7, 6, 5, 4])
|
||||
simd_shuffle8!(a, a, [3, 2, 1, 0, 7, 6, 5, 4])
|
||||
}
|
||||
|
||||
/// Signed Add and Accumulate Long Pairwise.
|
||||
|
|
|
|||
|
|
@ -92,3 +92,99 @@ macro_rules! types {
|
|||
pub struct $name($($fields)*);
|
||||
)*)
|
||||
}
|
||||
|
||||
#[allow(unused_macros)]
|
||||
macro_rules! simd_shuffle2 {
|
||||
($x:expr, $y:expr, <$(const $imm:ident : $ty:ty),+> $idx:expr $(,)?) => {{
|
||||
struct ConstParam<$(const $imm: $ty),+>;
|
||||
impl<$(const $imm: $ty),+> ConstParam<$($imm),+> {
|
||||
const IDX: [u32; 2] = $idx;
|
||||
}
|
||||
|
||||
simd_shuffle2($x, $y, ConstParam::<$($imm),+>::IDX)
|
||||
}};
|
||||
($x:expr, $y:expr, $idx:expr $(,)?) => {{
|
||||
const IDX: [u32; 2] = $idx;
|
||||
simd_shuffle2($x, $y, IDX)
|
||||
}};
|
||||
}
|
||||
|
||||
#[allow(unused_macros)]
|
||||
macro_rules! simd_shuffle4 {
|
||||
($x:expr, $y:expr, <$(const $imm:ident : $ty:ty),+> $idx:expr $(,)?) => {{
|
||||
struct ConstParam<$(const $imm: $ty),+>;
|
||||
impl<$(const $imm: $ty),+> ConstParam<$($imm),+> {
|
||||
const IDX: [u32; 4] = $idx;
|
||||
}
|
||||
|
||||
simd_shuffle4($x, $y, ConstParam::<$($imm),+>::IDX)
|
||||
}};
|
||||
($x:expr, $y:expr, $idx:expr $(,)?) => {{
|
||||
const IDX: [u32; 4] = $idx;
|
||||
simd_shuffle4($x, $y, IDX)
|
||||
}};
|
||||
}
|
||||
|
||||
#[allow(unused_macros)]
|
||||
macro_rules! simd_shuffle8 {
|
||||
($x:expr, $y:expr, <$(const $imm:ident : $ty:ty),+> $idx:expr $(,)?) => {{
|
||||
struct ConstParam<$(const $imm: $ty),+>;
|
||||
impl<$(const $imm: $ty),+> ConstParam<$($imm),+> {
|
||||
const IDX: [u32; 8] = $idx;
|
||||
}
|
||||
|
||||
simd_shuffle8($x, $y, ConstParam::<$($imm),+>::IDX)
|
||||
}};
|
||||
($x:expr, $y:expr, $idx:expr $(,)?) => {{
|
||||
const IDX: [u32; 8] = $idx;
|
||||
simd_shuffle8($x, $y, IDX)
|
||||
}};
|
||||
}
|
||||
|
||||
#[allow(unused_macros)]
|
||||
macro_rules! simd_shuffle16 {
|
||||
($x:expr, $y:expr, <$(const $imm:ident : $ty:ty),+> $idx:expr $(,)?) => {{
|
||||
struct ConstParam<$(const $imm: $ty),+>;
|
||||
impl<$(const $imm: $ty),+> ConstParam<$($imm),+> {
|
||||
const IDX: [u32; 16] = $idx;
|
||||
}
|
||||
|
||||
simd_shuffle16($x, $y, ConstParam::<$($imm),+>::IDX)
|
||||
}};
|
||||
($x:expr, $y:expr, $idx:expr $(,)?) => {{
|
||||
const IDX: [u32; 16] = $idx;
|
||||
simd_shuffle16($x, $y, IDX)
|
||||
}};
|
||||
}
|
||||
|
||||
#[allow(unused_macros)]
|
||||
macro_rules! simd_shuffle32 {
|
||||
($x:expr, $y:expr, <$(const $imm:ident : $ty:ty),+> $idx:expr $(,)?) => {{
|
||||
struct ConstParam<$(const $imm: $ty),+>;
|
||||
impl<$(const $imm: $ty),+> ConstParam<$($imm),+> {
|
||||
const IDX: [u32; 32] = $idx;
|
||||
}
|
||||
|
||||
simd_shuffle32($x, $y, ConstParam::<$($imm),+>::IDX)
|
||||
}};
|
||||
($x:expr, $y:expr, $idx:expr $(,)?) => {{
|
||||
const IDX: [u32; 32] = $idx;
|
||||
simd_shuffle32($x, $y, IDX)
|
||||
}};
|
||||
}
|
||||
|
||||
#[allow(unused_macros)]
|
||||
macro_rules! simd_shuffle64 {
|
||||
($x:expr, $y:expr, <$(const $imm:ident : $ty:ty),+> $idx:expr $(,)?) => {{
|
||||
struct ConstParam<$(const $imm: $ty),+>;
|
||||
impl<$(const $imm: $ty),+> ConstParam<$($imm),+> {
|
||||
const IDX: [u32; 64] = $idx;
|
||||
}
|
||||
|
||||
simd_shuffle64($x, $y, ConstParam::<$($imm),+>::IDX)
|
||||
}};
|
||||
($x:expr, $y:expr, $idx:expr $(,)?) => {{
|
||||
const IDX: [u32; 64] = $idx;
|
||||
simd_shuffle64($x, $y, IDX)
|
||||
}};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,10 +47,10 @@ mod sealed {
|
|||
#[cfg_attr(all(test, target_endian = "big"), assert_instr(xxspltd, dm = 0x0))]
|
||||
unsafe fn xxpermdi(a: i64x2, b: i64x2, dm: u8) -> i64x2 {
|
||||
match dm & 0b11 {
|
||||
0 => simd_shuffle2(a, b, [0b00, 0b10]),
|
||||
1 => simd_shuffle2(a, b, [0b01, 0b10]),
|
||||
2 => simd_shuffle2(a, b, [0b00, 0b11]),
|
||||
_ => simd_shuffle2(a, b, [0b01, 0b11]),
|
||||
0 => simd_shuffle2!(a, b, [0b00, 0b10]),
|
||||
1 => simd_shuffle2!(a, b, [0b01, 0b10]),
|
||||
2 => simd_shuffle2!(a, b, [0b00, 0b11]),
|
||||
_ => simd_shuffle2!(a, b, [0b01, 0b11]),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -118,10 +118,10 @@ pub unsafe fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_shuffle_pd<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
|
||||
static_assert_imm8!(MASK);
|
||||
simd_shuffle4(
|
||||
simd_shuffle4!(
|
||||
a,
|
||||
b,
|
||||
[
|
||||
<const MASK: i32> [
|
||||
MASK as u32 & 0b1,
|
||||
((MASK as u32 >> 1) & 0b1) + 4,
|
||||
((MASK as u32 >> 2) & 0b1) + 2,
|
||||
|
|
@ -141,10 +141,10 @@ pub unsafe fn _mm256_shuffle_pd<const MASK: i32>(a: __m256d, b: __m256d) -> __m2
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_shuffle_ps<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
|
||||
static_assert_imm8!(MASK);
|
||||
simd_shuffle8(
|
||||
simd_shuffle8!(
|
||||
a,
|
||||
b,
|
||||
[
|
||||
<const MASK: i32> [
|
||||
MASK as u32 & 0b11,
|
||||
(MASK as u32 >> 2) & 0b11,
|
||||
((MASK as u32 >> 4) & 0b11) + 8,
|
||||
|
|
@ -463,10 +463,10 @@ pub unsafe fn _mm256_sqrt_pd(a: __m256d) -> __m256d {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_blend_pd<const IMM4: i32>(a: __m256d, b: __m256d) -> __m256d {
|
||||
static_assert_imm4!(IMM4);
|
||||
simd_shuffle4(
|
||||
simd_shuffle4!(
|
||||
a,
|
||||
b,
|
||||
[
|
||||
<const IMM4: i32> [
|
||||
((IMM4 as u32 >> 0) & 1) * 4 + 0,
|
||||
((IMM4 as u32 >> 1) & 1) * 4 + 1,
|
||||
((IMM4 as u32 >> 2) & 1) * 4 + 2,
|
||||
|
|
@ -486,10 +486,10 @@ pub unsafe fn _mm256_blend_pd<const IMM4: i32>(a: __m256d, b: __m256d) -> __m256
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_blend_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
|
||||
static_assert_imm8!(IMM8);
|
||||
simd_shuffle8(
|
||||
simd_shuffle8!(
|
||||
a,
|
||||
b,
|
||||
[
|
||||
<const IMM8: i32> [
|
||||
((IMM8 as u32 >> 0) & 1) * 8 + 0,
|
||||
((IMM8 as u32 >> 1) & 1) * 8 + 1,
|
||||
((IMM8 as u32 >> 2) & 1) * 8 + 2,
|
||||
|
|
@ -930,10 +930,10 @@ pub unsafe fn _mm256_cvttps_epi32(a: __m256) -> __m256i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_extractf128_ps<const IMM1: i32>(a: __m256) -> __m128 {
|
||||
static_assert_imm1!(IMM1);
|
||||
simd_shuffle4(
|
||||
simd_shuffle4!(
|
||||
a,
|
||||
_mm256_undefined_ps(),
|
||||
[[0, 1, 2, 3], [4, 5, 6, 7]][IMM1 as usize],
|
||||
<const IMM1: i32> [[0, 1, 2, 3], [4, 5, 6, 7]][IMM1 as usize],
|
||||
)
|
||||
}
|
||||
|
||||
|
|
@ -951,7 +951,7 @@ pub unsafe fn _mm256_extractf128_ps<const IMM1: i32>(a: __m256) -> __m128 {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_extractf128_pd<const IMM1: i32>(a: __m256d) -> __m128d {
|
||||
static_assert_imm1!(IMM1);
|
||||
simd_shuffle2(a, _mm256_undefined_pd(), [[0, 1], [2, 3]][IMM1 as usize])
|
||||
simd_shuffle2!(a, _mm256_undefined_pd(), <const IMM1: i32> [[0, 1], [2, 3]][IMM1 as usize])
|
||||
}
|
||||
|
||||
/// Extracts 128 bits (composed of integer data) from `a`, selected with `imm8`.
|
||||
|
|
@ -967,10 +967,10 @@ pub unsafe fn _mm256_extractf128_pd<const IMM1: i32>(a: __m256d) -> __m128d {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_extractf128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
|
||||
static_assert_imm1!(IMM1);
|
||||
let dst: i64x2 = simd_shuffle2(
|
||||
let dst: i64x2 = simd_shuffle2!(
|
||||
a.as_i64x4(),
|
||||
_mm256_undefined_si256().as_i64x4(),
|
||||
[[0, 1], [2, 3]][IMM1 as usize],
|
||||
<const IMM1: i32> [[0, 1], [2, 3]][IMM1 as usize],
|
||||
);
|
||||
transmute(dst)
|
||||
}
|
||||
|
|
@ -1033,10 +1033,10 @@ pub unsafe fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128 {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_permute_ps<const IMM8: i32>(a: __m256) -> __m256 {
|
||||
static_assert_imm8!(IMM8);
|
||||
simd_shuffle8(
|
||||
simd_shuffle8!(
|
||||
a,
|
||||
_mm256_undefined_ps(),
|
||||
[
|
||||
<const IMM8: i32> [
|
||||
(IMM8 as u32 >> 0) & 0b11,
|
||||
(IMM8 as u32 >> 2) & 0b11,
|
||||
(IMM8 as u32 >> 4) & 0b11,
|
||||
|
|
@ -1060,10 +1060,10 @@ pub unsafe fn _mm256_permute_ps<const IMM8: i32>(a: __m256) -> __m256 {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_permute_ps<const IMM8: i32>(a: __m128) -> __m128 {
|
||||
static_assert_imm8!(IMM8);
|
||||
simd_shuffle4(
|
||||
simd_shuffle4!(
|
||||
a,
|
||||
_mm_undefined_ps(),
|
||||
[
|
||||
<const IMM8: i32> [
|
||||
(IMM8 as u32 >> 0) & 0b11,
|
||||
(IMM8 as u32 >> 2) & 0b11,
|
||||
(IMM8 as u32 >> 4) & 0b11,
|
||||
|
|
@ -1107,10 +1107,10 @@ pub unsafe fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_permute_pd<const IMM4: i32>(a: __m256d) -> __m256d {
|
||||
static_assert_imm4!(IMM4);
|
||||
simd_shuffle4(
|
||||
simd_shuffle4!(
|
||||
a,
|
||||
_mm256_undefined_pd(),
|
||||
[
|
||||
<const IMM4: i32> [
|
||||
((IMM4 as u32 >> 0) & 1),
|
||||
((IMM4 as u32 >> 1) & 1),
|
||||
((IMM4 as u32 >> 2) & 1) + 2,
|
||||
|
|
@ -1130,10 +1130,10 @@ pub unsafe fn _mm256_permute_pd<const IMM4: i32>(a: __m256d) -> __m256d {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_permute_pd<const IMM2: i32>(a: __m128d) -> __m128d {
|
||||
static_assert_imm2!(IMM2);
|
||||
simd_shuffle2(
|
||||
simd_shuffle2!(
|
||||
a,
|
||||
_mm_undefined_pd(),
|
||||
[(IMM2 as u32) & 1, (IMM2 as u32 >> 1) & 1],
|
||||
<const IMM2: i32> [(IMM2 as u32) & 1, (IMM2 as u32 >> 1) & 1],
|
||||
)
|
||||
}
|
||||
|
||||
|
|
@ -1257,10 +1257,10 @@ pub unsafe fn _mm256_broadcast_pd(a: &__m128d) -> __m256d {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_insertf128_ps<const IMM1: i32>(a: __m256, b: __m128) -> __m256 {
|
||||
static_assert_imm1!(IMM1);
|
||||
simd_shuffle8(
|
||||
simd_shuffle8!(
|
||||
a,
|
||||
_mm256_castps128_ps256(b),
|
||||
[[8, 9, 10, 11, 4, 5, 6, 7], [0, 1, 2, 3, 8, 9, 10, 11]][IMM1 as usize],
|
||||
<const IMM1: i32> [[8, 9, 10, 11, 4, 5, 6, 7], [0, 1, 2, 3, 8, 9, 10, 11]][IMM1 as usize],
|
||||
)
|
||||
}
|
||||
|
||||
|
|
@ -1279,10 +1279,10 @@ pub unsafe fn _mm256_insertf128_ps<const IMM1: i32>(a: __m256, b: __m128) -> __m
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_insertf128_pd<const IMM1: i32>(a: __m256d, b: __m128d) -> __m256d {
|
||||
static_assert_imm1!(IMM1);
|
||||
simd_shuffle4(
|
||||
simd_shuffle4!(
|
||||
a,
|
||||
_mm256_castpd128_pd256(b),
|
||||
[[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize],
|
||||
<const IMM1: i32> [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize],
|
||||
)
|
||||
}
|
||||
|
||||
|
|
@ -1300,10 +1300,10 @@ pub unsafe fn _mm256_insertf128_pd<const IMM1: i32>(a: __m256d, b: __m128d) -> _
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_insertf128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
|
||||
static_assert_imm1!(IMM1);
|
||||
let dst: i64x4 = simd_shuffle4(
|
||||
let dst: i64x4 = simd_shuffle4!(
|
||||
a.as_i64x4(),
|
||||
_mm256_castsi128_si256(b).as_i64x4(),
|
||||
[[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize],
|
||||
<const IMM1: i32> [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize],
|
||||
);
|
||||
transmute(dst)
|
||||
}
|
||||
|
|
@ -1639,7 +1639,7 @@ pub unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128) {
|
|||
#[cfg_attr(test, assert_instr(vmovshdup))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_movehdup_ps(a: __m256) -> __m256 {
|
||||
simd_shuffle8(a, a, [1, 1, 3, 3, 5, 5, 7, 7])
|
||||
simd_shuffle8!(a, a, [1, 1, 3, 3, 5, 5, 7, 7])
|
||||
}
|
||||
|
||||
/// Duplicate even-indexed single-precision (32-bit) floating-point elements
|
||||
|
|
@ -1651,7 +1651,7 @@ pub unsafe fn _mm256_movehdup_ps(a: __m256) -> __m256 {
|
|||
#[cfg_attr(test, assert_instr(vmovsldup))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_moveldup_ps(a: __m256) -> __m256 {
|
||||
simd_shuffle8(a, a, [0, 0, 2, 2, 4, 4, 6, 6])
|
||||
simd_shuffle8!(a, a, [0, 0, 2, 2, 4, 4, 6, 6])
|
||||
}
|
||||
|
||||
/// Duplicate even-indexed double-precision (64-bit) floating-point elements
|
||||
|
|
@ -1663,7 +1663,7 @@ pub unsafe fn _mm256_moveldup_ps(a: __m256) -> __m256 {
|
|||
#[cfg_attr(test, assert_instr(vmovddup))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_movedup_pd(a: __m256d) -> __m256d {
|
||||
simd_shuffle4(a, a, [0, 0, 2, 2])
|
||||
simd_shuffle4!(a, a, [0, 0, 2, 2])
|
||||
}
|
||||
|
||||
/// Loads 256-bits of integer data from unaligned memory into result.
|
||||
|
|
@ -1756,7 +1756,7 @@ pub unsafe fn _mm256_rsqrt_ps(a: __m256) -> __m256 {
|
|||
#[cfg_attr(test, assert_instr(vunpckhpd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d {
|
||||
simd_shuffle4(a, b, [1, 5, 3, 7])
|
||||
simd_shuffle4!(a, b, [1, 5, 3, 7])
|
||||
}
|
||||
|
||||
/// Unpacks and interleave single-precision (32-bit) floating-point elements
|
||||
|
|
@ -1768,7 +1768,7 @@ pub unsafe fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d {
|
|||
#[cfg_attr(test, assert_instr(vunpckhps))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 {
|
||||
simd_shuffle8(a, b, [2, 10, 3, 11, 6, 14, 7, 15])
|
||||
simd_shuffle8!(a, b, [2, 10, 3, 11, 6, 14, 7, 15])
|
||||
}
|
||||
|
||||
/// Unpacks and interleave double-precision (64-bit) floating-point elements
|
||||
|
|
@ -1780,7 +1780,7 @@ pub unsafe fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 {
|
|||
#[cfg_attr(test, assert_instr(vunpcklpd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d {
|
||||
simd_shuffle4(a, b, [0, 4, 2, 6])
|
||||
simd_shuffle4!(a, b, [0, 4, 2, 6])
|
||||
}
|
||||
|
||||
/// Unpacks and interleave single-precision (32-bit) floating-point elements
|
||||
|
|
@ -1792,7 +1792,7 @@ pub unsafe fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d {
|
|||
#[cfg_attr(test, assert_instr(vunpcklps))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256 {
|
||||
simd_shuffle8(a, b, [0, 8, 1, 9, 4, 12, 5, 13])
|
||||
simd_shuffle8!(a, b, [0, 8, 1, 9, 4, 12, 5, 13])
|
||||
}
|
||||
|
||||
/// Computes the bitwise AND of 256 bits (representing integer data) in `a` and
|
||||
|
|
@ -2572,7 +2572,7 @@ pub unsafe fn _mm256_castsi256_pd(a: __m256i) -> __m256d {
|
|||
// instructions, thus it has zero latency.
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_castps256_ps128(a: __m256) -> __m128 {
|
||||
simd_shuffle4(a, a, [0, 1, 2, 3])
|
||||
simd_shuffle4!(a, a, [0, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Casts vector of type __m256d to type __m128d.
|
||||
|
|
@ -2584,7 +2584,7 @@ pub unsafe fn _mm256_castps256_ps128(a: __m256) -> __m128 {
|
|||
// instructions, thus it has zero latency.
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_castpd256_pd128(a: __m256d) -> __m128d {
|
||||
simd_shuffle2(a, a, [0, 1])
|
||||
simd_shuffle2!(a, a, [0, 1])
|
||||
}
|
||||
|
||||
/// Casts vector of type __m256i to type __m128i.
|
||||
|
|
@ -2597,7 +2597,7 @@ pub unsafe fn _mm256_castpd256_pd128(a: __m256d) -> __m128d {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_castsi256_si128(a: __m256i) -> __m128i {
|
||||
let a = a.as_i64x4();
|
||||
let dst: i64x2 = simd_shuffle2(a, a, [0, 1]);
|
||||
let dst: i64x2 = simd_shuffle2!(a, a, [0, 1]);
|
||||
transmute(dst)
|
||||
}
|
||||
|
||||
|
|
@ -2611,8 +2611,8 @@ pub unsafe fn _mm256_castsi256_si128(a: __m256i) -> __m128i {
|
|||
// instructions, thus it has zero latency.
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_castps128_ps256(a: __m128) -> __m256 {
|
||||
// FIXME simd_shuffle8(a, a, [0, 1, 2, 3, -1, -1, -1, -1])
|
||||
simd_shuffle8(a, a, [0, 1, 2, 3, 0, 0, 0, 0])
|
||||
// FIXME simd_shuffle8!(a, a, [0, 1, 2, 3, -1, -1, -1, -1])
|
||||
simd_shuffle8!(a, a, [0, 1, 2, 3, 0, 0, 0, 0])
|
||||
}
|
||||
|
||||
/// Casts vector of type __m128d to type __m256d;
|
||||
|
|
@ -2625,8 +2625,8 @@ pub unsafe fn _mm256_castps128_ps256(a: __m128) -> __m256 {
|
|||
// instructions, thus it has zero latency.
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d {
|
||||
// FIXME simd_shuffle4(a, a, [0, 1, -1, -1])
|
||||
simd_shuffle4(a, a, [0, 1, 0, 0])
|
||||
// FIXME simd_shuffle4!(a, a, [0, 1, -1, -1])
|
||||
simd_shuffle4!(a, a, [0, 1, 0, 0])
|
||||
}
|
||||
|
||||
/// Casts vector of type __m128i to type __m256i;
|
||||
|
|
@ -2640,8 +2640,8 @@ pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
|
||||
let a = a.as_i64x2();
|
||||
// FIXME simd_shuffle4(a, a, [0, 1, -1, -1])
|
||||
let dst: i64x4 = simd_shuffle4(a, a, [0, 1, 0, 0]);
|
||||
// FIXME simd_shuffle4!(a, a, [0, 1, -1, -1])
|
||||
let dst: i64x4 = simd_shuffle4!(a, a, [0, 1, 0, 0]);
|
||||
transmute(dst)
|
||||
}
|
||||
|
||||
|
|
@ -2656,7 +2656,7 @@ pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
|
|||
// instructions, thus it has zero latency.
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_zextps128_ps256(a: __m128) -> __m256 {
|
||||
simd_shuffle8(a, _mm_setzero_ps(), [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
simd_shuffle8!(a, _mm_setzero_ps(), [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
}
|
||||
|
||||
/// Constructs a 256-bit integer vector from a 128-bit integer vector.
|
||||
|
|
@ -2671,7 +2671,7 @@ pub unsafe fn _mm256_zextps128_ps256(a: __m128) -> __m256 {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
|
||||
let b = _mm_setzero_si128().as_i64x2();
|
||||
let dst: i64x4 = simd_shuffle4(a.as_i64x2(), b, [0, 1, 2, 3]);
|
||||
let dst: i64x4 = simd_shuffle4!(a.as_i64x2(), b, [0, 1, 2, 3]);
|
||||
transmute(dst)
|
||||
}
|
||||
|
||||
|
|
@ -2687,7 +2687,7 @@ pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
|
|||
// instructions, thus it has zero latency.
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d {
|
||||
simd_shuffle4(a, _mm_setzero_pd(), [0, 1, 2, 3])
|
||||
simd_shuffle4!(a, _mm_setzero_pd(), [0, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Returns vector of type `__m256` with undefined elements.
|
||||
|
|
@ -2732,7 +2732,7 @@ pub unsafe fn _mm256_undefined_si256() -> __m256i {
|
|||
#[cfg_attr(test, assert_instr(vinsertf128))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_set_m128(hi: __m128, lo: __m128) -> __m256 {
|
||||
simd_shuffle8(lo, hi, [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
simd_shuffle8!(lo, hi, [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
}
|
||||
|
||||
/// Sets packed __m256d returned vector with the supplied values.
|
||||
|
|
|
|||
|
|
@ -175,7 +175,7 @@ pub unsafe fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m
|
|||
let b = b.as_i8x32();
|
||||
|
||||
let r: i8x32 = match IMM8 % 16 {
|
||||
0 => simd_shuffle32(
|
||||
0 => simd_shuffle32!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
|
|
@ -183,7 +183,7 @@ pub unsafe fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m
|
|||
23, 24, 25, 26, 27, 28, 29, 30, 31,
|
||||
],
|
||||
),
|
||||
1 => simd_shuffle32(
|
||||
1 => simd_shuffle32!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
|
|
@ -191,7 +191,7 @@ pub unsafe fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m
|
|||
24, 25, 26, 27, 28, 29, 30, 31, 48,
|
||||
],
|
||||
),
|
||||
2 => simd_shuffle32(
|
||||
2 => simd_shuffle32!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
|
|
@ -199,7 +199,7 @@ pub unsafe fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m
|
|||
25, 26, 27, 28, 29, 30, 31, 48, 49,
|
||||
],
|
||||
),
|
||||
3 => simd_shuffle32(
|
||||
3 => simd_shuffle32!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
|
|
@ -207,7 +207,7 @@ pub unsafe fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m
|
|||
25, 26, 27, 28, 29, 30, 31, 48, 49, 50,
|
||||
],
|
||||
),
|
||||
4 => simd_shuffle32(
|
||||
4 => simd_shuffle32!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
|
|
@ -215,7 +215,7 @@ pub unsafe fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m
|
|||
26, 27, 28, 29, 30, 31, 48, 49, 50, 51,
|
||||
],
|
||||
),
|
||||
5 => simd_shuffle32(
|
||||
5 => simd_shuffle32!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
|
|
@ -223,7 +223,7 @@ pub unsafe fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m
|
|||
27, 28, 29, 30, 31, 48, 49, 50, 51, 52,
|
||||
],
|
||||
),
|
||||
6 => simd_shuffle32(
|
||||
6 => simd_shuffle32!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
|
|
@ -231,7 +231,7 @@ pub unsafe fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m
|
|||
28, 29, 30, 31, 48, 49, 50, 51, 52, 53,
|
||||
],
|
||||
),
|
||||
7 => simd_shuffle32(
|
||||
7 => simd_shuffle32!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
|
|
@ -239,7 +239,7 @@ pub unsafe fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m
|
|||
28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54,
|
||||
],
|
||||
),
|
||||
8 => simd_shuffle32(
|
||||
8 => simd_shuffle32!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
|
|
@ -247,7 +247,7 @@ pub unsafe fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m
|
|||
29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55,
|
||||
],
|
||||
),
|
||||
9 => simd_shuffle32(
|
||||
9 => simd_shuffle32!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
|
|
@ -255,7 +255,7 @@ pub unsafe fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m
|
|||
30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56,
|
||||
],
|
||||
),
|
||||
10 => simd_shuffle32(
|
||||
10 => simd_shuffle32!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
|
|
@ -263,7 +263,7 @@ pub unsafe fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m
|
|||
31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
|
||||
],
|
||||
),
|
||||
11 => simd_shuffle32(
|
||||
11 => simd_shuffle32!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
|
|
@ -271,7 +271,7 @@ pub unsafe fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m
|
|||
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
|
||||
],
|
||||
),
|
||||
12 => simd_shuffle32(
|
||||
12 => simd_shuffle32!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
|
|
@ -279,7 +279,7 @@ pub unsafe fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m
|
|||
49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
|
||||
],
|
||||
),
|
||||
13 => simd_shuffle32(
|
||||
13 => simd_shuffle32!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
|
|
@ -287,7 +287,7 @@ pub unsafe fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m
|
|||
50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
|
||||
],
|
||||
),
|
||||
14 => simd_shuffle32(
|
||||
14 => simd_shuffle32!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
|
|
@ -295,7 +295,7 @@ pub unsafe fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m
|
|||
51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
|
||||
],
|
||||
),
|
||||
15 => simd_shuffle32(
|
||||
15 => simd_shuffle32!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
|
|
@ -370,10 +370,10 @@ pub unsafe fn _mm_blend_epi32<const IMM4: i32>(a: __m128i, b: __m128i) -> __m128
|
|||
static_assert_imm4!(IMM4);
|
||||
let a = a.as_i32x4();
|
||||
let b = b.as_i32x4();
|
||||
let r: i32x4 = simd_shuffle4(
|
||||
let r: i32x4 = simd_shuffle4!(
|
||||
a,
|
||||
b,
|
||||
[
|
||||
<const IMM4: i32> [
|
||||
[0, 4, 0, 4][IMM4 as usize & 0b11],
|
||||
[1, 1, 5, 5][IMM4 as usize & 0b11],
|
||||
[2, 6, 2, 6][(IMM4 as usize >> 2) & 0b11],
|
||||
|
|
@ -395,10 +395,10 @@ pub unsafe fn _mm256_blend_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m
|
|||
static_assert_imm8!(IMM8);
|
||||
let a = a.as_i32x8();
|
||||
let b = b.as_i32x8();
|
||||
let r: i32x8 = simd_shuffle8(
|
||||
let r: i32x8 = simd_shuffle8!(
|
||||
a,
|
||||
b,
|
||||
[
|
||||
<const IMM8: i32> [
|
||||
[0, 8, 0, 8][IMM8 as usize & 0b11],
|
||||
[1, 1, 9, 9][IMM8 as usize & 0b11],
|
||||
[2, 10, 2, 10][(IMM8 as usize >> 2) & 0b11],
|
||||
|
|
@ -424,10 +424,11 @@ pub unsafe fn _mm256_blend_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m
|
|||
static_assert_imm8!(IMM8);
|
||||
let a = a.as_i16x16();
|
||||
let b = b.as_i16x16();
|
||||
let r: i16x16 = simd_shuffle16(
|
||||
|
||||
let r: i16x16 = simd_shuffle16!(
|
||||
a,
|
||||
b,
|
||||
[
|
||||
<const IMM8: i32> [
|
||||
[0, 16, 0, 16][IMM8 as usize & 0b11],
|
||||
[1, 1, 17, 17][IMM8 as usize & 0b11],
|
||||
[2, 18, 2, 18][(IMM8 as usize >> 2) & 0b11],
|
||||
|
|
@ -470,7 +471,7 @@ pub unsafe fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m25
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_broadcastb_epi8(a: __m128i) -> __m128i {
|
||||
let zero = _mm_setzero_si128();
|
||||
let ret = simd_shuffle16(a.as_i8x16(), zero.as_i8x16(), [0_u32; 16]);
|
||||
let ret = simd_shuffle16!(a.as_i8x16(), zero.as_i8x16(), [0_u32; 16]);
|
||||
transmute::<i8x16, _>(ret)
|
||||
}
|
||||
|
||||
|
|
@ -484,7 +485,7 @@ pub unsafe fn _mm_broadcastb_epi8(a: __m128i) -> __m128i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i {
|
||||
let zero = _mm_setzero_si128();
|
||||
let ret = simd_shuffle32(a.as_i8x16(), zero.as_i8x16(), [0_u32; 32]);
|
||||
let ret = simd_shuffle32!(a.as_i8x16(), zero.as_i8x16(), [0_u32; 32]);
|
||||
transmute::<i8x32, _>(ret)
|
||||
}
|
||||
|
||||
|
|
@ -500,7 +501,7 @@ pub unsafe fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_broadcastd_epi32(a: __m128i) -> __m128i {
|
||||
let zero = _mm_setzero_si128();
|
||||
let ret = simd_shuffle4(a.as_i32x4(), zero.as_i32x4(), [0_u32; 4]);
|
||||
let ret = simd_shuffle4!(a.as_i32x4(), zero.as_i32x4(), [0_u32; 4]);
|
||||
transmute::<i32x4, _>(ret)
|
||||
}
|
||||
|
||||
|
|
@ -516,7 +517,7 @@ pub unsafe fn _mm_broadcastd_epi32(a: __m128i) -> __m128i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i {
|
||||
let zero = _mm_setzero_si128();
|
||||
let ret = simd_shuffle8(a.as_i32x4(), zero.as_i32x4(), [0_u32; 8]);
|
||||
let ret = simd_shuffle8!(a.as_i32x4(), zero.as_i32x4(), [0_u32; 8]);
|
||||
transmute::<i32x8, _>(ret)
|
||||
}
|
||||
|
||||
|
|
@ -530,7 +531,7 @@ pub unsafe fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i {
|
|||
#[cfg_attr(test, assert_instr(vmovddup))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_broadcastq_epi64(a: __m128i) -> __m128i {
|
||||
let ret = simd_shuffle2(a.as_i64x2(), a.as_i64x2(), [0_u32; 2]);
|
||||
let ret = simd_shuffle2!(a.as_i64x2(), a.as_i64x2(), [0_u32; 2]);
|
||||
transmute::<i64x2, _>(ret)
|
||||
}
|
||||
|
||||
|
|
@ -543,7 +544,7 @@ pub unsafe fn _mm_broadcastq_epi64(a: __m128i) -> __m128i {
|
|||
#[cfg_attr(test, assert_instr(vbroadcastsd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i {
|
||||
let ret = simd_shuffle4(a.as_i64x2(), a.as_i64x2(), [0_u32; 4]);
|
||||
let ret = simd_shuffle4!(a.as_i64x2(), a.as_i64x2(), [0_u32; 4]);
|
||||
transmute::<i64x4, _>(ret)
|
||||
}
|
||||
|
||||
|
|
@ -556,7 +557,7 @@ pub unsafe fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i {
|
|||
#[cfg_attr(test, assert_instr(vmovddup))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_broadcastsd_pd(a: __m128d) -> __m128d {
|
||||
simd_shuffle2(a, _mm_setzero_pd(), [0_u32; 2])
|
||||
simd_shuffle2!(a, _mm_setzero_pd(), [0_u32; 2])
|
||||
}
|
||||
|
||||
/// Broadcasts the low double-precision (64-bit) floating-point element
|
||||
|
|
@ -568,7 +569,7 @@ pub unsafe fn _mm_broadcastsd_pd(a: __m128d) -> __m128d {
|
|||
#[cfg_attr(test, assert_instr(vbroadcastsd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d {
|
||||
simd_shuffle4(a, _mm_setzero_pd(), [0_u32; 4])
|
||||
simd_shuffle4!(a, _mm_setzero_pd(), [0_u32; 4])
|
||||
}
|
||||
|
||||
// N.B., `broadcastsi128_si256` is often compiled to `vinsertf128` or
|
||||
|
|
@ -582,7 +583,7 @@ pub unsafe fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i {
|
||||
let zero = _mm_setzero_si128();
|
||||
let ret = simd_shuffle4(a.as_i64x2(), zero.as_i64x2(), [0, 1, 0, 1]);
|
||||
let ret = simd_shuffle4!(a.as_i64x2(), zero.as_i64x2(), [0, 1, 0, 1]);
|
||||
transmute::<i64x4, _>(ret)
|
||||
}
|
||||
|
||||
|
|
@ -595,7 +596,7 @@ pub unsafe fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i {
|
|||
#[cfg_attr(test, assert_instr(vbroadcastss))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_broadcastss_ps(a: __m128) -> __m128 {
|
||||
simd_shuffle4(a, _mm_setzero_ps(), [0_u32; 4])
|
||||
simd_shuffle4!(a, _mm_setzero_ps(), [0_u32; 4])
|
||||
}
|
||||
|
||||
/// Broadcasts the low single-precision (32-bit) floating-point element
|
||||
|
|
@ -607,7 +608,7 @@ pub unsafe fn _mm_broadcastss_ps(a: __m128) -> __m128 {
|
|||
#[cfg_attr(test, assert_instr(vbroadcastss))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_broadcastss_ps(a: __m128) -> __m256 {
|
||||
simd_shuffle8(a, _mm_setzero_ps(), [0_u32; 8])
|
||||
simd_shuffle8!(a, _mm_setzero_ps(), [0_u32; 8])
|
||||
}
|
||||
|
||||
/// Broadcasts the low packed 16-bit integer from a to all elements of
|
||||
|
|
@ -620,7 +621,7 @@ pub unsafe fn _mm256_broadcastss_ps(a: __m128) -> __m256 {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_broadcastw_epi16(a: __m128i) -> __m128i {
|
||||
let zero = _mm_setzero_si128();
|
||||
let ret = simd_shuffle8(a.as_i16x8(), zero.as_i16x8(), [0_u32; 8]);
|
||||
let ret = simd_shuffle8!(a.as_i16x8(), zero.as_i16x8(), [0_u32; 8]);
|
||||
transmute::<i16x8, _>(ret)
|
||||
}
|
||||
|
||||
|
|
@ -634,7 +635,7 @@ pub unsafe fn _mm_broadcastw_epi16(a: __m128i) -> __m128i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i {
|
||||
let zero = _mm_setzero_si128();
|
||||
let ret = simd_shuffle16(a.as_i16x8(), zero.as_i16x8(), [0_u32; 16]);
|
||||
let ret = simd_shuffle16!(a.as_i16x8(), zero.as_i16x8(), [0_u32; 16]);
|
||||
transmute::<i16x16, _>(ret)
|
||||
}
|
||||
|
||||
|
|
@ -746,7 +747,7 @@ pub unsafe fn _mm256_cvtepi16_epi32(a: __m128i) -> __m256i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i {
|
||||
let a = a.as_i16x8();
|
||||
let v64: i16x4 = simd_shuffle4(a, a, [0, 1, 2, 3]);
|
||||
let v64: i16x4 = simd_shuffle4!(a, a, [0, 1, 2, 3]);
|
||||
transmute::<i64x4, _>(simd_cast(v64))
|
||||
}
|
||||
|
||||
|
|
@ -781,7 +782,7 @@ pub unsafe fn _mm256_cvtepi8_epi16(a: __m128i) -> __m256i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i {
|
||||
let a = a.as_i8x16();
|
||||
let v64: i8x8 = simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
|
||||
let v64: i8x8 = simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
|
||||
transmute::<i32x8, _>(simd_cast(v64))
|
||||
}
|
||||
|
||||
|
|
@ -794,7 +795,7 @@ pub unsafe fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i {
|
||||
let a = a.as_i8x16();
|
||||
let v32: i8x4 = simd_shuffle4(a, a, [0, 1, 2, 3]);
|
||||
let v32: i8x4 = simd_shuffle4!(a, a, [0, 1, 2, 3]);
|
||||
transmute::<i64x4, _>(simd_cast(v32))
|
||||
}
|
||||
|
||||
|
|
@ -820,7 +821,7 @@ pub unsafe fn _mm256_cvtepu16_epi32(a: __m128i) -> __m256i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i {
|
||||
let a = a.as_u16x8();
|
||||
let v64: u16x4 = simd_shuffle4(a, a, [0, 1, 2, 3]);
|
||||
let v64: u16x4 = simd_shuffle4!(a, a, [0, 1, 2, 3]);
|
||||
transmute::<i64x4, _>(simd_cast(v64))
|
||||
}
|
||||
|
||||
|
|
@ -856,7 +857,7 @@ pub unsafe fn _mm256_cvtepu8_epi16(a: __m128i) -> __m256i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i {
|
||||
let a = a.as_u8x16();
|
||||
let v64: u8x8 = simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
|
||||
let v64: u8x8 = simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
|
||||
transmute::<i32x8, _>(simd_cast(v64))
|
||||
}
|
||||
|
||||
|
|
@ -870,7 +871,7 @@ pub unsafe fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i {
|
||||
let a = a.as_u8x16();
|
||||
let v32: u8x4 = simd_shuffle4(a, a, [0, 1, 2, 3]);
|
||||
let v32: u8x4 = simd_shuffle4!(a, a, [0, 1, 2, 3]);
|
||||
transmute::<i64x4, _>(simd_cast(v32))
|
||||
}
|
||||
|
||||
|
|
@ -889,7 +890,7 @@ pub unsafe fn _mm256_extracti128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
|
|||
static_assert_imm1!(IMM1);
|
||||
let a = a.as_i64x4();
|
||||
let b = _mm256_undefined_si256().as_i64x4();
|
||||
let dst: i64x2 = simd_shuffle2(a, b, [[0, 1], [2, 3]][IMM1 as usize]);
|
||||
let dst: i64x2 = simd_shuffle2!(a, b, <const IMM1: i32> [[0, 1], [2, 3]][IMM1 as usize]);
|
||||
transmute(dst)
|
||||
}
|
||||
|
||||
|
|
@ -1711,7 +1712,8 @@ pub unsafe fn _mm256_inserti128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -
|
|||
static_assert_imm1!(IMM1);
|
||||
let a = a.as_i64x4();
|
||||
let b = _mm256_castsi128_si256(b).as_i64x4();
|
||||
let dst: i64x4 = simd_shuffle4(a, b, [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize]);
|
||||
let dst: i64x4 =
|
||||
simd_shuffle4!(a, b, <const IMM1: i32> [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize]);
|
||||
transmute(dst)
|
||||
}
|
||||
|
||||
|
|
@ -2200,10 +2202,10 @@ pub unsafe fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i {
|
|||
pub unsafe fn _mm256_permute4x64_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
|
||||
static_assert_imm8!(IMM8);
|
||||
let zero = _mm256_setzero_si256().as_i64x4();
|
||||
let r: i64x4 = simd_shuffle4(
|
||||
let r: i64x4 = simd_shuffle4!(
|
||||
a.as_i64x4(),
|
||||
zero,
|
||||
[
|
||||
<const IMM8: i32> [
|
||||
IMM8 as u32 & 0b11,
|
||||
(IMM8 as u32 >> 2) & 0b11,
|
||||
(IMM8 as u32 >> 4) & 0b11,
|
||||
|
|
@ -2237,10 +2239,10 @@ pub unsafe fn _mm256_permute2x128_si256<const IMM8: i32>(a: __m256i, b: __m256i)
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_permute4x64_pd<const IMM8: i32>(a: __m256d) -> __m256d {
|
||||
static_assert_imm8!(IMM8);
|
||||
simd_shuffle4(
|
||||
simd_shuffle4!(
|
||||
a,
|
||||
_mm256_undefined_pd(),
|
||||
[
|
||||
<const IMM8: i32> [
|
||||
IMM8 as u32 & 0b11,
|
||||
(IMM8 as u32 >> 2) & 0b11,
|
||||
(IMM8 as u32 >> 4) & 0b11,
|
||||
|
|
@ -2350,10 +2352,10 @@ pub unsafe fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_shuffle_epi32<const MASK: i32>(a: __m256i) -> __m256i {
|
||||
static_assert_imm8!(MASK);
|
||||
let r: i32x8 = simd_shuffle8(
|
||||
let r: i32x8 = simd_shuffle8!(
|
||||
a.as_i32x8(),
|
||||
a.as_i32x8(),
|
||||
[
|
||||
<const MASK: i32> [
|
||||
MASK as u32 & 0b11,
|
||||
(MASK as u32 >> 2) & 0b11,
|
||||
(MASK as u32 >> 4) & 0b11,
|
||||
|
|
@ -2380,10 +2382,10 @@ pub unsafe fn _mm256_shuffle_epi32<const MASK: i32>(a: __m256i) -> __m256i {
|
|||
pub unsafe fn _mm256_shufflehi_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
|
||||
static_assert_imm8!(IMM8);
|
||||
let a = a.as_i16x16();
|
||||
let r: i16x16 = simd_shuffle16(
|
||||
let r: i16x16 = simd_shuffle16!(
|
||||
a,
|
||||
a,
|
||||
[
|
||||
<const IMM8: i32> [
|
||||
0,
|
||||
1,
|
||||
2,
|
||||
|
|
@ -2418,10 +2420,10 @@ pub unsafe fn _mm256_shufflehi_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
|
|||
pub unsafe fn _mm256_shufflelo_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
|
||||
static_assert_imm8!(IMM8);
|
||||
let a = a.as_i16x16();
|
||||
let r: i16x16 = simd_shuffle16(
|
||||
let r: i16x16 = simd_shuffle16!(
|
||||
a,
|
||||
a,
|
||||
[
|
||||
<const IMM8: i32> [
|
||||
0 + (IMM8 as u32 & 0b11),
|
||||
0 + ((IMM8 as u32 >> 2) & 0b11),
|
||||
0 + ((IMM8 as u32 >> 4) & 0b11),
|
||||
|
|
@ -2585,10 +2587,10 @@ pub unsafe fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
|
|||
static_assert_imm8!(IMM8);
|
||||
let a = a.as_i8x32();
|
||||
let zero = _mm256_setzero_si256().as_i8x32();
|
||||
let r: i8x32 = simd_shuffle32(
|
||||
let r: i8x32 = simd_shuffle32!(
|
||||
zero,
|
||||
a,
|
||||
[
|
||||
<const IMM8: i32> [
|
||||
32 - (IMM8 as u32 & 0xff),
|
||||
33 - (IMM8 as u32 & 0xff),
|
||||
34 - (IMM8 as u32 & 0xff),
|
||||
|
|
@ -2780,7 +2782,7 @@ pub unsafe fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
|
|||
let a = a.as_i8x32();
|
||||
let zero = _mm256_setzero_si256().as_i8x32();
|
||||
let r: i8x32 = match IMM8 % 16 {
|
||||
0 => simd_shuffle32(
|
||||
0 => simd_shuffle32!(
|
||||
a,
|
||||
zero,
|
||||
[
|
||||
|
|
@ -2788,7 +2790,7 @@ pub unsafe fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
|
|||
23, 24, 25, 26, 27, 28, 29, 30, 31,
|
||||
],
|
||||
),
|
||||
1 => simd_shuffle32(
|
||||
1 => simd_shuffle32!(
|
||||
a,
|
||||
zero,
|
||||
[
|
||||
|
|
@ -2796,7 +2798,7 @@ pub unsafe fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
|
|||
24, 25, 26, 27, 28, 29, 30, 31, 32,
|
||||
],
|
||||
),
|
||||
2 => simd_shuffle32(
|
||||
2 => simd_shuffle32!(
|
||||
a,
|
||||
zero,
|
||||
[
|
||||
|
|
@ -2804,7 +2806,7 @@ pub unsafe fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
|
|||
25, 26, 27, 28, 29, 30, 31, 32, 32,
|
||||
],
|
||||
),
|
||||
3 => simd_shuffle32(
|
||||
3 => simd_shuffle32!(
|
||||
a,
|
||||
zero,
|
||||
[
|
||||
|
|
@ -2812,7 +2814,7 @@ pub unsafe fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
|
|||
25, 26, 27, 28, 29, 30, 31, 32, 32, 32,
|
||||
],
|
||||
),
|
||||
4 => simd_shuffle32(
|
||||
4 => simd_shuffle32!(
|
||||
a,
|
||||
zero,
|
||||
[
|
||||
|
|
@ -2820,7 +2822,7 @@ pub unsafe fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
|
|||
26, 27, 28, 29, 30, 31, 32, 32, 32, 32,
|
||||
],
|
||||
),
|
||||
5 => simd_shuffle32(
|
||||
5 => simd_shuffle32!(
|
||||
a,
|
||||
zero,
|
||||
[
|
||||
|
|
@ -2828,7 +2830,7 @@ pub unsafe fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
|
|||
27, 28, 29, 30, 31, 32, 32, 32, 32, 32,
|
||||
],
|
||||
),
|
||||
6 => simd_shuffle32(
|
||||
6 => simd_shuffle32!(
|
||||
a,
|
||||
zero,
|
||||
[
|
||||
|
|
@ -2836,7 +2838,7 @@ pub unsafe fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
|
|||
28, 29, 30, 31, 32, 32, 32, 32, 32, 32,
|
||||
],
|
||||
),
|
||||
7 => simd_shuffle32(
|
||||
7 => simd_shuffle32!(
|
||||
a,
|
||||
zero,
|
||||
[
|
||||
|
|
@ -2844,7 +2846,7 @@ pub unsafe fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
|
|||
28, 29, 30, 31, 32, 32, 32, 32, 32, 32, 32,
|
||||
],
|
||||
),
|
||||
8 => simd_shuffle32(
|
||||
8 => simd_shuffle32!(
|
||||
a,
|
||||
zero,
|
||||
[
|
||||
|
|
@ -2852,7 +2854,7 @@ pub unsafe fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
|
|||
29, 30, 31, 32, 32, 32, 32, 32, 32, 32, 32,
|
||||
],
|
||||
),
|
||||
9 => simd_shuffle32(
|
||||
9 => simd_shuffle32!(
|
||||
a,
|
||||
zero,
|
||||
[
|
||||
|
|
@ -2860,7 +2862,7 @@ pub unsafe fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
|
|||
30, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
||||
],
|
||||
),
|
||||
10 => simd_shuffle32(
|
||||
10 => simd_shuffle32!(
|
||||
a,
|
||||
zero,
|
||||
[
|
||||
|
|
@ -2868,7 +2870,7 @@ pub unsafe fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
|
|||
31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
||||
],
|
||||
),
|
||||
11 => simd_shuffle32(
|
||||
11 => simd_shuffle32!(
|
||||
a,
|
||||
zero,
|
||||
[
|
||||
|
|
@ -2876,7 +2878,7 @@ pub unsafe fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
|
|||
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
||||
],
|
||||
),
|
||||
12 => simd_shuffle32(
|
||||
12 => simd_shuffle32!(
|
||||
a,
|
||||
zero,
|
||||
[
|
||||
|
|
@ -2884,7 +2886,7 @@ pub unsafe fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
|
|||
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
||||
],
|
||||
),
|
||||
13 => simd_shuffle32(
|
||||
13 => simd_shuffle32!(
|
||||
a,
|
||||
zero,
|
||||
[
|
||||
|
|
@ -2892,7 +2894,7 @@ pub unsafe fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
|
|||
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
||||
],
|
||||
),
|
||||
14 => simd_shuffle32(
|
||||
14 => simd_shuffle32!(
|
||||
a,
|
||||
zero,
|
||||
[
|
||||
|
|
@ -2900,7 +2902,7 @@ pub unsafe fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
|
|||
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
||||
],
|
||||
),
|
||||
15 => simd_shuffle32(
|
||||
15 => simd_shuffle32!(
|
||||
a,
|
||||
zero,
|
||||
[
|
||||
|
|
@ -3178,7 +3180,7 @@ pub unsafe fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i {
|
||||
#[rustfmt::skip]
|
||||
let r: i8x32 = simd_shuffle32(a.as_i8x32(), b.as_i8x32(), [
|
||||
let r: i8x32 = simd_shuffle32!(a.as_i8x32(), b.as_i8x32(), [
|
||||
8, 40, 9, 41, 10, 42, 11, 43,
|
||||
12, 44, 13, 45, 14, 46, 15, 47,
|
||||
24, 56, 25, 57, 26, 58, 27, 59,
|
||||
|
|
@ -3231,7 +3233,7 @@ pub unsafe fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i {
|
||||
#[rustfmt::skip]
|
||||
let r: i8x32 = simd_shuffle32(a.as_i8x32(), b.as_i8x32(), [
|
||||
let r: i8x32 = simd_shuffle32!(a.as_i8x32(), b.as_i8x32(), [
|
||||
0, 32, 1, 33, 2, 34, 3, 35,
|
||||
4, 36, 5, 37, 6, 38, 7, 39,
|
||||
16, 48, 17, 49, 18, 50, 19, 51,
|
||||
|
|
@ -3279,7 +3281,7 @@ pub unsafe fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i {
|
|||
#[cfg_attr(test, assert_instr(vpunpckhwd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
|
||||
let r: i16x16 = simd_shuffle16(
|
||||
let r: i16x16 = simd_shuffle16!(
|
||||
a.as_i16x16(),
|
||||
b.as_i16x16(),
|
||||
[4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31],
|
||||
|
|
@ -3327,7 +3329,7 @@ pub unsafe fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
|
|||
#[cfg_attr(test, assert_instr(vpunpcklwd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
|
||||
let r: i16x16 = simd_shuffle16(
|
||||
let r: i16x16 = simd_shuffle16!(
|
||||
a.as_i16x16(),
|
||||
b.as_i16x16(),
|
||||
[0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27],
|
||||
|
|
@ -3368,7 +3370,7 @@ pub unsafe fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
|
|||
#[cfg_attr(test, assert_instr(vunpckhps))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
|
||||
let r: i32x8 = simd_shuffle8(a.as_i32x8(), b.as_i32x8(), [2, 10, 3, 11, 6, 14, 7, 15]);
|
||||
let r: i32x8 = simd_shuffle8!(a.as_i32x8(), b.as_i32x8(), [2, 10, 3, 11, 6, 14, 7, 15]);
|
||||
transmute(r)
|
||||
}
|
||||
|
||||
|
|
@ -3405,7 +3407,7 @@ pub unsafe fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
|
|||
#[cfg_attr(test, assert_instr(vunpcklps))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
|
||||
let r: i32x8 = simd_shuffle8(a.as_i32x8(), b.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]);
|
||||
let r: i32x8 = simd_shuffle8!(a.as_i32x8(), b.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]);
|
||||
transmute(r)
|
||||
}
|
||||
|
||||
|
|
@ -3442,7 +3444,7 @@ pub unsafe fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
|
|||
#[cfg_attr(test, assert_instr(vunpckhpd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
|
||||
let r: i64x4 = simd_shuffle4(a.as_i64x4(), b.as_i64x4(), [1, 5, 3, 7]);
|
||||
let r: i64x4 = simd_shuffle4!(a.as_i64x4(), b.as_i64x4(), [1, 5, 3, 7]);
|
||||
transmute(r)
|
||||
}
|
||||
|
||||
|
|
@ -3479,7 +3481,7 @@ pub unsafe fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
|
|||
#[cfg_attr(test, assert_instr(vunpcklpd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i {
|
||||
let r: i64x4 = simd_shuffle4(a.as_i64x4(), b.as_i64x4(), [0, 4, 2, 6]);
|
||||
let r: i64x4 = simd_shuffle4!(a.as_i64x4(), b.as_i64x4(), [0, 4, 2, 6]);
|
||||
transmute(r)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -6218,7 +6218,7 @@ pub unsafe fn _mm_mask_blend_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m12
|
|||
#[cfg_attr(test, assert_instr(vpbroadcastw))]
|
||||
pub unsafe fn _mm512_broadcastw_epi16(a: __m128i) -> __m512i {
|
||||
let a = _mm512_castsi128_si512(a).as_i16x32();
|
||||
let ret: i16x32 = simd_shuffle32(
|
||||
let ret: i16x32 = simd_shuffle32!(
|
||||
a,
|
||||
a,
|
||||
[
|
||||
|
|
@ -6306,7 +6306,7 @@ pub unsafe fn _mm_maskz_broadcastw_epi16(k: __mmask8, a: __m128i) -> __m128i {
|
|||
#[cfg_attr(test, assert_instr(vpbroadcastb))]
|
||||
pub unsafe fn _mm512_broadcastb_epi8(a: __m128i) -> __m512i {
|
||||
let a = _mm512_castsi128_si512(a).as_i8x64();
|
||||
let ret: i8x64 = simd_shuffle64(
|
||||
let ret: i8x64 = simd_shuffle64!(
|
||||
a,
|
||||
a,
|
||||
[
|
||||
|
|
@ -6397,7 +6397,7 @@ pub unsafe fn _mm512_unpackhi_epi16(a: __m512i, b: __m512i) -> __m512i {
|
|||
let a = a.as_i16x32();
|
||||
let b = b.as_i16x32();
|
||||
#[rustfmt::skip]
|
||||
let r: i16x32 = simd_shuffle32(
|
||||
let r: i16x32 = simd_shuffle32!(
|
||||
a,
|
||||
b,
|
||||
[
|
||||
|
|
@ -6508,7 +6508,7 @@ pub unsafe fn _mm512_unpackhi_epi8(a: __m512i, b: __m512i) -> __m512i {
|
|||
let a = a.as_i8x64();
|
||||
let b = b.as_i8x64();
|
||||
#[rustfmt::skip]
|
||||
let r: i8x64 = simd_shuffle64(
|
||||
let r: i8x64 = simd_shuffle64!(
|
||||
a,
|
||||
b,
|
||||
[
|
||||
|
|
@ -6627,7 +6627,7 @@ pub unsafe fn _mm512_unpacklo_epi16(a: __m512i, b: __m512i) -> __m512i {
|
|||
let a = a.as_i16x32();
|
||||
let b = b.as_i16x32();
|
||||
#[rustfmt::skip]
|
||||
let r: i16x32 = simd_shuffle32(
|
||||
let r: i16x32 = simd_shuffle32!(
|
||||
a,
|
||||
b,
|
||||
[
|
||||
|
|
@ -6738,7 +6738,7 @@ pub unsafe fn _mm512_unpacklo_epi8(a: __m512i, b: __m512i) -> __m512i {
|
|||
let a = a.as_i8x64();
|
||||
let b = b.as_i8x64();
|
||||
#[rustfmt::skip]
|
||||
let r: i8x64 = simd_shuffle64(
|
||||
let r: i8x64 = simd_shuffle64!(
|
||||
a,
|
||||
b,
|
||||
[
|
||||
|
|
@ -7133,10 +7133,10 @@ pub unsafe fn _mm_maskz_set1_epi8(k: __mmask16, a: i8) -> __m128i {
|
|||
pub unsafe fn _mm512_shufflelo_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
|
||||
static_assert_imm8!(IMM8);
|
||||
let a = a.as_i16x32();
|
||||
let r: i16x32 = simd_shuffle32(
|
||||
let r: i16x32 = simd_shuffle32!(
|
||||
a,
|
||||
a,
|
||||
[
|
||||
<const IMM8: i32> [
|
||||
IMM8 as u32 & 0b11,
|
||||
(IMM8 as u32 >> 2) & 0b11,
|
||||
(IMM8 as u32 >> 4) & 0b11,
|
||||
|
|
@ -7277,10 +7277,10 @@ pub unsafe fn _mm_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask8, a: __m128i
|
|||
pub unsafe fn _mm512_shufflehi_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
|
||||
static_assert_imm8!(IMM8);
|
||||
let a = a.as_i16x32();
|
||||
let r: i16x32 = simd_shuffle32(
|
||||
let r: i16x32 = simd_shuffle32!(
|
||||
a,
|
||||
a,
|
||||
[
|
||||
<const IMM8: i32> [
|
||||
0,
|
||||
1,
|
||||
2,
|
||||
|
|
@ -8433,7 +8433,7 @@ pub unsafe fn _mm256_maskz_cvtepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
|
|||
pub unsafe fn _mm_cvtepi16_epi8(a: __m128i) -> __m128i {
|
||||
let a = a.as_i16x8();
|
||||
let zero = _mm_setzero_si128().as_i16x8();
|
||||
let v256: i16x16 = simd_shuffle16(a, zero, [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8]);
|
||||
let v256: i16x16 = simd_shuffle16!(a, zero, [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8]);
|
||||
transmute::<i8x16, _>(simd_cast(v256))
|
||||
}
|
||||
|
||||
|
|
@ -8875,10 +8875,10 @@ pub unsafe fn _mm512_bslli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
|
|||
static_assert_imm8!(IMM8);
|
||||
let a = a.as_i8x64();
|
||||
let zero = _mm512_setzero_si512().as_i8x64();
|
||||
let r: i8x64 = simd_shuffle64(
|
||||
let r: i8x64 = simd_shuffle64!(
|
||||
zero,
|
||||
a,
|
||||
[
|
||||
<const IMM8: i32> [
|
||||
64 - (IMM8 as u32 & 0xff),
|
||||
65 - (IMM8 as u32 & 0xff),
|
||||
66 - (IMM8 as u32 & 0xff),
|
||||
|
|
@ -8960,7 +8960,7 @@ pub unsafe fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
|
|||
let a = a.as_i8x64();
|
||||
let zero = _mm512_setzero_si512().as_i8x64();
|
||||
let r: i8x64 = match IMM8 % 16 {
|
||||
0 => simd_shuffle64(
|
||||
0 => simd_shuffle64!(
|
||||
a,
|
||||
zero,
|
||||
[
|
||||
|
|
@ -8969,7 +8969,7 @@ pub unsafe fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
|
|||
44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
||||
],
|
||||
),
|
||||
1 => simd_shuffle64(
|
||||
1 => simd_shuffle64!(
|
||||
a,
|
||||
zero,
|
||||
[
|
||||
|
|
@ -8978,7 +8978,7 @@ pub unsafe fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
|
|||
45, 46, 47, 96, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112,
|
||||
],
|
||||
),
|
||||
2 => simd_shuffle64(
|
||||
2 => simd_shuffle64!(
|
||||
a,
|
||||
zero,
|
||||
[
|
||||
|
|
@ -8987,7 +8987,7 @@ pub unsafe fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
|
|||
46, 47, 96, 97, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113,
|
||||
],
|
||||
),
|
||||
3 => simd_shuffle64(
|
||||
3 => simd_shuffle64!(
|
||||
a,
|
||||
zero,
|
||||
[
|
||||
|
|
@ -8997,7 +8997,7 @@ pub unsafe fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
|
|||
114,
|
||||
],
|
||||
),
|
||||
4 => simd_shuffle64(
|
||||
4 => simd_shuffle64!(
|
||||
a,
|
||||
zero,
|
||||
[
|
||||
|
|
@ -9007,7 +9007,7 @@ pub unsafe fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
|
|||
115,
|
||||
],
|
||||
),
|
||||
5 => simd_shuffle64(
|
||||
5 => simd_shuffle64!(
|
||||
a,
|
||||
zero,
|
||||
[
|
||||
|
|
@ -9017,7 +9017,7 @@ pub unsafe fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
|
|||
115, 116,
|
||||
],
|
||||
),
|
||||
6 => simd_shuffle64(
|
||||
6 => simd_shuffle64!(
|
||||
a,
|
||||
zero,
|
||||
[
|
||||
|
|
@ -9027,7 +9027,7 @@ pub unsafe fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
|
|||
116, 117,
|
||||
],
|
||||
),
|
||||
7 => simd_shuffle64(
|
||||
7 => simd_shuffle64!(
|
||||
a,
|
||||
zero,
|
||||
[
|
||||
|
|
@ -9037,7 +9037,7 @@ pub unsafe fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
|
|||
116, 117, 118,
|
||||
],
|
||||
),
|
||||
8 => simd_shuffle64(
|
||||
8 => simd_shuffle64!(
|
||||
a,
|
||||
zero,
|
||||
[
|
||||
|
|
@ -9047,7 +9047,7 @@ pub unsafe fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
|
|||
116, 117, 118, 119,
|
||||
],
|
||||
),
|
||||
9 => simd_shuffle64(
|
||||
9 => simd_shuffle64!(
|
||||
a,
|
||||
zero,
|
||||
[
|
||||
|
|
@ -9057,7 +9057,7 @@ pub unsafe fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
|
|||
117, 118, 119, 120,
|
||||
],
|
||||
),
|
||||
10 => simd_shuffle64(
|
||||
10 => simd_shuffle64!(
|
||||
a,
|
||||
zero,
|
||||
[
|
||||
|
|
@ -9067,7 +9067,7 @@ pub unsafe fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
|
|||
118, 119, 120, 121,
|
||||
],
|
||||
),
|
||||
11 => simd_shuffle64(
|
||||
11 => simd_shuffle64!(
|
||||
a,
|
||||
zero,
|
||||
[
|
||||
|
|
@ -9077,7 +9077,7 @@ pub unsafe fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
|
|||
117, 118, 119, 120, 121, 122,
|
||||
],
|
||||
),
|
||||
12 => simd_shuffle64(
|
||||
12 => simd_shuffle64!(
|
||||
a,
|
||||
zero,
|
||||
[
|
||||
|
|
@ -9087,7 +9087,7 @@ pub unsafe fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
|
|||
118, 119, 120, 121, 122, 123,
|
||||
],
|
||||
),
|
||||
13 => simd_shuffle64(
|
||||
13 => simd_shuffle64!(
|
||||
a,
|
||||
zero,
|
||||
[
|
||||
|
|
@ -9097,7 +9097,7 @@ pub unsafe fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
|
|||
119, 120, 121, 122, 123, 124,
|
||||
],
|
||||
),
|
||||
14 => simd_shuffle64(
|
||||
14 => simd_shuffle64!(
|
||||
a,
|
||||
zero,
|
||||
[
|
||||
|
|
@ -9107,7 +9107,7 @@ pub unsafe fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
|
|||
120, 121, 122, 123, 124, 125,
|
||||
],
|
||||
),
|
||||
15 => simd_shuffle64(
|
||||
15 => simd_shuffle64!(
|
||||
a,
|
||||
zero,
|
||||
[
|
||||
|
|
@ -9146,7 +9146,7 @@ pub unsafe fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m
|
|||
let b = b.as_i8x64();
|
||||
|
||||
let r: i8x64 = match IMM8 % 16 {
|
||||
0 => simd_shuffle64(
|
||||
0 => simd_shuffle64!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
|
|
@ -9155,7 +9155,7 @@ pub unsafe fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m
|
|||
44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
||||
],
|
||||
),
|
||||
1 => simd_shuffle64(
|
||||
1 => simd_shuffle64!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
|
|
@ -9164,7 +9164,7 @@ pub unsafe fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m
|
|||
45, 46, 47, 96, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112,
|
||||
],
|
||||
),
|
||||
2 => simd_shuffle64(
|
||||
2 => simd_shuffle64!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
|
|
@ -9173,7 +9173,7 @@ pub unsafe fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m
|
|||
46, 47, 96, 97, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113,
|
||||
],
|
||||
),
|
||||
3 => simd_shuffle64(
|
||||
3 => simd_shuffle64!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
|
|
@ -9183,7 +9183,7 @@ pub unsafe fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m
|
|||
114,
|
||||
],
|
||||
),
|
||||
4 => simd_shuffle64(
|
||||
4 => simd_shuffle64!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
|
|
@ -9193,7 +9193,7 @@ pub unsafe fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m
|
|||
115,
|
||||
],
|
||||
),
|
||||
5 => simd_shuffle64(
|
||||
5 => simd_shuffle64!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
|
|
@ -9203,7 +9203,7 @@ pub unsafe fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m
|
|||
115, 116,
|
||||
],
|
||||
),
|
||||
6 => simd_shuffle64(
|
||||
6 => simd_shuffle64!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
|
|
@ -9213,7 +9213,7 @@ pub unsafe fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m
|
|||
116, 117,
|
||||
],
|
||||
),
|
||||
7 => simd_shuffle64(
|
||||
7 => simd_shuffle64!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
|
|
@ -9223,7 +9223,7 @@ pub unsafe fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m
|
|||
116, 117, 118,
|
||||
],
|
||||
),
|
||||
8 => simd_shuffle64(
|
||||
8 => simd_shuffle64!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
|
|
@ -9233,7 +9233,7 @@ pub unsafe fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m
|
|||
116, 117, 118, 119,
|
||||
],
|
||||
),
|
||||
9 => simd_shuffle64(
|
||||
9 => simd_shuffle64!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
|
|
@ -9243,7 +9243,7 @@ pub unsafe fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m
|
|||
117, 118, 119, 120,
|
||||
],
|
||||
),
|
||||
10 => simd_shuffle64(
|
||||
10 => simd_shuffle64!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
|
|
@ -9253,7 +9253,7 @@ pub unsafe fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m
|
|||
118, 119, 120, 121,
|
||||
],
|
||||
),
|
||||
11 => simd_shuffle64(
|
||||
11 => simd_shuffle64!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
|
|
@ -9263,7 +9263,7 @@ pub unsafe fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m
|
|||
117, 118, 119, 120, 121, 122,
|
||||
],
|
||||
),
|
||||
12 => simd_shuffle64(
|
||||
12 => simd_shuffle64!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
|
|
@ -9273,7 +9273,7 @@ pub unsafe fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m
|
|||
118, 119, 120, 121, 122, 123,
|
||||
],
|
||||
),
|
||||
13 => simd_shuffle64(
|
||||
13 => simd_shuffle64!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
|
|
@ -9283,7 +9283,7 @@ pub unsafe fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m
|
|||
119, 120, 121, 122, 123, 124,
|
||||
],
|
||||
),
|
||||
14 => simd_shuffle64(
|
||||
14 => simd_shuffle64!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
|
|
@ -9293,7 +9293,7 @@ pub unsafe fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m
|
|||
120, 121, 122, 123, 124, 125,
|
||||
],
|
||||
),
|
||||
15 => simd_shuffle64(
|
||||
15 => simd_shuffle64!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
|
|
|
|||
|
|
@ -10529,7 +10529,7 @@ pub unsafe fn _mm512_cvtpd_pslo(v2: __m512d) -> __m512 {
|
|||
0b11111111,
|
||||
_MM_FROUND_CUR_DIRECTION,
|
||||
);
|
||||
simd_shuffle16(
|
||||
simd_shuffle16!(
|
||||
r,
|
||||
_mm256_setzero_ps().as_f32x8(),
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
|
||||
|
|
@ -10549,7 +10549,7 @@ pub unsafe fn _mm512_mask_cvtpd_pslo(src: __m512, k: __mmask8, v2: __m512d) -> _
|
|||
k,
|
||||
_MM_FROUND_CUR_DIRECTION,
|
||||
);
|
||||
simd_shuffle16(
|
||||
simd_shuffle16!(
|
||||
r,
|
||||
_mm256_setzero_ps().as_f32x8(),
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
|
||||
|
|
@ -10644,7 +10644,7 @@ pub unsafe fn _mm_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m128i {
|
|||
#[cfg_attr(test, assert_instr(vpmovsxbq))]
|
||||
pub unsafe fn _mm512_cvtepi8_epi64(a: __m128i) -> __m512i {
|
||||
let a = a.as_i8x16();
|
||||
let v64: i8x8 = simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
|
||||
let v64: i8x8 = simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
|
||||
transmute::<i64x8, _>(simd_cast(v64))
|
||||
}
|
||||
|
||||
|
|
@ -10805,7 +10805,7 @@ pub unsafe fn _mm_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m128i {
|
|||
#[cfg_attr(test, assert_instr(vpmovzxbq))]
|
||||
pub unsafe fn _mm512_cvtepu8_epi64(a: __m128i) -> __m512i {
|
||||
let a = a.as_u8x16();
|
||||
let v64: u8x8 = simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
|
||||
let v64: u8x8 = simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
|
||||
transmute::<i64x8, _>(simd_cast(v64))
|
||||
}
|
||||
|
||||
|
|
@ -11628,7 +11628,7 @@ pub unsafe fn _mm256_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m256d {
|
|||
#[cfg_attr(test, assert_instr(vcvtudq2pd))]
|
||||
pub unsafe fn _mm_cvtepu32_pd(a: __m128i) -> __m128d {
|
||||
let a = a.as_u32x4();
|
||||
let u64: u32x2 = simd_shuffle2(a, a, [0, 1]);
|
||||
let u64: u32x2 = simd_shuffle2!(a, a, [0, 1]);
|
||||
transmute::<f64x2, _>(simd_cast(u64))
|
||||
}
|
||||
|
||||
|
|
@ -11663,7 +11663,7 @@ pub unsafe fn _mm_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m128d {
|
|||
#[cfg_attr(test, assert_instr(vcvtdq2pd))]
|
||||
pub unsafe fn _mm512_cvtepi32lo_pd(v2: __m512i) -> __m512d {
|
||||
let v2 = v2.as_i32x16();
|
||||
let v256: i32x8 = simd_shuffle8(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
|
||||
let v256: i32x8 = simd_shuffle8!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
|
||||
transmute::<f64x8, _>(simd_cast(v256))
|
||||
}
|
||||
|
||||
|
|
@ -11686,7 +11686,7 @@ pub unsafe fn _mm512_mask_cvtepi32lo_pd(src: __m512d, k: __mmask8, v2: __m512i)
|
|||
#[cfg_attr(test, assert_instr(vcvtudq2pd))]
|
||||
pub unsafe fn _mm512_cvtepu32lo_pd(v2: __m512i) -> __m512d {
|
||||
let v2 = v2.as_u32x16();
|
||||
let v256: u32x8 = simd_shuffle8(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
|
||||
let v256: u32x8 = simd_shuffle8!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
|
||||
transmute::<f64x8, _>(simd_cast(v256))
|
||||
}
|
||||
|
||||
|
|
@ -19215,10 +19215,10 @@ pub unsafe fn _mm_maskz_srlv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> _
|
|||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn _mm512_permute_ps<const MASK: i32>(a: __m512) -> __m512 {
|
||||
static_assert_imm8!(MASK);
|
||||
simd_shuffle16(
|
||||
simd_shuffle16!(
|
||||
a,
|
||||
a,
|
||||
[
|
||||
<const MASK: i32> [
|
||||
MASK as u32 & 0b11,
|
||||
(MASK as u32 >> 2) & 0b11,
|
||||
((MASK as u32 >> 4) & 0b11),
|
||||
|
|
@ -19333,10 +19333,10 @@ pub unsafe fn _mm_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m128) -> _
|
|||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn _mm512_permute_pd<const MASK: i32>(a: __m512d) -> __m512d {
|
||||
static_assert_imm8!(MASK);
|
||||
simd_shuffle8(
|
||||
simd_shuffle8!(
|
||||
a,
|
||||
a,
|
||||
[
|
||||
<const MASK: i32> [
|
||||
MASK as u32 & 0b1,
|
||||
((MASK as u32 >> 1) & 0b1),
|
||||
((MASK as u32 >> 2) & 0b1) + 2,
|
||||
|
|
@ -19451,10 +19451,10 @@ pub unsafe fn _mm_maskz_permute_pd<const IMM2: i32>(k: __mmask8, a: __m128d) ->
|
|||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn _mm512_permutex_epi64<const MASK: i32>(a: __m512i) -> __m512i {
|
||||
static_assert_imm8!(MASK);
|
||||
simd_shuffle8(
|
||||
simd_shuffle8!(
|
||||
a,
|
||||
a,
|
||||
[
|
||||
<const MASK: i32> [
|
||||
MASK as u32 & 0b11,
|
||||
(MASK as u32 >> 2) & 0b11,
|
||||
((MASK as u32 >> 4) & 0b11),
|
||||
|
|
@ -19507,10 +19507,10 @@ pub unsafe fn _mm512_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m51
|
|||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn _mm256_permutex_epi64<const MASK: i32>(a: __m256i) -> __m256i {
|
||||
static_assert_imm8!(MASK);
|
||||
simd_shuffle4(
|
||||
simd_shuffle4!(
|
||||
a,
|
||||
a,
|
||||
[
|
||||
<const MASK: i32> [
|
||||
MASK as u32 & 0b11,
|
||||
(MASK as u32 >> 2) & 0b11,
|
||||
((MASK as u32 >> 4) & 0b11),
|
||||
|
|
@ -19559,10 +19559,10 @@ pub unsafe fn _mm256_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m25
|
|||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn _mm512_permutex_pd<const MASK: i32>(a: __m512d) -> __m512d {
|
||||
static_assert_imm8!(MASK);
|
||||
simd_shuffle8(
|
||||
simd_shuffle8!(
|
||||
a,
|
||||
a,
|
||||
[
|
||||
<const MASK: i32> [
|
||||
MASK as u32 & 0b11,
|
||||
(MASK as u32 >> 2) & 0b11,
|
||||
((MASK as u32 >> 4) & 0b11),
|
||||
|
|
@ -19613,10 +19613,10 @@ pub unsafe fn _mm512_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m512d)
|
|||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn _mm256_permutex_pd<const MASK: i32>(a: __m256d) -> __m256d {
|
||||
static_assert_imm8!(MASK);
|
||||
simd_shuffle4(
|
||||
simd_shuffle4!(
|
||||
a,
|
||||
a,
|
||||
[
|
||||
<const MASK: i32> [
|
||||
MASK as u32 & 0b11,
|
||||
(MASK as u32 >> 2) & 0b11,
|
||||
((MASK as u32 >> 4) & 0b11),
|
||||
|
|
@ -20867,10 +20867,10 @@ pub unsafe fn _mm_mask2_permutex2var_pd(
|
|||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn _mm512_shuffle_epi32<const MASK: _MM_PERM_ENUM>(a: __m512i) -> __m512i {
|
||||
static_assert_imm8!(MASK);
|
||||
let r: i32x16 = simd_shuffle16(
|
||||
let r: i32x16 = simd_shuffle16!(
|
||||
a.as_i32x16(),
|
||||
a.as_i32x16(),
|
||||
[
|
||||
<const MASK: _MM_PERM_ENUM> [
|
||||
MASK as u32 & 0b11,
|
||||
(MASK as u32 >> 2) & 0b11,
|
||||
(MASK as u32 >> 4) & 0b11,
|
||||
|
|
@ -21003,10 +21003,10 @@ pub unsafe fn _mm_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
|
|||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn _mm512_shuffle_ps<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
|
||||
static_assert_imm8!(MASK);
|
||||
simd_shuffle16(
|
||||
simd_shuffle16!(
|
||||
a,
|
||||
b,
|
||||
[
|
||||
<const MASK: i32> [
|
||||
MASK as u32 & 0b11,
|
||||
(MASK as u32 >> 2) & 0b11,
|
||||
((MASK as u32 >> 4) & 0b11) + 16,
|
||||
|
|
@ -21140,10 +21140,10 @@ pub unsafe fn _mm_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m128, b: _
|
|||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn _mm512_shuffle_pd<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
|
||||
static_assert_imm8!(MASK);
|
||||
simd_shuffle8(
|
||||
simd_shuffle8!(
|
||||
a,
|
||||
b,
|
||||
[
|
||||
<const MASK: i32> [
|
||||
MASK as u32 & 0b1,
|
||||
((MASK as u32 >> 1) & 0b1) + 8,
|
||||
((MASK as u32 >> 2) & 0b1) + 2,
|
||||
|
|
@ -21275,10 +21275,10 @@ pub unsafe fn _mm512_shuffle_i32x4<const MASK: i32>(a: __m512i, b: __m512i) -> _
|
|||
static_assert_imm8!(MASK);
|
||||
let a = a.as_i32x16();
|
||||
let b = b.as_i32x16();
|
||||
let r: i32x16 = simd_shuffle16(
|
||||
let r: i32x16 = simd_shuffle16!(
|
||||
a,
|
||||
b,
|
||||
[
|
||||
<const MASK: i32> [
|
||||
(MASK as u32 & 0b11) * 4 + 0,
|
||||
(MASK as u32 & 0b11) * 4 + 1,
|
||||
(MASK as u32 & 0b11) * 4 + 2,
|
||||
|
|
@ -21347,10 +21347,10 @@ pub unsafe fn _mm256_shuffle_i32x4<const MASK: i32>(a: __m256i, b: __m256i) -> _
|
|||
static_assert_imm8!(MASK);
|
||||
let a = a.as_i32x8();
|
||||
let b = b.as_i32x8();
|
||||
let r: i32x8 = simd_shuffle8(
|
||||
let r: i32x8 = simd_shuffle8!(
|
||||
a,
|
||||
b,
|
||||
[
|
||||
<const MASK: i32> [
|
||||
(MASK as u32 & 0b1) * 4 + 0,
|
||||
(MASK as u32 & 0b1) * 4 + 1,
|
||||
(MASK as u32 & 0b1) * 4 + 2,
|
||||
|
|
@ -21411,10 +21411,10 @@ pub unsafe fn _mm512_shuffle_i64x2<const MASK: i32>(a: __m512i, b: __m512i) -> _
|
|||
static_assert_imm8!(MASK);
|
||||
let a = a.as_i64x8();
|
||||
let b = b.as_i64x8();
|
||||
let r: i64x8 = simd_shuffle8(
|
||||
let r: i64x8 = simd_shuffle8!(
|
||||
a,
|
||||
b,
|
||||
[
|
||||
<const MASK: i32> [
|
||||
(MASK as u32 & 0b11) * 2 + 0,
|
||||
(MASK as u32 & 0b11) * 2 + 1,
|
||||
((MASK as u32 >> 2) & 0b11) * 2 + 0,
|
||||
|
|
@ -21475,10 +21475,10 @@ pub unsafe fn _mm256_shuffle_i64x2<const MASK: i32>(a: __m256i, b: __m256i) -> _
|
|||
static_assert_imm8!(MASK);
|
||||
let a = a.as_i64x4();
|
||||
let b = b.as_i64x4();
|
||||
let r: i64x4 = simd_shuffle4(
|
||||
let r: i64x4 = simd_shuffle4!(
|
||||
a,
|
||||
b,
|
||||
[
|
||||
<const MASK: i32> [
|
||||
(MASK as u32 & 0b1) * 2 + 0,
|
||||
(MASK as u32 & 0b1) * 2 + 1,
|
||||
((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4,
|
||||
|
|
@ -21535,10 +21535,10 @@ pub unsafe fn _mm512_shuffle_f32x4<const MASK: i32>(a: __m512, b: __m512) -> __m
|
|||
static_assert_imm8!(MASK);
|
||||
let a = a.as_f32x16();
|
||||
let b = b.as_f32x16();
|
||||
let r: f32x16 = simd_shuffle16(
|
||||
let r: f32x16 = simd_shuffle16!(
|
||||
a,
|
||||
b,
|
||||
[
|
||||
<const MASK: i32> [
|
||||
(MASK as u32 & 0b11) * 4 + 0,
|
||||
(MASK as u32 & 0b11) * 4 + 1,
|
||||
(MASK as u32 & 0b11) * 4 + 2,
|
||||
|
|
@ -21607,10 +21607,10 @@ pub unsafe fn _mm256_shuffle_f32x4<const MASK: i32>(a: __m256, b: __m256) -> __m
|
|||
static_assert_imm8!(MASK);
|
||||
let a = a.as_f32x8();
|
||||
let b = b.as_f32x8();
|
||||
let r: f32x8 = simd_shuffle8(
|
||||
let r: f32x8 = simd_shuffle8!(
|
||||
a,
|
||||
b,
|
||||
[
|
||||
<const MASK: i32> [
|
||||
(MASK as u32 & 0b1) * 4 + 0,
|
||||
(MASK as u32 & 0b1) * 4 + 1,
|
||||
(MASK as u32 & 0b1) * 4 + 2,
|
||||
|
|
@ -21671,10 +21671,10 @@ pub unsafe fn _mm512_shuffle_f64x2<const MASK: i32>(a: __m512d, b: __m512d) -> _
|
|||
static_assert_imm8!(MASK);
|
||||
let a = a.as_f64x8();
|
||||
let b = b.as_f64x8();
|
||||
let r: f64x8 = simd_shuffle8(
|
||||
let r: f64x8 = simd_shuffle8!(
|
||||
a,
|
||||
b,
|
||||
[
|
||||
<const MASK: i32> [
|
||||
(MASK as u32 & 0b11) * 2 + 0,
|
||||
(MASK as u32 & 0b11) * 2 + 1,
|
||||
((MASK as u32 >> 2) & 0b11) * 2 + 0,
|
||||
|
|
@ -21735,10 +21735,10 @@ pub unsafe fn _mm256_shuffle_f64x2<const MASK: i32>(a: __m256d, b: __m256d) -> _
|
|||
static_assert_imm8!(MASK);
|
||||
let a = a.as_f64x4();
|
||||
let b = b.as_f64x4();
|
||||
let r: f64x4 = simd_shuffle4(
|
||||
let r: f64x4 = simd_shuffle4!(
|
||||
a,
|
||||
b,
|
||||
[
|
||||
<const MASK: i32> [
|
||||
(MASK as u32 & 0b1) * 2 + 0,
|
||||
(MASK as u32 & 0b1) * 2 + 1,
|
||||
((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4,
|
||||
|
|
@ -21797,10 +21797,10 @@ pub unsafe fn _mm256_maskz_shuffle_f64x2<const MASK: i32>(
|
|||
pub unsafe fn _mm512_extractf32x4_ps<const IMM8: i32>(a: __m512) -> __m128 {
|
||||
static_assert_imm2!(IMM8);
|
||||
match IMM8 & 0x3 {
|
||||
0 => simd_shuffle4(a, _mm512_undefined_ps(), [0, 1, 2, 3]),
|
||||
1 => simd_shuffle4(a, _mm512_undefined_ps(), [4, 5, 6, 7]),
|
||||
2 => simd_shuffle4(a, _mm512_undefined_ps(), [8, 9, 10, 11]),
|
||||
_ => simd_shuffle4(a, _mm512_undefined_ps(), [12, 13, 14, 15]),
|
||||
0 => simd_shuffle4!(a, _mm512_undefined_ps(), [0, 1, 2, 3]),
|
||||
1 => simd_shuffle4!(a, _mm512_undefined_ps(), [4, 5, 6, 7]),
|
||||
2 => simd_shuffle4!(a, _mm512_undefined_ps(), [8, 9, 10, 11]),
|
||||
_ => simd_shuffle4!(a, _mm512_undefined_ps(), [12, 13, 14, 15]),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -21854,8 +21854,8 @@ pub unsafe fn _mm512_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m5
|
|||
pub unsafe fn _mm256_extractf32x4_ps<const IMM8: i32>(a: __m256) -> __m128 {
|
||||
static_assert_imm1!(IMM8);
|
||||
match IMM8 & 0x1 {
|
||||
0 => simd_shuffle4(a, _mm256_undefined_ps(), [0, 1, 2, 3]),
|
||||
_ => simd_shuffle4(a, _mm256_undefined_ps(), [4, 5, 6, 7]),
|
||||
0 => simd_shuffle4!(a, _mm256_undefined_ps(), [0, 1, 2, 3]),
|
||||
_ => simd_shuffle4!(a, _mm256_undefined_ps(), [4, 5, 6, 7]),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -21909,8 +21909,8 @@ pub unsafe fn _mm256_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m2
|
|||
pub unsafe fn _mm512_extracti64x4_epi64<const IMM1: i32>(a: __m512i) -> __m256i {
|
||||
static_assert_imm1!(IMM1);
|
||||
match IMM1 {
|
||||
0 => simd_shuffle4(a, _mm512_set1_epi64(0), [0, 1, 2, 3]),
|
||||
_ => simd_shuffle4(a, _mm512_set1_epi64(0), [4, 5, 6, 7]),
|
||||
0 => simd_shuffle4!(a, _mm512_set1_epi64(0), [0, 1, 2, 3]),
|
||||
_ => simd_shuffle4!(a, _mm512_set1_epi64(0), [4, 5, 6, 7]),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -21964,8 +21964,8 @@ pub unsafe fn _mm512_maskz_extracti64x4_epi64<const IMM1: i32>(k: __mmask8, a: _
|
|||
pub unsafe fn _mm512_extractf64x4_pd<const IMM8: i32>(a: __m512d) -> __m256d {
|
||||
static_assert_imm1!(IMM8);
|
||||
match IMM8 & 0x1 {
|
||||
0 => simd_shuffle4(a, _mm512_undefined_pd(), [0, 1, 2, 3]),
|
||||
_ => simd_shuffle4(a, _mm512_undefined_pd(), [4, 5, 6, 7]),
|
||||
0 => simd_shuffle4!(a, _mm512_undefined_pd(), [0, 1, 2, 3]),
|
||||
_ => simd_shuffle4!(a, _mm512_undefined_pd(), [4, 5, 6, 7]),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -22021,10 +22021,10 @@ pub unsafe fn _mm512_extracti32x4_epi32<const IMM2: i32>(a: __m512i) -> __m128i
|
|||
let a = a.as_i32x16();
|
||||
let undefined = _mm512_undefined_epi32().as_i32x16();
|
||||
let extract: i32x4 = match IMM2 {
|
||||
0 => simd_shuffle4(a, undefined, [0, 1, 2, 3]),
|
||||
1 => simd_shuffle4(a, undefined, [4, 5, 6, 7]),
|
||||
2 => simd_shuffle4(a, undefined, [8, 9, 10, 11]),
|
||||
_ => simd_shuffle4(a, undefined, [12, 13, 14, 15]),
|
||||
0 => simd_shuffle4!(a, undefined, [0, 1, 2, 3]),
|
||||
1 => simd_shuffle4!(a, undefined, [4, 5, 6, 7]),
|
||||
2 => simd_shuffle4!(a, undefined, [8, 9, 10, 11]),
|
||||
_ => simd_shuffle4!(a, undefined, [12, 13, 14, 15]),
|
||||
};
|
||||
transmute(extract)
|
||||
}
|
||||
|
|
@ -22081,8 +22081,8 @@ pub unsafe fn _mm256_extracti32x4_epi32<const IMM1: i32>(a: __m256i) -> __m128i
|
|||
let a = a.as_i32x8();
|
||||
let undefined = _mm256_undefined_si256().as_i32x8();
|
||||
let extract: i32x4 = match IMM1 {
|
||||
0 => simd_shuffle4(a, undefined, [0, 1, 2, 3]),
|
||||
_ => simd_shuffle4(a, undefined, [4, 5, 6, 7]),
|
||||
0 => simd_shuffle4!(a, undefined, [0, 1, 2, 3]),
|
||||
_ => simd_shuffle4!(a, undefined, [4, 5, 6, 7]),
|
||||
};
|
||||
transmute(extract)
|
||||
}
|
||||
|
|
@ -22131,7 +22131,7 @@ pub unsafe fn _mm256_maskz_extracti32x4_epi32<const IMM1: i32>(k: __mmask8, a: _
|
|||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vmovsldup))]
|
||||
pub unsafe fn _mm512_moveldup_ps(a: __m512) -> __m512 {
|
||||
let r: f32x16 = simd_shuffle16(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
|
||||
let r: f32x16 = simd_shuffle16!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
|
||||
transmute(r)
|
||||
}
|
||||
|
||||
|
|
@ -22142,7 +22142,7 @@ pub unsafe fn _mm512_moveldup_ps(a: __m512) -> __m512 {
|
|||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vmovsldup))]
|
||||
pub unsafe fn _mm512_mask_moveldup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
|
||||
let mov: f32x16 = simd_shuffle16(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
|
||||
let mov: f32x16 = simd_shuffle16!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
|
||||
transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
|
||||
}
|
||||
|
||||
|
|
@ -22153,7 +22153,7 @@ pub unsafe fn _mm512_mask_moveldup_ps(src: __m512, k: __mmask16, a: __m512) -> _
|
|||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vmovsldup))]
|
||||
pub unsafe fn _mm512_maskz_moveldup_ps(k: __mmask16, a: __m512) -> __m512 {
|
||||
let mov: f32x16 = simd_shuffle16(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
|
||||
let mov: f32x16 = simd_shuffle16!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
|
||||
let zero = _mm512_setzero_ps().as_f32x16();
|
||||
transmute(simd_select_bitmask(k, mov, zero))
|
||||
}
|
||||
|
|
@ -22211,7 +22211,7 @@ pub unsafe fn _mm_maskz_moveldup_ps(k: __mmask8, a: __m128) -> __m128 {
|
|||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vmovshdup))]
|
||||
pub unsafe fn _mm512_movehdup_ps(a: __m512) -> __m512 {
|
||||
let r: f32x16 = simd_shuffle16(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
|
||||
let r: f32x16 = simd_shuffle16!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
|
||||
transmute(r)
|
||||
}
|
||||
|
||||
|
|
@ -22222,7 +22222,7 @@ pub unsafe fn _mm512_movehdup_ps(a: __m512) -> __m512 {
|
|||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vmovshdup))]
|
||||
pub unsafe fn _mm512_mask_movehdup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
|
||||
let mov: f32x16 = simd_shuffle16(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
|
||||
let mov: f32x16 = simd_shuffle16!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
|
||||
transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
|
||||
}
|
||||
|
||||
|
|
@ -22233,7 +22233,7 @@ pub unsafe fn _mm512_mask_movehdup_ps(src: __m512, k: __mmask16, a: __m512) -> _
|
|||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vmovshdup))]
|
||||
pub unsafe fn _mm512_maskz_movehdup_ps(k: __mmask16, a: __m512) -> __m512 {
|
||||
let mov: f32x16 = simd_shuffle16(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
|
||||
let mov: f32x16 = simd_shuffle16!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
|
||||
let zero = _mm512_setzero_ps().as_f32x16();
|
||||
transmute(simd_select_bitmask(k, mov, zero))
|
||||
}
|
||||
|
|
@ -22291,7 +22291,7 @@ pub unsafe fn _mm_maskz_movehdup_ps(k: __mmask8, a: __m128) -> __m128 {
|
|||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vmovddup))]
|
||||
pub unsafe fn _mm512_movedup_pd(a: __m512d) -> __m512d {
|
||||
let r: f64x8 = simd_shuffle8(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
|
||||
let r: f64x8 = simd_shuffle8!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
|
||||
transmute(r)
|
||||
}
|
||||
|
||||
|
|
@ -22302,7 +22302,7 @@ pub unsafe fn _mm512_movedup_pd(a: __m512d) -> __m512d {
|
|||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vmovddup))]
|
||||
pub unsafe fn _mm512_mask_movedup_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
|
||||
let mov: f64x8 = simd_shuffle8(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
|
||||
let mov: f64x8 = simd_shuffle8!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
|
||||
transmute(simd_select_bitmask(k, mov, src.as_f64x8()))
|
||||
}
|
||||
|
||||
|
|
@ -22313,7 +22313,7 @@ pub unsafe fn _mm512_mask_movedup_pd(src: __m512d, k: __mmask8, a: __m512d) -> _
|
|||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vmovddup))]
|
||||
pub unsafe fn _mm512_maskz_movedup_pd(k: __mmask8, a: __m512d) -> __m512d {
|
||||
let mov: f64x8 = simd_shuffle8(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
|
||||
let mov: f64x8 = simd_shuffle8!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
|
||||
let zero = _mm512_setzero_pd().as_f64x8();
|
||||
transmute(simd_select_bitmask(k, mov, zero))
|
||||
}
|
||||
|
|
@ -22376,22 +22376,22 @@ pub unsafe fn _mm512_inserti32x4<const IMM8: i32>(a: __m512i, b: __m128i) -> __m
|
|||
let a = a.as_i32x16();
|
||||
let b = _mm512_castsi128_si512(b).as_i32x16();
|
||||
let ret: i32x16 = match IMM8 & 0b11 {
|
||||
0 => simd_shuffle16(
|
||||
0 => simd_shuffle16!(
|
||||
a,
|
||||
b,
|
||||
[16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
|
||||
),
|
||||
1 => simd_shuffle16(
|
||||
1 => simd_shuffle16!(
|
||||
a,
|
||||
b,
|
||||
[0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
|
||||
),
|
||||
2 => simd_shuffle16(
|
||||
2 => simd_shuffle16!(
|
||||
a,
|
||||
b,
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
|
||||
),
|
||||
_ => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]),
|
||||
_ => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]),
|
||||
};
|
||||
transmute(ret)
|
||||
}
|
||||
|
|
@ -22447,8 +22447,8 @@ pub unsafe fn _mm256_inserti32x4<const IMM8: i32>(a: __m256i, b: __m128i) -> __m
|
|||
let a = a.as_i32x8();
|
||||
let b = _mm256_castsi128_si256(b).as_i32x8();
|
||||
let ret: i32x8 = match IMM8 & 0b1 {
|
||||
0 => simd_shuffle8(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
|
||||
_ => simd_shuffle8(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
|
||||
0 => simd_shuffle8!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
|
||||
_ => simd_shuffle8!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
|
||||
};
|
||||
transmute(ret)
|
||||
}
|
||||
|
|
@ -22506,8 +22506,8 @@ pub unsafe fn _mm512_inserti64x4<const IMM8: i32>(a: __m512i, b: __m256i) -> __m
|
|||
static_assert_imm1!(IMM8);
|
||||
let b = _mm512_castsi256_si512(b);
|
||||
match IMM8 & 0b1 {
|
||||
0 => simd_shuffle8(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
|
||||
_ => simd_shuffle8(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
|
||||
0 => simd_shuffle8!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
|
||||
_ => simd_shuffle8!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -22558,22 +22558,22 @@ pub unsafe fn _mm512_insertf32x4<const IMM8: i32>(a: __m512, b: __m128) -> __m51
|
|||
static_assert_imm2!(IMM8);
|
||||
let b = _mm512_castps128_ps512(b);
|
||||
match IMM8 & 0b11 {
|
||||
0 => simd_shuffle16(
|
||||
0 => simd_shuffle16!(
|
||||
a,
|
||||
b,
|
||||
[16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
|
||||
),
|
||||
1 => simd_shuffle16(
|
||||
1 => simd_shuffle16!(
|
||||
a,
|
||||
b,
|
||||
[0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
|
||||
),
|
||||
2 => simd_shuffle16(
|
||||
2 => simd_shuffle16!(
|
||||
a,
|
||||
b,
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
|
||||
),
|
||||
_ => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]),
|
||||
_ => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -22627,8 +22627,8 @@ pub unsafe fn _mm256_insertf32x4<const IMM8: i32>(a: __m256, b: __m128) -> __m25
|
|||
static_assert_imm1!(IMM8);
|
||||
let b = _mm256_castps128_ps256(b);
|
||||
match IMM8 & 0b1 {
|
||||
0 => simd_shuffle8(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
|
||||
_ => simd_shuffle8(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
|
||||
0 => simd_shuffle8!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
|
||||
_ => simd_shuffle8!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -22685,8 +22685,8 @@ pub unsafe fn _mm512_insertf64x4<const IMM8: i32>(a: __m512d, b: __m256d) -> __m
|
|||
static_assert_imm1!(IMM8);
|
||||
let b = _mm512_castpd256_pd512(b);
|
||||
match IMM8 & 0b1 {
|
||||
0 => simd_shuffle8(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
|
||||
_ => simd_shuffle8(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
|
||||
0 => simd_shuffle8!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
|
||||
_ => simd_shuffle8!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -22736,7 +22736,7 @@ pub unsafe fn _mm512_unpackhi_epi32(a: __m512i, b: __m512i) -> __m512i {
|
|||
let a = a.as_i32x16();
|
||||
let b = b.as_i32x16();
|
||||
#[rustfmt::skip]
|
||||
let r: i32x16 = simd_shuffle16(
|
||||
let r: i32x16 = simd_shuffle16!(
|
||||
a, b,
|
||||
[ 2, 18, 3, 19,
|
||||
2 + 4, 18 + 4, 3 + 4, 19 + 4,
|
||||
|
|
@ -22837,7 +22837,7 @@ pub unsafe fn _mm_maskz_unpackhi_epi32(k: __mmask8, a: __m128i, b: __m128i) -> _
|
|||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vunpckhpd))] //should be vpunpckhqdq
|
||||
pub unsafe fn _mm512_unpackhi_epi64(a: __m512i, b: __m512i) -> __m512i {
|
||||
simd_shuffle8(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6])
|
||||
simd_shuffle8!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6])
|
||||
}
|
||||
|
||||
/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -22932,7 +22932,7 @@ pub unsafe fn _mm_maskz_unpackhi_epi64(k: __mmask8, a: __m128i, b: __m128i) -> _
|
|||
#[cfg_attr(test, assert_instr(vunpckhps))]
|
||||
pub unsafe fn _mm512_unpackhi_ps(a: __m512, b: __m512) -> __m512 {
|
||||
#[rustfmt::skip]
|
||||
simd_shuffle16(
|
||||
simd_shuffle16!(
|
||||
a, b,
|
||||
[ 2, 18, 3, 19,
|
||||
2 + 4, 18 + 4, 3 + 4, 19 + 4,
|
||||
|
|
@ -23017,7 +23017,7 @@ pub unsafe fn _mm_maskz_unpackhi_ps(k: __mmask8, a: __m128, b: __m128) -> __m128
|
|||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vunpckhpd))]
|
||||
pub unsafe fn _mm512_unpackhi_pd(a: __m512d, b: __m512d) -> __m512d {
|
||||
simd_shuffle8(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6])
|
||||
simd_shuffle8!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6])
|
||||
}
|
||||
|
||||
/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -23109,7 +23109,7 @@ pub unsafe fn _mm512_unpacklo_epi32(a: __m512i, b: __m512i) -> __m512i {
|
|||
let a = a.as_i32x16();
|
||||
let b = b.as_i32x16();
|
||||
#[rustfmt::skip]
|
||||
let r: i32x16 = simd_shuffle16(
|
||||
let r: i32x16 = simd_shuffle16!(
|
||||
a, b,
|
||||
[ 0, 16, 1, 17,
|
||||
0 + 4, 16 + 4, 1 + 4, 17 + 4,
|
||||
|
|
@ -23210,7 +23210,7 @@ pub unsafe fn _mm_maskz_unpacklo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> _
|
|||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vunpcklpd))] //should be vpunpcklqdq
|
||||
pub unsafe fn _mm512_unpacklo_epi64(a: __m512i, b: __m512i) -> __m512i {
|
||||
simd_shuffle8(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6])
|
||||
simd_shuffle8!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6])
|
||||
}
|
||||
|
||||
/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -23305,7 +23305,7 @@ pub unsafe fn _mm_maskz_unpacklo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> _
|
|||
#[cfg_attr(test, assert_instr(vunpcklps))]
|
||||
pub unsafe fn _mm512_unpacklo_ps(a: __m512, b: __m512) -> __m512 {
|
||||
#[rustfmt::skip]
|
||||
simd_shuffle16(a, b,
|
||||
simd_shuffle16!(a, b,
|
||||
[ 0, 16, 1, 17,
|
||||
0 + 4, 16 + 4, 1 + 4, 17 + 4,
|
||||
0 + 8, 16 + 8, 1 + 8, 17 + 8,
|
||||
|
|
@ -23389,7 +23389,7 @@ pub unsafe fn _mm_maskz_unpacklo_ps(k: __mmask8, a: __m128, b: __m128) -> __m128
|
|||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vunpcklpd))]
|
||||
pub unsafe fn _mm512_unpacklo_pd(a: __m512d, b: __m512d) -> __m512d {
|
||||
simd_shuffle8(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6])
|
||||
simd_shuffle8!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6])
|
||||
}
|
||||
|
||||
/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -23477,7 +23477,7 @@ pub unsafe fn _mm_maskz_unpacklo_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m1
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
pub unsafe fn _mm512_castps128_ps512(a: __m128) -> __m512 {
|
||||
simd_shuffle16(
|
||||
simd_shuffle16!(
|
||||
a,
|
||||
_mm_set1_ps(-1.),
|
||||
[0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
|
||||
|
|
@ -23490,7 +23490,7 @@ pub unsafe fn _mm512_castps128_ps512(a: __m128) -> __m512 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
pub unsafe fn _mm512_castps256_ps512(a: __m256) -> __m512 {
|
||||
simd_shuffle16(
|
||||
simd_shuffle16!(
|
||||
a,
|
||||
_mm256_set1_ps(-1.),
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
|
||||
|
|
@ -23503,7 +23503,7 @@ pub unsafe fn _mm512_castps256_ps512(a: __m256) -> __m512 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
pub unsafe fn _mm512_zextps128_ps512(a: __m128) -> __m512 {
|
||||
simd_shuffle16(
|
||||
simd_shuffle16!(
|
||||
a,
|
||||
_mm_set1_ps(0.),
|
||||
[0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
|
||||
|
|
@ -23516,7 +23516,7 @@ pub unsafe fn _mm512_zextps128_ps512(a: __m128) -> __m512 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
pub unsafe fn _mm512_zextps256_ps512(a: __m256) -> __m512 {
|
||||
simd_shuffle16(
|
||||
simd_shuffle16!(
|
||||
a,
|
||||
_mm256_set1_ps(0.),
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
|
||||
|
|
@ -23529,7 +23529,7 @@ pub unsafe fn _mm512_zextps256_ps512(a: __m256) -> __m512 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
pub unsafe fn _mm512_castps512_ps128(a: __m512) -> __m128 {
|
||||
simd_shuffle4(a, a, [0, 1, 2, 3])
|
||||
simd_shuffle4!(a, a, [0, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Cast vector of type __m512 to type __m256. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
|
||||
|
|
@ -23538,7 +23538,7 @@ pub unsafe fn _mm512_castps512_ps128(a: __m512) -> __m128 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
pub unsafe fn _mm512_castps512_ps256(a: __m512) -> __m256 {
|
||||
simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
}
|
||||
|
||||
/// Cast vector of type __m512 to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
|
||||
|
|
@ -23565,7 +23565,7 @@ pub unsafe fn _mm512_castps_si512(a: __m512) -> __m512i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
pub unsafe fn _mm512_castpd128_pd512(a: __m128d) -> __m512d {
|
||||
simd_shuffle8(a, _mm_set1_pd(-1.), [0, 1, 2, 2, 2, 2, 2, 2])
|
||||
simd_shuffle8!(a, _mm_set1_pd(-1.), [0, 1, 2, 2, 2, 2, 2, 2])
|
||||
}
|
||||
|
||||
/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
|
||||
|
|
@ -23574,7 +23574,7 @@ pub unsafe fn _mm512_castpd128_pd512(a: __m128d) -> __m512d {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
pub unsafe fn _mm512_castpd256_pd512(a: __m256d) -> __m512d {
|
||||
simd_shuffle8(a, _mm256_set1_pd(-1.), [0, 1, 2, 3, 4, 4, 4, 4])
|
||||
simd_shuffle8!(a, _mm256_set1_pd(-1.), [0, 1, 2, 3, 4, 4, 4, 4])
|
||||
}
|
||||
|
||||
/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
|
||||
|
|
@ -23583,7 +23583,7 @@ pub unsafe fn _mm512_castpd256_pd512(a: __m256d) -> __m512d {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
pub unsafe fn _mm512_zextpd128_pd512(a: __m128d) -> __m512d {
|
||||
simd_shuffle8(a, _mm_set1_pd(0.), [0, 1, 2, 2, 2, 2, 2, 2])
|
||||
simd_shuffle8!(a, _mm_set1_pd(0.), [0, 1, 2, 2, 2, 2, 2, 2])
|
||||
}
|
||||
|
||||
/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
|
||||
|
|
@ -23592,7 +23592,7 @@ pub unsafe fn _mm512_zextpd128_pd512(a: __m128d) -> __m512d {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
pub unsafe fn _mm512_zextpd256_pd512(a: __m256d) -> __m512d {
|
||||
simd_shuffle8(a, _mm256_set1_pd(0.), [0, 1, 2, 3, 4, 4, 4, 4])
|
||||
simd_shuffle8!(a, _mm256_set1_pd(0.), [0, 1, 2, 3, 4, 4, 4, 4])
|
||||
}
|
||||
|
||||
/// Cast vector of type __m512d to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
|
||||
|
|
@ -23601,7 +23601,7 @@ pub unsafe fn _mm512_zextpd256_pd512(a: __m256d) -> __m512d {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
pub unsafe fn _mm512_castpd512_pd128(a: __m512d) -> __m128d {
|
||||
simd_shuffle2(a, a, [0, 1])
|
||||
simd_shuffle2!(a, a, [0, 1])
|
||||
}
|
||||
|
||||
/// Cast vector of type __m512d to type __m256d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
|
||||
|
|
@ -23610,7 +23610,7 @@ pub unsafe fn _mm512_castpd512_pd128(a: __m512d) -> __m128d {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
pub unsafe fn _mm512_castpd512_pd256(a: __m512d) -> __m256d {
|
||||
simd_shuffle4(a, a, [0, 1, 2, 3])
|
||||
simd_shuffle4!(a, a, [0, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Cast vector of type __m512d to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
|
||||
|
|
@ -23637,7 +23637,7 @@ pub unsafe fn _mm512_castpd_si512(a: __m512d) -> __m512i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
pub unsafe fn _mm512_castsi128_si512(a: __m128i) -> __m512i {
|
||||
simd_shuffle8(a, _mm_set1_epi64x(-1), [0, 1, 2, 2, 2, 2, 2, 2])
|
||||
simd_shuffle8!(a, _mm_set1_epi64x(-1), [0, 1, 2, 2, 2, 2, 2, 2])
|
||||
}
|
||||
|
||||
/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
|
||||
|
|
@ -23646,7 +23646,7 @@ pub unsafe fn _mm512_castsi128_si512(a: __m128i) -> __m512i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
pub unsafe fn _mm512_castsi256_si512(a: __m256i) -> __m512i {
|
||||
simd_shuffle8(a, _mm256_set1_epi64x(-1), [0, 1, 2, 3, 4, 4, 4, 4])
|
||||
simd_shuffle8!(a, _mm256_set1_epi64x(-1), [0, 1, 2, 3, 4, 4, 4, 4])
|
||||
}
|
||||
|
||||
/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
|
||||
|
|
@ -23655,7 +23655,7 @@ pub unsafe fn _mm512_castsi256_si512(a: __m256i) -> __m512i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
pub unsafe fn _mm512_zextsi128_si512(a: __m128i) -> __m512i {
|
||||
simd_shuffle8(a, _mm_set1_epi64x(0), [0, 1, 2, 2, 2, 2, 2, 2])
|
||||
simd_shuffle8!(a, _mm_set1_epi64x(0), [0, 1, 2, 2, 2, 2, 2, 2])
|
||||
}
|
||||
|
||||
/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
|
||||
|
|
@ -23664,7 +23664,7 @@ pub unsafe fn _mm512_zextsi128_si512(a: __m128i) -> __m512i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
pub unsafe fn _mm512_zextsi256_si512(a: __m256i) -> __m512i {
|
||||
simd_shuffle8(a, _mm256_set1_epi64x(0), [0, 1, 2, 3, 4, 4, 4, 4])
|
||||
simd_shuffle8!(a, _mm256_set1_epi64x(0), [0, 1, 2, 3, 4, 4, 4, 4])
|
||||
}
|
||||
|
||||
/// Cast vector of type __m512i to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
|
||||
|
|
@ -23673,7 +23673,7 @@ pub unsafe fn _mm512_zextsi256_si512(a: __m256i) -> __m512i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
pub unsafe fn _mm512_castsi512_si128(a: __m512i) -> __m128i {
|
||||
simd_shuffle2(a, a, [0, 1])
|
||||
simd_shuffle2!(a, a, [0, 1])
|
||||
}
|
||||
|
||||
/// Cast vector of type __m512i to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
|
||||
|
|
@ -23682,7 +23682,7 @@ pub unsafe fn _mm512_castsi512_si128(a: __m512i) -> __m128i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
pub unsafe fn _mm512_castsi512_si256(a: __m512i) -> __m256i {
|
||||
simd_shuffle4(a, a, [0, 1, 2, 3])
|
||||
simd_shuffle4!(a, a, [0, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Cast vector of type __m512i to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
|
||||
|
|
@ -23722,7 +23722,7 @@ pub unsafe fn _mm512_cvtsi512_si32(a: __m512i) -> i32 {
|
|||
#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastd
|
||||
pub unsafe fn _mm512_broadcastd_epi32(a: __m128i) -> __m512i {
|
||||
let a = _mm512_castsi128_si512(a).as_i32x16();
|
||||
let ret: i32x16 = simd_shuffle16(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
|
||||
let ret: i32x16 = simd_shuffle16!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
|
||||
transmute(ret)
|
||||
}
|
||||
|
||||
|
|
@ -23802,7 +23802,7 @@ pub unsafe fn _mm_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m128i {
|
|||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vbroadcas))] //should be vpbroadcastq
|
||||
pub unsafe fn _mm512_broadcastq_epi64(a: __m128i) -> __m512i {
|
||||
simd_shuffle8(a, a, [0, 0, 0, 0, 0, 0, 0, 0])
|
||||
simd_shuffle8!(a, a, [0, 0, 0, 0, 0, 0, 0, 0])
|
||||
}
|
||||
|
||||
/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -23881,7 +23881,7 @@ pub unsafe fn _mm_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m128i {
|
|||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vbroadcastss))]
|
||||
pub unsafe fn _mm512_broadcastss_ps(a: __m128) -> __m512 {
|
||||
simd_shuffle16(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
|
||||
simd_shuffle16!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
|
||||
}
|
||||
|
||||
/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -23960,7 +23960,7 @@ pub unsafe fn _mm_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m128 {
|
|||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vbroadcastsd))]
|
||||
pub unsafe fn _mm512_broadcastsd_pd(a: __m128d) -> __m512d {
|
||||
simd_shuffle8(a, a, [0, 0, 0, 0, 0, 0, 0, 0])
|
||||
simd_shuffle8!(a, a, [0, 0, 0, 0, 0, 0, 0, 0])
|
||||
}
|
||||
|
||||
/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -24016,7 +24016,7 @@ pub unsafe fn _mm256_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m256d {
|
|||
#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
|
||||
pub unsafe fn _mm512_broadcast_i32x4(a: __m128i) -> __m512i {
|
||||
let a = a.as_i32x4();
|
||||
let ret: i32x16 = simd_shuffle16(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]);
|
||||
let ret: i32x16 = simd_shuffle16!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]);
|
||||
transmute(ret)
|
||||
}
|
||||
|
||||
|
|
@ -24048,7 +24048,7 @@ pub unsafe fn _mm512_maskz_broadcast_i32x4(k: __mmask16, a: __m128i) -> __m512i
|
|||
#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
|
||||
pub unsafe fn _mm256_broadcast_i32x4(a: __m128i) -> __m256i {
|
||||
let a = a.as_i32x4();
|
||||
let ret: i32x8 = simd_shuffle8(a, a, [0, 1, 2, 3, 0, 1, 2, 3]);
|
||||
let ret: i32x8 = simd_shuffle8!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]);
|
||||
transmute(ret)
|
||||
}
|
||||
|
||||
|
|
@ -24079,7 +24079,7 @@ pub unsafe fn _mm256_maskz_broadcast_i32x4(k: __mmask8, a: __m128i) -> __m256i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
|
||||
pub unsafe fn _mm512_broadcast_i64x4(a: __m256i) -> __m512i {
|
||||
simd_shuffle8(a, a, [0, 1, 2, 3, 0, 1, 2, 3])
|
||||
simd_shuffle8!(a, a, [0, 1, 2, 3, 0, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -24109,7 +24109,7 @@ pub unsafe fn _mm512_maskz_broadcast_i64x4(k: __mmask8, a: __m256i) -> __m512i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshuf
|
||||
pub unsafe fn _mm512_broadcast_f32x4(a: __m128) -> __m512 {
|
||||
simd_shuffle16(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3])
|
||||
simd_shuffle16!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -24139,7 +24139,7 @@ pub unsafe fn _mm512_maskz_broadcast_f32x4(k: __mmask16, a: __m128) -> __m512 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshuf
|
||||
pub unsafe fn _mm256_broadcast_f32x4(a: __m128) -> __m256 {
|
||||
simd_shuffle8(a, a, [0, 1, 2, 3, 0, 1, 2, 3])
|
||||
simd_shuffle8!(a, a, [0, 1, 2, 3, 0, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -24169,7 +24169,7 @@ pub unsafe fn _mm256_maskz_broadcast_f32x4(k: __mmask8, a: __m128) -> __m256 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vperm
|
||||
pub unsafe fn _mm512_broadcast_f64x4(a: __m256d) -> __m512d {
|
||||
simd_shuffle8(a, a, [0, 1, 2, 3, 0, 1, 2, 3])
|
||||
simd_shuffle8!(a, a, [0, 1, 2, 3, 0, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -24326,66 +24326,62 @@ pub unsafe fn _mm512_alignr_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __
|
|||
let b = b.as_i32x16();
|
||||
let imm8: i32 = IMM8 % 16;
|
||||
let r: i32x16 = match imm8 {
|
||||
0 => simd_shuffle16(
|
||||
0 => simd_shuffle16!(
|
||||
a,
|
||||
b,
|
||||
[
|
||||
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
||||
],
|
||||
[16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,],
|
||||
),
|
||||
1 => simd_shuffle16(
|
||||
1 => simd_shuffle16!(
|
||||
a,
|
||||
b,
|
||||
[
|
||||
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0,
|
||||
],
|
||||
[17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0,],
|
||||
),
|
||||
2 => simd_shuffle16(
|
||||
2 => simd_shuffle16!(
|
||||
a,
|
||||
b,
|
||||
[18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1],
|
||||
),
|
||||
3 => simd_shuffle16(
|
||||
3 => simd_shuffle16!(
|
||||
a,
|
||||
b,
|
||||
[19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2],
|
||||
),
|
||||
4 => simd_shuffle16(
|
||||
4 => simd_shuffle16!(
|
||||
a,
|
||||
b,
|
||||
[20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3],
|
||||
),
|
||||
5 => simd_shuffle16(
|
||||
5 => simd_shuffle16!(
|
||||
a,
|
||||
b,
|
||||
[21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4],
|
||||
),
|
||||
6 => simd_shuffle16(
|
||||
6 => simd_shuffle16!(
|
||||
a,
|
||||
b,
|
||||
[22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5],
|
||||
),
|
||||
7 => simd_shuffle16(
|
||||
7 => simd_shuffle16!(
|
||||
a,
|
||||
b,
|
||||
[23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6],
|
||||
),
|
||||
8 => simd_shuffle16(
|
||||
8 => simd_shuffle16!(
|
||||
a,
|
||||
b,
|
||||
[24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7],
|
||||
),
|
||||
9 => simd_shuffle16(
|
||||
9 => simd_shuffle16!(
|
||||
a,
|
||||
b,
|
||||
[25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8],
|
||||
),
|
||||
10 => simd_shuffle16(a, b, [26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
|
||||
11 => simd_shuffle16(a, b, [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
|
||||
12 => simd_shuffle16(a, b, [28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
|
||||
13 => simd_shuffle16(a, b, [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]),
|
||||
14 => simd_shuffle16(a, b, [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]),
|
||||
_ => simd_shuffle16(a, b, [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
|
||||
10 => simd_shuffle16!(a, b, [26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
|
||||
11 => simd_shuffle16!(a, b, [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
|
||||
12 => simd_shuffle16!(a, b, [28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
|
||||
13 => simd_shuffle16!(a, b, [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]),
|
||||
14 => simd_shuffle16!(a, b, [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]),
|
||||
_ => simd_shuffle16!(a, b, [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
|
||||
};
|
||||
transmute(r)
|
||||
}
|
||||
|
|
@ -24439,22 +24435,22 @@ pub unsafe fn _mm256_alignr_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __
|
|||
let b = b.as_i32x8();
|
||||
let imm8: i32 = IMM8 % 16;
|
||||
let r: i32x8 = match imm8 {
|
||||
0 => simd_shuffle8(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
|
||||
1 => simd_shuffle8(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
|
||||
2 => simd_shuffle8(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
|
||||
3 => simd_shuffle8(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
|
||||
4 => simd_shuffle8(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
|
||||
5 => simd_shuffle8(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
|
||||
6 => simd_shuffle8(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
|
||||
7 => simd_shuffle8(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
|
||||
8 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
|
||||
9 => simd_shuffle8(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
|
||||
10 => simd_shuffle8(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
|
||||
11 => simd_shuffle8(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
|
||||
12 => simd_shuffle8(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
|
||||
13 => simd_shuffle8(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
|
||||
14 => simd_shuffle8(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
|
||||
_ => simd_shuffle8(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
|
||||
0 => simd_shuffle8!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
|
||||
1 => simd_shuffle8!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
|
||||
2 => simd_shuffle8!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
|
||||
3 => simd_shuffle8!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
|
||||
4 => simd_shuffle8!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
|
||||
5 => simd_shuffle8!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
|
||||
6 => simd_shuffle8!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
|
||||
7 => simd_shuffle8!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
|
||||
8 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
|
||||
9 => simd_shuffle8!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
|
||||
10 => simd_shuffle8!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
|
||||
11 => simd_shuffle8!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
|
||||
12 => simd_shuffle8!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
|
||||
13 => simd_shuffle8!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
|
||||
14 => simd_shuffle8!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
|
||||
_ => simd_shuffle8!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
|
||||
};
|
||||
transmute(r)
|
||||
}
|
||||
|
|
@ -24508,14 +24504,14 @@ pub unsafe fn _mm_alignr_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m12
|
|||
let b = b.as_i32x4();
|
||||
let imm8: i32 = IMM8 % 8;
|
||||
let r: i32x4 = match imm8 {
|
||||
0 => simd_shuffle4(a, b, [4, 5, 6, 7]),
|
||||
1 => simd_shuffle4(a, b, [5, 6, 7, 0]),
|
||||
2 => simd_shuffle4(a, b, [6, 7, 0, 1]),
|
||||
3 => simd_shuffle4(a, b, [7, 0, 1, 2]),
|
||||
4 => simd_shuffle4(a, b, [0, 1, 2, 3]),
|
||||
5 => simd_shuffle4(a, b, [1, 2, 3, 0]),
|
||||
6 => simd_shuffle4(a, b, [2, 3, 0, 1]),
|
||||
_ => simd_shuffle4(a, b, [3, 0, 1, 2]),
|
||||
0 => simd_shuffle4!(a, b, [4, 5, 6, 7]),
|
||||
1 => simd_shuffle4!(a, b, [5, 6, 7, 0]),
|
||||
2 => simd_shuffle4!(a, b, [6, 7, 0, 1]),
|
||||
3 => simd_shuffle4!(a, b, [7, 0, 1, 2]),
|
||||
4 => simd_shuffle4!(a, b, [0, 1, 2, 3]),
|
||||
5 => simd_shuffle4!(a, b, [1, 2, 3, 0]),
|
||||
6 => simd_shuffle4!(a, b, [2, 3, 0, 1]),
|
||||
_ => simd_shuffle4!(a, b, [3, 0, 1, 2]),
|
||||
};
|
||||
transmute(r)
|
||||
}
|
||||
|
|
@ -24567,14 +24563,14 @@ pub unsafe fn _mm512_alignr_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __
|
|||
static_assert_imm8!(IMM8);
|
||||
let imm8: i32 = IMM8 % 8;
|
||||
let r: i64x8 = match imm8 {
|
||||
0 => simd_shuffle8(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
|
||||
1 => simd_shuffle8(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
|
||||
2 => simd_shuffle8(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
|
||||
3 => simd_shuffle8(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
|
||||
4 => simd_shuffle8(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
|
||||
5 => simd_shuffle8(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
|
||||
6 => simd_shuffle8(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
|
||||
_ => simd_shuffle8(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
|
||||
0 => simd_shuffle8!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
|
||||
1 => simd_shuffle8!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
|
||||
2 => simd_shuffle8!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
|
||||
3 => simd_shuffle8!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
|
||||
4 => simd_shuffle8!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
|
||||
5 => simd_shuffle8!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
|
||||
6 => simd_shuffle8!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
|
||||
_ => simd_shuffle8!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
|
||||
};
|
||||
transmute(r)
|
||||
}
|
||||
|
|
@ -24626,14 +24622,14 @@ pub unsafe fn _mm256_alignr_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __
|
|||
static_assert_imm8!(IMM8);
|
||||
let imm8: i32 = IMM8 % 8;
|
||||
let r: i64x4 = match imm8 {
|
||||
0 => simd_shuffle4(a, b, [4, 5, 6, 7]),
|
||||
1 => simd_shuffle4(a, b, [5, 6, 7, 0]),
|
||||
2 => simd_shuffle4(a, b, [6, 7, 0, 1]),
|
||||
3 => simd_shuffle4(a, b, [7, 0, 1, 2]),
|
||||
4 => simd_shuffle4(a, b, [0, 1, 2, 3]),
|
||||
5 => simd_shuffle4(a, b, [1, 2, 3, 4]),
|
||||
6 => simd_shuffle4(a, b, [2, 3, 4, 5]),
|
||||
_ => simd_shuffle4(a, b, [3, 4, 5, 6]),
|
||||
0 => simd_shuffle4!(a, b, [4, 5, 6, 7]),
|
||||
1 => simd_shuffle4!(a, b, [5, 6, 7, 0]),
|
||||
2 => simd_shuffle4!(a, b, [6, 7, 0, 1]),
|
||||
3 => simd_shuffle4!(a, b, [7, 0, 1, 2]),
|
||||
4 => simd_shuffle4!(a, b, [0, 1, 2, 3]),
|
||||
5 => simd_shuffle4!(a, b, [1, 2, 3, 4]),
|
||||
6 => simd_shuffle4!(a, b, [2, 3, 4, 5]),
|
||||
_ => simd_shuffle4!(a, b, [3, 4, 5, 6]),
|
||||
};
|
||||
transmute(r)
|
||||
}
|
||||
|
|
@ -24685,10 +24681,10 @@ pub unsafe fn _mm_alignr_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m12
|
|||
static_assert_imm8!(IMM8);
|
||||
let imm8: i32 = IMM8 % 4;
|
||||
let r: i64x2 = match imm8 {
|
||||
0 => simd_shuffle2(a, b, [2, 3]),
|
||||
1 => simd_shuffle2(a, b, [3, 0]),
|
||||
2 => simd_shuffle2(a, b, [0, 1]),
|
||||
_ => simd_shuffle2(a, b, [1, 2]),
|
||||
0 => simd_shuffle2!(a, b, [2, 3]),
|
||||
1 => simd_shuffle2!(a, b, [3, 0]),
|
||||
2 => simd_shuffle2!(a, b, [0, 1]),
|
||||
_ => simd_shuffle2!(a, b, [1, 2]),
|
||||
};
|
||||
transmute(r)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -350,7 +350,7 @@ pub unsafe fn _mm_cmple_ss(a: __m128, b: __m128) -> __m128 {
|
|||
#[cfg_attr(test, assert_instr(cmpltss))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cmpgt_ss(a: __m128, b: __m128) -> __m128 {
|
||||
simd_shuffle4(a, cmpss(b, a, 1), [4, 1, 2, 3])
|
||||
simd_shuffle4!(a, cmpss(b, a, 1), [4, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Compares the lowest `f32` of both inputs for greater than or equal. The
|
||||
|
|
@ -364,7 +364,7 @@ pub unsafe fn _mm_cmpgt_ss(a: __m128, b: __m128) -> __m128 {
|
|||
#[cfg_attr(test, assert_instr(cmpless))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cmpge_ss(a: __m128, b: __m128) -> __m128 {
|
||||
simd_shuffle4(a, cmpss(b, a, 2), [4, 1, 2, 3])
|
||||
simd_shuffle4!(a, cmpss(b, a, 2), [4, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Compares the lowest `f32` of both inputs for inequality. The lowest 32 bits
|
||||
|
|
@ -420,7 +420,7 @@ pub unsafe fn _mm_cmpnle_ss(a: __m128, b: __m128) -> __m128 {
|
|||
#[cfg_attr(test, assert_instr(cmpnltss))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cmpngt_ss(a: __m128, b: __m128) -> __m128 {
|
||||
simd_shuffle4(a, cmpss(b, a, 5), [4, 1, 2, 3])
|
||||
simd_shuffle4!(a, cmpss(b, a, 5), [4, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Compares the lowest `f32` of both inputs for not-greater-than-or-equal. The
|
||||
|
|
@ -434,7 +434,7 @@ pub unsafe fn _mm_cmpngt_ss(a: __m128, b: __m128) -> __m128 {
|
|||
#[cfg_attr(test, assert_instr(cmpnless))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cmpnge_ss(a: __m128, b: __m128) -> __m128 {
|
||||
simd_shuffle4(a, cmpss(b, a, 6), [4, 1, 2, 3])
|
||||
simd_shuffle4!(a, cmpss(b, a, 6), [4, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Checks if the lowest `f32` of both inputs are ordered. The lowest 32 bits of
|
||||
|
|
@ -1011,10 +1011,10 @@ pub const fn _MM_SHUFFLE(z: u32, y: u32, x: u32, w: u32) -> i32 {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_shuffle_ps<const MASK: i32>(a: __m128, b: __m128) -> __m128 {
|
||||
static_assert_imm8!(MASK);
|
||||
simd_shuffle4(
|
||||
simd_shuffle4!(
|
||||
a,
|
||||
b,
|
||||
[
|
||||
<const MASK: i32> [
|
||||
MASK as u32 & 0b11,
|
||||
(MASK as u32 >> 2) & 0b11,
|
||||
((MASK as u32 >> 4) & 0b11) + 4,
|
||||
|
|
@ -1032,7 +1032,7 @@ pub unsafe fn _mm_shuffle_ps<const MASK: i32>(a: __m128, b: __m128) -> __m128 {
|
|||
#[cfg_attr(test, assert_instr(unpckhps))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_unpackhi_ps(a: __m128, b: __m128) -> __m128 {
|
||||
simd_shuffle4(a, b, [2, 6, 3, 7])
|
||||
simd_shuffle4!(a, b, [2, 6, 3, 7])
|
||||
}
|
||||
|
||||
/// Unpacks and interleave single-precision (32-bit) floating-point elements
|
||||
|
|
@ -1044,7 +1044,7 @@ pub unsafe fn _mm_unpackhi_ps(a: __m128, b: __m128) -> __m128 {
|
|||
#[cfg_attr(test, assert_instr(unpcklps))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_unpacklo_ps(a: __m128, b: __m128) -> __m128 {
|
||||
simd_shuffle4(a, b, [0, 4, 1, 5])
|
||||
simd_shuffle4!(a, b, [0, 4, 1, 5])
|
||||
}
|
||||
|
||||
/// Combine higher half of `a` and `b`. The highwe half of `b` occupies the
|
||||
|
|
@ -1057,7 +1057,7 @@ pub unsafe fn _mm_unpacklo_ps(a: __m128, b: __m128) -> __m128 {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_movehl_ps(a: __m128, b: __m128) -> __m128 {
|
||||
// TODO; figure why this is a different instruction on Windows?
|
||||
simd_shuffle4(a, b, [6, 7, 2, 3])
|
||||
simd_shuffle4!(a, b, [6, 7, 2, 3])
|
||||
}
|
||||
|
||||
/// Combine lower half of `a` and `b`. The lower half of `b` occupies the
|
||||
|
|
@ -1069,7 +1069,7 @@ pub unsafe fn _mm_movehl_ps(a: __m128, b: __m128) -> __m128 {
|
|||
#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 {
|
||||
simd_shuffle4(a, b, [0, 1, 4, 5])
|
||||
simd_shuffle4!(a, b, [0, 1, 4, 5])
|
||||
}
|
||||
|
||||
/// Returns a mask of the most significant bit of each element in `a`.
|
||||
|
|
@ -1201,7 +1201,7 @@ pub unsafe fn _mm_loadu_ps(p: *const f32) -> __m128 {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_loadr_ps(p: *const f32) -> __m128 {
|
||||
let a = _mm_load_ps(p);
|
||||
simd_shuffle4(a, a, [3, 2, 1, 0])
|
||||
simd_shuffle4!(a, a, [3, 2, 1, 0])
|
||||
}
|
||||
|
||||
/// Loads unaligned 64-bits of integer data from memory into new vector.
|
||||
|
|
@ -1253,7 +1253,7 @@ pub unsafe fn _mm_store_ss(p: *mut f32, a: __m128) {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
#[allow(clippy::cast_ptr_alignment)]
|
||||
pub unsafe fn _mm_store1_ps(p: *mut f32, a: __m128) {
|
||||
let b: __m128 = simd_shuffle4(a, a, [0, 0, 0, 0]);
|
||||
let b: __m128 = simd_shuffle4!(a, a, [0, 0, 0, 0]);
|
||||
*(p as *mut __m128) = b;
|
||||
}
|
||||
|
||||
|
|
@ -1329,7 +1329,7 @@ pub unsafe fn _mm_storeu_ps(p: *mut f32, a: __m128) {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
#[allow(clippy::cast_ptr_alignment)]
|
||||
pub unsafe fn _mm_storer_ps(p: *mut f32, a: __m128) {
|
||||
let b: __m128 = simd_shuffle4(a, a, [3, 2, 1, 0]);
|
||||
let b: __m128 = simd_shuffle4!(a, a, [3, 2, 1, 0]);
|
||||
*(p as *mut __m128) = b;
|
||||
}
|
||||
|
||||
|
|
@ -1347,7 +1347,7 @@ pub unsafe fn _mm_storer_ps(p: *mut f32, a: __m128) {
|
|||
#[cfg_attr(test, assert_instr(movss))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_move_ss(a: __m128, b: __m128) -> __m128 {
|
||||
simd_shuffle4(a, b, [4, 1, 2, 3])
|
||||
simd_shuffle4!(a, b, [4, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Performs a serializing operation on all store-to-memory instructions that
|
||||
|
|
|
|||
|
|
@ -432,10 +432,10 @@ unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
|
|||
}
|
||||
}
|
||||
let zero = _mm_set1_epi8(0).as_i8x16();
|
||||
transmute(simd_shuffle16::<i8x16, i8x16>(
|
||||
transmute::<i8x16, _>(simd_shuffle16!(
|
||||
zero,
|
||||
a.as_i8x16(),
|
||||
[
|
||||
<const IMM8: i32> [
|
||||
mask(IMM8, 0),
|
||||
mask(IMM8, 1),
|
||||
mask(IMM8, 2),
|
||||
|
|
@ -635,10 +635,10 @@ unsafe fn _mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
|
|||
}
|
||||
}
|
||||
let zero = _mm_set1_epi8(0).as_i8x16();
|
||||
let x: i8x16 = simd_shuffle16(
|
||||
let x: i8x16 = simd_shuffle16!(
|
||||
a.as_i8x16(),
|
||||
zero,
|
||||
[
|
||||
<const IMM8: i32> [
|
||||
mask(IMM8, 0),
|
||||
mask(IMM8, 1),
|
||||
mask(IMM8, 2),
|
||||
|
|
@ -895,7 +895,7 @@ pub unsafe fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtepi32_pd(a: __m128i) -> __m128d {
|
||||
let a = a.as_i32x4();
|
||||
simd_cast::<i32x2, __m128d>(simd_shuffle2(a, a, [0, 1]))
|
||||
simd_cast::<i32x2, __m128d>(simd_shuffle2!(a, a, [0, 1]))
|
||||
}
|
||||
|
||||
/// Returns `a` with its lower element replaced by `b` after converting it to
|
||||
|
|
@ -1303,7 +1303,7 @@ pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_move_epi64(a: __m128i) -> __m128i {
|
||||
let zero = _mm_setzero_si128();
|
||||
let r: i64x2 = simd_shuffle2(a.as_i64x2(), zero.as_i64x2(), [0, 2]);
|
||||
let r: i64x2 = simd_shuffle2!(a.as_i64x2(), zero.as_i64x2(), [0, 2]);
|
||||
transmute(r)
|
||||
}
|
||||
|
||||
|
|
@ -1391,10 +1391,10 @@ pub unsafe fn _mm_movemask_epi8(a: __m128i) -> i32 {
|
|||
pub unsafe fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
|
||||
static_assert_imm8!(IMM8);
|
||||
let a = a.as_i32x4();
|
||||
let x: i32x4 = simd_shuffle4(
|
||||
let x: i32x4 = simd_shuffle4!(
|
||||
a,
|
||||
a,
|
||||
[
|
||||
<const IMM8: i32> [
|
||||
IMM8 as u32 & 0b11,
|
||||
(IMM8 as u32 >> 2) & 0b11,
|
||||
(IMM8 as u32 >> 4) & 0b11,
|
||||
|
|
@ -1419,10 +1419,10 @@ pub unsafe fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
|
|||
pub unsafe fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
|
||||
static_assert_imm8!(IMM8);
|
||||
let a = a.as_i16x8();
|
||||
let x: i16x8 = simd_shuffle8(
|
||||
let x: i16x8 = simd_shuffle8!(
|
||||
a,
|
||||
a,
|
||||
[
|
||||
<const IMM8: i32> [
|
||||
0,
|
||||
1,
|
||||
2,
|
||||
|
|
@ -1451,10 +1451,10 @@ pub unsafe fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
|
|||
pub unsafe fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
|
||||
static_assert_imm8!(IMM8);
|
||||
let a = a.as_i16x8();
|
||||
let x: i16x8 = simd_shuffle8(
|
||||
let x: i16x8 = simd_shuffle8!(
|
||||
a,
|
||||
a,
|
||||
[
|
||||
<const IMM8: i32> [
|
||||
IMM8 as u32 & 0b11,
|
||||
(IMM8 as u32 >> 2) & 0b11,
|
||||
(IMM8 as u32 >> 4) & 0b11,
|
||||
|
|
@ -1476,7 +1476,7 @@ pub unsafe fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
|
|||
#[cfg_attr(test, assert_instr(punpckhbw))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute::<i8x16, _>(simd_shuffle16(
|
||||
transmute::<i8x16, _>(simd_shuffle16!(
|
||||
a.as_i8x16(),
|
||||
b.as_i8x16(),
|
||||
[8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31],
|
||||
|
|
@ -1491,7 +1491,7 @@ pub unsafe fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
|
|||
#[cfg_attr(test, assert_instr(punpckhwd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
|
||||
let x = simd_shuffle8(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]);
|
||||
let x = simd_shuffle8!(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]);
|
||||
transmute::<i16x8, _>(x)
|
||||
}
|
||||
|
||||
|
|
@ -1503,7 +1503,7 @@ pub unsafe fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
|
|||
#[cfg_attr(test, assert_instr(unpckhps))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute::<i32x4, _>(simd_shuffle4(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7]))
|
||||
transmute::<i32x4, _>(simd_shuffle4!(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7]))
|
||||
}
|
||||
|
||||
/// Unpacks and interleave 64-bit integers from the high half of `a` and `b`.
|
||||
|
|
@ -1514,7 +1514,7 @@ pub unsafe fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
|
|||
#[cfg_attr(test, assert_instr(unpckhpd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute::<i64x2, _>(simd_shuffle2(a.as_i64x2(), b.as_i64x2(), [1, 3]))
|
||||
transmute::<i64x2, _>(simd_shuffle2!(a.as_i64x2(), b.as_i64x2(), [1, 3]))
|
||||
}
|
||||
|
||||
/// Unpacks and interleave 8-bit integers from the low half of `a` and `b`.
|
||||
|
|
@ -1525,7 +1525,7 @@ pub unsafe fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
|
|||
#[cfg_attr(test, assert_instr(punpcklbw))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute::<i8x16, _>(simd_shuffle16(
|
||||
transmute::<i8x16, _>(simd_shuffle16!(
|
||||
a.as_i8x16(),
|
||||
b.as_i8x16(),
|
||||
[0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23],
|
||||
|
|
@ -1540,7 +1540,7 @@ pub unsafe fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
|
|||
#[cfg_attr(test, assert_instr(punpcklwd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
|
||||
let x = simd_shuffle8(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]);
|
||||
let x = simd_shuffle8!(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]);
|
||||
transmute::<i16x8, _>(x)
|
||||
}
|
||||
|
||||
|
|
@ -1552,7 +1552,7 @@ pub unsafe fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
|
|||
#[cfg_attr(test, assert_instr(unpcklps))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute::<i32x4, _>(simd_shuffle4(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5]))
|
||||
transmute::<i32x4, _>(simd_shuffle4!(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5]))
|
||||
}
|
||||
|
||||
/// Unpacks and interleave 64-bit integers from the low half of `a` and `b`.
|
||||
|
|
@ -1563,7 +1563,7 @@ pub unsafe fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
|
|||
#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute::<i64x2, _>(simd_shuffle2(a.as_i64x2(), b.as_i64x2(), [0, 2]))
|
||||
transmute::<i64x2, _>(simd_shuffle2!(a.as_i64x2(), b.as_i64x2(), [0, 2]))
|
||||
}
|
||||
|
||||
/// Returns a new vector with the low element of `a` replaced by the sum of the
|
||||
|
|
@ -2519,7 +2519,7 @@ pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
#[allow(clippy::cast_ptr_alignment)]
|
||||
pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) {
|
||||
let b: __m128d = simd_shuffle2(a, a, [0, 0]);
|
||||
let b: __m128d = simd_shuffle2!(a, a, [0, 0]);
|
||||
*(mem_addr as *mut __m128d) = b;
|
||||
}
|
||||
|
||||
|
|
@ -2533,7 +2533,7 @@ pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
#[allow(clippy::cast_ptr_alignment)]
|
||||
pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) {
|
||||
let b: __m128d = simd_shuffle2(a, a, [0, 0]);
|
||||
let b: __m128d = simd_shuffle2!(a, a, [0, 0]);
|
||||
*(mem_addr as *mut __m128d) = b;
|
||||
}
|
||||
|
||||
|
|
@ -2548,7 +2548,7 @@ pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
#[allow(clippy::cast_ptr_alignment)]
|
||||
pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) {
|
||||
let b: __m128d = simd_shuffle2(a, a, [1, 0]);
|
||||
let b: __m128d = simd_shuffle2!(a, a, [1, 0]);
|
||||
*(mem_addr as *mut __m128d) = b;
|
||||
}
|
||||
|
||||
|
|
@ -2612,7 +2612,7 @@ pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d {
|
||||
let a = _mm_load_pd(mem_addr);
|
||||
simd_shuffle2(a, a, [1, 0])
|
||||
simd_shuffle2!(a, a, [1, 0])
|
||||
}
|
||||
|
||||
/// Loads 128-bits (composed of 2 packed double-precision (64-bit)
|
||||
|
|
@ -2653,7 +2653,7 @@ pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d {
|
||||
static_assert_imm8!(MASK);
|
||||
simd_shuffle2(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2])
|
||||
simd_shuffle2!(a, b, <const MASK: i32> [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2])
|
||||
}
|
||||
|
||||
/// Constructs a 128-bit floating-point vector of `[2 x double]`. The lower
|
||||
|
|
@ -2777,7 +2777,7 @@ pub unsafe fn _mm_undefined_si128() -> __m128i {
|
|||
#[cfg_attr(test, assert_instr(unpckhpd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d {
|
||||
simd_shuffle2(a, b, [1, 3])
|
||||
simd_shuffle2!(a, b, [1, 3])
|
||||
}
|
||||
|
||||
/// The resulting `__m128d` element is composed by the high-order values of
|
||||
|
|
@ -2792,7 +2792,7 @@ pub unsafe fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d {
|
|||
#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
|
||||
simd_shuffle2(a, b, [0, 2])
|
||||
simd_shuffle2!(a, b, [0, 2])
|
||||
}
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
|
|
|
|||
|
|
@ -106,7 +106,7 @@ pub unsafe fn _mm_lddqu_si128(mem_addr: *const __m128i) -> __m128i {
|
|||
#[cfg_attr(test, assert_instr(movddup))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_movedup_pd(a: __m128d) -> __m128d {
|
||||
simd_shuffle2(a, a, [0, 0])
|
||||
simd_shuffle2!(a, a, [0, 0])
|
||||
}
|
||||
|
||||
/// Loads a double-precision (64-bit) floating-point element from memory
|
||||
|
|
@ -130,7 +130,7 @@ pub unsafe fn _mm_loaddup_pd(mem_addr: *const f64) -> __m128d {
|
|||
#[cfg_attr(test, assert_instr(movshdup))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_movehdup_ps(a: __m128) -> __m128 {
|
||||
simd_shuffle4(a, a, [1, 1, 3, 3])
|
||||
simd_shuffle4!(a, a, [1, 1, 3, 3])
|
||||
}
|
||||
|
||||
/// Duplicate even-indexed single-precision (32-bit) floating-point elements
|
||||
|
|
@ -142,7 +142,7 @@ pub unsafe fn _mm_movehdup_ps(a: __m128) -> __m128 {
|
|||
#[cfg_attr(test, assert_instr(movsldup))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_moveldup_ps(a: __m128) -> __m128 {
|
||||
simd_shuffle4(a, a, [0, 0, 2, 2])
|
||||
simd_shuffle4!(a, a, [0, 0, 2, 2])
|
||||
}
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
|
|
|
|||
|
|
@ -379,7 +379,7 @@ pub unsafe fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i {
|
||||
let a = a.as_i8x16();
|
||||
let a = simd_shuffle8::<_, i8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
|
||||
let a: i8x8 = simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
|
||||
transmute(simd_cast::<_, i16x8>(a))
|
||||
}
|
||||
|
||||
|
|
@ -392,7 +392,7 @@ pub unsafe fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i {
|
||||
let a = a.as_i8x16();
|
||||
let a = simd_shuffle4::<_, i8x4>(a, a, [0, 1, 2, 3]);
|
||||
let a: i8x4 = simd_shuffle4!(a, a, [0, 1, 2, 3]);
|
||||
transmute(simd_cast::<_, i32x4>(a))
|
||||
}
|
||||
|
||||
|
|
@ -406,7 +406,7 @@ pub unsafe fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i {
|
||||
let a = a.as_i8x16();
|
||||
let a = simd_shuffle2::<_, i8x2>(a, a, [0, 1]);
|
||||
let a: i8x2 = simd_shuffle2!(a, a, [0, 1]);
|
||||
transmute(simd_cast::<_, i64x2>(a))
|
||||
}
|
||||
|
||||
|
|
@ -419,7 +419,7 @@ pub unsafe fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i {
|
||||
let a = a.as_i16x8();
|
||||
let a = simd_shuffle4::<_, i16x4>(a, a, [0, 1, 2, 3]);
|
||||
let a: i16x4 = simd_shuffle4!(a, a, [0, 1, 2, 3]);
|
||||
transmute(simd_cast::<_, i32x4>(a))
|
||||
}
|
||||
|
||||
|
|
@ -432,7 +432,7 @@ pub unsafe fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i {
|
||||
let a = a.as_i16x8();
|
||||
let a = simd_shuffle2::<_, i16x2>(a, a, [0, 1]);
|
||||
let a: i16x2 = simd_shuffle2!(a, a, [0, 1]);
|
||||
transmute(simd_cast::<_, i64x2>(a))
|
||||
}
|
||||
|
||||
|
|
@ -445,7 +445,7 @@ pub unsafe fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i {
|
||||
let a = a.as_i32x4();
|
||||
let a = simd_shuffle2::<_, i32x2>(a, a, [0, 1]);
|
||||
let a: i32x2 = simd_shuffle2!(a, a, [0, 1]);
|
||||
transmute(simd_cast::<_, i64x2>(a))
|
||||
}
|
||||
|
||||
|
|
@ -458,7 +458,7 @@ pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i {
|
||||
let a = a.as_u8x16();
|
||||
let a = simd_shuffle8::<_, u8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
|
||||
let a: u8x8 = simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
|
||||
transmute(simd_cast::<_, i16x8>(a))
|
||||
}
|
||||
|
||||
|
|
@ -471,7 +471,7 @@ pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i {
|
||||
let a = a.as_u8x16();
|
||||
let a = simd_shuffle4::<_, u8x4>(a, a, [0, 1, 2, 3]);
|
||||
let a: u8x4 = simd_shuffle4!(a, a, [0, 1, 2, 3]);
|
||||
transmute(simd_cast::<_, i32x4>(a))
|
||||
}
|
||||
|
||||
|
|
@ -484,7 +484,7 @@ pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i {
|
||||
let a = a.as_u8x16();
|
||||
let a = simd_shuffle2::<_, u8x2>(a, a, [0, 1]);
|
||||
let a: u8x2 = simd_shuffle2!(a, a, [0, 1]);
|
||||
transmute(simd_cast::<_, i64x2>(a))
|
||||
}
|
||||
|
||||
|
|
@ -498,7 +498,7 @@ pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i {
|
||||
let a = a.as_u16x8();
|
||||
let a = simd_shuffle4::<_, u16x4>(a, a, [0, 1, 2, 3]);
|
||||
let a: u16x4 = simd_shuffle4!(a, a, [0, 1, 2, 3]);
|
||||
transmute(simd_cast::<_, i32x4>(a))
|
||||
}
|
||||
|
||||
|
|
@ -512,7 +512,7 @@ pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i {
|
||||
let a = a.as_u16x8();
|
||||
let a = simd_shuffle2::<_, u16x2>(a, a, [0, 1]);
|
||||
let a: u16x2 = simd_shuffle2!(a, a, [0, 1]);
|
||||
transmute(simd_cast::<_, i64x2>(a))
|
||||
}
|
||||
|
||||
|
|
@ -526,7 +526,7 @@ pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i {
|
||||
let a = a.as_u32x4();
|
||||
let a = simd_shuffle2::<_, u32x2>(a, a, [0, 1]);
|
||||
let a: u32x2 = simd_shuffle2!(a, a, [0, 1]);
|
||||
transmute(simd_cast::<_, i64x2>(a))
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -113,10 +113,10 @@ pub unsafe fn _mm_alignr_epi8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128
|
|||
shift + i
|
||||
}
|
||||
}
|
||||
let r: i8x16 = simd_shuffle16(
|
||||
let r: i8x16 = simd_shuffle16!(
|
||||
b.as_i8x16(),
|
||||
a.as_i8x16(),
|
||||
[
|
||||
<const IMM8: i32> [
|
||||
mask(IMM8 as u32, 0),
|
||||
mask(IMM8 as u32, 1),
|
||||
mask(IMM8 as u32, 2),
|
||||
|
|
|
|||
|
|
@ -194,8 +194,8 @@ generate int32x2_t:int32x2_t:int64x2_t
|
|||
/// Unsigned Absolute difference Long
|
||||
name = vabdl_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle8, c:uint8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15]
|
||||
multi_fn = simd_shuffle8, d:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
|
||||
multi_fn = simd_shuffle8!, c:uint8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15]
|
||||
multi_fn = simd_shuffle8!, d:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
|
||||
multi_fn = simd_cast, {vabd_u8, c, d}
|
||||
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
b = 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10
|
||||
|
|
@ -207,8 +207,8 @@ generate uint8x16_t:uint8x16_t:uint16x8_t
|
|||
/// Unsigned Absolute difference Long
|
||||
name = vabdl_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle4, c:uint16x4_t, a, a, [4, 5, 6, 7]
|
||||
multi_fn = simd_shuffle4, d:uint16x4_t, b, b, [4, 5, 6, 7]
|
||||
multi_fn = simd_shuffle4!, c:uint16x4_t, a, a, [4, 5, 6, 7]
|
||||
multi_fn = simd_shuffle4!, d:uint16x4_t, b, b, [4, 5, 6, 7]
|
||||
multi_fn = simd_cast, {vabd_u16, c, d}
|
||||
a = 1, 2, 3, 4, 8, 9, 11, 12
|
||||
b = 10, 10, 10, 10, 10, 10, 10, 10
|
||||
|
|
@ -220,8 +220,8 @@ generate uint16x8_t:uint16x8_t:uint32x4_t
|
|||
/// Unsigned Absolute difference Long
|
||||
name = vabdl_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle2, c:uint32x2_t, a, a, [2, 3]
|
||||
multi_fn = simd_shuffle2, d:uint32x2_t, b, b, [2, 3]
|
||||
multi_fn = simd_shuffle2!, c:uint32x2_t, a, a, [2, 3]
|
||||
multi_fn = simd_shuffle2!, d:uint32x2_t, b, b, [2, 3]
|
||||
multi_fn = simd_cast, {vabd_u32, c, d}
|
||||
a = 1, 2, 3, 4
|
||||
b = 10, 10, 10, 10
|
||||
|
|
@ -233,8 +233,8 @@ generate uint32x4_t:uint32x4_t:uint64x2_t
|
|||
/// Signed Absolute difference Long
|
||||
name = vabdl_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle8, c:int8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15]
|
||||
multi_fn = simd_shuffle8, d:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
|
||||
multi_fn = simd_shuffle8!, c:int8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15]
|
||||
multi_fn = simd_shuffle8!, d:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
|
||||
multi_fn = simd_cast, e:uint8x8_t, {vabd_s8, c, d}
|
||||
multi_fn = simd_cast, e
|
||||
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
|
|
@ -247,8 +247,8 @@ generate int8x16_t:int8x16_t:int16x8_t
|
|||
/// Signed Absolute difference Long
|
||||
name = vabdl_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle4, c:int16x4_t, a, a, [4, 5, 6, 7]
|
||||
multi_fn = simd_shuffle4, d:int16x4_t, b, b, [4, 5, 6, 7]
|
||||
multi_fn = simd_shuffle4!, c:int16x4_t, a, a, [4, 5, 6, 7]
|
||||
multi_fn = simd_shuffle4!, d:int16x4_t, b, b, [4, 5, 6, 7]
|
||||
multi_fn = simd_cast, e:uint16x4_t, {vabd_s16, c, d}
|
||||
multi_fn = simd_cast, e
|
||||
a = 1, 2, 3, 4, 9, 10, 11, 12
|
||||
|
|
@ -261,8 +261,8 @@ generate int16x8_t:int16x8_t:int32x4_t
|
|||
/// Signed Absolute difference Long
|
||||
name = vabdl_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle2, c:int32x2_t, a, a, [2, 3]
|
||||
multi_fn = simd_shuffle2, d:int32x2_t, b, b, [2, 3]
|
||||
multi_fn = simd_shuffle2!, c:int32x2_t, a, a, [2, 3]
|
||||
multi_fn = simd_shuffle2!, d:int32x2_t, b, b, [2, 3]
|
||||
multi_fn = simd_cast, e:uint32x2_t, {vabd_s32, c, d}
|
||||
multi_fn = simd_cast, e
|
||||
a = 1, 2, 3, 4
|
||||
|
|
@ -727,7 +727,7 @@ lane-suffixes
|
|||
constn = LANE1:LANE2
|
||||
multi_fn = static_assert_imm-in0_exp_len-LANE1
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE2
|
||||
multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-noext, a, b, {ins-in0_len-in0_len-LANE2}
|
||||
multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-!, a, b, {ins-in0_len-in0_len-LANE2}
|
||||
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
b = 0, MAX, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
n = 0:1
|
||||
|
|
@ -744,7 +744,7 @@ lane-suffixes
|
|||
constn = LANE1:LANE2
|
||||
multi_fn = static_assert_imm-in0_exp_len-LANE1
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE2
|
||||
multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-noext, a, b, {ins-in0_len-in0_len-LANE2}
|
||||
multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-!, a, b, {ins-in0_len-in0_len-LANE2}
|
||||
a = 1., 2., 3., 4.
|
||||
b = 0., 0.5, 0., 0.
|
||||
n = 0:1
|
||||
|
|
@ -759,8 +759,8 @@ lane-suffixes
|
|||
constn = LANE1:LANE2
|
||||
multi_fn = static_assert_imm-in0_exp_len-LANE1
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE2
|
||||
multi_fn = simd_shuffle-in_len-noext, a:in_t, a, a, {asc-0-in_len}
|
||||
multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-noext, a, b, {ins-in0_len-in_len-LANE2}
|
||||
multi_fn = simd_shuffle-in_len-!, a:in_t, a, a, {asc-0-in_len}
|
||||
multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-!, a, b, {ins-in0_len-in_len-LANE2}
|
||||
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
b = 0, MAX, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
n = 0:1
|
||||
|
|
@ -777,8 +777,8 @@ lane-suffixes
|
|||
constn = LANE1:LANE2
|
||||
multi_fn = static_assert_imm-in0_exp_len-LANE1
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE2
|
||||
multi_fn = simd_shuffle-in_len-noext, a:in_t, a, a, {asc-0-in_len}
|
||||
multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-noext, a, b, {ins-in0_len-in_len-LANE2}
|
||||
multi_fn = simd_shuffle-in_len-!, a:in_t, a, a, {asc-0-in_len}
|
||||
multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-!, a, b, {ins-in0_len-in_len-LANE2}
|
||||
a = 1., 2., 3., 4.
|
||||
b = 0., 0.5, 0., 0.
|
||||
n = 0:1
|
||||
|
|
@ -793,8 +793,8 @@ lane-suffixes
|
|||
constn = LANE1:LANE2
|
||||
multi_fn = static_assert_imm-in0_exp_len-LANE1
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE2
|
||||
multi_fn = simd_shuffle-in0_len-noext, b:in_t0, b, b, {asc-0-in0_len}
|
||||
multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-noext, a, b, {ins-in0_len-in0_len-LANE2}
|
||||
multi_fn = simd_shuffle-in0_len-!, b:in_t0, b, b, {asc-0-in0_len}
|
||||
multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-!, a, b, {ins-in0_len-in0_len-LANE2}
|
||||
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
b = 0, MAX, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
n = 0:1
|
||||
|
|
@ -811,8 +811,8 @@ lane-suffixes
|
|||
constn = LANE1:LANE2
|
||||
multi_fn = static_assert_imm-in0_exp_len-LANE1
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE2
|
||||
multi_fn = simd_shuffle-in0_len-noext, b:in_t0, b, b, {asc-0-in0_len}
|
||||
multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-noext, a, b, {ins-in0_len-in0_len-LANE2}
|
||||
multi_fn = simd_shuffle-in0_len-!, b:in_t0, b, b, {asc-0-in0_len}
|
||||
multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-!, a, b, {ins-in0_len-in0_len-LANE2}
|
||||
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
b = MAX, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
n = 1:0
|
||||
|
|
@ -827,8 +827,8 @@ lane-suffixes
|
|||
constn = LANE1:LANE2
|
||||
multi_fn = static_assert_imm-in0_exp_len-LANE1
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE2
|
||||
multi_fn = simd_shuffle-in0_len-noext, b:in_t0, b, b, {asc-0-in0_len}
|
||||
multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-noext, a, b, {ins-in0_len-in0_len-LANE2}
|
||||
multi_fn = simd_shuffle-in0_len-!, b:in_t0, b, b, {asc-0-in0_len}
|
||||
multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-!, a, b, {ins-in0_len-in0_len-LANE2}
|
||||
a = 1., 2., 3., 4.
|
||||
b = 0.5, 0., 0., 0.
|
||||
n = 1:0
|
||||
|
|
@ -897,7 +897,7 @@ generate float32x2_t:float64x2_t
|
|||
/// Floating-point convert to higher precision long
|
||||
name = vcvt_high
|
||||
noq-double-suffixes
|
||||
multi_fn = simd_shuffle2, b:float32x2_t, a, a, [2, 3]
|
||||
multi_fn = simd_shuffle2!, b:float32x2_t, a, a, [2, 3]
|
||||
multi_fn = simd_cast, b
|
||||
a = -1.2, 1.2, 2.3, 3.4
|
||||
validate 2.3f32 as f64, 3.4f32 as f64
|
||||
|
|
@ -918,7 +918,7 @@ generate float64x2_t:float32x2_t
|
|||
/// Floating-point convert to lower precision narrow
|
||||
name = vcvt_high
|
||||
noq-double-suffixes
|
||||
multi_fn = simd_shuffle4, a, {simd_cast, b}, [0, 1, 2, 3]
|
||||
multi_fn = simd_shuffle4!, a, {simd_cast, b}, [0, 1, 2, 3]
|
||||
a = -1.2, 1.2
|
||||
b = -2.3, 3.4
|
||||
validate -1.2, 1.2, -2.3f64 as f32, 3.4f64 as f32
|
||||
|
|
@ -939,7 +939,7 @@ generate float64x2_t:float32x2_t
|
|||
/// Floating-point convert to lower precision narrow, rounding to odd
|
||||
name = vcvtx_high
|
||||
noq-double-suffixes
|
||||
multi_fn = simd_shuffle4, a, {vcvtx-noq_doubleself-noext, b}, [0, 1, 2, 3]
|
||||
multi_fn = simd_shuffle4!, a, {vcvtx-noq_doubleself-noext, b}, [0, 1, 2, 3]
|
||||
a = -1.0, 2.0
|
||||
b = -3.0, 4.0
|
||||
validate -1.0, 2.0, -3.0, 4.0
|
||||
|
|
@ -1162,7 +1162,7 @@ name = vdup
|
|||
lane-suffixes
|
||||
constn = N
|
||||
multi_fn = static_assert_imm-in_exp_len-N
|
||||
multi_fn = simd_shuffle-out_len-noext, a, a, {dup-out_len-N as u32}
|
||||
multi_fn = simd_shuffle-out_len-!, a, a, {dup-out_len-N as u32}
|
||||
a = 1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16
|
||||
n = HFLEN
|
||||
validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
|
||||
|
|
@ -1188,7 +1188,7 @@ name = vdup
|
|||
lane-suffixes
|
||||
constn = N
|
||||
multi_fn = static_assert_imm-in_exp_len-N
|
||||
multi_fn = simd_shuffle-out_len-noext, a, a, {dup-out_len-N as u32}
|
||||
multi_fn = simd_shuffle-out_len-!, a, a, {dup-out_len-N as u32}
|
||||
a = 1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16
|
||||
n = HFLEN
|
||||
validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
|
||||
|
|
@ -1202,7 +1202,7 @@ name = vdup
|
|||
lane-suffixes
|
||||
constn = N
|
||||
multi_fn = static_assert_imm-in_exp_len-N
|
||||
multi_fn = simd_shuffle-out_len-noext, a, a, {dup-out_len-N as u32}
|
||||
multi_fn = simd_shuffle-out_len-!, a, a, {dup-out_len-N as u32}
|
||||
a = 1., 1., 1., 4.
|
||||
n = HFLEN
|
||||
validate 1., 1., 1., 1.
|
||||
|
|
@ -1303,7 +1303,7 @@ generate float32x2_t:f32, float32x4_t:f32, float64x1_t:f64, float64x2_t:f64
|
|||
name = vext
|
||||
constn = N
|
||||
multi_fn = static_assert_imm-out_exp_len-N
|
||||
multi_fn = matchn-out_exp_len-N, simd_shuffle-out_len-noext, a, b, {asc-n-out_len}
|
||||
multi_fn = matchn-out_exp_len-N, simd_shuffle-out_len-!, a, b, {asc-n-out_len}
|
||||
a = 0, 8, 8, 9, 8, 9, 9, 11, 8, 9, 9, 11, 9, 11, 14, 15
|
||||
b = 9, 11, 14, 15, 16, 17, 18, 19, 0, 8, 8, 9, 8, 9, 9, 11
|
||||
n = HFLEN
|
||||
|
|
@ -1317,7 +1317,7 @@ generate int*_t, uint*_t, poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t
|
|||
name = vext
|
||||
constn = N
|
||||
multi_fn = static_assert_imm-out_exp_len-N
|
||||
multi_fn = matchn-out_exp_len-N, simd_shuffle-out_len-noext, a, b, {asc-n-out_len}
|
||||
multi_fn = matchn-out_exp_len-N, simd_shuffle-out_len-!, a, b, {asc-n-out_len}
|
||||
a = 0, 8, 8, 9, 8, 9, 9, 11, 8, 9, 9, 11, 9, 11, 14, 15
|
||||
b = 9, 11, 14, 15, 16, 17, 18, 19, 0, 8, 8, 9, 8, 9, 9, 11
|
||||
n = HFLEN
|
||||
|
|
@ -1333,7 +1333,7 @@ generate int64x2_t, uint64x2_t
|
|||
name = vext
|
||||
constn = N
|
||||
multi_fn = static_assert_imm-out_exp_len-N
|
||||
multi_fn = matchn-out_exp_len-N, simd_shuffle-out_len-noext, a, b, {asc-n-out_len}
|
||||
multi_fn = matchn-out_exp_len-N, simd_shuffle-out_len-!, a, b, {asc-n-out_len}
|
||||
a = 0., 2., 2., 3.
|
||||
b = 3., 4., 5., 6.,
|
||||
n = HFLEN
|
||||
|
|
@ -1403,7 +1403,7 @@ name = vmla
|
|||
in2-lane-suffixes
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in2_exp_len-LANE
|
||||
multi_fn = vmla-self-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}}
|
||||
multi_fn = vmla-self-noext, a, b, {simd_shuffle-in_len-!, c, c, {dup-in_len-LANE as u32}}
|
||||
a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
|
|
@ -1422,7 +1422,7 @@ name = vmla
|
|||
in2-lane-suffixes
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in2_exp_len-LANE
|
||||
multi_fn = vmla-self-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}}
|
||||
multi_fn = vmla-self-noext, a, b, {simd_shuffle-in_len-!, c, c, {dup-in_len-LANE as u32}}
|
||||
a = 0., 1., 2., 3.
|
||||
b = 2., 2., 2., 2.
|
||||
c = 0., 3., 0., 0.
|
||||
|
|
@ -1477,7 +1477,7 @@ name = vmlal_lane
|
|||
in2-suffix
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in2_exp_len-LANE
|
||||
multi_fn = vmlal-self-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}}
|
||||
multi_fn = vmlal-self-noext, a, b, {simd_shuffle-in_len-!, c, c, {dup-in_len-LANE as u32}}
|
||||
a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
|
|
@ -1495,8 +1495,8 @@ generate uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x2_t:uint
|
|||
/// Signed multiply-add long
|
||||
name = vmlal_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle-out_len-noext, b:half, b, b, {fixed-half-right}
|
||||
multi_fn = simd_shuffle-out_len-noext, c:half, c, c, {fixed-half-right}
|
||||
multi_fn = simd_shuffle-out_len-!, b:half, b, b, {fixed-half-right}
|
||||
multi_fn = simd_shuffle-out_len-!, c:half, c, c, {fixed-half-right}
|
||||
multi_fn = vmlal-noqself-noext, a, b, c
|
||||
a = 8, 7, 6, 5, 4, 3, 2, 1
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
|
|
@ -1510,8 +1510,8 @@ generate int16x8_t:int8x16_t:int8x16_t:int16x8_t, int32x4_t:int16x8_t:int16x8_t:
|
|||
/// Unsigned multiply-add long
|
||||
name = vmlal_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle-out_len-noext, b:half, b, b, {fixed-half-right}
|
||||
multi_fn = simd_shuffle-out_len-noext, c:half, c, c, {fixed-half-right}
|
||||
multi_fn = simd_shuffle-out_len-!, b:half, b, b, {fixed-half-right}
|
||||
multi_fn = simd_shuffle-out_len-!, c:half, c, c, {fixed-half-right}
|
||||
multi_fn = vmlal-noqself-noext, a, b, c
|
||||
a = 8, 7, 6, 5, 4, 3, 2, 1
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
|
|
@ -1541,7 +1541,7 @@ name = vmlal_high_lane
|
|||
in2-suffix
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in2_exp_len-LANE
|
||||
multi_fn = vmlal_high-noqself-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}}
|
||||
multi_fn = vmlal_high-noqself-noext, a, b, {simd_shuffle-in_len-!, c, c, {dup-in_len-LANE as u32}}
|
||||
a = 8, 7, 6, 5, 4, 3, 2, 1
|
||||
b = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
|
||||
c = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
|
|
@ -1613,7 +1613,7 @@ name = vmls
|
|||
in2-lane-suffixes
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in2_exp_len-LANE
|
||||
multi_fn = vmls-self-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}}
|
||||
multi_fn = vmls-self-noext, a, b, {simd_shuffle-in_len-!, c, c, {dup-in_len-LANE as u32}}
|
||||
a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
|
|
@ -1632,7 +1632,7 @@ name = vmls
|
|||
in2-lane-suffixes
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in2_exp_len-LANE
|
||||
multi_fn = vmls-self-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}}
|
||||
multi_fn = vmls-self-noext, a, b, {simd_shuffle-in_len-!, c, c, {dup-in_len-LANE as u32}}
|
||||
a = 6., 7., 8., 9.
|
||||
b = 2., 2., 2., 2.
|
||||
c = 0., 3., 0., 0.
|
||||
|
|
@ -1687,7 +1687,7 @@ name = vmlsl_lane
|
|||
in2-suffix
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in2_exp_len-LANE
|
||||
multi_fn = vmlsl-self-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}}
|
||||
multi_fn = vmlsl-self-noext, a, b, {simd_shuffle-in_len-!, c, c, {dup-in_len-LANE as u32}}
|
||||
a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
|
|
@ -1705,8 +1705,8 @@ generate uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x2_t:uint
|
|||
/// Signed multiply-subtract long
|
||||
name = vmlsl_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle-out_len-noext, b:half, b, b, {fixed-half-right}
|
||||
multi_fn = simd_shuffle-out_len-noext, c:half, c, c, {fixed-half-right}
|
||||
multi_fn = simd_shuffle-out_len-!, b:half, b, b, {fixed-half-right}
|
||||
multi_fn = simd_shuffle-out_len-!, c:half, c, c, {fixed-half-right}
|
||||
multi_fn = vmlsl-noqself-noext, a, b, c
|
||||
a = 14, 15, 16, 17, 18, 19, 20, 21
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
|
|
@ -1720,8 +1720,8 @@ generate int16x8_t:int8x16_t:int8x16_t:int16x8_t, int32x4_t:int16x8_t:int16x8_t:
|
|||
/// Unsigned multiply-subtract long
|
||||
name = vmlsl_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle-out_len-noext, b:half, b, b, {fixed-half-right}
|
||||
multi_fn = simd_shuffle-out_len-noext, c:half, c, c, {fixed-half-right}
|
||||
multi_fn = simd_shuffle-out_len-!, b:half, b, b, {fixed-half-right}
|
||||
multi_fn = simd_shuffle-out_len-!, c:half, c, c, {fixed-half-right}
|
||||
multi_fn = vmlsl-noqself-noext, a, b, c
|
||||
a = 14, 15, 16, 17, 18, 19, 20, 21
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
|
|
@ -1751,7 +1751,7 @@ name = vmlsl_high_lane
|
|||
in2-suffix
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in2_exp_len-LANE
|
||||
multi_fn = vmlsl_high-noqself-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}}
|
||||
multi_fn = vmlsl_high-noqself-noext, a, b, {simd_shuffle-in_len-!, c, c, {dup-in_len-LANE as u32}}
|
||||
a = 14, 15, 16, 17, 18, 19, 20, 21
|
||||
b = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
|
||||
c = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
|
|
@ -1769,7 +1769,7 @@ generate uint64x2_t:uint32x4_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x4_t:uint
|
|||
name = vmovn_high
|
||||
no-q
|
||||
multi_fn = simd_cast, c:in_t0, b
|
||||
multi_fn = simd_shuffle-out_len-noext, a, c, {asc-0-out_len}
|
||||
multi_fn = simd_shuffle-out_len-!, a, c, {asc-0-out_len}
|
||||
a = 0, 1, 2, 3, 2, 3, 4, 5
|
||||
b = 2, 3, 4, 5, 12, 13, 14, 15
|
||||
validate 0, 1, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 12, 13, 14, 15
|
||||
|
|
@ -2070,7 +2070,7 @@ name = vmul
|
|||
lane-suffixes
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
multi_fn = simd_mul, a, {simd_shuffle-out_len-noext, b, b, {dup-out_len-LANE as u32}}
|
||||
multi_fn = simd_mul, a, {simd_shuffle-out_len-!, b, b, {dup-out_len-LANE as u32}}
|
||||
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
b = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
n = 1
|
||||
|
|
@ -2102,7 +2102,7 @@ name = vmul
|
|||
lane-suffixes
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
multi_fn = simd_mul, a, {simd_shuffle-out_len-noext, b, b, {dup-out_len-LANE as u32}}
|
||||
multi_fn = simd_mul, a, {simd_shuffle-out_len-!, b, b, {dup-out_len-LANE as u32}}
|
||||
a = 1., 2., 3., 4.
|
||||
b = 2., 0., 0., 0.
|
||||
n = 0
|
||||
|
|
@ -2155,8 +2155,8 @@ generate int8x8_t:int8x8_t:int16x8_t, int16x4_t:int16x4_t:int32x4_t, int32x2_t:i
|
|||
/// Signed multiply long
|
||||
name = vmull_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle-out_len-noext, a:half, a, a, {fixed-half-right}
|
||||
multi_fn = simd_shuffle-out_len-noext, b:half, b, b, {fixed-half-right}
|
||||
multi_fn = simd_shuffle-out_len-!, a:half, a, a, {fixed-half-right}
|
||||
multi_fn = simd_shuffle-out_len-!, b:half, b, b, {fixed-half-right}
|
||||
multi_fn = vmull-noqself-noext, a, b
|
||||
a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
|
||||
|
|
@ -2181,8 +2181,8 @@ generate uint8x8_t:uint8x8_t:uint16x8_t, uint16x4_t:uint16x4_t:uint32x4_t, uint3
|
|||
/// Unsigned multiply long
|
||||
name = vmull_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle-out_len-noext, a:half, a, a, {fixed-half-right}
|
||||
multi_fn = simd_shuffle-out_len-noext, b:half, b, b, {fixed-half-right}
|
||||
multi_fn = simd_shuffle-out_len-!, a:half, a, a, {fixed-half-right}
|
||||
multi_fn = simd_shuffle-out_len-!, b:half, b, b, {fixed-half-right}
|
||||
multi_fn = vmull-noqself-noext, a, b
|
||||
a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
|
||||
|
|
@ -2222,8 +2222,8 @@ link-arm = vmullp.v2i64:int64x1_t:int64x1_t:int64x1_t:int64x2_t
|
|||
/// Polynomial multiply long
|
||||
name = vmull_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle-out_len-noext, a:half, a, a, {fixed-half-right}
|
||||
multi_fn = simd_shuffle-out_len-noext, b:half, b, b, {fixed-half-right}
|
||||
multi_fn = simd_shuffle-out_len-!, a:half, a, a, {fixed-half-right}
|
||||
multi_fn = simd_shuffle-out_len-!, b:half, b, b, {fixed-half-right}
|
||||
multi_fn = vmull-noqself-noext, a, b
|
||||
a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
b = 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3
|
||||
|
|
@ -2263,7 +2263,7 @@ generate uint16x4_t:u16:uint32x4_t, uint32x2_t:u32:uint64x2_t
|
|||
name = vmull_lane
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
multi_fn = vmull-in0-noext, a, {simd_shuffle-in0_len-noext, b, b, {dup-in0_len-LANE as u32}}
|
||||
multi_fn = vmull-in0-noext, a, {simd_shuffle-in0_len-!, b, b, {dup-in0_len-LANE as u32}}
|
||||
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
b = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
n = 1
|
||||
|
|
@ -2294,7 +2294,7 @@ generate uint16x8_t:u16:uint32x4_t, uint32x4_t:u32:uint64x2_t
|
|||
name = vmull_high_lane
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
multi_fn = vmull_high-noqself-noext, a, {simd_shuffle-in0_len-noext, b, b, {dup-in0_len-LANE as u32}}
|
||||
multi_fn = vmull_high-noqself-noext, a, {simd_shuffle-in0_len-!, b, b, {dup-in0_len-LANE as u32}}
|
||||
a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
b = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
n = 1
|
||||
|
|
@ -2336,7 +2336,7 @@ name = vmulx
|
|||
lane-suffixes
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
multi_fn = vmulx-in0-noext, a, {simd_shuffle-in0_len-noext, b, b, {dup-in0_len-LANE as u32}}
|
||||
multi_fn = vmulx-in0-noext, a, {simd_shuffle-in0_len-!, b, b, {dup-in0_len-LANE as u32}}
|
||||
a = 1., 2., 3., 4.
|
||||
b = 2., 0., 0., 0.
|
||||
n = 0
|
||||
|
|
@ -2573,7 +2573,7 @@ generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t
|
|||
name = vsubhn_high
|
||||
no-q
|
||||
multi_fn = vsubhn-noqself-noext, d:in_t0, b, c
|
||||
multi_fn = simd_shuffle-out_len-noext, a, d, {asc-0-out_len}
|
||||
multi_fn = simd_shuffle-out_len-!, a, d, {asc-0-out_len}
|
||||
a = MAX, 0, MAX, 0, MAX, 0, MAX, 0
|
||||
b = MAX, 1, MAX, 1, MAX, 1, MAX, 1
|
||||
c = 1, 0, 1, 0, 1, 0, 1, 0
|
||||
|
|
@ -2629,7 +2629,7 @@ generate uint16x8_t:uint8x8_t:uint16x8_t, uint32x4_t:uint16x4_t:uint32x4_t, uint
|
|||
/// Signed Subtract Wide
|
||||
name = vsubw_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle8, c:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
|
||||
multi_fn = simd_shuffle8!, c:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
|
||||
multi_fn = simd_sub, a, {simd_cast, c}
|
||||
a = 8, 9, 10, 12, 13, 14, 15, 16
|
||||
b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16
|
||||
|
|
@ -2641,7 +2641,7 @@ generate int16x8_t:int8x16_t:int16x8_t
|
|||
/// Signed Subtract Wide
|
||||
name = vsubw_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle4, c:int16x4_t, b, b, [4, 5, 6, 7]
|
||||
multi_fn = simd_shuffle4!, c:int16x4_t, b, b, [4, 5, 6, 7]
|
||||
multi_fn = simd_sub, a, {simd_cast, c}
|
||||
a = 8, 9, 10, 11
|
||||
b = 0, 1, 2, 3, 8, 9, 10, 11
|
||||
|
|
@ -2653,7 +2653,7 @@ generate int32x4_t:int16x8_t:int32x4_t
|
|||
/// Signed Subtract Wide
|
||||
name = vsubw_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle2, c:int32x2_t, b, b, [2, 3]
|
||||
multi_fn = simd_shuffle2!, c:int32x2_t, b, b, [2, 3]
|
||||
multi_fn = simd_sub, a, {simd_cast, c}
|
||||
a = 8, 9
|
||||
b = 6, 7, 8, 9
|
||||
|
|
@ -2665,7 +2665,7 @@ generate int64x2_t:int32x4_t:int64x2_t
|
|||
/// Unsigned Subtract Wide
|
||||
name = vsubw_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle8, c:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
|
||||
multi_fn = simd_shuffle8!, c:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
|
||||
multi_fn = simd_sub, a, {simd_cast, c}
|
||||
a = 8, 9, 10, 11, 12, 13, 14, 15
|
||||
b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
|
|
@ -2677,7 +2677,7 @@ generate uint16x8_t:uint8x16_t:uint16x8_t
|
|||
/// Unsigned Subtract Wide
|
||||
name = vsubw_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle4, c:uint16x4_t, b, b, [4, 5, 6, 7]
|
||||
multi_fn = simd_shuffle4!, c:uint16x4_t, b, b, [4, 5, 6, 7]
|
||||
multi_fn = simd_sub, a, {simd_cast, c}
|
||||
a = 8, 9, 10, 11
|
||||
b = 0, 1, 2, 3, 8, 9, 10, 11
|
||||
|
|
@ -2689,7 +2689,7 @@ generate uint32x4_t:uint16x8_t:uint32x4_t
|
|||
/// Unsigned Subtract Wide
|
||||
name = vsubw_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle2, c:uint32x2_t, b, b, [2, 3]
|
||||
multi_fn = simd_shuffle2!, c:uint32x2_t, b, b, [2, 3]
|
||||
multi_fn = simd_sub, a, {simd_cast, c}
|
||||
a = 8, 9
|
||||
b = 6, 7, 8, 9
|
||||
|
|
@ -2731,9 +2731,9 @@ generate uint8x8_t:uint8x8_t:uint16x8_t, uint16x4_t:uint16x4_t:uint32x4_t, uint3
|
|||
/// Signed Subtract Long
|
||||
name = vsubl_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle8, c:int8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15]
|
||||
multi_fn = simd_shuffle8!, c:int8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15]
|
||||
multi_fn = simd_cast, d:out_t, c
|
||||
multi_fn = simd_shuffle8, e:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
|
||||
multi_fn = simd_shuffle8!, e:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
|
||||
multi_fn = simd_cast, f:out_t, e
|
||||
multi_fn = simd_sub, d, f
|
||||
|
||||
|
|
@ -2747,9 +2747,9 @@ generate int8x16_t:int8x16_t:int16x8_t
|
|||
/// Signed Subtract Long
|
||||
name = vsubl_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle4, c:int16x4_t, a, a, [4, 5, 6, 7]
|
||||
multi_fn = simd_shuffle4!, c:int16x4_t, a, a, [4, 5, 6, 7]
|
||||
multi_fn = simd_cast, d:out_t, c
|
||||
multi_fn = simd_shuffle4, e:int16x4_t, b, b, [4, 5, 6, 7]
|
||||
multi_fn = simd_shuffle4!, e:int16x4_t, b, b, [4, 5, 6, 7]
|
||||
multi_fn = simd_cast, f:out_t, e
|
||||
multi_fn = simd_sub, d, f
|
||||
|
||||
|
|
@ -2763,9 +2763,9 @@ generate int16x8_t:int16x8_t:int32x4_t
|
|||
/// Signed Subtract Long
|
||||
name = vsubl_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle2, c:int32x2_t, a, a, [2, 3]
|
||||
multi_fn = simd_shuffle2!, c:int32x2_t, a, a, [2, 3]
|
||||
multi_fn = simd_cast, d:out_t, c
|
||||
multi_fn = simd_shuffle2, e:int32x2_t, b, b, [2, 3]
|
||||
multi_fn = simd_shuffle2!, e:int32x2_t, b, b, [2, 3]
|
||||
multi_fn = simd_cast, f:out_t, e
|
||||
multi_fn = simd_sub, d, f
|
||||
|
||||
|
|
@ -2779,9 +2779,9 @@ generate int32x4_t:int32x4_t:int64x2_t
|
|||
/// Unsigned Subtract Long
|
||||
name = vsubl_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle8, c:uint8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15]
|
||||
multi_fn = simd_shuffle8!, c:uint8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15]
|
||||
multi_fn = simd_cast, d:out_t, c
|
||||
multi_fn = simd_shuffle8, e:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
|
||||
multi_fn = simd_shuffle8!, e:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
|
||||
multi_fn = simd_cast, f:out_t, e
|
||||
multi_fn = simd_sub, d, f
|
||||
|
||||
|
|
@ -2795,9 +2795,9 @@ generate uint8x16_t:uint8x16_t:uint16x8_t
|
|||
/// Unsigned Subtract Long
|
||||
name = vsubl_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle4, c:uint16x4_t, a, a, [4, 5, 6, 7]
|
||||
multi_fn = simd_shuffle4!, c:uint16x4_t, a, a, [4, 5, 6, 7]
|
||||
multi_fn = simd_cast, d:out_t, c
|
||||
multi_fn = simd_shuffle4, e:uint16x4_t, b, b, [4, 5, 6, 7]
|
||||
multi_fn = simd_shuffle4!, e:uint16x4_t, b, b, [4, 5, 6, 7]
|
||||
multi_fn = simd_cast, f:out_t, e
|
||||
multi_fn = simd_sub, d, f
|
||||
|
||||
|
|
@ -2811,9 +2811,9 @@ generate uint16x8_t:uint16x8_t:uint32x4_t
|
|||
/// Unsigned Subtract Long
|
||||
name = vsubl_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle2, c:uint32x2_t, a, a, [2, 3]
|
||||
multi_fn = simd_shuffle2!, c:uint32x2_t, a, a, [2, 3]
|
||||
multi_fn = simd_cast, d:out_t, c
|
||||
multi_fn = simd_shuffle2, e:uint32x2_t, b, b, [2, 3]
|
||||
multi_fn = simd_shuffle2!, e:uint32x2_t, b, b, [2, 3]
|
||||
multi_fn = simd_cast, f:out_t, e
|
||||
multi_fn = simd_sub, d, f
|
||||
|
||||
|
|
@ -3011,8 +3011,8 @@ generate int16x4_t:i16:int32x4_t, int32x2_t:i32:int64x2_t
|
|||
/// Signed saturating doubling multiply long
|
||||
name = vqdmull_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle-out_len-noext, a:half, a, a, {asc-halflen-halflen}
|
||||
multi_fn = simd_shuffle-out_len-noext, b:half, b, b, {asc-halflen-halflen}
|
||||
multi_fn = simd_shuffle-out_len-!, a:half, a, a, {asc-halflen-halflen}
|
||||
multi_fn = simd_shuffle-out_len-!, b:half, b, b, {asc-halflen-halflen}
|
||||
multi_fn = vqdmull-noqself-noext, a, b
|
||||
a = 0, 1, 4, 5, 4, 5, 6, 7
|
||||
b = 1, 2, 5, 6, 5, 6, 7, 8
|
||||
|
|
@ -3024,7 +3024,7 @@ generate int16x8_t:int16x8_t:int32x4_t, int32x4_t:int32x4_t:int64x2_t
|
|||
/// Signed saturating doubling multiply long
|
||||
name = vqdmull_high_n
|
||||
no-q
|
||||
multi_fn = simd_shuffle-out_len-noext, a:in_ntt, a, a, {asc-out_len-out_len}
|
||||
multi_fn = simd_shuffle-out_len-!, a:in_ntt, a, a, {asc-out_len-out_len}
|
||||
multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
|
||||
multi_fn = vqdmull-in_ntt-noext, a, b
|
||||
a = 0, 2, 8, 10, 8, 10, 12, 14
|
||||
|
|
@ -3038,7 +3038,7 @@ generate int16x8_t:i16:int32x4_t, int32x4_t:i32:int64x2_t
|
|||
name = vqdmull_lane
|
||||
constn = N
|
||||
multi_fn = static_assert_imm-in_exp_len-N
|
||||
multi_fn = simd_shuffle-out_len-noext, b:in_t0, b, b, {dup-out_len-N as u32}
|
||||
multi_fn = simd_shuffle-out_len-!, b:in_t0, b, b, {dup-out_len-N as u32}
|
||||
multi_fn = vqdmull-noqself-noext, a, b
|
||||
a = 1, 2, 3, 4
|
||||
b = 0, 2, 2, 0, 2, 0, 0, 0
|
||||
|
|
@ -3083,8 +3083,8 @@ generate i32:int32x2_t:i64, i32:int32x4_t:i64
|
|||
name = vqdmull_high_lane
|
||||
constn = N
|
||||
multi_fn = static_assert_imm-in_exp_len-N
|
||||
multi_fn = simd_shuffle-out_len-noext, a:in_t, a, a, {asc-out_len-out_len}
|
||||
multi_fn = simd_shuffle-out_len-noext, b:in_t, b, b, {dup-out_len-N as u32}
|
||||
multi_fn = simd_shuffle-out_len-!, a:in_t, a, a, {asc-out_len-out_len}
|
||||
multi_fn = simd_shuffle-out_len-!, b:in_t, b, b, {dup-out_len-N as u32}
|
||||
multi_fn = vqdmull-self-noext, a, b
|
||||
a = 0, 1, 4, 5, 4, 5, 6, 7
|
||||
b = 0, 2, 2, 0, 2, 0, 0, 0
|
||||
|
|
@ -3098,8 +3098,8 @@ generate int16x8_t:int16x4_t:int32x4_t, int32x4_t:int32x2_t:int64x2_t
|
|||
name = vqdmull_high_lane
|
||||
constn = N
|
||||
multi_fn = static_assert_imm-in_exp_len-N
|
||||
multi_fn = simd_shuffle-out_len-noext, a:half, a, a, {asc-out_len-out_len}
|
||||
multi_fn = simd_shuffle-out_len-noext, b:half, b, b, {dup-out_len-N as u32}
|
||||
multi_fn = simd_shuffle-out_len-!, a:half, a, a, {asc-out_len-out_len}
|
||||
multi_fn = simd_shuffle-out_len-!, b:half, b, b, {dup-out_len-N as u32}
|
||||
multi_fn = vqdmull-noqself-noext, a, b
|
||||
a = 0, 1, 4, 5, 4, 5, 6, 7
|
||||
b = 0, 2, 2, 0, 2, 0, 0, 0
|
||||
|
|
@ -3390,7 +3390,7 @@ name = vqrdmulh
|
|||
lane-suffixes
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
multi_fn = simd_shuffle-out_len-noext, b:out_t, b, b, {dup-out_len-LANE as u32}
|
||||
multi_fn = simd_shuffle-out_len-!, b:out_t, b, b, {dup-out_len-LANE as u32}
|
||||
multi_fn = vqrdmulh-out-noext, a, b
|
||||
a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
|
||||
b = 0, 2, 0, 0, 0, 0, 0, 0,
|
||||
|
|
@ -3616,7 +3616,7 @@ name = vqrshrn_high
|
|||
noq-n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-halfbits
|
||||
multi_fn = simd_shuffle-out_len-noext, a, {vqrshrn_n-noqself-::<N>, b}, {asc-0-out_len}
|
||||
multi_fn = simd_shuffle-out_len-!, a, {vqrshrn_n-noqself-::<N>, b}, {asc-0-out_len}
|
||||
a = 0, 1, 2, 3, 2, 3, 6, 7
|
||||
b = 8, 12, 24, 28, 48, 52, 56, 60
|
||||
n = 2
|
||||
|
|
@ -3662,7 +3662,7 @@ name = vqrshrn_high
|
|||
noq-n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-halfbits
|
||||
multi_fn = simd_shuffle-out_len-noext, a, {vqrshrn_n-noqself-::<N>, b}, {asc-0-out_len}
|
||||
multi_fn = simd_shuffle-out_len-!, a, {vqrshrn_n-noqself-::<N>, b}, {asc-0-out_len}
|
||||
a = 0, 1, 2, 3, 2, 3, 6, 7
|
||||
b = 8, 12, 24, 28, 48, 52, 56, 60
|
||||
n = 2
|
||||
|
|
@ -3708,7 +3708,7 @@ name = vqrshrun_high
|
|||
noq-n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-halfbits
|
||||
multi_fn = simd_shuffle-out_len-noext, a, {vqrshrun_n-noqself-::<N>, b}, {asc-0-out_len}
|
||||
multi_fn = simd_shuffle-out_len-!, a, {vqrshrun_n-noqself-::<N>, b}, {asc-0-out_len}
|
||||
a = 0, 1, 2, 3, 2, 3, 6, 7
|
||||
b = 8, 12, 24, 28, 48, 52, 56, 60
|
||||
n = 2
|
||||
|
|
@ -3858,7 +3858,7 @@ name = vqshrn_high
|
|||
noq-n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-halfbits
|
||||
multi_fn = simd_shuffle-out_len-noext, a, {vqshrn_n-noqself-::<N>, b}, {asc-0-out_len}
|
||||
multi_fn = simd_shuffle-out_len-!, a, {vqshrn_n-noqself-::<N>, b}, {asc-0-out_len}
|
||||
a = 0, 1, 8, 9, 8, 9, 10, 11
|
||||
b = 32, 36, 40, 44, 48, 52, 56, 60
|
||||
n = 2
|
||||
|
|
@ -3903,7 +3903,7 @@ name = vqshrn_high
|
|||
noq-n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-halfbits
|
||||
multi_fn = simd_shuffle-out_len-noext, a, {vqshrn_n-noqself-::<N>, b}, {asc-0-out_len}
|
||||
multi_fn = simd_shuffle-out_len-!, a, {vqshrn_n-noqself-::<N>, b}, {asc-0-out_len}
|
||||
a = 0, 1, 8, 9, 8, 9, 10, 11
|
||||
b = 32, 36, 40, 44, 48, 52, 56, 60
|
||||
n = 2
|
||||
|
|
@ -3948,7 +3948,7 @@ name = vqshrun_high
|
|||
noq-n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-halfbits
|
||||
multi_fn = simd_shuffle-out_len-noext, a, {vqshrun_n-noqself-::<N>, b}, {asc-0-out_len}
|
||||
multi_fn = simd_shuffle-out_len-!, a, {vqshrun_n-noqself-::<N>, b}, {asc-0-out_len}
|
||||
a = 0, 1, 8, 9, 8, 9, 10, 11
|
||||
b = 32, 36, 40, 44, 48, 52, 56, 60
|
||||
n = 2
|
||||
|
|
@ -4312,7 +4312,7 @@ name = vrshrn_high
|
|||
noq-n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-halfbits
|
||||
multi_fn = simd_shuffle-out_len-noext, a, {vrshrn_n-noqself-::<N>, b}, {asc-0-out_len}
|
||||
multi_fn = simd_shuffle-out_len-!, a, {vrshrn_n-noqself-::<N>, b}, {asc-0-out_len}
|
||||
a = 0, 1, 8, 9, 8, 9, 10, 11
|
||||
b = 32, 36, 40, 44, 48, 52, 56, 60
|
||||
n = 2
|
||||
|
|
@ -4542,7 +4542,7 @@ name = vshll_high_n
|
|||
no-q
|
||||
constn = N
|
||||
multi_fn = static_assert-N-0-bits
|
||||
multi_fn = simd_shuffle-out_len-noext, b:half, a, a, {asc-halflen-halflen}
|
||||
multi_fn = simd_shuffle-out_len-!, b:half, a, a, {asc-halflen-halflen}
|
||||
multi_fn = vshll_n-noqself-::<N>, b
|
||||
a = 0, 0, 1, 2, 1, 2, 3, 4, 1, 2, 3, 4, 5, 6, 7, 8
|
||||
n = 2
|
||||
|
|
@ -4589,7 +4589,7 @@ name = vshrn_high_n
|
|||
no-q
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-halfbits
|
||||
multi_fn = simd_shuffle-out_len-noext, a, {vshrn_n-noqself-::<N>, b}, {asc-0-out_len}
|
||||
multi_fn = simd_shuffle-out_len-!, a, {vshrn_n-noqself-::<N>, b}, {asc-0-out_len}
|
||||
a = 1, 2, 5, 6, 5, 6, 7, 8
|
||||
b = 20, 24, 28, 32, 52, 56, 60, 64
|
||||
n = 2
|
||||
|
|
@ -4631,7 +4631,7 @@ generate uint*_t, uint64x*_t
|
|||
|
||||
/// Transpose vectors
|
||||
name = vtrn1
|
||||
multi_fn = simd_shuffle-in_len-noext, a, b, {transpose-1-in_len}
|
||||
multi_fn = simd_shuffle-in_len-!, a, b, {transpose-1-in_len}
|
||||
a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
|
||||
b = 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
|
||||
validate 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29
|
||||
|
|
@ -4644,7 +4644,7 @@ generate int32x2_t, int64x2_t, uint32x2_t, uint64x2_t, poly64x2_t
|
|||
|
||||
/// Transpose vectors
|
||||
name = vtrn1
|
||||
multi_fn = simd_shuffle-in_len-noext, a, b, {transpose-1-in_len}
|
||||
multi_fn = simd_shuffle-in_len-!, a, b, {transpose-1-in_len}
|
||||
a = 0., 2., 4., 6., 8., 10., 12., 14.
|
||||
b = 1., 3., 5., 7., 9., 11., 13., 15.
|
||||
validate 0., 1., 4., 5., 8., 9., 12., 13.
|
||||
|
|
@ -4657,7 +4657,7 @@ generate float32x2_t, float64x2_t
|
|||
|
||||
/// Transpose vectors
|
||||
name = vtrn2
|
||||
multi_fn = simd_shuffle-in_len-noext, a, b, {transpose-2-in_len}
|
||||
multi_fn = simd_shuffle-in_len-!, a, b, {transpose-2-in_len}
|
||||
a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
|
||||
b = 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
|
||||
validate 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31
|
||||
|
|
@ -4670,7 +4670,7 @@ generate int32x2_t, int64x2_t, uint32x2_t, uint64x2_t, poly64x2_t
|
|||
|
||||
/// Transpose vectors
|
||||
name = vtrn2
|
||||
multi_fn = simd_shuffle-in_len-noext, a, b, {transpose-2-in_len}
|
||||
multi_fn = simd_shuffle-in_len-!, a, b, {transpose-2-in_len}
|
||||
a = 0., 2., 4., 6., 8., 10., 12., 14.
|
||||
b = 1., 3., 5., 7., 9., 11., 13., 15.
|
||||
validate 2., 3., 6., 7., 10., 11., 14., 15.
|
||||
|
|
@ -4683,7 +4683,7 @@ generate float32x2_t, float64x2_t
|
|||
|
||||
/// Zip vectors
|
||||
name = vzip1
|
||||
multi_fn = simd_shuffle-in_len-noext, a, b, {zip-1-in_len}
|
||||
multi_fn = simd_shuffle-in_len-!, a, b, {zip-1-in_len}
|
||||
a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
|
||||
b = 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
|
||||
validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
|
|
@ -4693,7 +4693,7 @@ generate int*_t, int64x2_t, uint*_t, uint64x2_t, poly8x8_t, poly8x16_t, poly16x4
|
|||
|
||||
/// Zip vectors
|
||||
name = vzip1
|
||||
multi_fn = simd_shuffle-in_len-noext, a, b, {zip-1-in_len}
|
||||
multi_fn = simd_shuffle-in_len-!, a, b, {zip-1-in_len}
|
||||
a = 0., 2., 4., 6., 8., 10., 12., 14.
|
||||
b = 1., 3., 5., 7., 9., 11., 13., 15.
|
||||
validate 0., 1., 2., 3., 4., 5., 6., 7.
|
||||
|
|
@ -4703,7 +4703,7 @@ generate float32x2_t, float32x4_t, float64x2_t
|
|||
|
||||
/// Zip vectors
|
||||
name = vzip2
|
||||
multi_fn = simd_shuffle-in_len-noext, a, b, {zip-2-in_len}
|
||||
multi_fn = simd_shuffle-in_len-!, a, b, {zip-2-in_len}
|
||||
a = 0, 16, 16, 18, 16, 18, 20, 22, 16, 18, 20, 22, 24, 26, 28, 30
|
||||
b = 1, 17, 17, 19, 17, 19, 21, 23, 17, 19, 21, 23, 25, 27, 29, 31
|
||||
validate 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
|
||||
|
|
@ -4713,7 +4713,7 @@ generate int*_t, int64x2_t, uint*_t, uint64x2_t, poly8x8_t, poly8x16_t, poly16x4
|
|||
|
||||
/// Zip vectors
|
||||
name = vzip2
|
||||
multi_fn = simd_shuffle-in_len-noext, a, b, {zip-2-in_len}
|
||||
multi_fn = simd_shuffle-in_len-!, a, b, {zip-2-in_len}
|
||||
a = 0., 8., 8., 10., 8., 10., 12., 14.
|
||||
b = 1., 9., 9., 11., 9., 11., 13., 15.
|
||||
validate 8., 9., 10., 11., 12., 13., 14., 15.
|
||||
|
|
@ -4723,7 +4723,7 @@ generate float32x2_t, float32x4_t, float64x2_t
|
|||
|
||||
/// Unzip vectors
|
||||
name = vuzp1
|
||||
multi_fn = simd_shuffle-in_len-noext, a, b, {unzip-1-in_len}
|
||||
multi_fn = simd_shuffle-in_len-!, a, b, {unzip-1-in_len}
|
||||
a = 1, 0, 2, 0, 2, 0, 3, 0, 2, 0, 3, 0, 7, 0, 8, 0
|
||||
b = 2, 0, 3, 0, 7, 0, 8, 0, 13, 0, 14, 0, 15, 0, 16, 0
|
||||
validate 1, 2, 2, 3, 2, 3, 7, 8, 2, 3, 7, 8, 13, 14, 15, 16
|
||||
|
|
@ -4736,7 +4736,7 @@ generate int32x2_t, int64x2_t, uint32x2_t, uint64x2_t, poly64x2_t
|
|||
|
||||
/// Unzip vectors
|
||||
name = vuzp1
|
||||
multi_fn = simd_shuffle-in_len-noext, a, b, {unzip-1-in_len}
|
||||
multi_fn = simd_shuffle-in_len-!, a, b, {unzip-1-in_len}
|
||||
a = 0., 8., 1., 9., 4., 12., 5., 13.
|
||||
b = 1., 10., 3., 11., 6., 14., 7., 15.
|
||||
validate 0., 1., 1., 3., 4., 5., 6., 7.
|
||||
|
|
@ -4749,7 +4749,7 @@ generate float32x2_t, float64x2_t
|
|||
|
||||
/// Unzip vectors
|
||||
name = vuzp2
|
||||
multi_fn = simd_shuffle-in_len-noext, a, b, {unzip-2-in_len}
|
||||
multi_fn = simd_shuffle-in_len-!, a, b, {unzip-2-in_len}
|
||||
a = 0, 17, 0, 18, 0, 18, 0, 19, 0, 18, 0, 19, 0, 23, 0, 24
|
||||
b = 0, 18, 0, 19, 0, 23, 0, 24, 0, 29, 0, 30, 0, 31, 0, 32
|
||||
validate 17, 18, 18, 19, 18, 19, 23, 24, 18, 19, 23, 24, 29, 30, 31, 32
|
||||
|
|
@ -4762,7 +4762,7 @@ generate int32x2_t, int64x2_t, uint32x2_t, uint64x2_t, poly64x2_t
|
|||
|
||||
/// Unzip vectors
|
||||
name = vuzp2
|
||||
multi_fn = simd_shuffle-in_len-noext, a, b, {unzip-2-in_len}
|
||||
multi_fn = simd_shuffle-in_len-!, a, b, {unzip-2-in_len}
|
||||
a = 0., 8., 1., 9., 4., 12., 5., 13.
|
||||
b = 2., 9., 3., 11., 6., 14., 7., 15.
|
||||
validate 8., 9., 9., 11., 12., 13., 14., 15.
|
||||
|
|
@ -4793,8 +4793,8 @@ generate uint16x8_t:uint8x8_t:uint8x8_t:uint16x8_t, uint32x4_t:uint16x4_t:uint16
|
|||
/// Unsigned Absolute difference and Accumulate Long
|
||||
name = vabal_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle8, d:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
|
||||
multi_fn = simd_shuffle8, e:uint8x8_t, c, c, [8, 9, 10, 11, 12, 13, 14, 15]
|
||||
multi_fn = simd_shuffle8!, d:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
|
||||
multi_fn = simd_shuffle8!, e:uint8x8_t, c, c, [8, 9, 10, 11, 12, 13, 14, 15]
|
||||
multi_fn = vabd_u8, d, e, f:uint8x8_t
|
||||
multi_fn = simd_add, a, {simd_cast, f}
|
||||
a = 9, 10, 11, 12, 13, 14, 15, 16
|
||||
|
|
@ -4808,8 +4808,8 @@ generate uint16x8_t:uint8x16_t:uint8x16_t:uint16x8_t
|
|||
/// Unsigned Absolute difference and Accumulate Long
|
||||
name = vabal_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle4, d:uint16x4_t, b, b, [4, 5, 6, 7]
|
||||
multi_fn = simd_shuffle4, e:uint16x4_t, c, c, [4, 5, 6, 7]
|
||||
multi_fn = simd_shuffle4!, d:uint16x4_t, b, b, [4, 5, 6, 7]
|
||||
multi_fn = simd_shuffle4!, e:uint16x4_t, c, c, [4, 5, 6, 7]
|
||||
multi_fn = vabd_u16, d, e, f:uint16x4_t
|
||||
multi_fn = simd_add, a, {simd_cast, f}
|
||||
a = 9, 10, 11, 12
|
||||
|
|
@ -4823,8 +4823,8 @@ generate uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t
|
|||
/// Unsigned Absolute difference and Accumulate Long
|
||||
name = vabal_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle2, d:uint32x2_t, b, b, [2, 3]
|
||||
multi_fn = simd_shuffle2, e:uint32x2_t, c, c, [2, 3]
|
||||
multi_fn = simd_shuffle2!, d:uint32x2_t, b, b, [2, 3]
|
||||
multi_fn = simd_shuffle2!, e:uint32x2_t, c, c, [2, 3]
|
||||
multi_fn = vabd_u32, d, e, f:uint32x2_t
|
||||
multi_fn = simd_add, a, {simd_cast, f}
|
||||
a = 15, 16
|
||||
|
|
@ -4884,8 +4884,8 @@ generate int64x2_t:int32x2_t:int32x2_t:int64x2_t
|
|||
/// Signed Absolute difference and Accumulate Long
|
||||
name = vabal_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle8, d:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
|
||||
multi_fn = simd_shuffle8, e:int8x8_t, c, c, [8, 9, 10, 11, 12, 13, 14, 15]
|
||||
multi_fn = simd_shuffle8!, d:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
|
||||
multi_fn = simd_shuffle8!, e:int8x8_t, c, c, [8, 9, 10, 11, 12, 13, 14, 15]
|
||||
multi_fn = vabd_s8, d, e, f:int8x8_t
|
||||
multi_fn = simd_cast, f:uint8x8_t, f
|
||||
multi_fn = simd_add, a, {simd_cast, f}
|
||||
|
|
@ -4900,8 +4900,8 @@ generate int16x8_t:int8x16_t:int8x16_t:int16x8_t
|
|||
/// Signed Absolute difference and Accumulate Long
|
||||
name = vabal_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle4, d:int16x4_t, b, b, [4, 5, 6, 7]
|
||||
multi_fn = simd_shuffle4, e:int16x4_t, c, c, [4, 5, 6, 7]
|
||||
multi_fn = simd_shuffle4!, d:int16x4_t, b, b, [4, 5, 6, 7]
|
||||
multi_fn = simd_shuffle4!, e:int16x4_t, c, c, [4, 5, 6, 7]
|
||||
multi_fn = vabd_s16, d, e, f:int16x4_t
|
||||
multi_fn = simd_cast, f:uint16x4_t, f
|
||||
multi_fn = simd_add, a, {simd_cast, f}
|
||||
|
|
@ -4916,8 +4916,8 @@ generate int32x4_t:int16x8_t:int16x8_t:int32x4_t
|
|||
/// Signed Absolute difference and Accumulate Long
|
||||
name = vabal_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle2, d:int32x2_t, b, b, [2, 3]
|
||||
multi_fn = simd_shuffle2, e:int32x2_t, c, c, [2, 3]
|
||||
multi_fn = simd_shuffle2!, d:int32x2_t, b, b, [2, 3]
|
||||
multi_fn = simd_shuffle2!, e:int32x2_t, c, c, [2, 3]
|
||||
multi_fn = vabd_s32, d, e, f:int32x2_t
|
||||
multi_fn = simd_cast, f:uint32x2_t, f
|
||||
multi_fn = simd_add, a, {simd_cast, f}
|
||||
|
|
|
|||
|
|
@ -988,6 +988,17 @@ fn gen_aarch64(
|
|||
);
|
||||
}
|
||||
};
|
||||
let const_declare = if let Some(constn) = constn {
|
||||
if constn.contains(":") {
|
||||
let constns: Vec<_> = constn.split(':').map(|v| v.to_string()).collect();
|
||||
assert_eq!(constns.len(), 2);
|
||||
format!(r#"<const {}: i32, const {}: i32>"#, constns[0], constns[1])
|
||||
} else {
|
||||
format!(r#"<const {}: i32>"#, constn)
|
||||
}
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
let multi_calls = if !multi_fn.is_empty() {
|
||||
let mut calls = String::new();
|
||||
for i in 0..multi_fn.len() {
|
||||
|
|
@ -997,6 +1008,7 @@ fn gen_aarch64(
|
|||
calls.push_str(&get_call(
|
||||
&multi_fn[i],
|
||||
current_name,
|
||||
&const_declare,
|
||||
in_t,
|
||||
out_t,
|
||||
fixed,
|
||||
|
|
@ -1007,17 +1019,6 @@ fn gen_aarch64(
|
|||
} else {
|
||||
String::new()
|
||||
};
|
||||
let const_declare = if let Some(constn) = constn {
|
||||
if constn.contains(":") {
|
||||
let constns: Vec<_> = constn.split(':').map(|v| v.to_string()).collect();
|
||||
assert_eq!(constns.len(), 2);
|
||||
format!(r#"<const {}: i32, const {}: i32>"#, constns[0], constns[1])
|
||||
} else {
|
||||
format!(r#"<const {}: i32>"#, constn)
|
||||
}
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
let const_assert = if let Some(constn) = constn {
|
||||
if constn.contains(":") {
|
||||
let constns: Vec<_> = constn.split(':').map(|v| v.to_string()).collect();
|
||||
|
|
@ -1582,6 +1583,11 @@ fn gen_arm(
|
|||
));
|
||||
}
|
||||
};
|
||||
let const_declare = if let Some(constn) = constn {
|
||||
format!(r#"<const {}: i32>"#, constn)
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
let multi_calls = if !multi_fn.is_empty() {
|
||||
let mut calls = String::new();
|
||||
for i in 0..multi_fn.len() {
|
||||
|
|
@ -1591,6 +1597,7 @@ fn gen_arm(
|
|||
calls.push_str(&get_call(
|
||||
&multi_fn[i],
|
||||
current_name,
|
||||
&const_declare,
|
||||
in_t,
|
||||
out_t,
|
||||
fixed,
|
||||
|
|
@ -1601,11 +1608,6 @@ fn gen_arm(
|
|||
} else {
|
||||
String::new()
|
||||
};
|
||||
let const_declare = if let Some(constn) = constn {
|
||||
format!(r#"<const {}: i32>"#, constn)
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
let const_assert = if let Some(constn) = constn {
|
||||
format!(
|
||||
r#", {} = {}"#,
|
||||
|
|
@ -2003,6 +2005,7 @@ fn expand_intrinsic(intr: &str, t: &str) -> String {
|
|||
fn get_call(
|
||||
in_str: &str,
|
||||
current_name: &str,
|
||||
const_declare: &str,
|
||||
in_t: &[&str; 3],
|
||||
out_t: &str,
|
||||
fixed: &Vec<String>,
|
||||
|
|
@ -2041,7 +2044,7 @@ fn get_call(
|
|||
"halflen" => type_len(in_t[1]) / 2,
|
||||
_ => 0,
|
||||
};
|
||||
let mut s = String::from("[");
|
||||
let mut s = format!("{} [", const_declare);
|
||||
for i in 0..len {
|
||||
if i != 0 {
|
||||
s.push_str(", ");
|
||||
|
|
@ -2084,7 +2087,7 @@ fn get_call(
|
|||
"in0_len" => type_len(in_t[0]),
|
||||
_ => 0,
|
||||
};
|
||||
let mut s = String::from("[");
|
||||
let mut s = format!("{} [", const_declare);
|
||||
for i in 0..len {
|
||||
if i != 0 {
|
||||
s.push_str(", ");
|
||||
|
|
@ -2167,7 +2170,15 @@ fn get_call(
|
|||
let sub_match = format!(
|
||||
" {} => {},\n",
|
||||
i,
|
||||
get_call(&sub_call, current_name, in_t, out_t, fixed, Some(i as i32))
|
||||
get_call(
|
||||
&sub_call,
|
||||
current_name,
|
||||
const_declare,
|
||||
in_t,
|
||||
out_t,
|
||||
fixed,
|
||||
Some(i as i32)
|
||||
)
|
||||
);
|
||||
call.push_str(&sub_match);
|
||||
}
|
||||
|
|
@ -2210,6 +2221,7 @@ fn get_call(
|
|||
let sub_call = get_call(
|
||||
&sub_fn[1..sub_fn.len() - 1],
|
||||
current_name,
|
||||
const_declare,
|
||||
in_t,
|
||||
out_t,
|
||||
fixed,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue