Refactor some loops to avoid indexing
This commit is contained in:
parent
9f741c5986
commit
3b2a5e2866
5 changed files with 48 additions and 211 deletions
|
|
@ -5399,10 +5399,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm_i32gather_epi32() {
|
||||
let mut arr = [0i32; 128];
|
||||
for i in 0..128i32 {
|
||||
arr[i as usize] = i;
|
||||
}
|
||||
let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
|
||||
// A multiplier of 4 is word-addressing
|
||||
let r = _mm_i32gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
|
||||
assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
|
||||
|
|
@ -5410,10 +5407,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm_mask_i32gather_epi32() {
|
||||
let mut arr = [0i32; 128];
|
||||
for i in 0..128i32 {
|
||||
arr[i as usize] = i;
|
||||
}
|
||||
let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
|
||||
// A multiplier of 4 is word-addressing
|
||||
let r = _mm_mask_i32gather_epi32::<4>(
|
||||
_mm_set1_epi32(256),
|
||||
|
|
@ -5426,10 +5420,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_i32gather_epi32() {
|
||||
let mut arr = [0i32; 128];
|
||||
for i in 0..128i32 {
|
||||
arr[i as usize] = i;
|
||||
}
|
||||
let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
|
||||
// A multiplier of 4 is word-addressing
|
||||
let r =
|
||||
_mm256_i32gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
|
||||
|
|
@ -5438,10 +5429,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_mask_i32gather_epi32() {
|
||||
let mut arr = [0i32; 128];
|
||||
for i in 0..128i32 {
|
||||
arr[i as usize] = i;
|
||||
}
|
||||
let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
|
||||
// A multiplier of 4 is word-addressing
|
||||
let r = _mm256_mask_i32gather_epi32::<4>(
|
||||
_mm256_set1_epi32(256),
|
||||
|
|
@ -5454,12 +5442,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm_i32gather_ps() {
|
||||
let mut arr = [0.0f32; 128];
|
||||
let mut j = 0.0;
|
||||
for i in 0..128usize {
|
||||
arr[i] = j;
|
||||
j += 1.0;
|
||||
}
|
||||
let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
|
||||
// A multiplier of 4 is word-addressing for f32s
|
||||
let r = _mm_i32gather_ps::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
|
||||
assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
|
||||
|
|
@ -5467,12 +5450,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm_mask_i32gather_ps() {
|
||||
let mut arr = [0.0f32; 128];
|
||||
let mut j = 0.0;
|
||||
for i in 0..128usize {
|
||||
arr[i] = j;
|
||||
j += 1.0;
|
||||
}
|
||||
let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
|
||||
// A multiplier of 4 is word-addressing for f32s
|
||||
let r = _mm_mask_i32gather_ps::<4>(
|
||||
_mm_set1_ps(256.0),
|
||||
|
|
@ -5485,12 +5463,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_i32gather_ps() {
|
||||
let mut arr = [0.0f32; 128];
|
||||
let mut j = 0.0;
|
||||
for i in 0..128usize {
|
||||
arr[i] = j;
|
||||
j += 1.0;
|
||||
}
|
||||
let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
|
||||
// A multiplier of 4 is word-addressing for f32s
|
||||
let r =
|
||||
_mm256_i32gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
|
||||
|
|
@ -5499,12 +5472,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_mask_i32gather_ps() {
|
||||
let mut arr = [0.0f32; 128];
|
||||
let mut j = 0.0;
|
||||
for i in 0..128usize {
|
||||
arr[i] = j;
|
||||
j += 1.0;
|
||||
}
|
||||
let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
|
||||
// A multiplier of 4 is word-addressing for f32s
|
||||
let r = _mm256_mask_i32gather_ps::<4>(
|
||||
_mm256_set1_ps(256.0),
|
||||
|
|
@ -5520,10 +5488,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm_i32gather_epi64() {
|
||||
let mut arr = [0i64; 128];
|
||||
for i in 0..128i64 {
|
||||
arr[i as usize] = i;
|
||||
}
|
||||
let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
|
||||
// A multiplier of 8 is word-addressing for i64s
|
||||
let r = _mm_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0));
|
||||
assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
|
||||
|
|
@ -5531,10 +5496,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm_mask_i32gather_epi64() {
|
||||
let mut arr = [0i64; 128];
|
||||
for i in 0..128i64 {
|
||||
arr[i as usize] = i;
|
||||
}
|
||||
let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
|
||||
// A multiplier of 8 is word-addressing for i64s
|
||||
let r = _mm_mask_i32gather_epi64::<8>(
|
||||
_mm_set1_epi64x(256),
|
||||
|
|
@ -5547,10 +5509,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_i32gather_epi64() {
|
||||
let mut arr = [0i64; 128];
|
||||
for i in 0..128i64 {
|
||||
arr[i as usize] = i;
|
||||
}
|
||||
let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
|
||||
// A multiplier of 8 is word-addressing for i64s
|
||||
let r = _mm256_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
|
||||
assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
|
||||
|
|
@ -5558,10 +5517,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_mask_i32gather_epi64() {
|
||||
let mut arr = [0i64; 128];
|
||||
for i in 0..128i64 {
|
||||
arr[i as usize] = i;
|
||||
}
|
||||
let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
|
||||
// A multiplier of 8 is word-addressing for i64s
|
||||
let r = _mm256_mask_i32gather_epi64::<8>(
|
||||
_mm256_set1_epi64x(256),
|
||||
|
|
@ -5574,12 +5530,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm_i32gather_pd() {
|
||||
let mut arr = [0.0f64; 128];
|
||||
let mut j = 0.0;
|
||||
for i in 0..128usize {
|
||||
arr[i] = j;
|
||||
j += 1.0;
|
||||
}
|
||||
let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
|
||||
// A multiplier of 8 is word-addressing for f64s
|
||||
let r = _mm_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0));
|
||||
assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
|
||||
|
|
@ -5587,12 +5538,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm_mask_i32gather_pd() {
|
||||
let mut arr = [0.0f64; 128];
|
||||
let mut j = 0.0;
|
||||
for i in 0..128usize {
|
||||
arr[i] = j;
|
||||
j += 1.0;
|
||||
}
|
||||
let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
|
||||
// A multiplier of 8 is word-addressing for f64s
|
||||
let r = _mm_mask_i32gather_pd::<8>(
|
||||
_mm_set1_pd(256.0),
|
||||
|
|
@ -5605,12 +5551,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_i32gather_pd() {
|
||||
let mut arr = [0.0f64; 128];
|
||||
let mut j = 0.0;
|
||||
for i in 0..128usize {
|
||||
arr[i] = j;
|
||||
j += 1.0;
|
||||
}
|
||||
let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
|
||||
// A multiplier of 8 is word-addressing for f64s
|
||||
let r = _mm256_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
|
||||
assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
|
||||
|
|
@ -5618,12 +5559,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_mask_i32gather_pd() {
|
||||
let mut arr = [0.0f64; 128];
|
||||
let mut j = 0.0;
|
||||
for i in 0..128usize {
|
||||
arr[i] = j;
|
||||
j += 1.0;
|
||||
}
|
||||
let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
|
||||
// A multiplier of 8 is word-addressing for f64s
|
||||
let r = _mm256_mask_i32gather_pd::<8>(
|
||||
_mm256_set1_pd(256.0),
|
||||
|
|
@ -5636,10 +5572,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm_i64gather_epi32() {
|
||||
let mut arr = [0i32; 128];
|
||||
for i in 0..128i32 {
|
||||
arr[i as usize] = i;
|
||||
}
|
||||
let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
|
||||
// A multiplier of 4 is word-addressing
|
||||
let r = _mm_i64gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
|
||||
assert_eq_m128i(r, _mm_setr_epi32(0, 16, 0, 0));
|
||||
|
|
@ -5647,10 +5580,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm_mask_i64gather_epi32() {
|
||||
let mut arr = [0i32; 128];
|
||||
for i in 0..128i32 {
|
||||
arr[i as usize] = i;
|
||||
}
|
||||
let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
|
||||
// A multiplier of 4 is word-addressing
|
||||
let r = _mm_mask_i64gather_epi32::<4>(
|
||||
_mm_set1_epi32(256),
|
||||
|
|
@ -5663,10 +5593,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_i64gather_epi32() {
|
||||
let mut arr = [0i32; 128];
|
||||
for i in 0..128i32 {
|
||||
arr[i as usize] = i;
|
||||
}
|
||||
let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
|
||||
// A multiplier of 4 is word-addressing
|
||||
let r = _mm256_i64gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
|
||||
assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
|
||||
|
|
@ -5674,10 +5601,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_mask_i64gather_epi32() {
|
||||
let mut arr = [0i32; 128];
|
||||
for i in 0..128i32 {
|
||||
arr[i as usize] = i;
|
||||
}
|
||||
let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
|
||||
// A multiplier of 4 is word-addressing
|
||||
let r = _mm256_mask_i64gather_epi32::<4>(
|
||||
_mm_set1_epi32(256),
|
||||
|
|
@ -5690,12 +5614,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm_i64gather_ps() {
|
||||
let mut arr = [0.0f32; 128];
|
||||
let mut j = 0.0;
|
||||
for i in 0..128usize {
|
||||
arr[i] = j;
|
||||
j += 1.0;
|
||||
}
|
||||
let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
|
||||
// A multiplier of 4 is word-addressing for f32s
|
||||
let r = _mm_i64gather_ps::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
|
||||
assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 0.0, 0.0));
|
||||
|
|
@ -5703,12 +5622,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm_mask_i64gather_ps() {
|
||||
let mut arr = [0.0f32; 128];
|
||||
let mut j = 0.0;
|
||||
for i in 0..128usize {
|
||||
arr[i] = j;
|
||||
j += 1.0;
|
||||
}
|
||||
let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
|
||||
// A multiplier of 4 is word-addressing for f32s
|
||||
let r = _mm_mask_i64gather_ps::<4>(
|
||||
_mm_set1_ps(256.0),
|
||||
|
|
@ -5721,12 +5635,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_i64gather_ps() {
|
||||
let mut arr = [0.0f32; 128];
|
||||
let mut j = 0.0;
|
||||
for i in 0..128usize {
|
||||
arr[i] = j;
|
||||
j += 1.0;
|
||||
}
|
||||
let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
|
||||
// A multiplier of 4 is word-addressing for f32s
|
||||
let r = _mm256_i64gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
|
||||
assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
|
||||
|
|
@ -5734,12 +5643,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_mask_i64gather_ps() {
|
||||
let mut arr = [0.0f32; 128];
|
||||
let mut j = 0.0;
|
||||
for i in 0..128usize {
|
||||
arr[i] = j;
|
||||
j += 1.0;
|
||||
}
|
||||
let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
|
||||
// A multiplier of 4 is word-addressing for f32s
|
||||
let r = _mm256_mask_i64gather_ps::<4>(
|
||||
_mm_set1_ps(256.0),
|
||||
|
|
@ -5752,10 +5656,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm_i64gather_epi64() {
|
||||
let mut arr = [0i64; 128];
|
||||
for i in 0..128i64 {
|
||||
arr[i as usize] = i;
|
||||
}
|
||||
let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
|
||||
// A multiplier of 8 is word-addressing for i64s
|
||||
let r = _mm_i64gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
|
||||
assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
|
||||
|
|
@ -5763,10 +5664,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm_mask_i64gather_epi64() {
|
||||
let mut arr = [0i64; 128];
|
||||
for i in 0..128i64 {
|
||||
arr[i as usize] = i;
|
||||
}
|
||||
let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
|
||||
// A multiplier of 8 is word-addressing for i64s
|
||||
let r = _mm_mask_i64gather_epi64::<8>(
|
||||
_mm_set1_epi64x(256),
|
||||
|
|
@ -5779,10 +5677,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_i64gather_epi64() {
|
||||
let mut arr = [0i64; 128];
|
||||
for i in 0..128i64 {
|
||||
arr[i as usize] = i;
|
||||
}
|
||||
let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
|
||||
// A multiplier of 8 is word-addressing for i64s
|
||||
let r = _mm256_i64gather_epi64::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
|
||||
assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
|
||||
|
|
@ -5790,10 +5685,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_mask_i64gather_epi64() {
|
||||
let mut arr = [0i64; 128];
|
||||
for i in 0..128i64 {
|
||||
arr[i as usize] = i;
|
||||
}
|
||||
let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
|
||||
// A multiplier of 8 is word-addressing for i64s
|
||||
let r = _mm256_mask_i64gather_epi64::<8>(
|
||||
_mm256_set1_epi64x(256),
|
||||
|
|
@ -5806,12 +5698,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm_i64gather_pd() {
|
||||
let mut arr = [0.0f64; 128];
|
||||
let mut j = 0.0;
|
||||
for i in 0..128usize {
|
||||
arr[i] = j;
|
||||
j += 1.0;
|
||||
}
|
||||
let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
|
||||
// A multiplier of 8 is word-addressing for f64s
|
||||
let r = _mm_i64gather_pd::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
|
||||
assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
|
||||
|
|
@ -5819,12 +5706,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm_mask_i64gather_pd() {
|
||||
let mut arr = [0.0f64; 128];
|
||||
let mut j = 0.0;
|
||||
for i in 0..128usize {
|
||||
arr[i] = j;
|
||||
j += 1.0;
|
||||
}
|
||||
let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
|
||||
// A multiplier of 8 is word-addressing for f64s
|
||||
let r = _mm_mask_i64gather_pd::<8>(
|
||||
_mm_set1_pd(256.0),
|
||||
|
|
@ -5837,12 +5719,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_i64gather_pd() {
|
||||
let mut arr = [0.0f64; 128];
|
||||
let mut j = 0.0;
|
||||
for i in 0..128usize {
|
||||
arr[i] = j;
|
||||
j += 1.0;
|
||||
}
|
||||
let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
|
||||
// A multiplier of 8 is word-addressing for f64s
|
||||
let r = _mm256_i64gather_pd::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
|
||||
assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
|
||||
|
|
@ -5850,12 +5727,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_mask_i64gather_pd() {
|
||||
let mut arr = [0.0f64; 128];
|
||||
let mut j = 0.0;
|
||||
for i in 0..128usize {
|
||||
arr[i] = j;
|
||||
j += 1.0;
|
||||
}
|
||||
let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
|
||||
// A multiplier of 8 is word-addressing for f64s
|
||||
let r = _mm256_mask_i64gather_pd::<8>(
|
||||
_mm256_set1_pd(256.0),
|
||||
|
|
|
|||
|
|
@ -47792,10 +47792,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_i32gather_ps() {
|
||||
let mut arr = [0f32; 256];
|
||||
for i in 0..256 {
|
||||
arr[i] = i as f32;
|
||||
}
|
||||
let arr: [f32; 256] = core::array::from_fn(|i| i as f32);
|
||||
// A multiplier of 4 is word-addressing
|
||||
#[rustfmt::skip]
|
||||
let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
|
||||
|
|
@ -47808,10 +47805,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_mask_i32gather_ps() {
|
||||
let mut arr = [0f32; 256];
|
||||
for i in 0..256 {
|
||||
arr[i] = i as f32;
|
||||
}
|
||||
let arr: [f32; 256] = core::array::from_fn(|i| i as f32);
|
||||
let src = _mm512_set1_ps(2.);
|
||||
let mask = 0b10101010_10101010;
|
||||
#[rustfmt::skip]
|
||||
|
|
@ -47826,10 +47820,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_i32gather_epi32() {
|
||||
let mut arr = [0i32; 256];
|
||||
for i in 0..256 {
|
||||
arr[i] = i as i32;
|
||||
}
|
||||
let arr: [i32; 256] = core::array::from_fn(|i| i as i32);
|
||||
// A multiplier of 4 is word-addressing
|
||||
#[rustfmt::skip]
|
||||
let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
|
||||
|
|
@ -47842,10 +47833,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_mask_i32gather_epi32() {
|
||||
let mut arr = [0i32; 256];
|
||||
for i in 0..256 {
|
||||
arr[i] = i as i32;
|
||||
}
|
||||
let arr: [i32; 256] = core::array::from_fn(|i| i as i32);
|
||||
let src = _mm512_set1_epi32(2);
|
||||
let mask = 0b10101010_10101010;
|
||||
let index = _mm512_setr_epi32(
|
||||
|
|
|
|||
|
|
@ -2928,8 +2928,7 @@ mod tests {
|
|||
(NAN, i32::MIN),
|
||||
(2147483500.1, 2147483520),
|
||||
];
|
||||
for i in 0..inputs.len() {
|
||||
let (xi, e) = inputs[i];
|
||||
for (i, &(xi, e)) in inputs.iter().enumerate() {
|
||||
let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0);
|
||||
let r = _mm_cvttss_si32(x);
|
||||
assert_eq!(
|
||||
|
|
@ -2949,8 +2948,7 @@ mod tests {
|
|||
(-322223333, -322223330.0),
|
||||
];
|
||||
|
||||
for i in 0..inputs.len() {
|
||||
let (x, f) = inputs[i];
|
||||
for &(x, f) in inputs.iter() {
|
||||
let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
|
||||
let r = _mm_cvtsi32_ss(a, x);
|
||||
let e = _mm_setr_ps(f, 6.0, 7.0, 8.0);
|
||||
|
|
@ -3330,7 +3328,7 @@ mod tests {
|
|||
assert_eq_m128(r, exp);
|
||||
|
||||
let underflow = _MM_GET_EXCEPTION_STATE() & _MM_EXCEPT_UNDERFLOW != 0;
|
||||
assert_eq!(underflow, true);
|
||||
assert!(underflow);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
|
|
|
|||
|
|
@ -7293,10 +7293,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_i32gather_pd() {
|
||||
let mut arr = [0f64; 128];
|
||||
for i in 0..128 {
|
||||
arr[i] = i as f64;
|
||||
}
|
||||
let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
|
||||
// A multiplier of 8 is word-addressing
|
||||
let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112);
|
||||
let r = _mm512_i32gather_pd::<8>(index, arr.as_ptr() as *const u8);
|
||||
|
|
@ -7305,10 +7302,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_mask_i32gather_pd() {
|
||||
let mut arr = [0f64; 128];
|
||||
for i in 0..128 {
|
||||
arr[i] = i as f64;
|
||||
}
|
||||
let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
|
||||
let src = _mm512_set1_pd(2.);
|
||||
let mask = 0b10101010;
|
||||
let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112);
|
||||
|
|
@ -7319,10 +7313,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_i64gather_pd() {
|
||||
let mut arr = [0f64; 128];
|
||||
for i in 0..128 {
|
||||
arr[i] = i as f64;
|
||||
}
|
||||
let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
|
||||
// A multiplier of 8 is word-addressing
|
||||
let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
|
||||
let r = _mm512_i64gather_pd::<8>(index, arr.as_ptr() as *const u8);
|
||||
|
|
@ -7331,10 +7322,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_mask_i64gather_pd() {
|
||||
let mut arr = [0f64; 128];
|
||||
for i in 0..128 {
|
||||
arr[i] = i as f64;
|
||||
}
|
||||
let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
|
||||
let src = _mm512_set1_pd(2.);
|
||||
let mask = 0b10101010;
|
||||
let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
|
||||
|
|
@ -7345,10 +7333,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_i64gather_ps() {
|
||||
let mut arr = [0f32; 128];
|
||||
for i in 0..128 {
|
||||
arr[i] = i as f32;
|
||||
}
|
||||
let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
|
||||
// A multiplier of 4 is word-addressing
|
||||
#[rustfmt::skip]
|
||||
let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
|
||||
|
|
@ -7358,10 +7343,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_mask_i64gather_ps() {
|
||||
let mut arr = [0f32; 128];
|
||||
for i in 0..128 {
|
||||
arr[i] = i as f32;
|
||||
}
|
||||
let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
|
||||
let src = _mm256_set1_ps(2.);
|
||||
let mask = 0b10101010;
|
||||
#[rustfmt::skip]
|
||||
|
|
|
|||
|
|
@ -86,8 +86,7 @@ mod tests {
|
|||
(2147483500.1, 2147483520),
|
||||
(9.223371e18, 9223370937343148032),
|
||||
];
|
||||
for i in 0..inputs.len() {
|
||||
let (xi, e) = inputs[i];
|
||||
for (i, &(xi, e)) in inputs.iter().enumerate() {
|
||||
let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0);
|
||||
let r = _mm_cvtss_si64(x);
|
||||
assert_eq!(
|
||||
|
|
@ -114,8 +113,7 @@ mod tests {
|
|||
(9.223371e18, 9223370937343148032),
|
||||
(9.223372e18, i64::MIN),
|
||||
];
|
||||
for i in 0..inputs.len() {
|
||||
let (xi, e) = inputs[i];
|
||||
for (i, &(xi, e)) in inputs.iter().enumerate() {
|
||||
let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0);
|
||||
let r = _mm_cvttss_si64(x);
|
||||
assert_eq!(
|
||||
|
|
@ -137,8 +135,7 @@ mod tests {
|
|||
(-9223372036854775808, -9.223372e18),
|
||||
];
|
||||
|
||||
for i in 0..inputs.len() {
|
||||
let (x, f) = inputs[i];
|
||||
for &(x, f) in inputs {
|
||||
let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
|
||||
let r = _mm_cvtsi64_ss(a, x);
|
||||
let e = _mm_setr_ps(f, 6.0, 7.0, 8.0);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue