Replace some calls to pointer::offset with add and sub
This commit is contained in:
parent
39f73ac0b3
commit
1a2eac5986
4 changed files with 33 additions and 36 deletions
|
|
@ -829,21 +829,21 @@ mod tests {
|
|||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn load_m128i_word<T>(data: &[T], word_index: usize) -> __m128i {
|
||||
let byte_offset = word_index * 16 / size_of::<T>();
|
||||
let pointer = data.as_ptr().offset(byte_offset as isize) as *const __m128i;
|
||||
let pointer = data.as_ptr().add(byte_offset) as *const __m128i;
|
||||
_mm_loadu_si128(black_box(pointer))
|
||||
}
|
||||
|
||||
#[target_feature(enable = "avx")]
|
||||
unsafe fn load_m256i_word<T>(data: &[T], word_index: usize) -> __m256i {
|
||||
let byte_offset = word_index * 32 / size_of::<T>();
|
||||
let pointer = data.as_ptr().offset(byte_offset as isize) as *const __m256i;
|
||||
let pointer = data.as_ptr().add(byte_offset) as *const __m256i;
|
||||
_mm256_loadu_si256(black_box(pointer))
|
||||
}
|
||||
|
||||
#[target_feature(enable = "avx512f")]
|
||||
unsafe fn load_m512i_word<T>(data: &[T], word_index: usize) -> __m512i {
|
||||
let byte_offset = word_index * 64 / size_of::<T>();
|
||||
let pointer = data.as_ptr().offset(byte_offset as isize) as *const i32;
|
||||
let pointer = data.as_ptr().add(byte_offset) as *const i32;
|
||||
_mm512_loadu_si512(black_box(pointer))
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1185,9 +1185,9 @@ pub unsafe fn _mm_loadu_ps(p: *const f32) -> __m128 {
|
|||
///
|
||||
/// ```text
|
||||
/// let a0 = *p;
|
||||
/// let a1 = *p.offset(1);
|
||||
/// let a2 = *p.offset(2);
|
||||
/// let a3 = *p.offset(3);
|
||||
/// let a1 = *p.add(1);
|
||||
/// let a2 = *p.add(2);
|
||||
/// let a3 = *p.add(3);
|
||||
/// __m128::new(a3, a2, a1, a0)
|
||||
/// ```
|
||||
///
|
||||
|
|
@ -1241,9 +1241,9 @@ pub unsafe fn _mm_store_ss(p: *mut f32, a: __m128) {
|
|||
/// ```text
|
||||
/// let x = a.extract(0);
|
||||
/// *p = x;
|
||||
/// *p.offset(1) = x;
|
||||
/// *p.offset(2) = x;
|
||||
/// *p.offset(3) = x;
|
||||
/// *p.add(1) = x;
|
||||
/// *p.add(2) = x;
|
||||
/// *p.add(3) = x;
|
||||
/// ```
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store1_ps)
|
||||
|
|
@ -1317,9 +1317,9 @@ pub unsafe fn _mm_storeu_ps(p: *mut f32, a: __m128) {
|
|||
///
|
||||
/// ```text
|
||||
/// *p = a.extract(3);
|
||||
/// *p.offset(1) = a.extract(2);
|
||||
/// *p.offset(2) = a.extract(1);
|
||||
/// *p.offset(3) = a.extract(0);
|
||||
/// *p.add(1) = a.extract(2);
|
||||
/// *p.add(2) = a.extract(1);
|
||||
/// *p.add(3) = a.extract(0);
|
||||
/// ```
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storer_ps)
|
||||
|
|
@ -3006,9 +3006,9 @@ mod tests {
|
|||
|
||||
let unalignment = (p as usize) & 0xf;
|
||||
if unalignment != 0 {
|
||||
let delta = ((16 - unalignment) >> 2) as isize;
|
||||
let delta = (16 - unalignment) >> 2;
|
||||
fixup = delta as f32;
|
||||
p = p.offset(delta);
|
||||
p = p.add(delta);
|
||||
}
|
||||
|
||||
let r = _mm_load_ps(p);
|
||||
|
|
@ -3019,7 +3019,7 @@ mod tests {
|
|||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_mm_loadu_ps() {
|
||||
let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
|
||||
let p = vals.as_ptr().offset(3);
|
||||
let p = vals.as_ptr().add(3);
|
||||
let r = _mm_loadu_ps(black_box(p));
|
||||
assert_eq_m128(r, _mm_setr_ps(4.0, 5.0, 6.0, 7.0));
|
||||
}
|
||||
|
|
@ -3036,9 +3036,9 @@ mod tests {
|
|||
|
||||
let unalignment = (p as usize) & 0xf;
|
||||
if unalignment != 0 {
|
||||
let delta = ((16 - unalignment) >> 2) as isize;
|
||||
let delta = (16 - unalignment) >> 2;
|
||||
fixup = delta as f32;
|
||||
p = p.offset(delta);
|
||||
p = p.add(delta);
|
||||
}
|
||||
|
||||
let r = _mm_loadr_ps(p);
|
||||
|
|
@ -3057,7 +3057,7 @@ mod tests {
|
|||
unsafe fn test_mm_store_ss() {
|
||||
let mut vals = [0.0f32; 8];
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
_mm_store_ss(vals.as_mut_ptr().offset(1), a);
|
||||
_mm_store_ss(vals.as_mut_ptr().add(1), a);
|
||||
|
||||
assert_eq!(vals[0], 0.0);
|
||||
assert_eq!(vals[1], 1.0);
|
||||
|
|
@ -3152,7 +3152,7 @@ mod tests {
|
|||
// Make sure p is **not** aligned to 16-byte boundary
|
||||
if (p as usize) & 0xf == 0 {
|
||||
ofs = 1;
|
||||
p = p.offset(1);
|
||||
p = p.add(1);
|
||||
}
|
||||
|
||||
_mm_storeu_ps(p, *black_box(&a));
|
||||
|
|
|
|||
|
|
@ -4518,7 +4518,7 @@ mod tests {
|
|||
// Make sure p is **not** aligned to 16-byte boundary
|
||||
if (p as usize) & 0xf == 0 {
|
||||
ofs = 1;
|
||||
p = p.offset(1);
|
||||
p = p.add(1);
|
||||
}
|
||||
|
||||
_mm_storeu_pd(p, *black_box(&a));
|
||||
|
|
@ -4606,7 +4606,7 @@ mod tests {
|
|||
let mut offset = 0;
|
||||
if (d as usize) & 0xf == 0 {
|
||||
offset = 1;
|
||||
d = d.offset(offset as isize);
|
||||
d = d.add(offset);
|
||||
}
|
||||
|
||||
let r = _mm_loadu_pd(d);
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ unsafe fn hex_encode_avx2<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a s
|
|||
let ascii_a = _mm256_set1_epi8((b'a' - 9 - 1) as i8);
|
||||
let and4bits = _mm256_set1_epi8(0xf);
|
||||
|
||||
let mut i = 0_isize;
|
||||
let mut i = 0_usize;
|
||||
while src.len() >= 32 {
|
||||
let invec = _mm256_loadu_si256(src.as_ptr() as *const _);
|
||||
|
||||
|
|
@ -96,18 +96,17 @@ unsafe fn hex_encode_avx2<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a s
|
|||
let res2 = _mm256_unpackhi_epi8(masked2, masked1);
|
||||
|
||||
// Store everything into the right destination now
|
||||
let base = dst.as_mut_ptr().offset(i * 2);
|
||||
let base1 = base.offset(0) as *mut _;
|
||||
let base2 = base.offset(16) as *mut _;
|
||||
let base3 = base.offset(32) as *mut _;
|
||||
let base4 = base.offset(48) as *mut _;
|
||||
let base = dst.as_mut_ptr().add(i * 2);
|
||||
let base1 = base.add(0) as *mut _;
|
||||
let base2 = base.add(16) as *mut _;
|
||||
let base3 = base.add(32) as *mut _;
|
||||
let base4 = base.add(48) as *mut _;
|
||||
_mm256_storeu2_m128i(base3, base1, res1);
|
||||
_mm256_storeu2_m128i(base4, base2, res2);
|
||||
src = &src[32..];
|
||||
i += 32;
|
||||
}
|
||||
|
||||
let i = i as usize;
|
||||
let _ = hex_encode_sse41(src, &mut dst[i * 2..]);
|
||||
|
||||
Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
|
||||
|
|
@ -122,7 +121,7 @@ unsafe fn hex_encode_sse41<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a
|
|||
let ascii_a = _mm_set1_epi8((b'a' - 9 - 1) as i8);
|
||||
let and4bits = _mm_set1_epi8(0xf);
|
||||
|
||||
let mut i = 0_isize;
|
||||
let mut i = 0_usize;
|
||||
while src.len() >= 16 {
|
||||
let invec = _mm_loadu_si128(src.as_ptr() as *const _);
|
||||
|
||||
|
|
@ -141,13 +140,12 @@ unsafe fn hex_encode_sse41<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a
|
|||
let res1 = _mm_unpacklo_epi8(masked2, masked1);
|
||||
let res2 = _mm_unpackhi_epi8(masked2, masked1);
|
||||
|
||||
_mm_storeu_si128(dst.as_mut_ptr().offset(i * 2) as *mut _, res1);
|
||||
_mm_storeu_si128(dst.as_mut_ptr().offset(i * 2 + 16) as *mut _, res2);
|
||||
_mm_storeu_si128(dst.as_mut_ptr().add(i * 2) as *mut _, res1);
|
||||
_mm_storeu_si128(dst.as_mut_ptr().add(i * 2 + 16) as *mut _, res2);
|
||||
src = &src[16..];
|
||||
i += 16;
|
||||
}
|
||||
|
||||
let i = i as usize;
|
||||
let _ = hex_encode_fallback(src, &mut dst[i * 2..]);
|
||||
|
||||
Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
|
||||
|
|
@ -163,7 +161,7 @@ unsafe fn hex_encode_simd128<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'
|
|||
let ascii_a = u8x16_splat(b'a' - 9 - 1);
|
||||
let and4bits = u8x16_splat(0xf);
|
||||
|
||||
let mut i = 0_isize;
|
||||
let mut i = 0_usize;
|
||||
while src.len() >= 16 {
|
||||
let invec = v128_load(src.as_ptr() as *const _);
|
||||
|
||||
|
|
@ -189,13 +187,12 @@ unsafe fn hex_encode_simd128<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'
|
|||
masked2, masked1,
|
||||
);
|
||||
|
||||
v128_store(dst.as_mut_ptr().offset(i * 2) as *mut _, res1);
|
||||
v128_store(dst.as_mut_ptr().offset(i * 2 + 16) as *mut _, res2);
|
||||
v128_store(dst.as_mut_ptr().add(i * 2) as *mut _, res1);
|
||||
v128_store(dst.as_mut_ptr().add(i * 2 + 16) as *mut _, res2);
|
||||
src = &src[16..];
|
||||
i += 16;
|
||||
}
|
||||
|
||||
let i = i as usize;
|
||||
let _ = hex_encode_fallback(src, &mut dst[i * 2..]);
|
||||
|
||||
Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue