Specialize strlen for x86_64.
This commit is contained in:
parent
2ca64c2798
commit
a11bd1cfdb
3 changed files with 40 additions and 7 deletions
|
|
@ -279,3 +279,13 @@ pub unsafe fn compare_bytes(s1: *const u8, s2: *const u8, n: usize) -> i32 {
|
|||
}
|
||||
0
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub unsafe fn c_string_length(mut s: *const core::ffi::c_char) -> usize {
|
||||
let mut n = 0;
|
||||
while *s != 0 {
|
||||
n += 1;
|
||||
s = s.add(1);
|
||||
}
|
||||
n
|
||||
}
|
||||
|
|
|
|||
|
|
@ -63,13 +63,7 @@ intrinsics! {
|
|||
#[mem_builtin]
|
||||
#[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")]
|
||||
pub unsafe extern "C" fn strlen(s: *const core::ffi::c_char) -> usize {
|
||||
let mut n = 0;
|
||||
let mut s = s;
|
||||
while *s != 0 {
|
||||
n += 1;
|
||||
s = s.offset(1);
|
||||
}
|
||||
n
|
||||
impls::c_string_length(s)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -173,6 +173,35 @@ pub unsafe fn compare_bytes(a: *const u8, b: *const u8, n: usize) -> i32 {
|
|||
c16(a.cast(), b.cast(), n)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub unsafe fn c_string_length(s: *const std::ffi::c_char) -> usize {
|
||||
let mut n: usize;
|
||||
|
||||
std::arch::asm!(
|
||||
// search for a zero byte
|
||||
"xor al, al",
|
||||
|
||||
// unbounded memory region
|
||||
"xor rcx, rcx",
|
||||
"not rcx",
|
||||
|
||||
// forward direction
|
||||
"cld",
|
||||
|
||||
// perform search
|
||||
"repne scasb",
|
||||
|
||||
// extract length
|
||||
"not rcx",
|
||||
"dec rcx",
|
||||
inout("rdi") s => _,
|
||||
out("rcx") n,
|
||||
options(nostack),
|
||||
);
|
||||
|
||||
n
|
||||
}
|
||||
|
||||
/// Determine optimal parameters for a `rep` instruction.
|
||||
fn rep_param(dest: *mut u8, mut count: usize) -> (usize, usize, usize) {
|
||||
// Unaligned writes are still slow on modern processors, so align the destination address.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue