From a11bd1cfdb3642bc137b2e2bda3254c2a6a827f4 Mon Sep 17 00:00:00 2001 From: Tobias Decking Date: Tue, 21 Feb 2023 23:13:02 +0100 Subject: [PATCH] Specialize `strlen` for `x86_64`. --- library/compiler-builtins/src/mem/impls.rs | 10 +++++++ library/compiler-builtins/src/mem/mod.rs | 8 +----- library/compiler-builtins/src/mem/x86_64.rs | 29 +++++++++++++++++++++ 3 files changed, 40 insertions(+), 7 deletions(-) diff --git a/library/compiler-builtins/src/mem/impls.rs b/library/compiler-builtins/src/mem/impls.rs index 72003a5c472b..23c9d8d32753 100644 --- a/library/compiler-builtins/src/mem/impls.rs +++ b/library/compiler-builtins/src/mem/impls.rs @@ -279,3 +279,13 @@ pub unsafe fn compare_bytes(s1: *const u8, s2: *const u8, n: usize) -> i32 { } 0 } + +#[inline(always)] +pub unsafe fn c_string_length(mut s: *const core::ffi::c_char) -> usize { + let mut n = 0; + while *s != 0 { + n += 1; + s = s.add(1); + } + n +} diff --git a/library/compiler-builtins/src/mem/mod.rs b/library/compiler-builtins/src/mem/mod.rs index c5b0ddc16ec3..be118778b1a4 100644 --- a/library/compiler-builtins/src/mem/mod.rs +++ b/library/compiler-builtins/src/mem/mod.rs @@ -63,13 +63,7 @@ intrinsics! { #[mem_builtin] #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] pub unsafe extern "C" fn strlen(s: *const core::ffi::c_char) -> usize { - let mut n = 0; - let mut s = s; - while *s != 0 { - n += 1; - s = s.offset(1); - } - n + impls::c_string_length(s) } } diff --git a/library/compiler-builtins/src/mem/x86_64.rs b/library/compiler-builtins/src/mem/x86_64.rs index 17b461f79142..ea8f6d819de5 100644 --- a/library/compiler-builtins/src/mem/x86_64.rs +++ b/library/compiler-builtins/src/mem/x86_64.rs @@ -173,6 +173,35 @@ pub unsafe fn compare_bytes(a: *const u8, b: *const u8, n: usize) -> i32 { c16(a.cast(), b.cast(), n) } +#[inline(always)] +pub unsafe fn c_string_length(s: *const std::ffi::c_char) -> usize { + let mut n: usize; + + std::arch::asm!( + // search for a zero byte + "xor al, al", + + // unbounded memory region + "xor rcx, rcx", + "not rcx", + + // forward direction + "cld", + + // perform search + "repne scasb", + + // extract length + "not rcx", + "dec rcx", + inout("rdi") s => _, + out("rcx") n, + options(nostack), + ); + + n +} + /// Determine optimal parameters for a `rep` instruction. fn rep_param(dest: *mut u8, mut count: usize) -> (usize, usize, usize) { // Unaligned writes are still slow on modern processors, so align the destination address.