Merge pull request #4524 from Stypox/tracing-chrome-overhead-rdtsc

Account for time spent tracing, use RDTSC for faster time
2025-08-23 11:18:29 +00:00 · 2025-08-23 11:18:29 +00:00 · d70fea9149
commit d70fea9149
parent 069074ead4 804b41e949
3 changed files with 283 additions and 78 deletions
--- a/src/tools/miri/src/bin/log/mod.rs
+++ b/src/tools/miri/src/bin/log/mod.rs
@ -1,2 +1,3 @@
 pub mod setup;
 mod tracing_chrome;
+mod tracing_chrome_instant;
--- a/src/tools/miri/src/bin/log/tracing_chrome.rs
+++ b/src/tools/miri/src/bin/log/tracing_chrome.rs
@ -7,12 +7,15 @@
 //! (`git log -- path/to/tracing_chrome.rs`), but in summary:
 //! - the file attributes were changed and `extern crate` was added at the top
 //! - if a tracing span has a field called "tracing_separate_thread", it will be given a separate
-//! span ID even in [TraceStyle::Threaded] mode, to make it appear on a separate line when viewing
-//! the trace in <https://ui.perfetto.dev>. This is the syntax to trigger this behavior:
+//!   span ID even in [TraceStyle::Threaded] mode, to make it appear on a separate line when viewing
+//!   the trace in <https://ui.perfetto.dev>. This is the syntax to trigger this behavior:
 //!   ```rust
 //!   tracing::info_span!("my_span", tracing_separate_thread = tracing::field::Empty, /* ... */)
 //!   ```
-//! - use i64 instead of u64 for the "id" in [ChromeLayer::get_root_id] to be compatible with Perfetto
+//! - use i64 instead of u64 for the "id" in [ChromeLayer::get_root_id] to be compatible with
+//!   Perfetto
+//! - use [ChromeLayer::with_elapsed_micros_subtracting_tracing] to make time measurements faster on
+//!   Linux x86/x86_64 and to subtract time spent tracing from the timestamps in the trace file
 //!
 //! Depending on the tracing-chrome crate from crates.io is unfortunately not possible, since it
 //! depends on `tracing_core` which conflicts with rustc_private's `tracing_core` (meaning it would
@ -50,9 +53,22 @@ use std::{
    thread::JoinHandle,
 };

+use crate::log::tracing_chrome_instant::TracingChromeInstant;
+
+/// Contains thread-local data for threads that send tracing spans or events.
+struct ThreadData {
+    /// A unique ID for this thread, will populate "tid" field in the output trace file.
+    tid: usize,
+    /// A clone of [ChromeLayer::out] to avoid the expensive operation of accessing a mutex
+    /// every time. This is used to send [Message]s to the thread that saves trace data to file.
+    out: Sender<Message>,
+    /// The instant in time this thread was started. All events happening on this thread will be
+    /// saved to the trace file with a timestamp (the "ts" field) measured relative to this instant.
+    start: TracingChromeInstant,
+}
+
 thread_local! {
-    static OUT: RefCell<Option<Sender<Message>>> = const { RefCell::new(None) };
-    static TID: RefCell<Option<usize>> = const { RefCell::new(None) };
+    static THREAD_DATA: RefCell<Option<ThreadData>> = const { RefCell::new(None) };
 }

 type NameFn<S> = Box<dyn Fn(&EventOrSpan<'_, '_, S>) -> String + Send + Sync>;
@ -64,7 +80,6 @@ where
    S: Subscriber + for<'span> LookupSpan<'span> + Send + Sync,
 {
    out: Arc<Mutex<Sender<Message>>>,
-    start: std::time::Instant,
    max_tid: AtomicUsize,
    include_args: bool,
    include_locations: bool,
@ -323,7 +338,6 @@ where
 {
    fn new(mut builder: ChromeLayerBuilder<S>) -> (ChromeLayer<S>, FlushGuard) {
        let (tx, rx) = mpsc::channel();
-        OUT.with(|val| val.replace(Some(tx.clone())));

        let out_writer = builder
            .out_writer
@ -443,7 +457,6 @@ where
        };
        let layer = ChromeLayer {
            out: Arc::new(Mutex::new(tx)),
-            start: std::time::Instant::now(),
            max_tid: AtomicUsize::new(0),
            name_fn: builder.name_fn.take(),
            cat_fn: builder.cat_fn.take(),
@ -456,22 +469,7 @@ where
        (layer, guard)
    }

-    fn get_tid(&self) -> (usize, bool) {
-        TID.with(|value| {
-            let tid = *value.borrow();
-            match tid {
-                Some(tid) => (tid, false),
-                None => {
-                    let tid = self.max_tid.fetch_add(1, Ordering::SeqCst);
-                    value.replace(Some(tid));
-                    (tid, true)
-                }
-            }
-        })
-    }
-
-    fn get_callsite(&self, data: EventOrSpan<S>) -> Callsite {
-        let (tid, new_thread) = self.get_tid();
+    fn get_callsite(&self, data: EventOrSpan<S>, tid: usize) -> Callsite {
        let name = self.name_fn.as_ref().map(|name_fn| name_fn(&data));
        let target = self.cat_fn.as_ref().map(|cat_fn| cat_fn(&data));
        let meta = match data {
@ -502,14 +500,6 @@ where
            (None, None)
        };

-        if new_thread {
-            let name = match std::thread::current().name() {
-                Some(name) => name.to_owned(),
-                None => tid.to_string(),
-            };
-            self.send_message(Message::NewThread(tid, name));
-        }
-
        Callsite {
            tid,
            name,
@ -548,31 +538,55 @@ where
        }
    }

-    fn enter_span(&self, span: SpanRef<S>, ts: f64) {
-        let callsite = self.get_callsite(EventOrSpan::Span(&span));
+    fn enter_span(&self, span: SpanRef<S>, ts: f64, tid: usize, out: &Sender<Message>) {
+        let callsite = self.get_callsite(EventOrSpan::Span(&span), tid);
        let root_id = self.get_root_id(span);
-        self.send_message(Message::Enter(ts, callsite, root_id));
+        let _ignored = out.send(Message::Enter(ts, callsite, root_id));
    }

-    fn exit_span(&self, span: SpanRef<S>, ts: f64) {
-        let callsite = self.get_callsite(EventOrSpan::Span(&span));
+    fn exit_span(&self, span: SpanRef<S>, ts: f64, tid: usize, out: &Sender<Message>) {
+        let callsite = self.get_callsite(EventOrSpan::Span(&span), tid);
        let root_id = self.get_root_id(span);
-        self.send_message(Message::Exit(ts, callsite, root_id));
+        let _ignored = out.send(Message::Exit(ts, callsite, root_id));
    }

-    fn get_ts(&self) -> f64 {
-        self.start.elapsed().as_nanos() as f64 / 1000.0
-    }
+    /// Helper function that measures how much time is spent while executing `f` and accounts for it
+    /// in subsequent calls, with the aim to reduce biases in the data collected by `tracing_chrome`
+    /// by subtracting the time spent inside tracing functions from the timeline. This makes it so
+    /// that the time spent inside the `tracing_chrome` functions does not impact the timestamps
+    /// inside the trace file (i.e. `ts`), even if such functions are slow (e.g. because they need
+    /// to format arguments on the same thread those arguments are collected on, otherwise memory
+    /// safety would be broken).
+    ///
+    /// `f` is called with the microseconds elapsed since the current thread was started (**not**
+    /// since the program start!), with the current thread ID (i.e. `tid`), and with a [Sender] that
+    /// can be used to send a [Message] to the thread that collects [Message]s and saves them to the
+    /// trace file.
+    #[inline(always)]
+    fn with_elapsed_micros_subtracting_tracing(&self, f: impl Fn(f64, usize, &Sender<Message>)) {
+        THREAD_DATA.with(|value| {
+            let mut thread_data = value.borrow_mut();
+            let (ThreadData { tid, out, start }, new_thread) = match thread_data.as_mut() {
+                Some(thread_data) => (thread_data, false),
+                None => {
+                    let tid = self.max_tid.fetch_add(1, Ordering::SeqCst);
+                    let out = self.out.lock().unwrap().clone();
+                    let start = TracingChromeInstant::setup_for_thread_and_start(tid);
+                    *thread_data = Some(ThreadData { tid, out, start });
+                    (thread_data.as_mut().unwrap(), true)
+                }
+            };

-    fn send_message(&self, message: Message) {
-        OUT.with(move |val| {
-            if val.borrow().is_some() {
-                let _ignored = val.borrow().as_ref().unwrap().send(message);
-            } else {
-                let out = self.out.lock().unwrap().clone();
-                let _ignored = out.send(message);
-                val.replace(Some(out));
-            }
+            start.with_elapsed_micros_subtracting_tracing(|ts| {
+                if new_thread {
+                    let name = match std::thread::current().name() {
+                        Some(name) => name.to_owned(),
+                        None => tid.to_string(),
+                    };
+                    let _ignored = out.send(Message::NewThread(*tid, name));
+                }
+                f(ts, *tid, out);
+            });
        });
    }
 }
@ -586,52 +600,58 @@ where
            return;
        }

-        let ts = self.get_ts();
-        self.enter_span(ctx.span(id).expect("Span not found."), ts);
+        self.with_elapsed_micros_subtracting_tracing(|ts, tid, out| {
+            self.enter_span(ctx.span(id).expect("Span not found."), ts, tid, out);
+        });
    }

    fn on_record(&self, id: &span::Id, values: &span::Record<'_>, ctx: Context<'_, S>) {
        if self.include_args {
-            let span = ctx.span(id).unwrap();
-            let mut exts = span.extensions_mut();
+            self.with_elapsed_micros_subtracting_tracing(|_, _, _| {
+                let span = ctx.span(id).unwrap();
+                let mut exts = span.extensions_mut();

-            let args = exts.get_mut::<ArgsWrapper>();
+                let args = exts.get_mut::<ArgsWrapper>();

-            if let Some(args) = args {
-                let args = Arc::make_mut(&mut args.args);
-                values.record(&mut JsonVisitor { object: args });
-            }
+                if let Some(args) = args {
+                    let args = Arc::make_mut(&mut args.args);
+                    values.record(&mut JsonVisitor { object: args });
+                }
+            });
        }
    }

    fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) {
-        let ts = self.get_ts();
-        let callsite = self.get_callsite(EventOrSpan::Event(event));
-        self.send_message(Message::Event(ts, callsite));
+        self.with_elapsed_micros_subtracting_tracing(|ts, tid, out| {
+            let callsite = self.get_callsite(EventOrSpan::Event(event), tid);
+            let _ignored = out.send(Message::Event(ts, callsite));
+        });
    }

    fn on_exit(&self, id: &span::Id, ctx: Context<'_, S>) {
        if let TraceStyle::Async = self.trace_style {
            return;
        }
-        let ts = self.get_ts();
-        self.exit_span(ctx.span(id).expect("Span not found."), ts);
+        self.with_elapsed_micros_subtracting_tracing(|ts, tid, out| {
+            self.exit_span(ctx.span(id).expect("Span not found."), ts, tid, out);
+        });
    }

    fn on_new_span(&self, attrs: &span::Attributes<'_>, id: &span::Id, ctx: Context<'_, S>) {
-        if self.include_args {
-            let mut args = Object::new();
-            attrs.record(&mut JsonVisitor { object: &mut args });
-            ctx.span(id).unwrap().extensions_mut().insert(ArgsWrapper {
-                args: Arc::new(args),
-            });
-        }
-        if let TraceStyle::Threaded = self.trace_style {
-            return;
-        }
+        self.with_elapsed_micros_subtracting_tracing(|ts, tid, out| {
+            if self.include_args {
+                let mut args = Object::new();
+                attrs.record(&mut JsonVisitor { object: &mut args });
+                ctx.span(id).unwrap().extensions_mut().insert(ArgsWrapper {
+                    args: Arc::new(args),
+                });
+            }
+            if let TraceStyle::Threaded = self.trace_style {
+                return;
+            }

-        let ts = self.get_ts();
-        self.enter_span(ctx.span(id).expect("Span not found."), ts);
+            self.enter_span(ctx.span(id).expect("Span not found."), ts, tid, out);
+        });
    }

    fn on_close(&self, id: span::Id, ctx: Context<'_, S>) {
@ -639,8 +659,9 @@ where
            return;
        }

-        let ts = self.get_ts();
-        self.exit_span(ctx.span(&id).expect("Span not found."), ts);
+        self.with_elapsed_micros_subtracting_tracing(|ts, tid, out| {
+            self.exit_span(ctx.span(&id).expect("Span not found."), ts, tid, out);
+        });
    }
 }

--- a/src/tools/miri/src/bin/log/tracing_chrome_instant.rs
+++ b/src/tools/miri/src/bin/log/tracing_chrome_instant.rs
@ -0,0 +1,183 @@
+//! Code in this class was in part inspired by
+//! <https://github.com/tikv/minstant/blob/27c9ec5ec90b5b67113a748a4defee0d2519518c/src/tsc_now.rs>.
+//! A useful resource is also
+//! <https://www.pingcap.com/blog/how-we-trace-a-kv-database-with-less-than-5-percent-performance-impact/>,
+//! although this file does not implement TSC synchronization but insteads pins threads to CPUs,
+//! since the former is not reliable (i.e. it might lead to non-monotonic time measurements).
+//! Another useful resource for future improvements might be measureme's time measurement utils:
+//! <https://github.com/rust-lang/measureme/blob/master/measureme/src/counters.rs>.
+//! Documentation about how the Linux kernel chooses a clock source can be found here:
+//! <https://btorpey.github.io/blog/2014/02/18/clock-sources-in-linux/>.
+#![cfg(feature = "tracing")]
+
+/// This alternative `TracingChromeInstant` implementation was made entirely to suit the needs of
+/// [crate::log::tracing_chrome], and shouldn't be used for anything else. It featues two functions:
+/// - [TracingChromeInstant::setup_for_thread_and_start], which sets up the current thread to do
+///   proper time tracking and returns a point in time to use as "t=0", and
+/// - [TracingChromeInstant::with_elapsed_micros_subtracting_tracing], which allows
+///   obtaining how much time elapsed since [TracingChromeInstant::setup_for_thread_and_start] was
+///   called while accounting for (and subtracting) the time spent inside tracing-related functions.
+///
+/// This measures time using [std::time::Instant], except for x86/x86_64 Linux machines, where
+/// [std::time::Instant] is too slow (~1.5us) and thus `rdtsc` is used instead (~5ns).
+pub enum TracingChromeInstant {
+    WallTime {
+        /// The time at which this instant was created, shifted forward to account
+        /// for time spent in tracing functions as explained in
+        /// [TracingChromeInstant::with_elapsed_micros_subtracting_tracing]'s comments.
+        start_instant: std::time::Instant,
+    },
+    #[cfg(all(target_os = "linux", any(target_arch = "x86", target_arch = "x86_64")))]
+    Tsc {
+        /// The value in the TSC counter when this instant was created, shifted forward to account
+        /// for time spent in tracing functions as explained in
+        /// [TracingChromeInstant::with_elapsed_micros_subtracting_tracing]'s comments.
+        start_tsc: u64,
+        /// The period of the TSC counter in microseconds.
+        tsc_to_microseconds: f64,
+    },
+}
+
+impl TracingChromeInstant {
+    /// Can be thought of as the same as [std::time::Instant::now()], but also does some setup to
+    /// make TSC stable in case TSC is available. This is supposed to be called (at most) once per
+    /// thread since the thread setup takes a few milliseconds.
+    ///
+    /// WARNING: If TSC is available, `incremental_thread_id` is used to pick to which CPU to pin
+    /// the current thread. Thread IDs should be assigned contiguously starting from 0. Be aware
+    /// that the current thread will be restricted to one CPU for the rest of the execution!
+    pub fn setup_for_thread_and_start(incremental_thread_id: usize) -> TracingChromeInstant {
+        #[cfg(all(target_os = "linux", any(target_arch = "x86", target_arch = "x86_64")))]
+        if *tsc::IS_TSC_AVAILABLE.get_or_init(tsc::is_tsc_available) {
+            // We need to lock this thread to a specific CPU, because CPUs' TSC timers might be out
+            // of sync.
+            tsc::set_cpu_affinity(incremental_thread_id);
+
+            // Can only use tsc_to_microseconds() and rdtsc() after having set the CPU affinity!
+            // We compute tsc_to_microseconds anew for every new thread just in case some CPU core
+            // has a different TSC frequency.
+            let tsc_to_microseconds = tsc::tsc_to_microseconds();
+            let start_tsc = tsc::rdtsc();
+            return TracingChromeInstant::Tsc { start_tsc, tsc_to_microseconds };
+        }
+
+        let _ = incremental_thread_id; // otherwise we get a warning when the TSC branch is disabled
+        TracingChromeInstant::WallTime { start_instant: std::time::Instant::now() }
+    }
+
+    /// Calls `f` with the time elapsed in microseconds since this [TracingChromeInstant] was built
+    /// by [TracingChromeInstant::setup_for_thread_and_start], while subtracting all time previously
+    /// spent executing other `f`s passed to this function. This behavior allows subtracting time
+    /// spent in functions that log tracing data (which `f` is supposed to be) from the tracing time
+    /// measurements.
+    ///
+    /// Note: microseconds are used as the time unit since that's what Chrome trace files should
+    /// contain, see the definition of the "ts" field in
+    /// <https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview>.
+    #[inline(always)]
+    pub fn with_elapsed_micros_subtracting_tracing(&mut self, f: impl Fn(f64)) {
+        match self {
+            TracingChromeInstant::WallTime { start_instant } => {
+                // Obtain the current time (before executing `f`).
+                let instant_before_f = std::time::Instant::now();
+
+                // Using the current time (`instant_before_f`) and the `start_instant` stored in
+                // `self`, calculate the elapsed time (in microseconds) since this instant was
+                // instantiated, accounting for any time that was previously spent executing `f`.
+                // The "accounting" part is not computed in this line, but is rather done by
+                // shifting forward the `start_instant` down below.
+                let ts = (instant_before_f - *start_instant).as_nanos() as f64 / 1000.0;
+
+                // Run the function (supposedly a function internal to the tracing infrastructure).
+                f(ts);
+
+                // Measure how much time was spent executing `f` and shift `start_instant` forward
+                // by that amount. This "removes" that time from the trace.
+                *start_instant += std::time::Instant::now() - instant_before_f;
+            }
+
+            #[cfg(all(target_os = "linux", any(target_arch = "x86", target_arch = "x86_64")))]
+            TracingChromeInstant::Tsc { start_tsc, tsc_to_microseconds } => {
+                // the comments above also apply here, since it's the same logic
+                let tsc_before_f = tsc::rdtsc();
+                let ts = ((tsc_before_f - *start_tsc) as f64) * (*tsc_to_microseconds);
+                f(ts);
+                *start_tsc += tsc::rdtsc() - tsc_before_f;
+            }
+        }
+    }
+}
+
+#[cfg(all(target_os = "linux", any(target_arch = "x86", target_arch = "x86_64")))]
+mod tsc {
+
+    pub static IS_TSC_AVAILABLE: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
+
+    /// Reads the timestamp-counter register. Will give monotonic answers only when called from the
+    /// same thread, because the TSC of different CPUs might be out of sync.
+    #[inline(always)]
+    pub(super) fn rdtsc() -> u64 {
+        #[cfg(target_arch = "x86")]
+        use core::arch::x86::_rdtsc;
+        #[cfg(target_arch = "x86_64")]
+        use core::arch::x86_64::_rdtsc;
+
+        unsafe { _rdtsc() }
+    }
+
+    /// Estimates the frequency of the TSC counter by waiting 10ms in a busy loop and
+    /// looking at how much the TSC increased in the meantime.
+    pub(super) fn tsc_to_microseconds() -> f64 {
+        const BUSY_WAIT: std::time::Duration = std::time::Duration::from_millis(10);
+        let tsc_start = rdtsc();
+        let instant_start = std::time::Instant::now();
+        while instant_start.elapsed() < BUSY_WAIT {
+            // `thread::sleep()` is not very precise at waking up the program at the right time,
+            // so use a busy loop instead.
+            core::hint::spin_loop();
+        }
+        let tsc_end = rdtsc();
+        (BUSY_WAIT.as_nanos() as f64) / 1000.0 / ((tsc_end - tsc_start) as f64)
+    }
+
+    /// Checks whether the TSC counter is available and runs at a constant rate independently
+    /// of CPU frequency even across different power states of the CPU (i.e. checks for the
+    /// `invariant_tsc` CPUID flag).
+    pub(super) fn is_tsc_available() -> bool {
+        #[cfg(target_arch = "x86")]
+        use core::arch::x86::__cpuid;
+        #[cfg(target_arch = "x86_64")]
+        use core::arch::x86_64::__cpuid;
+
+        // implemented like https://docs.rs/raw-cpuid/latest/src/raw_cpuid/extended.rs.html#965-967
+        const LEAF: u32 = 0x80000007; // this is the leaf for "advanced power management info"
+        let cpuid = unsafe { __cpuid(LEAF) };
+        (cpuid.edx & (1 << 8)) != 0 // EDX bit 8 indicates invariant TSC
+    }
+
+    /// Forces the current thread to run on a single CPU, which ensures the TSC counter is monotonic
+    /// (since TSCs of different CPUs might be out-of-sync). `incremental_thread_id` is used to pick
+    /// to which CPU to pin the current thread, and should be an incremental number that starts from
+    /// 0.
+    pub(super) fn set_cpu_affinity(incremental_thread_id: usize) {
+        let cpu_id = match std::thread::available_parallelism() {
+            Ok(available_parallelism) => incremental_thread_id % available_parallelism,
+            _ => panic!("Could not determine CPU count to properly set CPU affinity"),
+        };
+
+        let mut set = unsafe { std::mem::zeroed::<libc::cpu_set_t>() };
+        unsafe { libc::CPU_SET(cpu_id, &mut set) };
+
+        // Set the current thread's core affinity.
+        if unsafe {
+            libc::sched_setaffinity(
+                0, // Defaults to current thread
+                size_of::<libc::cpu_set_t>(),
+                &set as *const _,
+            )
+        } != 0
+        {
+            panic!("Could not set CPU affinity")
+        }
+    }
+}