Lock shards while collecting active jobs.

Co-authored-by: Zoxc <zoxc32@gmail.com>
This commit is contained in:
ywxt 2025-11-14 09:01:22 +08:00
parent 01867557cd
commit a4d0507af7
5 changed files with 47 additions and 25 deletions

View file

@ -252,7 +252,7 @@ pub(crate) fn run_in_thread_pool_with_globals<
let query_map = rustc_span::set_session_globals_then(unsafe { &*(session_globals as *const SessionGlobals) }, || {
// Ensure there was no errors collecting all active jobs.
// We need the complete map to ensure we find a cycle to break.
QueryCtxt::new(tcx).collect_active_jobs().expect("failed to collect active queries in deadlock handler")
QueryCtxt::new(tcx).collect_active_jobs(false).expect("failed to collect active queries in deadlock handler")
});
break_query_cycles(query_map, &registry);
})

View file

@ -88,17 +88,25 @@ impl<'tcx> QueryContext for QueryCtxt<'tcx> {
tls::with_related_context(self.tcx, |icx| icx.query)
}
/// Returns a query map representing active query jobs.
/// It returns an incomplete map as an error if it fails
/// to take locks.
/// Returns a map of currently active query jobs.
///
/// If `require_complete` is `true`, this function locks all shards of the
/// query results to produce a complete map, which always returns `Ok`.
/// Otherwise, it may return an incomplete map as an error if any shard
/// lock cannot be acquired.
///
/// Prefer passing `false` to `require_complete` to avoid potential deadlocks,
/// especially when called from within a deadlock handler, unless a
/// complete map is needed and no deadlock is possible at this call site.
fn collect_active_jobs(
self,
require_complete: bool,
) -> Result<QueryMap<QueryStackDeferred<'tcx>>, QueryMap<QueryStackDeferred<'tcx>>> {
let mut jobs = QueryMap::default();
let mut complete = true;
for collect in super::TRY_COLLECT_ACTIVE_JOBS.iter() {
if collect(self.tcx, &mut jobs).is_none() {
for collect in super::COLLECT_ACTIVE_JOBS.iter() {
if collect(self.tcx, &mut jobs, require_complete).is_none() {
complete = false;
}
}
@ -163,11 +171,7 @@ impl<'tcx> QueryContext for QueryCtxt<'tcx> {
}
fn depth_limit_error(self, job: QueryJobId) {
// FIXME: `collect_active_jobs` expects no locks to be held, which doesn't hold for this call.
let query_map = match self.collect_active_jobs() {
Ok(query_map) => query_map,
Err(query_map) => query_map,
};
let query_map = self.collect_active_jobs(true).expect("failed to collect active queries");
let (info, depth) = job.find_dep_kind_root(query_map);
let suggested_limit = match self.recursion_limit() {
@ -731,19 +735,21 @@ macro_rules! define_queries {
}
}
pub(crate) fn try_collect_active_jobs<'tcx>(
pub(crate) fn collect_active_jobs<'tcx>(
tcx: TyCtxt<'tcx>,
qmap: &mut QueryMap<QueryStackDeferred<'tcx>>,
require_complete: bool,
) -> Option<()> {
let make_query = |tcx, key| {
let kind = rustc_middle::dep_graph::dep_kinds::$name;
let name = stringify!($name);
$crate::plumbing::create_query_frame(tcx, rustc_middle::query::descs::$name, key, kind, name)
};
let res = tcx.query_system.states.$name.try_collect_active_jobs(
let res = tcx.query_system.states.$name.collect_active_jobs(
tcx,
make_query,
qmap,
require_complete,
);
// this can be called during unwinding, and the function has a `try_`-prefix, so
// don't `unwrap()` here, just manually check for `None` and do best-effort error
@ -814,10 +820,10 @@ macro_rules! define_queries {
// These arrays are used for iteration and can't be indexed by `DepKind`.
const TRY_COLLECT_ACTIVE_JOBS: &[
for<'tcx> fn(TyCtxt<'tcx>, &mut QueryMap<QueryStackDeferred<'tcx>>) -> Option<()>
const COLLECT_ACTIVE_JOBS: &[
for<'tcx> fn(TyCtxt<'tcx>, &mut QueryMap<QueryStackDeferred<'tcx>>, bool) -> Option<()>
] =
&[$(query_impl::$name::try_collect_active_jobs),*];
&[$(query_impl::$name::collect_active_jobs),*];
const ALLOC_SELF_PROFILE_QUERY_STRINGS: &[
for<'tcx> fn(TyCtxt<'tcx>, &mut QueryKeyStringCache)

View file

@ -616,7 +616,7 @@ pub fn print_query_stack<Qcx: QueryContext>(
let mut count_total = 0;
// Make use of a partial query map if we fail to take locks collecting active queries.
let query_map = match qcx.collect_active_jobs() {
let query_map = match qcx.collect_active_jobs(false) {
Ok(query_map) => query_map,
Err(query_map) => query_map,
};

View file

@ -161,7 +161,10 @@ pub trait QueryContext: HasDepContext {
/// Get the query information from the TLS context.
fn current_query_job(self) -> Option<QueryJobId>;
fn collect_active_jobs(self) -> Result<QueryMap<Self::QueryInfo>, QueryMap<Self::QueryInfo>>;
fn collect_active_jobs(
self,
require_complete: bool,
) -> Result<QueryMap<Self::QueryInfo>, QueryMap<Self::QueryInfo>>;
fn lift_query_info(self, info: &Self::QueryInfo) -> QueryStackFrameExtra;

View file

@ -7,10 +7,12 @@ use std::fmt::Debug;
use std::hash::Hash;
use std::mem;
use hashbrown::HashTable;
use hashbrown::hash_table::Entry;
use rustc_data_structures::fingerprint::Fingerprint;
use rustc_data_structures::sharded::{self, Sharded};
use rustc_data_structures::stack::ensure_sufficient_stack;
use rustc_data_structures::sync::LockGuard;
use rustc_data_structures::{outline, sync};
use rustc_errors::{Diag, FatalError, StashKey};
use rustc_span::{DUMMY_SP, Span};
@ -63,22 +65,33 @@ where
self.active.lock_shards().all(|shard| shard.is_empty())
}
pub fn try_collect_active_jobs<Qcx: Copy>(
pub fn collect_active_jobs<Qcx: Copy>(
&self,
qcx: Qcx,
make_query: fn(Qcx, K) -> QueryStackFrame<I>,
jobs: &mut QueryMap<I>,
require_complete: bool,
) -> Option<()> {
let mut active = Vec::new();
// We use try_lock_shards here since we are called from the
// deadlock handler, and this shouldn't be locked.
for shard in self.active.try_lock_shards() {
for (k, v) in shard?.iter() {
let mut collect = |iter: LockGuard<'_, HashTable<(K, QueryResult<I>)>>| {
for (k, v) in iter.iter() {
if let QueryResult::Started(ref job) = *v {
active.push((*k, (*job).clone()));
active.push((*k, job.clone()));
}
}
};
if require_complete {
for shard in self.active.lock_shards() {
collect(shard);
}
} else {
// We use try_lock_shards here since we are called from the
// deadlock handler, and this shouldn't be locked.
for shard in self.active.try_lock_shards() {
collect(shard?);
}
}
// Call `make_query` while we're not holding a `self.active` lock as `make_query` may call
@ -271,7 +284,7 @@ where
{
// Ensure there was no errors collecting all active jobs.
// We need the complete map to ensure we find a cycle to break.
let query_map = qcx.collect_active_jobs().ok().expect("failed to collect active queries");
let query_map = qcx.collect_active_jobs(false).ok().expect("failed to collect active queries");
let error = try_execute.find_cycle_in_stack(query_map, &qcx.current_query_job(), span);
(mk_cycle(query, qcx, error.lift(qcx)), None)