From 47114032296c82a9f9f1bd9ef5ded976a724a528 Mon Sep 17 00:00:00 2001 From: Igor Matuszewski Date: Mon, 16 Mar 2026 15:09:48 +0100 Subject: [PATCH] Add watchdog suppression API Add `Handle::suppress_watchdog()` which returns an RAII guard that prevents the watchdog from counting stalls toward the deadlock limit. Tests that create many simulated nodes can queue hundreds of ready tasks whose wall-clock drain time exceeds the default 5 s watchdog window without advancing simulated time. This causes false-positive deadlock aborts on slower or busier hosts. The guard resets the deadlock counter on each watchdog tick while active, and re-enables normal deadlock detection when dropped. Usage: let _guard = Handle::current().suppress_watchdog(); // ... heavy setup that may stall simulated time ... // guard dropped here, watchdog resumes --- msim/src/sim/runtime/mod.rs | 43 ++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/msim/src/sim/runtime/mod.rs b/msim/src/sim/runtime/mod.rs index 55548fc..7dfe091 100644 --- a/msim/src/sim/runtime/mod.rs +++ b/msim/src/sim/runtime/mod.rs @@ -13,7 +13,10 @@ use std::{ future::Future, io::Write, net::IpAddr, - sync::{Arc, Mutex, RwLock}, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, Mutex, RwLock, + }, time::Duration, }; use tokio::sync::oneshot; @@ -69,6 +72,7 @@ impl Runtime { task: task.handle().clone(), sims: Default::default(), config, + watchdog_suppressed: Arc::new(AtomicBool::new(false)), }; let rt = Runtime { rand, task, handle }; rt.add_simulator::(); @@ -255,6 +259,13 @@ fn start_watchdog_with( let _ = std::io::stdout().flush(); std::process::abort(); } + if rt.handle.is_watchdog_suppressed() { + // Reset the counter while suppressed so heavy setup phases + // don't accumulate stalls toward the deadlock limit. + deadlock_count = 0; + prev_time = now; + continue; + } if now == prev_time { warn!("possible deadlock detected..."); deadlock_count += 1; @@ -284,6 +295,7 @@ pub struct Handle { pub(crate) task: task::TaskHandle, pub(crate) sims: Arc>>>, pub(crate) config: SimConfig, + watchdog_suppressed: Arc, } impl Handle { @@ -362,6 +374,35 @@ impl Handle { pub fn time(&self) -> &time::TimeHandle { &self.time } + + /// Suppress the watchdog for the duration of the returned guard. + /// + /// While the guard is alive, the watchdog will not count stalls against + /// the deadlock limit. This is useful during heavy setup phases where the + /// simulator may take a long time to drain ready tasks without advancing + /// simulated time. + pub fn suppress_watchdog(&self) -> WatchdogSuppressionGuard { + self.watchdog_suppressed.store(true, Ordering::Relaxed); + WatchdogSuppressionGuard { + flag: Arc::clone(&self.watchdog_suppressed), + } + } + + /// Check whether the watchdog is currently suppressed. + pub(crate) fn is_watchdog_suppressed(&self) -> bool { + self.watchdog_suppressed.load(Ordering::Relaxed) + } +} + +/// RAII guard that re-enables the watchdog when dropped. +pub struct WatchdogSuppressionGuard { + flag: Arc, +} + +impl Drop for WatchdogSuppressionGuard { + fn drop(&mut self) { + self.flag.store(false, Ordering::Relaxed); + } } /// Guard for entering handle