diff --git a/crates/xenia-app/src/main.rs b/crates/xenia-app/src/main.rs index 5a2fa01..e4765db 100644 --- a/crates/xenia-app/src/main.rs +++ b/crates/xenia-app/src/main.rs @@ -2138,7 +2138,7 @@ fn coord_pre_round( // is the guest-cycle timebase, not host_ns. This runs in `coord_pre_round` // which both the lockstep and parallel outer loops call every round. loop { - let now = kernel.scheduler.ctx(0).timebase; + let now = kernel.now_basis_at(0); let Some((r, reason)) = kernel.scheduler.advance_to_next_wake_if_due(now) else { break; @@ -3146,6 +3146,16 @@ fn run_execution_parallel( .and_then(|t| guard.scheduler.find_by_tid(t)) .unwrap_or(thread_ref); *guard.scheduler.ctx_mut_ref(target_ref) = ctx_taken; + // Advance the parallel-mode coherent clock by + // the instructions this block retired. This is + // the single authoritative "now" the kernel + // deadline-arithmetic reads in parallel mode + // (per-thread `ctx.timebase` is incoherent here + // because peers extract/zero their slots) — + // keeping it monotonic breaks the timebase- + // desync livelock where a woken thread re-armed + // the same constant deadline forever. + guard.scheduler.advance_global_clock(executed); // worker_epilogue's exit_current path // expects scheduler.current to be set // to the running thread. diff --git a/crates/xenia-cpu/src/scheduler.rs b/crates/xenia-cpu/src/scheduler.rs index 4cf926f..dfa9aa4 100644 --- a/crates/xenia-cpu/src/scheduler.rs +++ b/crates/xenia-cpu/src/scheduler.rs @@ -351,6 +351,19 @@ pub struct Scheduler { /// Sorted by deadline ascending. Scheduler wakes the first entry via /// `advance_to_next_wake` when a round finds nothing runnable. timed_waits: Vec<(u64, ThreadRef)>, + /// Parallel-mode coherent monotonic clock. In `--parallel`, workers + /// extract their `PpcContext` (leaving a zeroed timebase in the slot) + /// and step unlocked, so `ctx(hw_id).timebase` is NOT a coherent "now" + /// — a coordinator that reads it can see a stale/zero basis decoupled + /// from the deadline it just advanced to, re-arming the same constant + /// deadline forever (timebase-desync livelock). This field is the + /// single authoritative "now" the parallel coordinator and kernel + /// deadline-arithmetic read instead. Advanced by `advance_global_clock` + /// (per-block retired-instruction count) on each parallel writeback and + /// floored up by `advance_all_timebases_to`. LOCKSTEP never reads it + /// (gated by `KernelState::parallel_active`), so it has zero effect on + /// the deterministic lockstep trace. + global_clock: u64, /// Global count of TLS slots allocated — `spawn` pre-sizes new threads' /// `tls_values` to this. tls_slot_count: usize, @@ -389,6 +402,7 @@ impl Scheduler { order, rng_state, timed_waits: Vec::new(), + global_clock: 0, tls_slot_count: 0, non_empty_runnable: 0, rotation_cursor: 0, @@ -1114,6 +1128,29 @@ impl Scheduler { } } } + // Keep the parallel-mode coherent clock at least as far forward as + // any deadline we fast-forward to (idle/timer/wake advances). This + // only mutates the new `global_clock` field — lockstep never reads + // it — so it cannot perturb the deterministic lockstep trace. + self.global_clock = self.global_clock.max(deadline); + } + + /// Parallel-mode coherent "now" (see [`Self::global_clock`] field doc). + /// Read by the kernel deadline-arithmetic ONLY when + /// `KernelState::parallel_active`; lockstep keeps reading per-thread + /// `ctx(hw_id).timebase`. + #[inline] + pub fn global_clock(&self) -> u64 { + self.global_clock + } + + /// Advance the parallel-mode coherent clock by `n` retired instructions. + /// Called from the parallel worker writeback with the block's executed + /// count so "now" tracks aggregate guest progress. Never called in + /// lockstep (the clock stays 0 and unread there). + #[inline] + pub fn advance_global_clock(&mut self, n: u64) { + self.global_clock = self.global_clock.saturating_add(n); } /// Fast-forward the timebase to the earliest pending timed wait and diff --git a/crates/xenia-kernel/src/exports.rs b/crates/xenia-kernel/src/exports.rs index 5243688..392401e 100644 --- a/crates/xenia-kernel/src/exports.rs +++ b/crates/xenia-kernel/src/exports.rs @@ -2165,7 +2165,7 @@ fn nt_set_timer_ex(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelSt // timebase separately (immutable borrow) before any mutation of the // object to keep the borrow-checker happy. let hw_id = state.scheduler.current_hw_id().unwrap_or(0); - let now = state.scheduler.ctx(hw_id).timebase; + let now = state.now_basis_at(hw_id); // Read signed i64 due_time (big-endian hi/lo — same pattern as // parse_timeout). Negative = relative-from-now, positive = absolute @@ -3589,7 +3589,7 @@ pub(crate) fn parse_timeout(state: &KernelState, timeout_ptr: u32, mem: &GuestMe return Some(Some(0)); // poll } let hw_id = state.scheduler.current_hw_id().unwrap_or(0); - let now = state.scheduler.ctx(hw_id).timebase; + let now = state.now_basis_at(hw_id); // Negative = relative, positive = absolute wall-clock. Our timebase is a // plain instruction counter, so we treat all timeouts as "time-units // after now" regardless of sign, using the magnitude. diff --git a/crates/xenia-kernel/src/state.rs b/crates/xenia-kernel/src/state.rs index 290818c..9ccf0ed 100644 --- a/crates/xenia-kernel/src/state.rs +++ b/crates/xenia-kernel/src/state.rs @@ -1251,6 +1251,26 @@ impl KernelState { self.pending_timer_fires.first().map(|&(d, _)| d) } + /// Coherent "now" basis for deadline arithmetic, gated on execution mode. + /// + /// In **lockstep** (`parallel_active == false`) this returns exactly the + /// pre-existing per-thread `ctx(hw_id).timebase` each call site read + /// before, so the deterministic lockstep trace is byte-identical (no + /// golden re-baseline). In **parallel** (`parallel_active == true`) the + /// per-thread timebases are incoherent (workers extract/zero their slots + /// while stepping unlocked), so we return the scheduler's single + /// monotonic `global_clock` instead — the basis that breaks the + /// timebase-desync livelock. Callers pass the `hw_id` they would have + /// used for the lockstep `ctx()` read (slot 0 for coordinator-side + /// drains, the current thread's slot for in-guest waits). + pub fn now_basis_at(&self, hw_id: u8) -> u64 { + if self.parallel_active { + self.scheduler.global_clock() + } else { + self.scheduler.ctx(hw_id).timebase + } + } + /// Fire every timer whose deadline is `<= now` (derived from slot 0's /// timebase, matching `parse_timeout`'s "current thread" fallback). /// For each fire: mark the timer `signaled=true`, clear its @@ -1259,7 +1279,7 @@ impl KernelState { /// fired — the caller uses this to decide whether the scheduler round /// needs a follow-up `advance_to_next_wake_if_due` step. pub fn fire_due_timers(&mut self) -> bool { - let now = self.scheduler.ctx(0).timebase; + let now = self.now_basis_at(0); let mut fired = false; loop { let Some(&(deadline, handle)) = self.pending_timer_fires.first() else {