diff --git a/crates/xenia-app/src/main.rs b/crates/xenia-app/src/main.rs index 1277528..685e5a9 100644 --- a/crates/xenia-app/src/main.rs +++ b/crates/xenia-app/src/main.rs @@ -2830,6 +2830,19 @@ fn run_execution( // Both calls are no-ops when `XENIA_SILPH_UI_AUTOSIGNAL_DELAY` // is unset (the pending queue stays empty). kernel.set_now_cycle_hint(stats.instruction_count); + // Drive the coherent monotonic "now" the kernel deadline-arithmetic + // reads (`KernelState::now_basis_at` -> `Scheduler::global_clock`) + // from the deterministic retired-instruction count. Floored up (never + // backwards). This is the LOCKSTEP analogue of the parallel writeback's + // `advance_global_clock`: a parked/poll thread computing a relative + // timeout via `parse_timeout` now reads a real, non-zero, monotone + // basis instead of `idle_ctx`'s timebase-0, so its deadline lands in + // the future and `coord_idle_advance` stops re-arming the constant + // past deadline forever (the timebase-desync livelock / render-gate + // root). Pure function of guest instructions -> bit-reproducible. + kernel + .scheduler + .advance_global_clock_to(stats.instruction_count); kernel.fire_due_silph_autosignals(stats.instruction_count); dispatch_graphics_interrupts( kernel, diff --git a/crates/xenia-app/tests/golden/sylpheed_n50m.json b/crates/xenia-app/tests/golden/sylpheed_n50m.json index 9a8662c..bde4037 100644 --- a/crates/xenia-app/tests/golden/sylpheed_n50m.json +++ b/crates/xenia-app/tests/golden/sylpheed_n50m.json @@ -1,6 +1,6 @@ { - "instructions": 50000004, - "imports": 1790936, + "instructions": 50000007, + "imports": 92317, "unimpl": 0, "draws": 0, "swaps": 1, diff --git a/crates/xenia-cpu/src/scheduler.rs b/crates/xenia-cpu/src/scheduler.rs index dfa9aa4..6a4b837 100644 --- a/crates/xenia-cpu/src/scheduler.rs +++ b/crates/xenia-cpu/src/scheduler.rs @@ -351,18 +351,27 @@ pub struct Scheduler { /// Sorted by deadline ascending. Scheduler wakes the first entry via /// `advance_to_next_wake` when a round finds nothing runnable. timed_waits: Vec<(u64, ThreadRef)>, - /// Parallel-mode coherent monotonic clock. In `--parallel`, workers - /// extract their `PpcContext` (leaving a zeroed timebase in the slot) - /// and step unlocked, so `ctx(hw_id).timebase` is NOT a coherent "now" - /// — a coordinator that reads it can see a stale/zero basis decoupled - /// from the deadline it just advanced to, re-arming the same constant - /// deadline forever (timebase-desync livelock). This field is the - /// single authoritative "now" the parallel coordinator and kernel - /// deadline-arithmetic read instead. Advanced by `advance_global_clock` - /// (per-block retired-instruction count) on each parallel writeback and - /// floored up by `advance_all_timebases_to`. LOCKSTEP never reads it - /// (gated by `KernelState::parallel_active`), so it has zero effect on - /// the deterministic lockstep trace. + /// Coherent monotonic "now" clock — the single authoritative basis the + /// kernel deadline-arithmetic (`KernelState::now_basis_at`) reads in + /// BOTH execution modes. Per-thread `ctx(hw_id).timebase` is NOT a + /// coherent "now": + /// * In `--parallel`, workers extract their `PpcContext` (leaving a + /// zeroed timebase in the slot) and step unlocked. + /// * In **lockstep**, a parked/poll thread has `running_idx == None`, + /// so `ctx()` returns `idle_ctx` (timebase 0); a `parse_timeout` + /// reading that basis registers `deadline = 0 + relative`, a value + /// permanently in the past, and `coord_idle_advance` re-arms that + /// same constant deadline forever (timebase-desync livelock — the + /// render-gate root: the submitter's 16ms re-wait never fires). + /// So a coordinator/parked thread reading per-thread timebase can see a + /// stale/zero basis decoupled from the deadline it just advanced to. + /// This field is that coherent basis instead. It is DETERMINISTIC: a + /// pure function of retired guest instructions (never wall-clock). + /// Advanced by `advance_global_clock` (per-block retired count on each + /// parallel writeback), `advance_global_clock_to` (floored up to the + /// deterministic per-round `stats.instruction_count` in lockstep), and + /// floored up by `advance_all_timebases_to`. Two cold lockstep runs + /// read identical values, so the lockstep trace stays bit-reproducible. global_clock: u64, /// Global count of TLS slots allocated — `spawn` pre-sizes new threads' /// `tls_values` to this. @@ -1146,13 +1155,26 @@ impl Scheduler { /// Advance the parallel-mode coherent clock by `n` retired instructions. /// Called from the parallel worker writeback with the block's executed - /// count so "now" tracks aggregate guest progress. Never called in - /// lockstep (the clock stays 0 and unread there). + /// count so "now" tracks aggregate guest progress. #[inline] pub fn advance_global_clock(&mut self, n: u64) { self.global_clock = self.global_clock.saturating_add(n); } + /// Floor the coherent clock up to `now` (monotonic; never goes + /// backwards). Used by the **lockstep** outer loop once per round to + /// track the deterministic retired-instruction count + /// (`stats.instruction_count`) as the single coherent "now". A plain + /// floor-up rather than `saturating_add` because the lockstep caller + /// passes an absolute monotonic counter (not a per-block delta), and + /// because `advance_all_timebases_to` may already have pushed + /// `global_clock` past the instruction count when fast-forwarding to a + /// future deadline — clamping with `max` keeps both sources monotone. + #[inline] + pub fn advance_global_clock_to(&mut self, now: u64) { + self.global_clock = self.global_clock.max(now); + } + /// Fast-forward the timebase to the earliest pending timed wait and /// wake that sleeper. Used when a round had no Ready threads and no /// timer fires closer than the earliest wait. Returns the woken diff --git a/crates/xenia-kernel/src/state.rs b/crates/xenia-kernel/src/state.rs index e34b065..83b37ab 100644 --- a/crates/xenia-kernel/src/state.rs +++ b/crates/xenia-kernel/src/state.rs @@ -1295,24 +1295,28 @@ impl KernelState { self.pending_timer_fires.first().map(|&(d, _)| d) } - /// Coherent "now" basis for deadline arithmetic, gated on execution mode. + /// Coherent "now" basis for deadline arithmetic — the scheduler's + /// single monotonic `global_clock`, in BOTH execution modes. /// - /// In **lockstep** (`parallel_active == false`) this returns exactly the - /// pre-existing per-thread `ctx(hw_id).timebase` each call site read - /// before, so the deterministic lockstep trace is byte-identical (no - /// golden re-baseline). In **parallel** (`parallel_active == true`) the - /// per-thread timebases are incoherent (workers extract/zero their slots - /// while stepping unlocked), so we return the scheduler's single - /// monotonic `global_clock` instead — the basis that breaks the - /// timebase-desync livelock. Callers pass the `hw_id` they would have - /// used for the lockstep `ctx()` read (slot 0 for coordinator-side - /// drains, the current thread's slot for in-guest waits). - pub fn now_basis_at(&self, hw_id: u8) -> u64 { - if self.parallel_active { - self.scheduler.global_clock() - } else { - self.scheduler.ctx(hw_id).timebase - } + /// Per-thread `ctx(hw_id).timebase` is NOT a sound "now" for deadline + /// arithmetic: in `--parallel` workers extract/zero their slots while + /// stepping unlocked, and in **lockstep** a parked/poll thread has + /// `running_idx == None` so `ctx()` returns `idle_ctx` (timebase 0). + /// Either way a `parse_timeout` reading the per-thread basis can see 0 + /// (or a stale value) and register `deadline = 0 + relative`, a value + /// permanently in the past, which `coord_idle_advance` then re-arms + /// forever (the timebase-desync livelock; the render-gate root). The + /// `global_clock` is a deterministic function of retired guest + /// instructions (per-round `stats.instruction_count` floor-ups in + /// lockstep, per-block retired counts in parallel), so it is coherent, + /// monotonic, never zero after boot, and bit-reproducible across two + /// cold lockstep runs. + /// + /// The `hw_id` argument is retained for call-site clarity (which slot a + /// caller would conceptually be "asking about") but is no longer read — + /// the basis is global. + pub fn now_basis_at(&self, _hw_id: u8) -> u64 { + self.scheduler.global_clock() } /// Fire every timer whose deadline is `<= now` (derived from slot 0's