diff --git a/crates/xenia-app/src/main.rs b/crates/xenia-app/src/main.rs index a31754d..12ce4c3 100644 --- a/crates/xenia-app/src/main.rs +++ b/crates/xenia-app/src/main.rs @@ -2451,6 +2451,23 @@ fn coord_pre_round( // restores the ~60 Hz rate at the cost of bit-exact run reproducibility, // which is acceptable under `--parallel` (M11 already documented // `--parallel` as non-deterministic by design). + // 2.AZ — lockstep v-sync clock source. + // + // CORRECTION to the 2.AX framing (this iterate, measured): the lockstep + // ticker's instruction-count clock does NOT freeze after the post-boot + // wedge. `stats.instruction_count` is monotone & global and climbs the + // whole run (reaches the full -n budget) because the "wedge" is not a + // true all-blocked stall — tids 7/8/9/10 stay `Ready` and spin, so + // instructions keep retiring and the ticker keeps crossing the 150k + // threshold (~3 333 crossings @ -n 500M). The measured ~73-v-sync/run + // cap on *delivered* interrupts is the INJECTOR throughput + // (INTERRUPT_QUEUE_CAP=4 + one drain/round in + // `try_inject_graphics_interrupt`), NOT the clock. And even a delivered + // r3==0 VSync ISR never signals Event 0x10e8 — it takes the opt_callback + // `+44` path, a confirmed structural dead-end (2.AV/2.AX). So the + // cadence clock is NOT the wedge gate; the original instruction-count + // source is retained (driving a timebase ticker off `max_timebase` + // PLATEAUS when the lead thread blocks and regresses delivery 73→13). let fired = if kernel.parallel_active { kernel.interrupts.tick_vsync_wallclock() } else { diff --git a/crates/xenia-cpu/src/scheduler.rs b/crates/xenia-cpu/src/scheduler.rs index aca2439..3b80bbf 100644 --- a/crates/xenia-cpu/src/scheduler.rs +++ b/crates/xenia-cpu/src/scheduler.rs @@ -1196,6 +1196,26 @@ impl Scheduler { } } + /// Maximum guest timebase across every thread in every slot's runqueue + /// (2.AZ). This is the global guest-clock proxy: it advances both when + /// any thread executes (per-instruction `timebase += 1`) and when the + /// idle path jumps the timebase forward to a pending deadline + /// (`advance_all_timebases_to`). Unlike `ctx(hw_id).timebase` — which + /// reads only the *currently scheduled* thread on one slot and therefore + /// stalls whenever that slot's thread is Blocked — the max is monotone + /// across the whole machine, so a v-sync ticker keyed to it keeps + /// advancing even when the slot-0 thread is wedged. Deterministic: + /// derived purely from guest-cycle state, never host wall-clock. + /// Returns 0 when no threads exist. + pub fn max_timebase(&self) -> u64 { + self.slots + .iter() + .flat_map(|slot| slot.runqueue.iter()) + .map(|t| t.ctx.timebase) + .max() + .unwrap_or(0) + } + /// Fast-forward the timebase to the earliest pending timed wait and /// wake that sleeper. Used when a round had no Ready threads and no /// timer fires closer than the earliest wait. Returns the woken diff --git a/crates/xenia-kernel/src/interrupts.rs b/crates/xenia-kernel/src/interrupts.rs index 55f0e2f..2ecf0b5 100644 --- a/crates/xenia-kernel/src/interrupts.rs +++ b/crates/xenia-kernel/src/interrupts.rs @@ -165,6 +165,15 @@ pub struct InterruptState { /// ticker. `tick_vsync_instr` diffs against this to advance /// `vsync_accumulator`. pub last_instr_count: u64, + /// Last observed guest **timebase** for the deterministic-idle v-sync + /// ticker (`tick_vsync_timebase`, 2.AZ). Distinct accumulator state + /// from `last_instr_count` so the two tickers never alias. The guest + /// timebase advances `+1` per executed instruction during execution + /// (≈ the instruction count) *and* jumps forward in 1 µs units while + /// every thread is wedged (`advance_all_timebases_to` during idle), so + /// diffing it keeps the v-sync cadence moving when the guest stops + /// executing — fixing the lockstep self-stall (ISR dies at cyc 7.46M). + pub last_timebase: u64, /// Wall-clock anchor for the production v-sync ticker. `None` until /// the first `tick_vsync_wallclock` call (lazy init so unit tests /// that never invoke that function don't construct an Instant). @@ -249,6 +258,52 @@ impl InterruptState { true } + /// **Lockstep (2.AZ)** — deterministic v-sync ticker driven off the + /// guest **timebase** instead of `stats.instruction_count`. + /// + /// Root cause it fixes: `tick_vsync_instr` diffs `instruction_count`, + /// which is bumped ONLY by real guest execution. Once `tid=1` wedges on + /// Event 0x10e8 and every thread is Blocked/Exited, the lockstep loop + /// executes 0 instructions/round, `instruction_count` freezes, the + /// ticker delta is 0, and the VSync ISR `sub_824be9a0` stops firing + /// after cyc 7.46M (2.AX). Canary sustains 60 Hz forever because its + /// v-sync is host-clock driven, independent of guest CPU progress. + /// + /// The guest timebase keeps advancing while the guest is wedged: + /// `coord_idle_advance` jumps it forward (in 1 µs units) to the next + /// timer / wait deadline via `advance_all_timebases_to`. Diffing it + /// therefore keeps queuing v-syncs during the wedge, and the existing + /// `try_inject_graphics_interrupt` Pass-2 delivers them onto a Blocked + /// thread. During *normal* execution the timebase advances ≈ 1:1 with + /// instruction count, so the same `VSYNC_INSTR_PERIOD` (150 000) + /// reproduces the established lockstep cadence — behaviour is + /// continuous across the execute↔idle boundary. + /// + /// **Determinism**: the cadence derives purely from the deterministic + /// guest timebase (guest-cycle / µs deadlines), never host wall-clock, + /// so golden oracles stay bit-stable. Reuses the same period constant + /// as the instruction-count ticker for cadence continuity. + pub fn tick_vsync_timebase(&mut self, current_timebase: u64) -> bool { + let delta = current_timebase.saturating_sub(self.last_timebase); + self.last_timebase = current_timebase; + self.vsync_accumulator = self.vsync_accumulator.saturating_add(delta); + if self.vsync_accumulator < VSYNC_INSTR_PERIOD { + return false; + } + let periods = self.vsync_accumulator / VSYNC_INSTR_PERIOD; + self.vsync_accumulator %= VSYNC_INSTR_PERIOD; + // Cap the per-call burst at the FIFO depth: an idle round can jump + // the timebase forward by many periods at once (a far-off deadline), + // and `queue_interrupt` would otherwise drop the overflow silently. + // Bounding the queued count keeps delivery paced one-per-round + // rather than dumping a backlog that the injector can't drain. + let to_queue = periods.min(INTERRUPT_QUEUE_CAP as u64); + for _ in 0..to_queue { + self.queue_interrupt(INTERRUPT_SOURCE_VSYNC); + } + true + } + /// **Production** — wall-clock v-sync ticker. Fires /// `floor(elapsed / VSYNC_PERIOD)` v-syncs since the last call and /// advances the anchor by that many full periods (so a long pause @@ -356,6 +411,45 @@ mod tests { assert_eq!(s.pending.len(), 3); } + #[test] + fn tick_vsync_timebase_fires_at_period_threshold() { + // 2.AZ — timebase-driven lockstep ticker mirrors the + // instruction-count one: a delta < period queues nothing, a delta + // == period queues exactly one v-sync. + let mut s = InterruptState::default(); + s.set_callback(0x1000, 0xAB); + assert!(!s.tick_vsync_timebase(VSYNC_INSTR_PERIOD - 1)); + assert!(s.pending.is_empty()); + assert!(s.tick_vsync_timebase(VSYNC_INSTR_PERIOD)); + assert_eq!(s.peek_next(), Some(INTERRUPT_SOURCE_VSYNC)); + } + + #[test] + fn tick_vsync_timebase_advances_while_guest_wedged() { + // The core 2.AZ fix: even with ZERO executed instructions, an idle + // round jumps the guest timebase forward (µs deadlines). Diffing + // the timebase must still queue the due v-syncs so the ISR keeps + // firing during the wedge. Here the timebase jumps by 2 periods in + // a single call with no intervening "instruction" progress. + let mut s = InterruptState::default(); + s.set_callback(0x1000, 0xAB); + assert!(s.tick_vsync_timebase(VSYNC_INSTR_PERIOD * 2)); + assert_eq!(s.pending.len(), 2); + } + + #[test] + fn tick_vsync_timebase_caps_burst_at_queue_cap() { + // A far-off idle deadline can jump the timebase forward by many + // periods at once; the per-call burst is capped at the FIFO depth + // so the backlog doesn't silently overflow `queue_interrupt`. + let mut s = InterruptState::default(); + s.set_callback(0x1000, 0xAB); + let huge = VSYNC_INSTR_PERIOD * (INTERRUPT_QUEUE_CAP as u64 + 50); + assert!(s.tick_vsync_timebase(huge)); + assert_eq!(s.pending.len(), INTERRUPT_QUEUE_CAP); + assert_eq!(s.dropped, 0, "cap should pre-bound, not drop"); + } + #[test] fn tick_vsync_wallclock_first_call_sets_anchor() { // First call seeds the anchor and never fires. KRNBUG-D08: