diff --git a/crates/xenia-app/src/main.rs b/crates/xenia-app/src/main.rs index 5a41154..bcdba26 100644 --- a/crates/xenia-app/src/main.rs +++ b/crates/xenia-app/src/main.rs @@ -2151,7 +2151,13 @@ fn coord_pre_round( let fired = if kernel.parallel_active { kernel.interrupts.tick_vsync_wallclock() } else { - kernel.interrupts.tick_vsync_instr(stats.instruction_count) + // iterate-3AJ: present-anchored — pass the guest's live present + // (`VdSwap`) count so vsync tracks the real present rate once the + // guest is presenting (≈1 vblank/present), instead of firing a + // fixed instruction quantum that over-fires ~66× during one heavy + // splash asset-load frame and collapsed the logo fade-in. + let presents = kernel.gpu.swaps_seen(); + kernel.interrupts.tick_vsync_instr(stats.instruction_count, presents) }; if fired { use std::sync::atomic::Ordering; diff --git a/crates/xenia-app/tests/golden/sylpheed_n50m.json b/crates/xenia-app/tests/golden/sylpheed_n50m.json index 9687b99..a6e7f75 100644 --- a/crates/xenia-app/tests/golden/sylpheed_n50m.json +++ b/crates/xenia-app/tests/golden/sylpheed_n50m.json @@ -1,9 +1,9 @@ { - "instructions": 50000014, - "imports": 352251, + "instructions": 50000007, + "imports": 333453, "unimpl": 0, - "draws": 718, - "swaps": 147, + "draws": 1274, + "swaps": 259, "unique_render_targets": 2, "shader_blobs_live": 6, "texture_cache_entries": 1 diff --git a/crates/xenia-gpu/src/handle.rs b/crates/xenia-gpu/src/handle.rs index fa3db8a..467b156 100644 --- a/crates/xenia-gpu/src/handle.rs +++ b/crates/xenia-gpu/src/handle.rs @@ -444,6 +444,23 @@ impl GpuBackend { } } + /// Current guest present (`VdSwap`) count. Cheap single-field read used + /// by the present-anchored vsync ticker (iterate-3AJ) every scheduler + /// round. Inline mode reads the live counter directly; threaded mode + /// reads the last-published digest mirror under a brief lock (the + /// `--parallel` path uses the wall-clock vsync ticker anyway, so the + /// exact freshness here is not load-bearing). + pub fn swaps_seen(&self) -> u64 { + match self { + GpuBackend::Inline(s) => s.stats.swaps_seen, + GpuBackend::Threaded(h) => h + .digest + .lock() + .map(|d| d.stats.swaps_seen) + .unwrap_or(0), + } + } + /// Forward [`GpuSystem::has_pending_interrupts`] under inline mode; /// under threaded mode peek the `int_rx` channel. pub fn has_pending_interrupts(&self) -> bool { diff --git a/crates/xenia-kernel/src/interrupts.rs b/crates/xenia-kernel/src/interrupts.rs index aa7cbf7..84ce584 100644 --- a/crates/xenia-kernel/src/interrupts.rs +++ b/crates/xenia-kernel/src/interrupts.rs @@ -183,6 +183,28 @@ pub struct InterruptState { /// ticker. `tick_vsync_instr` diffs against this to advance /// `vsync_accumulator`. pub last_instr_count: u64, + /// **iterate-3AJ — present-anchored vsync.** Set `true` once the guest + /// has presented at least one frame (a `VdSwap`). Before this, the + /// vsync ticker uses the legacy fixed instruction-quantum cadence so + /// the boot present-loop bootstrap (iterate-2W) still gets the vsyncs + /// it needs *before* the first present. After this, vsync is anchored + /// to the guest's real present rate (≈1 vblank per present, as on real + /// hardware where the title double-buffers at vblank), with only a + /// small capped instruction-quantum *fallback* for frames where the + /// guest genuinely stops presenting (heavy asset load). This stops the + /// proxy from firing ~66 vsyncs during one heavy load frame, which + /// collapsed the splash-logo intro fade-in (the guest's vsync counter + /// jumped 0→66 in one frame instead of ramping smoothly). + pub vsync_present_anchored: bool, + /// Last observed guest present (`VdSwap`) count. `tick_vsync_instr` + /// diffs the live count against this each call to emit one vblank per + /// new present once `vsync_present_anchored` is set. + pub last_present_count: u64, + /// How many *fallback* (non-present-driven) vsyncs have fired in the + /// current dry (no-present) window. Reset to 0 whenever a present + /// occurs. Capped at [`DRY_FALLBACK_CAP`] so one heavy non-presenting + /// frame cannot fire a long burst of vsyncs (the fade-in regression). + pub dry_fallback_fired: u32, /// Wall-clock anchor for the production v-sync ticker. `None` until /// the first `tick_vsync_wallclock` call (lazy init so unit tests /// that never invoke that function don't construct an Instant). @@ -208,6 +230,21 @@ pub struct InterruptState { /// determinism. pub const VSYNC_INSTR_PERIOD: u64 = 150_000; +/// **iterate-3AJ — present-anchored vsync fallback.** +/// +/// Once the guest is in its present loop (`vsync_present_anchored`), each +/// guest present emits exactly one vblank — vsync *is* the present cadence, +/// as on real Xbox 360 hardware where the title double-buffers at vblank. +/// For a frame where the guest stops presenting (e.g. the ~1.1 s splash +/// asset-load), we still need *some* vsyncs to keep timers / the present +/// loop alive, but firing one per [`VSYNC_INSTR_PERIOD`] would reproduce the +/// ~66-vsync spike that collapsed the fade-in. So the fallback fires one +/// vblank per `VSYNC_INSTR_PERIOD` of *non-presenting* instructions, but at +/// most [`DRY_FALLBACK_CAP`] per dry window (the counter resets on each +/// present). A heavy load frame therefore advances the guest vsync counter +/// by ≤ `DRY_FALLBACK_CAP` (a small ramp like canary's 0/5/10/2/1…), not 66. +pub const DRY_FALLBACK_CAP: u32 = 4; + /// Wall-clock period for the **production** v-sync ticker. 16.667 ms /// targets exactly 60 Hz. KRNBUG-D08 — converting from the /// instruction-count proxy fixes the `--parallel` rate drop while @@ -254,23 +291,83 @@ impl InterruptState { self.pending.pop_front().map(|(source, _)| source) } - /// **Legacy** — instruction-count v-sync ticker. Kept for unit tests - /// that need a deterministic clock source. Production code calls - /// `tick_vsync_wallclock` instead. Returns `true` if at least one - /// v-sync was queued. - pub fn tick_vsync_instr(&mut self, current_instr_count: u64) -> bool { + /// **Present-anchored** instruction-paced v-sync ticker (the lockstep + /// production path; also used by unit tests for a deterministic clock). + /// + /// `current_instr_count` is the running retired-instruction count. + /// `present_count` is the guest's running `VdSwap` count (monotonic). + /// + /// Two regimes: + /// + /// 1. **Bootstrap** (`!vsync_present_anchored`, i.e. before the guest's + /// first present): legacy fixed-quantum cadence — one vsync per + /// [`VSYNC_INSTR_PERIOD`] retired instructions. The boot present loop + /// (iterate-2W) needs vsyncs delivered *before* it can present, so + /// this regime is unchanged from the original ticker. The first + /// observed present flips `vsync_present_anchored`. + /// + /// 2. **Present-anchored** (after the first present): one vblank per + /// guest present (vsync *is* the present cadence on real hardware), + /// plus a small capped instruction-quantum fallback ([`DRY_FALLBACK_CAP`] + /// per dry window) so a frame where the guest stops presenting (heavy + /// asset load) still ticks a *few* vsyncs — not ~66, which collapsed + /// the splash fade-in. + /// + /// Returns `true` if at least one v-sync was queued. + pub fn tick_vsync_instr(&mut self, current_instr_count: u64, present_count: u64) -> bool { let delta = current_instr_count.saturating_sub(self.last_instr_count); self.last_instr_count = current_instr_count; self.vsync_accumulator = self.vsync_accumulator.saturating_add(delta); - if self.vsync_accumulator < VSYNC_INSTR_PERIOD { - return false; + + let new_presents = present_count.saturating_sub(self.last_present_count); + self.last_present_count = present_count; + if new_presents > 0 { + self.vsync_present_anchored = true; } - let periods = self.vsync_accumulator / VSYNC_INSTR_PERIOD; - self.vsync_accumulator %= VSYNC_INSTR_PERIOD; - for _ in 0..periods { + + // Regime 1 — bootstrap: legacy fixed instruction quantum. Preserves + // the iterate-2W present-loop bootstrap exactly (vsyncs must fire + // before the guest can present). + if !self.vsync_present_anchored { + if self.vsync_accumulator < VSYNC_INSTR_PERIOD { + return false; + } + let periods = self.vsync_accumulator / VSYNC_INSTR_PERIOD; + self.vsync_accumulator %= VSYNC_INSTR_PERIOD; + for _ in 0..periods { + self.queue_interrupt(INTERRUPT_SOURCE_VSYNC, VSYNC_TARGET_CPU); + } + return true; + } + + // Regime 2 — present-anchored. + let mut queued = false; + + if new_presents > 0 { + // One vblank per guest present. `queue_interrupt` caps the FIFO, + // so a burst of presents in one round can't flood. A fresh + // present resets the dry-window state. + for _ in 0..new_presents { + self.queue_interrupt(INTERRUPT_SOURCE_VSYNC, VSYNC_TARGET_CPU); + } + self.vsync_accumulator = 0; + self.dry_fallback_fired = 0; + queued = true; + } else if self.vsync_accumulator >= VSYNC_INSTR_PERIOD + && self.dry_fallback_fired < DRY_FALLBACK_CAP + { + // Dry frame (no present this tick): the guest stopped presenting + // (heavy load). Tick a *capped* number of fallback vsyncs so + // timers/the present loop stay alive without re-introducing the + // ~66-vsync spike. Consume one period per fired vsync so the + // accumulator paces the few fallbacks. + self.vsync_accumulator -= VSYNC_INSTR_PERIOD; + self.dry_fallback_fired += 1; self.queue_interrupt(INTERRUPT_SOURCE_VSYNC, VSYNC_TARGET_CPU); + queued = true; } - true + + queued } /// **Production** — wall-clock v-sync ticker. Fires @@ -364,9 +461,10 @@ mod tests { let mut s = InterruptState::default(); s.set_callback(0x1000, 0xAB); assert_eq!(VSYNC_INSTR_PERIOD, 150_000); - assert!(!s.tick_vsync_instr(VSYNC_INSTR_PERIOD - 1)); + // present_count = 0 → bootstrap regime (legacy fixed quantum). + assert!(!s.tick_vsync_instr(VSYNC_INSTR_PERIOD - 1, 0)); assert!(s.pending.is_empty()); - assert!(s.tick_vsync_instr(VSYNC_INSTR_PERIOD)); + assert!(s.tick_vsync_instr(VSYNC_INSTR_PERIOD, 0)); assert_eq!(s.peek_next(), Some(INTERRUPT_SOURCE_VSYNC)); } @@ -376,10 +474,59 @@ mod tests { // be delivered, not lost. let mut s = InterruptState::default(); s.set_callback(0x1000, 0xAB); - assert!(s.tick_vsync_instr(VSYNC_INSTR_PERIOD * 3 + 10)); + // present_count = 0 → bootstrap regime drains all 3 periods at once. + assert!(s.tick_vsync_instr(VSYNC_INSTR_PERIOD * 3 + 10, 0)); assert_eq!(s.pending.len(), 3); } + #[test] + fn tick_vsync_instr_present_anchors_after_first_present() { + // iterate-3AJ: once the guest presents, vsync tracks presents (one + // vblank per present), NOT the fixed instruction quantum. + let mut s = InterruptState::default(); + s.set_callback(0x1000, 0xAB); + // Bootstrap: instruction quantum fires (present_count still 0). + assert!(s.tick_vsync_instr(VSYNC_INSTR_PERIOD, 0)); + assert_eq!(s.pending.len(), 1); + let _ = s.take_next(); + // First present flips to anchored: exactly one vblank for the present. + assert!(s.tick_vsync_instr(VSYNC_INSTR_PERIOD * 2, 1)); + assert!(s.vsync_present_anchored); + assert_eq!(s.pending.len(), 1); + let _ = s.take_next(); + } + + #[test] + fn tick_vsync_instr_heavy_dry_frame_capped_not_spiking() { + // iterate-3AJ: the regression. A heavy non-presenting frame retires + // ~10M instructions; the OLD ticker fired ~66 vsyncs (10M/150k) in + // that single frame, jumping the guest vsync counter 0→66 and + // skipping the fade-in. The present-anchored ticker caps the dry + // window at DRY_FALLBACK_CAP. + let mut s = InterruptState::default(); + s.set_callback(0x1000, 0xAB); + // Enter anchored mode via one present. + let mut instr: u64 = VSYNC_INSTR_PERIOD; + assert!(s.tick_vsync_instr(instr, 1)); + while s.take_next().is_some() {} + // Simulate a 10M-instruction frame with NO new present, ticked in + // chunks (as coord_pre_round would). Count fallback vsyncs queued. + let mut fallback = 0usize; + for _ in 0..100 { + instr += 100_000; // 100 chunks × 100k = 10M instructions + if s.tick_vsync_instr(instr, 1) { + while s.take_next().is_some() { + fallback += 1; + } + } + } + assert_eq!( + fallback, DRY_FALLBACK_CAP as usize, + "a heavy dry frame must cap fallback vsyncs at DRY_FALLBACK_CAP, \ + not fire ~66" + ); + } + #[test] fn tick_vsync_wallclock_first_call_sets_anchor() { // First call seeds the anchor and never fires. KRNBUG-D08: