//! Graphics interrupt + synthetic v-sync bookkeeping (P6). //! //! The Xbox 360 graphics driver calls `VdSetGraphicsInterruptCallback` to //! register a single per-process callback that the OS invokes on: //! //! 1. **V-sync** — at 60 Hz; source code 0 (`INTERRUPT_SOURCE_VSYNC`). //! 2. **Command-processor interrupt** — when `PM4_INTERRUPT` fires from the //! guest-issued command stream; source code 1 (`INTERRUPT_SOURCE_CP`). //! //! Canary's [xboxkrnl_video.cc:303-310](xenia-canary/src/xenia/kernel/xboxkrnl/xboxkrnl_video.cc#L303-L310) //! dispatches the callback on HW thread 0. We follow the same convention. //! //! The delivery model is cooperative: we inject the callback entry into HW //! thread 0 at the top of a scheduler round when it's safe (not mid-export, //! not already inside another interrupt). When the callback returns to //! [`LR_HALT_SENTINEL`] the main loop restores the saved [`PpcContext`] //! fields and the HW thread picks up where it left off. use std::collections::VecDeque; use std::time::{Duration, Instant}; use xenia_cpu::context::{CrField, PpcContext}; use xenia_cpu::ThreadRef; pub const INTERRUPT_SOURCE_VSYNC: u32 = 0; pub const INTERRUPT_SOURCE_CP: u32 = 1; /// Guest-registered V-sync / graphics-interrupt callback (from /// `VdSetGraphicsInterruptCallback`). #[derive(Debug, Clone, Copy)] pub struct GraphicsInterruptCallback { pub callback_pc: u32, pub user_data: u32, } /// Snapshot of the fields we mutate when diverting a HW thread into an /// interrupt callback. Restored when the callback returns to /// `LR_HALT_SENTINEL`. /// /// We save **all PPC volatile registers** (r0, r2–r12) plus `r1` (SP), /// `pc`, `lr`, `ctr`, and `cr`. Non-volatile regs (r13–r31) are preserved /// by the callback's own `__savegprlr_N` prologue/epilogue per the PPC /// ELF ABI, so they don't need stashing here. /// /// **SP (`gpr[1]`) is included because the injector decrements it by /// [`CALLBACK_STACK_PAD`] before the callback runs** — see that constant's /// docs for why. Without this, the callback's `__savegprlr_N` prologue /// overwrites the interrupted function's own stack-saved LR (which lives /// at `[r1 - 8]`), and when the interrupted function later tries to /// return, `bclr` jumps to `LR_HALT_SENTINEL` and the thread exits /// prematurely. #[derive(Debug, Clone, Copy)] pub struct SavedCallbackCtx { pub pc: u32, pub lr: u64, pub ctr: u64, /// All PPC volatile GPRs (r0, r2–r12) plus r1 (SP) in index order. /// Index 0 = r0, 1 = r1, 2 = r2, …, 12 = r12. Index 13..32 unused. pub gprs: [u64; 13], pub cr: [CrField; 8], pub source: u32, } /// Bytes the injector reserves below the interrupted thread's SP before /// running the ISR callback. Matches Canary's /// [`Processor::Execute`](../../../../xenia-canary/src/xenia/cpu/processor.cc#L383) /// which decrements `r[1]` by `64 + 112 = 176` before /// `function->Call(...)` and restores afterwards. The pad must be larger /// than any plausible sum of `__savegprlr_N`'s save-area (up to 64 B for /// r25-r31 + 8 B for LR) plus the callback's own `stwu r1,-N(r1)` frame /// (the Sylpheed vsync ISR uses 128 B). /// /// Pre-fix: the ISR's `__savegprlr_25` stored the callback's saved LR /// (= `LR_HALT_SENTINEL`, from injection) at `[r1 - 8]` — exactly where /// the interrupted thread's current `bl`-saved LR lived. The /// interrupted function's return site got stomped with `SENTINEL`, so /// `__restgprlr_N -> bclr` jumped to the halt sentinel and the thread /// exited through the wrong path. Manifested in Sylpheed as tid=5 /// (producer for the render queue) terminating at cycle 7.5M, starving /// both `0x10fc` (main's completion wait) and the PKEVENT that tid=6 /// polls — no second `VdSwap`, no first pixel. pub const CALLBACK_STACK_PAD: u32 = 64 + 112; impl SavedCallbackCtx { pub fn capture(ctx: &PpcContext, source: u32) -> Self { let mut gprs = [0u64; 13]; for i in 0..13 { gprs[i] = ctx.gpr[i]; } Self { pc: ctx.pc, lr: ctx.lr, ctr: ctx.ctr, gprs, cr: ctx.cr, source, } } pub fn restore(self, ctx: &mut PpcContext) { ctx.pc = self.pc; ctx.lr = self.lr; ctx.ctr = self.ctr; for i in 0..13 { ctx.gpr[i] = self.gprs[i]; } ctx.cr = self.cr; } } /// Maximum pending sources held in the FIFO queue before new ones are /// dropped. Four is enough to absorb a short burst (a few v-syncs arriving /// while HW 0 is mid-callback from a prior one) without letting runaway /// delivery swamp the guest. pub const INTERRUPT_QUEUE_CAP: usize = 4; /// All interrupt bookkeeping — single field on `KernelState`. /// /// **First-Pixels M2 (2026-04-20)** — changed from a single-slot /// `pending_source: Option` coalesce to a bounded FIFO so bursts /// don't drop silently, and dropped `VSYNC_INSTR_PERIOD` from 500k to /// 150k so cadence approximates 60 Hz at the current ~10 MIPS interpreter /// throughput. Combined with the `HwState::ServicingIrq` variant added to /// `xenia-cpu::scheduler`, interrupts can now be delivered even when HW 0 /// is `Blocked(WaitAny)` — the injector stashes the block into the new /// variant and the restore path re-blocks when the callback returns, /// unless a `wake()` during the callback resolved the wait. /// M2.5 — per-slot pending-IRQ bitmask. Each `AtomicU8` holds one bit per /// interrupt source (currently 2 sources: VSYNC=bit 0, CP=bit 1) destined /// for that specific HW slot. Used by the M3 parallel path: T_main (or /// the GPU thread) sets a bit Release on the target slot's atomic; the /// target T_cpu_i checks the bit Acquire at its quantum boundary and /// self-injects without taking another thread's slot lock. /// /// The 6-element fixed-size array mirrors `xenia_cpu::scheduler::HW_THREAD_COUNT`. pub type PendingLocalIrq = [std::sync::atomic::AtomicU8; xenia_cpu::scheduler::HW_THREAD_COUNT]; #[derive(Debug, Default)] pub struct InterruptState { /// Registered callback (set by `VdSetGraphicsInterruptCallback`). pub callback: Option, /// Bounded FIFO of pending interrupt sources awaiting injection. /// Push-back on queue, pop-front on inject. Over-cap pushes drop. pub pending: VecDeque, /// When `Some`, some HW thread is currently running a callback; on /// return-to-sentinel we restore this and clear the flag. pub saved: Option, /// Which guest thread the current callback was injected into. /// Required because we no longer anchor delivery to HW 0 — any /// non-Exited thread is a valid target. Meaningful only while /// `saved.is_some()`. Stored as a `ThreadRef` so per-slot /// runqueues don't get ambiguous addressing. pub injected_ref: Option, /// Monotonic count of delivered interrupts. pub delivered: u64, /// Dropped interrupts (callback unset, queue full, or thread /// exited/idle at inject time). pub dropped: u64, /// Instruction-count accumulator for the synthetic v-sync ticker /// (legacy path used by unit tests via `tick_vsync_instr`). Production /// uses `tick_vsync_wallclock` instead — see [`KRNBUG-D08`]. pub vsync_accumulator: u64, /// Last observed instruction count for the legacy instruction-count /// ticker. `tick_vsync_instr` diffs against this to advance /// `vsync_accumulator`. pub last_instr_count: u64, /// Wall-clock anchor for the production v-sync ticker. `None` until /// the first `tick_vsync_wallclock` call (lazy init so unit tests /// that never invoke that function don't construct an Instant). /// Each call fires `(elapsed / VSYNC_PERIOD)` v-syncs and advances /// the anchor by that many full periods. pub last_vsync_instant: Option, /// M2.5 — per-slot pending-IRQ bits. Set by the producer (M3's /// IRQ-routing logic on `T_main`) with `Release`; consumed by the /// target T_cpu_i with `Acquire` at quantum boundary. Unused under /// the lockstep path (M2's single-host-thread model still uses /// `pending` + `try_inject_graphics_interrupt`); the field is wired /// here so M3's per-HW-thread path is a flag flip, not a refactor. pub pending_local_irq: PendingLocalIrq, } /// How many guest instructions correspond to one synthetic v-sync. /// /// **Legacy** — drives `tick_vsync_instr` only. Production uses /// `tick_vsync_wallclock` with [`VSYNC_PERIOD`]. Kept because audit M11 /// observed this proxy drifts from 629 v-syncs/100M lockstep down to ~2 /// under `--parallel`, where the dispatcher executes more PPC instructions /// per tick call. Unit tests still drive the instruction-count ticker for /// determinism. pub const VSYNC_INSTR_PERIOD: u64 = 150_000; /// Wall-clock period for the **production** v-sync ticker. 16.667 ms /// targets exactly 60 Hz. KRNBUG-D08 — converting from the /// instruction-count proxy fixes the `--parallel` rate drop while /// keeping lockstep cadence stable (instruction-count was *also* an /// approximation; wall-clock is the canonical Xbox 360 v-sync source). pub const VSYNC_PERIOD: Duration = Duration::from_nanos(16_666_667); impl InterruptState { /// Record a new callback registration. pub fn set_callback(&mut self, callback_pc: u32, user_data: u32) { self.callback = Some(GraphicsInterruptCallback { callback_pc, user_data, }); } /// Queue an interrupt for the next safe injection point. pub fn queue_interrupt(&mut self, source: u32) { if self.callback.is_none() { self.dropped += 1; return; } if self.pending.len() >= INTERRUPT_QUEUE_CAP { self.dropped += 1; return; } self.pending.push_back(source); } /// Peek at the next pending source without removing it. pub fn peek_next(&self) -> Option { self.pending.front().copied() } /// Pop the next pending source (called by the injector after it has /// committed to dispatching it). pub fn take_next(&mut self) -> Option { self.pending.pop_front() } /// **Legacy** — instruction-count v-sync ticker. Kept for unit tests /// that need a deterministic clock source. Production code calls /// `tick_vsync_wallclock` instead. Returns `true` if at least one /// v-sync was queued. pub fn tick_vsync_instr(&mut self, current_instr_count: u64) -> bool { let delta = current_instr_count.saturating_sub(self.last_instr_count); self.last_instr_count = current_instr_count; self.vsync_accumulator = self.vsync_accumulator.saturating_add(delta); if self.vsync_accumulator < VSYNC_INSTR_PERIOD { return false; } let periods = self.vsync_accumulator / VSYNC_INSTR_PERIOD; self.vsync_accumulator %= VSYNC_INSTR_PERIOD; for _ in 0..periods { self.queue_interrupt(INTERRUPT_SOURCE_VSYNC); } true } /// **Production** — wall-clock v-sync ticker. Fires /// `floor(elapsed / VSYNC_PERIOD)` v-syncs since the last call and /// advances the anchor by that many full periods (so a long pause /// doesn't lose all the v-syncs it spans, and a quick succession of /// calls doesn't over-fire). KRNBUG-D08 — replaces the legacy /// instruction-count proxy that drifted under `--parallel`. /// Returns `true` if at least one v-sync was queued. pub fn tick_vsync_wallclock(&mut self) -> bool { let now = Instant::now(); let anchor = match self.last_vsync_instant { Some(t) => t, None => { self.last_vsync_instant = Some(now); return false; } }; let elapsed = now.saturating_duration_since(anchor); let period_ns = VSYNC_PERIOD.as_nanos() as u64; let elapsed_ns = elapsed.as_nanos() as u64; let periods = elapsed_ns / period_ns; if periods == 0 { return false; } // Advance the anchor by the number of full periods consumed, // not to `now`. That lets a long pause distribute its missed // v-syncs evenly without lazy-batching the entire backlog into // one tick (over-fire would interleave dozens of callback // injections back-to-back). Cap at INTERRUPT_QUEUE_CAP so a // clock that jumped forward (system suspend) doesn't try to // queue more than the FIFO can hold. let advance = Duration::from_nanos(periods * period_ns); self.last_vsync_instant = Some(anchor + advance); let to_queue = (periods as usize).min(INTERRUPT_QUEUE_CAP); for _ in 0..to_queue { self.queue_interrupt(INTERRUPT_SOURCE_VSYNC); } true } /// Is HW thread 0 currently in a callback? pub fn is_in_callback(&self) -> bool { self.saved.is_some() } } #[cfg(test)] mod tests { use super::*; #[test] fn queue_interrupt_drops_without_callback() { let mut s = InterruptState::default(); s.queue_interrupt(INTERRUPT_SOURCE_VSYNC); assert_eq!(s.dropped, 1); assert!(s.pending.is_empty()); } #[test] fn queue_interrupt_fifo_preserves_order() { let mut s = InterruptState::default(); s.set_callback(0x1000, 0xAB); s.queue_interrupt(INTERRUPT_SOURCE_VSYNC); s.queue_interrupt(INTERRUPT_SOURCE_CP); s.queue_interrupt(INTERRUPT_SOURCE_VSYNC); assert_eq!(s.dropped, 0); // FIFO: take_next hands them out in push order. assert_eq!(s.take_next(), Some(INTERRUPT_SOURCE_VSYNC)); assert_eq!(s.take_next(), Some(INTERRUPT_SOURCE_CP)); assert_eq!(s.take_next(), Some(INTERRUPT_SOURCE_VSYNC)); assert_eq!(s.take_next(), None); } #[test] fn queue_interrupt_caps_at_queue_size() { let mut s = InterruptState::default(); s.set_callback(0x1000, 0xAB); for _ in 0..INTERRUPT_QUEUE_CAP { s.queue_interrupt(INTERRUPT_SOURCE_VSYNC); } // Over-cap: drops rather than evicting the oldest. s.queue_interrupt(INTERRUPT_SOURCE_VSYNC); s.queue_interrupt(INTERRUPT_SOURCE_VSYNC); assert_eq!(s.dropped, 2); assert_eq!(s.pending.len(), INTERRUPT_QUEUE_CAP); } #[test] fn tick_vsync_instr_fires_at_new_150k_threshold() { let mut s = InterruptState::default(); s.set_callback(0x1000, 0xAB); assert_eq!(VSYNC_INSTR_PERIOD, 150_000); assert!(!s.tick_vsync_instr(VSYNC_INSTR_PERIOD - 1)); assert!(s.pending.is_empty()); assert!(s.tick_vsync_instr(VSYNC_INSTR_PERIOD)); assert_eq!(s.peek_next(), Some(INTERRUPT_SOURCE_VSYNC)); } #[test] fn tick_vsync_instr_drains_multiple_periods_in_one_call() { // Long kernel export → big instr delta → multiple v-syncs must // be delivered, not lost. let mut s = InterruptState::default(); s.set_callback(0x1000, 0xAB); assert!(s.tick_vsync_instr(VSYNC_INSTR_PERIOD * 3 + 10)); assert_eq!(s.pending.len(), 3); } #[test] fn tick_vsync_wallclock_first_call_sets_anchor() { // First call seeds the anchor and never fires. KRNBUG-D08: // initial wall-clock state has no prior reference, so we can't // know the elapsed delta yet. let mut s = InterruptState::default(); s.set_callback(0x1000, 0xAB); assert!(!s.tick_vsync_wallclock()); assert!(s.pending.is_empty()); assert!(s.last_vsync_instant.is_some()); } #[test] fn tick_vsync_wallclock_fires_after_period() { // Sleeps one full v-sync period (16.667 ms) and verifies a // single v-sync is queued. Sleep is fine in --release tests // (one-shot, ~17 ms cost). let mut s = InterruptState::default(); s.set_callback(0x1000, 0xAB); s.tick_vsync_wallclock(); // seed std::thread::sleep(VSYNC_PERIOD + Duration::from_millis(2)); assert!(s.tick_vsync_wallclock()); assert_eq!(s.pending.len(), 1); assert_eq!(s.peek_next(), Some(INTERRUPT_SOURCE_VSYNC)); } #[test] fn tick_vsync_wallclock_caps_burst_at_queue_cap() { // A multi-period elapsed window queues at most // INTERRUPT_QUEUE_CAP v-syncs (the FIFO can't hold more anyway). // Sleep 6 periods (~100 ms), expect INTERRUPT_QUEUE_CAP queued. let mut s = InterruptState::default(); s.set_callback(0x1000, 0xAB); s.tick_vsync_wallclock(); // seed std::thread::sleep(VSYNC_PERIOD * 6 + Duration::from_millis(2)); assert!(s.tick_vsync_wallclock()); assert_eq!(s.pending.len(), INTERRUPT_QUEUE_CAP); } /// Simulates what the main loop does: inject, execute guest code up /// to the sentinel, restore. Uses a single-instruction `bclr` callback /// — the interpreter sees `pc == callback_pc`, steps, and the blr /// instruction writes `lr` into `pc`, which equals `LR_HALT_SENTINEL` /// → main loop detects and triggers restore. #[test] fn inject_restore_roundtrip_smoke() { let mut ctx = PpcContext::new(); ctx.pc = 0x1000_0000; ctx.lr = 0xCAFE_BABE; ctx.gpr[3] = 0x1234; ctx.gpr[4] = 0x5678; let mut s = InterruptState::default(); s.set_callback(0x2000_0000, 0xDEAD); // Simulate main loop inject: save ctx fields, divert pc/lr/r3/r4. let saved = SavedCallbackCtx::capture(&ctx, INTERRUPT_SOURCE_VSYNC); s.saved = Some(saved); ctx.pc = 0x2000_0000; ctx.lr = xenia_cpu::context::LR_HALT_SENTINEL; ctx.gpr[3] = INTERRUPT_SOURCE_VSYNC as u64; ctx.gpr[4] = 0xDEAD; assert!(s.is_in_callback()); // Guest callback "runs" to the sentinel — simulate by writing // pc = lr (what `blr` would do). ctx.pc = ctx.lr as u32; // Main loop detects pc == LR_HALT_SENTINEL while in_callback: let saved = s.saved.take().unwrap(); saved.restore(&mut ctx); s.delivered += 1; assert_eq!(ctx.pc, 0x1000_0000); assert_eq!(ctx.lr, 0xCAFE_BABE); assert_eq!(ctx.gpr[3], 0x1234); assert_eq!(ctx.gpr[4], 0x5678); assert!(!s.is_in_callback()); assert_eq!(s.delivered, 1); } #[test] fn saved_ctx_roundtrip() { let mut ctx = PpcContext::new(); ctx.pc = 0x11223344; ctx.lr = 0xDEADBEEF; ctx.gpr[3] = 0xAAAA; ctx.gpr[4] = 0xBBBB; let saved = SavedCallbackCtx::capture(&ctx, INTERRUPT_SOURCE_VSYNC); ctx.pc = 0; ctx.lr = 0; ctx.gpr[3] = 0; ctx.gpr[4] = 0; saved.restore(&mut ctx); assert_eq!(ctx.pc, 0x11223344); assert_eq!(ctx.lr, 0xDEADBEEF); assert_eq!(ctx.gpr[3], 0xAAAA); assert_eq!(ctx.gpr[4], 0xBBBB); } /// Full volatile-GPR + SP roundtrip. Regression test for the /// 2026-04-24 IRQ-injection fix: the ISR callback's prologue clobbers /// `[r1 - 8]` on the interrupted thread's stack unless the injector /// pre-decrements SP by [`CALLBACK_STACK_PAD`] and the saved ctx puts /// SP (and the rest of the PPC volatile set) back on return. #[test] fn saved_ctx_covers_sp_and_all_volatile_gprs() { let mut ctx = PpcContext::new(); ctx.pc = 0xAAAA_BBBB; ctx.lr = 0x1111_2222; ctx.ctr = 0x3333_4444; for i in 0..13 { ctx.gpr[i] = 0x1000 + i as u64; } // r13..r31 are non-volatile and should survive the callback's own // save/restore — the saved ctx deliberately does NOT cover them. for i in 13..32 { ctx.gpr[i] = 0xDEAD_0000 + i as u64; } let saved = SavedCallbackCtx::capture(&ctx, INTERRUPT_SOURCE_VSYNC); // Simulate injector: flip pc/lr/r1/r3/r4 (what the real injector // actually does — see try_inject_graphics_interrupt in main.rs). ctx.pc = 0xCAFE; ctx.lr = xenia_cpu::context::LR_HALT_SENTINEL; ctx.gpr[1] = ctx.gpr[1].wrapping_sub(CALLBACK_STACK_PAD as u64); ctx.gpr[3] = INTERRUPT_SOURCE_VSYNC as u64; ctx.gpr[4] = 0xBEEF; // Simulate callback clobbering a few volatile regs that aren't // part of the "obviously diverted" set. ctx.gpr[0] = 0xFEED_FACE; ctx.gpr[7] = 0x9999; ctx.gpr[12] = 0xABCD; saved.restore(&mut ctx); // All volatile GPRs restored to pre-injection. for i in 0..13 { assert_eq!( ctx.gpr[i], 0x1000 + i as u64, "volatile r{} clobbered by callback was not restored", i ); } // SP specifically back to the pre-pad value. assert_eq!(ctx.gpr[1], 0x1001, "SP must be restored to pre-injection"); // Non-volatile regs were never captured; they stay as the callback // left them (here, untouched because we didn't modify 13..32). for i in 13..32 { assert_eq!(ctx.gpr[i], 0xDEAD_0000 + i as u64); } assert_eq!(ctx.pc, 0xAAAA_BBBB); assert_eq!(ctx.lr, 0x1111_2222); assert_eq!(ctx.ctr, 0x3333_4444); } }