//! Graphics interrupt + synthetic v-sync bookkeeping (P6). //! //! The Xbox 360 graphics driver calls `VdSetGraphicsInterruptCallback` to //! register a single per-process callback that the OS invokes on: //! //! 1. **V-sync** — at 60 Hz; source code 0 (`INTERRUPT_SOURCE_VSYNC`). //! 2. **Command-processor interrupt** — when `PM4_INTERRUPT` fires from the //! guest-issued command stream; source code 1 (`INTERRUPT_SOURCE_CP`). //! //! Canary's [xboxkrnl_video.cc:303-310](xenia-canary/src/xenia/kernel/xboxkrnl/xboxkrnl_video.cc#L303-L310) //! dispatches the callback on HW thread 0. We follow the same convention. //! //! The delivery model is cooperative: we inject the callback entry into HW //! thread 0 at the top of a scheduler round when it's safe (not mid-export, //! not already inside another interrupt). When the callback returns to //! [`LR_HALT_SENTINEL`] the main loop restores the saved [`PpcContext`] //! fields and the HW thread picks up where it left off. use std::collections::VecDeque; use xenia_cpu::context::{CrField, PpcContext}; use xenia_cpu::ThreadRef; pub const INTERRUPT_SOURCE_VSYNC: u32 = 0; pub const INTERRUPT_SOURCE_CP: u32 = 1; /// Guest-registered V-sync / graphics-interrupt callback (from /// `VdSetGraphicsInterruptCallback`). #[derive(Debug, Clone, Copy)] pub struct GraphicsInterruptCallback { pub callback_pc: u32, pub user_data: u32, } /// Snapshot of the fields we mutate when diverting a HW thread into an /// interrupt callback. Restored when the callback returns to /// `LR_HALT_SENTINEL`. /// /// We save **all PPC volatile registers** (r0, r2–r12) plus `r1` (SP), /// `pc`, `lr`, `ctr`, and `cr`. Non-volatile regs (r13–r31) are preserved /// by the callback's own `__savegprlr_N` prologue/epilogue per the PPC /// ELF ABI, so they don't need stashing here. /// /// **SP (`gpr[1]`) is included because the injector decrements it by /// [`CALLBACK_STACK_PAD`] before the callback runs** — see that constant's /// docs for why. Without this, the callback's `__savegprlr_N` prologue /// overwrites the interrupted function's own stack-saved LR (which lives /// at `[r1 - 8]`), and when the interrupted function later tries to /// return, `bclr` jumps to `LR_HALT_SENTINEL` and the thread exits /// prematurely. #[derive(Debug, Clone, Copy)] pub struct SavedCallbackCtx { pub pc: u32, pub lr: u64, pub ctr: u64, /// All PPC volatile GPRs (r0, r2–r12) plus r1 (SP) in index order. /// Index 0 = r0, 1 = r1, 2 = r2, …, 12 = r12. Index 13..32 unused. pub gprs: [u64; 13], pub cr: [CrField; 8], pub source: u32, } /// Bytes the injector reserves below the interrupted thread's SP before /// running the ISR callback. Matches Canary's /// [`Processor::Execute`](../../../../xenia-canary/src/xenia/cpu/processor.cc#L383) /// which decrements `r[1]` by `64 + 112 = 176` before /// `function->Call(...)` and restores afterwards. The pad must be larger /// than any plausible sum of `__savegprlr_N`'s save-area (up to 64 B for /// r25-r31 + 8 B for LR) plus the callback's own `stwu r1,-N(r1)` frame /// (the Sylpheed vsync ISR uses 128 B). /// /// Pre-fix: the ISR's `__savegprlr_25` stored the callback's saved LR /// (= `LR_HALT_SENTINEL`, from injection) at `[r1 - 8]` — exactly where /// the interrupted thread's current `bl`-saved LR lived. The /// interrupted function's return site got stomped with `SENTINEL`, so /// `__restgprlr_N -> bclr` jumped to the halt sentinel and the thread /// exited through the wrong path. Manifested in Sylpheed as tid=5 /// (producer for the render queue) terminating at cycle 7.5M, starving /// both `0x10fc` (main's completion wait) and the PKEVENT that tid=6 /// polls — no second `VdSwap`, no first pixel. pub const CALLBACK_STACK_PAD: u32 = 64 + 112; impl SavedCallbackCtx { pub fn capture(ctx: &PpcContext, source: u32) -> Self { let mut gprs = [0u64; 13]; for i in 0..13 { gprs[i] = ctx.gpr[i]; } Self { pc: ctx.pc, lr: ctx.lr, ctr: ctx.ctr, gprs, cr: ctx.cr, source, } } pub fn restore(self, ctx: &mut PpcContext) { ctx.pc = self.pc; ctx.lr = self.lr; ctx.ctr = self.ctr; for i in 0..13 { ctx.gpr[i] = self.gprs[i]; } ctx.cr = self.cr; } } /// Maximum pending sources held in the FIFO queue before new ones are /// dropped. Four is enough to absorb a short burst (a few v-syncs arriving /// while HW 0 is mid-callback from a prior one) without letting runaway /// delivery swamp the guest. pub const INTERRUPT_QUEUE_CAP: usize = 4; /// All interrupt bookkeeping — single field on `KernelState`. /// /// **First-Pixels M2 (2026-04-20)** — changed from a single-slot /// `pending_source: Option` coalesce to a bounded FIFO so bursts /// don't drop silently, and dropped `VSYNC_INSTR_PERIOD` from 500k to /// 150k so cadence approximates 60 Hz at the current ~10 MIPS interpreter /// throughput. Combined with the `HwState::ServicingIrq` variant added to /// `xenia-cpu::scheduler`, interrupts can now be delivered even when HW 0 /// is `Blocked(WaitAny)` — the injector stashes the block into the new /// variant and the restore path re-blocks when the callback returns, /// unless a `wake()` during the callback resolved the wait. /// M2.5 — per-slot pending-IRQ bitmask. Each `AtomicU8` holds one bit per /// interrupt source (currently 2 sources: VSYNC=bit 0, CP=bit 1) destined /// for that specific HW slot. Used by the M3 parallel path: T_main (or /// the GPU thread) sets a bit Release on the target slot's atomic; the /// target T_cpu_i checks the bit Acquire at its quantum boundary and /// self-injects without taking another thread's slot lock. /// /// The 6-element fixed-size array mirrors `xenia_cpu::scheduler::HW_THREAD_COUNT`. pub type PendingLocalIrq = [std::sync::atomic::AtomicU8; xenia_cpu::scheduler::HW_THREAD_COUNT]; #[derive(Debug, Default)] pub struct InterruptState { /// Registered callback (set by `VdSetGraphicsInterruptCallback`). pub callback: Option, /// Bounded FIFO of pending interrupt sources awaiting injection. /// Push-back on queue, pop-front on inject. Over-cap pushes drop. pub pending: VecDeque, /// When `Some`, some HW thread is currently running a callback; on /// return-to-sentinel we restore this and clear the flag. pub saved: Option, /// Which guest thread the current callback was injected into. /// Required because we no longer anchor delivery to HW 0 — any /// non-Exited thread is a valid target. Meaningful only while /// `saved.is_some()`. Stored as a `ThreadRef` so per-slot /// runqueues don't get ambiguous addressing. pub injected_ref: Option, /// Monotonic count of delivered interrupts. pub delivered: u64, /// Dropped interrupts (callback unset, queue full, or thread /// exited/idle at inject time). pub dropped: u64, /// Instruction-count accumulator for the synthetic v-sync ticker. At /// `VSYNC_INSTR_PERIOD` the main loop pushes an `INTERRUPT_SOURCE_VSYNC` /// onto `pending` and resets. pub vsync_accumulator: u64, /// Last observed instruction count — `tick_vsync` diffs against /// this to advance `vsync_accumulator`. pub last_instr_count: u64, /// M2.5 — per-slot pending-IRQ bits. Set by the producer (M3's /// IRQ-routing logic on `T_main`) with `Release`; consumed by the /// target T_cpu_i with `Acquire` at quantum boundary. Unused under /// the lockstep path (M2's single-host-thread model still uses /// `pending` + `try_inject_graphics_interrupt`); the field is wired /// here so M3's per-HW-thread path is a flag flip, not a refactor. pub pending_local_irq: PendingLocalIrq, } /// How many guest instructions correspond to one synthetic v-sync. /// /// Targets **~60 Hz at the post-Tier-3 interpreter throughput (~10 MIPS)**: /// 10e6 instr/s ÷ 60 Hz ≈ 167k — we use 150k to give a small cushion. /// Before M2 this was 500k (~20 Hz), which was enough for games that /// don't gate anything on v-sync but not enough for titles like Sylpheed /// whose main loop waits on the v-sync callback to signal an event every /// frame. pub const VSYNC_INSTR_PERIOD: u64 = 150_000; impl InterruptState { /// Record a new callback registration. pub fn set_callback(&mut self, callback_pc: u32, user_data: u32) { self.callback = Some(GraphicsInterruptCallback { callback_pc, user_data, }); } /// Queue an interrupt for the next safe injection point. pub fn queue_interrupt(&mut self, source: u32) { if self.callback.is_none() { self.dropped += 1; return; } if self.pending.len() >= INTERRUPT_QUEUE_CAP { self.dropped += 1; return; } self.pending.push_back(source); } /// Peek at the next pending source without removing it. pub fn peek_next(&self) -> Option { self.pending.front().copied() } /// Pop the next pending source (called by the injector after it has /// committed to dispatching it). pub fn take_next(&mut self) -> Option { self.pending.pop_front() } /// Advance the v-sync accumulator by the delta since the last call. /// Returns `true` if a new v-sync was queued. pub fn tick_vsync(&mut self, current_instr_count: u64) -> bool { let delta = current_instr_count.saturating_sub(self.last_instr_count); self.last_instr_count = current_instr_count; self.vsync_accumulator = self.vsync_accumulator.saturating_add(delta); if self.vsync_accumulator < VSYNC_INSTR_PERIOD { return false; } // Multiple periods may have elapsed in a single tick call if a // large instruction delta went by (e.g. a long export). Drain // the accumulator fully so we don't lag behind. let periods = self.vsync_accumulator / VSYNC_INSTR_PERIOD; self.vsync_accumulator %= VSYNC_INSTR_PERIOD; for _ in 0..periods { self.queue_interrupt(INTERRUPT_SOURCE_VSYNC); } true } /// Is HW thread 0 currently in a callback? pub fn is_in_callback(&self) -> bool { self.saved.is_some() } } #[cfg(test)] mod tests { use super::*; #[test] fn queue_interrupt_drops_without_callback() { let mut s = InterruptState::default(); s.queue_interrupt(INTERRUPT_SOURCE_VSYNC); assert_eq!(s.dropped, 1); assert!(s.pending.is_empty()); } #[test] fn queue_interrupt_fifo_preserves_order() { let mut s = InterruptState::default(); s.set_callback(0x1000, 0xAB); s.queue_interrupt(INTERRUPT_SOURCE_VSYNC); s.queue_interrupt(INTERRUPT_SOURCE_CP); s.queue_interrupt(INTERRUPT_SOURCE_VSYNC); assert_eq!(s.dropped, 0); // FIFO: take_next hands them out in push order. assert_eq!(s.take_next(), Some(INTERRUPT_SOURCE_VSYNC)); assert_eq!(s.take_next(), Some(INTERRUPT_SOURCE_CP)); assert_eq!(s.take_next(), Some(INTERRUPT_SOURCE_VSYNC)); assert_eq!(s.take_next(), None); } #[test] fn queue_interrupt_caps_at_queue_size() { let mut s = InterruptState::default(); s.set_callback(0x1000, 0xAB); for _ in 0..INTERRUPT_QUEUE_CAP { s.queue_interrupt(INTERRUPT_SOURCE_VSYNC); } // Over-cap: drops rather than evicting the oldest. s.queue_interrupt(INTERRUPT_SOURCE_VSYNC); s.queue_interrupt(INTERRUPT_SOURCE_VSYNC); assert_eq!(s.dropped, 2); assert_eq!(s.pending.len(), INTERRUPT_QUEUE_CAP); } #[test] fn tick_vsync_fires_at_new_150k_threshold() { let mut s = InterruptState::default(); s.set_callback(0x1000, 0xAB); assert_eq!(VSYNC_INSTR_PERIOD, 150_000); assert!(!s.tick_vsync(VSYNC_INSTR_PERIOD - 1)); assert!(s.pending.is_empty()); assert!(s.tick_vsync(VSYNC_INSTR_PERIOD)); assert_eq!(s.peek_next(), Some(INTERRUPT_SOURCE_VSYNC)); } #[test] fn tick_vsync_drains_multiple_periods_in_one_call() { // Long kernel export → big instr delta → multiple v-syncs must // be delivered, not lost. let mut s = InterruptState::default(); s.set_callback(0x1000, 0xAB); assert!(s.tick_vsync(VSYNC_INSTR_PERIOD * 3 + 10)); assert_eq!(s.pending.len(), 3); } /// Simulates what the main loop does: inject, execute guest code up /// to the sentinel, restore. Uses a single-instruction `bclr` callback /// — the interpreter sees `pc == callback_pc`, steps, and the blr /// instruction writes `lr` into `pc`, which equals `LR_HALT_SENTINEL` /// → main loop detects and triggers restore. #[test] fn inject_restore_roundtrip_smoke() { let mut ctx = PpcContext::new(); ctx.pc = 0x1000_0000; ctx.lr = 0xCAFE_BABE; ctx.gpr[3] = 0x1234; ctx.gpr[4] = 0x5678; let mut s = InterruptState::default(); s.set_callback(0x2000_0000, 0xDEAD); // Simulate main loop inject: save ctx fields, divert pc/lr/r3/r4. let saved = SavedCallbackCtx::capture(&ctx, INTERRUPT_SOURCE_VSYNC); s.saved = Some(saved); ctx.pc = 0x2000_0000; ctx.lr = xenia_cpu::context::LR_HALT_SENTINEL; ctx.gpr[3] = INTERRUPT_SOURCE_VSYNC as u64; ctx.gpr[4] = 0xDEAD; assert!(s.is_in_callback()); // Guest callback "runs" to the sentinel — simulate by writing // pc = lr (what `blr` would do). ctx.pc = ctx.lr as u32; // Main loop detects pc == LR_HALT_SENTINEL while in_callback: let saved = s.saved.take().unwrap(); saved.restore(&mut ctx); s.delivered += 1; assert_eq!(ctx.pc, 0x1000_0000); assert_eq!(ctx.lr, 0xCAFE_BABE); assert_eq!(ctx.gpr[3], 0x1234); assert_eq!(ctx.gpr[4], 0x5678); assert!(!s.is_in_callback()); assert_eq!(s.delivered, 1); } #[test] fn saved_ctx_roundtrip() { let mut ctx = PpcContext::new(); ctx.pc = 0x11223344; ctx.lr = 0xDEADBEEF; ctx.gpr[3] = 0xAAAA; ctx.gpr[4] = 0xBBBB; let saved = SavedCallbackCtx::capture(&ctx, INTERRUPT_SOURCE_VSYNC); ctx.pc = 0; ctx.lr = 0; ctx.gpr[3] = 0; ctx.gpr[4] = 0; saved.restore(&mut ctx); assert_eq!(ctx.pc, 0x11223344); assert_eq!(ctx.lr, 0xDEADBEEF); assert_eq!(ctx.gpr[3], 0xAAAA); assert_eq!(ctx.gpr[4], 0xBBBB); } /// Full volatile-GPR + SP roundtrip. Regression test for the /// 2026-04-24 IRQ-injection fix: the ISR callback's prologue clobbers /// `[r1 - 8]` on the interrupted thread's stack unless the injector /// pre-decrements SP by [`CALLBACK_STACK_PAD`] and the saved ctx puts /// SP (and the rest of the PPC volatile set) back on return. #[test] fn saved_ctx_covers_sp_and_all_volatile_gprs() { let mut ctx = PpcContext::new(); ctx.pc = 0xAAAA_BBBB; ctx.lr = 0x1111_2222; ctx.ctr = 0x3333_4444; for i in 0..13 { ctx.gpr[i] = 0x1000 + i as u64; } // r13..r31 are non-volatile and should survive the callback's own // save/restore — the saved ctx deliberately does NOT cover them. for i in 13..32 { ctx.gpr[i] = 0xDEAD_0000 + i as u64; } let saved = SavedCallbackCtx::capture(&ctx, INTERRUPT_SOURCE_VSYNC); // Simulate injector: flip pc/lr/r1/r3/r4 (what the real injector // actually does — see try_inject_graphics_interrupt in main.rs). ctx.pc = 0xCAFE; ctx.lr = xenia_cpu::context::LR_HALT_SENTINEL; ctx.gpr[1] = ctx.gpr[1].wrapping_sub(CALLBACK_STACK_PAD as u64); ctx.gpr[3] = INTERRUPT_SOURCE_VSYNC as u64; ctx.gpr[4] = 0xBEEF; // Simulate callback clobbering a few volatile regs that aren't // part of the "obviously diverted" set. ctx.gpr[0] = 0xFEED_FACE; ctx.gpr[7] = 0x9999; ctx.gpr[12] = 0xABCD; saved.restore(&mut ctx); // All volatile GPRs restored to pre-injection. for i in 0..13 { assert_eq!( ctx.gpr[i], 0x1000 + i as u64, "volatile r{} clobbered by callback was not restored", i ); } // SP specifically back to the pre-pad value. assert_eq!(ctx.gpr[1], 0x1001, "SP must be restored to pre-injection"); // Non-volatile regs were never captured; they stay as the callback // left them (here, untouched because we didn't modify 13..32). for i in 13..32 { assert_eq!(ctx.gpr[i], 0xDEAD_0000 + i as u64); } assert_eq!(ctx.pc, 0xAAAA_BBBB); assert_eq!(ctx.lr, 0x1111_2222); assert_eq!(ctx.ctr, 0x3333_4444); } }