Files
xenia-rs/crates/xenia-kernel/src/interrupts.rs
MechaCat02 5f0d6487ea xenia-kernel: HLE expansion, scheduler integration, audit + UI bridge
Major HLE buildout in exports.rs: KeInitializeSemaphore now seeds
count/limit, XexGet{Module,Procedure}Address use distinct
HMODULE_XBOXKRNL/HMODULE_XAM pseudo-handles with a reverse
(ModuleId,ordinal)→thunk_addr map, plus sweeping additions across
sync primitives, file I/O, semaphores, events, threads, and
allocator paths needed to advance Sylpheed past VdSwap=2.

New modules:
  - thread.rs   — ThreadRef + per-thread suspension/wake plumbing
  - interrupts.rs — IRQ delivery, pending-IRQ slots, IPI helpers
  - path.rs     — guest path normalization (D:\\, game:\\, etc.)
  - audit.rs    — --trace-handles harness backing the handle audit
  - ui_bridge.rs — kernel-side endpoint of the xenia-ui bridge
                   (input snapshots, framebuffer publish handles)

state.rs grows to own the HW-slot scheduler state, the new audit /
UI bridge handles, and the per-handle reverse maps. xam.rs and
objects.rs follow suit for the HLE additions.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 16:29:00 +02:00

425 lines
17 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
//! Graphics interrupt + synthetic v-sync bookkeeping (P6).
//!
//! The Xbox 360 graphics driver calls `VdSetGraphicsInterruptCallback` to
//! register a single per-process callback that the OS invokes on:
//!
//! 1. **V-sync** — at 60 Hz; source code 0 (`INTERRUPT_SOURCE_VSYNC`).
//! 2. **Command-processor interrupt** — when `PM4_INTERRUPT` fires from the
//! guest-issued command stream; source code 1 (`INTERRUPT_SOURCE_CP`).
//!
//! Canary's [xboxkrnl_video.cc:303-310](xenia-canary/src/xenia/kernel/xboxkrnl/xboxkrnl_video.cc#L303-L310)
//! dispatches the callback on HW thread 0. We follow the same convention.
//!
//! The delivery model is cooperative: we inject the callback entry into HW
//! thread 0 at the top of a scheduler round when it's safe (not mid-export,
//! not already inside another interrupt). When the callback returns to
//! [`LR_HALT_SENTINEL`] the main loop restores the saved [`PpcContext`]
//! fields and the HW thread picks up where it left off.
use std::collections::VecDeque;
use xenia_cpu::context::{CrField, PpcContext};
use xenia_cpu::ThreadRef;
pub const INTERRUPT_SOURCE_VSYNC: u32 = 0;
pub const INTERRUPT_SOURCE_CP: u32 = 1;
/// Guest-registered V-sync / graphics-interrupt callback (from
/// `VdSetGraphicsInterruptCallback`).
#[derive(Debug, Clone, Copy)]
pub struct GraphicsInterruptCallback {
pub callback_pc: u32,
pub user_data: u32,
}
/// Snapshot of the fields we mutate when diverting a HW thread into an
/// interrupt callback. Restored when the callback returns to
/// `LR_HALT_SENTINEL`.
///
/// We save **all PPC volatile registers** (r0, r2r12) plus `r1` (SP),
/// `pc`, `lr`, `ctr`, and `cr`. Non-volatile regs (r13r31) are preserved
/// by the callback's own `__savegprlr_N` prologue/epilogue per the PPC
/// ELF ABI, so they don't need stashing here.
///
/// **SP (`gpr[1]`) is included because the injector decrements it by
/// [`CALLBACK_STACK_PAD`] before the callback runs** — see that constant's
/// docs for why. Without this, the callback's `__savegprlr_N` prologue
/// overwrites the interrupted function's own stack-saved LR (which lives
/// at `[r1 - 8]`), and when the interrupted function later tries to
/// return, `bclr` jumps to `LR_HALT_SENTINEL` and the thread exits
/// prematurely.
#[derive(Debug, Clone, Copy)]
pub struct SavedCallbackCtx {
pub pc: u32,
pub lr: u64,
pub ctr: u64,
/// All PPC volatile GPRs (r0, r2r12) plus r1 (SP) in index order.
/// Index 0 = r0, 1 = r1, 2 = r2, …, 12 = r12. Index 13..32 unused.
pub gprs: [u64; 13],
pub cr: [CrField; 8],
pub source: u32,
}
/// Bytes the injector reserves below the interrupted thread's SP before
/// running the ISR callback. Matches Canary's
/// [`Processor::Execute`](../../../../xenia-canary/src/xenia/cpu/processor.cc#L383)
/// which decrements `r[1]` by `64 + 112 = 176` before
/// `function->Call(...)` and restores afterwards. The pad must be larger
/// than any plausible sum of `__savegprlr_N`'s save-area (up to 64 B for
/// r25-r31 + 8 B for LR) plus the callback's own `stwu r1,-N(r1)` frame
/// (the Sylpheed vsync ISR uses 128 B).
///
/// Pre-fix: the ISR's `__savegprlr_25` stored the callback's saved LR
/// (= `LR_HALT_SENTINEL`, from injection) at `[r1 - 8]` — exactly where
/// the interrupted thread's current `bl`-saved LR lived. The
/// interrupted function's return site got stomped with `SENTINEL`, so
/// `__restgprlr_N -> bclr` jumped to the halt sentinel and the thread
/// exited through the wrong path. Manifested in Sylpheed as tid=5
/// (producer for the render queue) terminating at cycle 7.5M, starving
/// both `0x10fc` (main's completion wait) and the PKEVENT that tid=6
/// polls — no second `VdSwap`, no first pixel.
pub const CALLBACK_STACK_PAD: u32 = 64 + 112;
impl SavedCallbackCtx {
pub fn capture(ctx: &PpcContext, source: u32) -> Self {
let mut gprs = [0u64; 13];
for i in 0..13 {
gprs[i] = ctx.gpr[i];
}
Self {
pc: ctx.pc,
lr: ctx.lr,
ctr: ctx.ctr,
gprs,
cr: ctx.cr,
source,
}
}
pub fn restore(self, ctx: &mut PpcContext) {
ctx.pc = self.pc;
ctx.lr = self.lr;
ctx.ctr = self.ctr;
for i in 0..13 {
ctx.gpr[i] = self.gprs[i];
}
ctx.cr = self.cr;
}
}
/// Maximum pending sources held in the FIFO queue before new ones are
/// dropped. Four is enough to absorb a short burst (a few v-syncs arriving
/// while HW 0 is mid-callback from a prior one) without letting runaway
/// delivery swamp the guest.
pub const INTERRUPT_QUEUE_CAP: usize = 4;
/// All interrupt bookkeeping — single field on `KernelState`.
///
/// **First-Pixels M2 (2026-04-20)** — changed from a single-slot
/// `pending_source: Option<u32>` coalesce to a bounded FIFO so bursts
/// don't drop silently, and dropped `VSYNC_INSTR_PERIOD` from 500k to
/// 150k so cadence approximates 60 Hz at the current ~10 MIPS interpreter
/// throughput. Combined with the `HwState::ServicingIrq` variant added to
/// `xenia-cpu::scheduler`, interrupts can now be delivered even when HW 0
/// is `Blocked(WaitAny)` — the injector stashes the block into the new
/// variant and the restore path re-blocks when the callback returns,
/// unless a `wake()` during the callback resolved the wait.
/// M2.5 — per-slot pending-IRQ bitmask. Each `AtomicU8` holds one bit per
/// interrupt source (currently 2 sources: VSYNC=bit 0, CP=bit 1) destined
/// for that specific HW slot. Used by the M3 parallel path: T_main (or
/// the GPU thread) sets a bit Release on the target slot's atomic; the
/// target T_cpu_i checks the bit Acquire at its quantum boundary and
/// self-injects without taking another thread's slot lock.
///
/// The 6-element fixed-size array mirrors `xenia_cpu::scheduler::HW_THREAD_COUNT`.
pub type PendingLocalIrq = [std::sync::atomic::AtomicU8;
xenia_cpu::scheduler::HW_THREAD_COUNT];
#[derive(Debug, Default)]
pub struct InterruptState {
/// Registered callback (set by `VdSetGraphicsInterruptCallback`).
pub callback: Option<GraphicsInterruptCallback>,
/// Bounded FIFO of pending interrupt sources awaiting injection.
/// Push-back on queue, pop-front on inject. Over-cap pushes drop.
pub pending: VecDeque<u32>,
/// When `Some`, some HW thread is currently running a callback; on
/// return-to-sentinel we restore this and clear the flag.
pub saved: Option<SavedCallbackCtx>,
/// Which guest thread the current callback was injected into.
/// Required because we no longer anchor delivery to HW 0 — any
/// non-Exited thread is a valid target. Meaningful only while
/// `saved.is_some()`. Stored as a `ThreadRef` so per-slot
/// runqueues don't get ambiguous addressing.
pub injected_ref: Option<ThreadRef>,
/// Monotonic count of delivered interrupts.
pub delivered: u64,
/// Dropped interrupts (callback unset, queue full, or thread
/// exited/idle at inject time).
pub dropped: u64,
/// Instruction-count accumulator for the synthetic v-sync ticker. At
/// `VSYNC_INSTR_PERIOD` the main loop pushes an `INTERRUPT_SOURCE_VSYNC`
/// onto `pending` and resets.
pub vsync_accumulator: u64,
/// Last observed instruction count — `tick_vsync` diffs against
/// this to advance `vsync_accumulator`.
pub last_instr_count: u64,
/// M2.5 — per-slot pending-IRQ bits. Set by the producer (M3's
/// IRQ-routing logic on `T_main`) with `Release`; consumed by the
/// target T_cpu_i with `Acquire` at quantum boundary. Unused under
/// the lockstep path (M2's single-host-thread model still uses
/// `pending` + `try_inject_graphics_interrupt`); the field is wired
/// here so M3's per-HW-thread path is a flag flip, not a refactor.
pub pending_local_irq: PendingLocalIrq,
}
/// How many guest instructions correspond to one synthetic v-sync.
///
/// Targets **~60 Hz at the post-Tier-3 interpreter throughput (~10 MIPS)**:
/// 10e6 instr/s ÷ 60 Hz ≈ 167k — we use 150k to give a small cushion.
/// Before M2 this was 500k (~20 Hz), which was enough for games that
/// don't gate anything on v-sync but not enough for titles like Sylpheed
/// whose main loop waits on the v-sync callback to signal an event every
/// frame.
pub const VSYNC_INSTR_PERIOD: u64 = 150_000;
impl InterruptState {
/// Record a new callback registration.
pub fn set_callback(&mut self, callback_pc: u32, user_data: u32) {
self.callback = Some(GraphicsInterruptCallback {
callback_pc,
user_data,
});
}
/// Queue an interrupt for the next safe injection point.
pub fn queue_interrupt(&mut self, source: u32) {
if self.callback.is_none() {
self.dropped += 1;
return;
}
if self.pending.len() >= INTERRUPT_QUEUE_CAP {
self.dropped += 1;
return;
}
self.pending.push_back(source);
}
/// Peek at the next pending source without removing it.
pub fn peek_next(&self) -> Option<u32> {
self.pending.front().copied()
}
/// Pop the next pending source (called by the injector after it has
/// committed to dispatching it).
pub fn take_next(&mut self) -> Option<u32> {
self.pending.pop_front()
}
/// Advance the v-sync accumulator by the delta since the last call.
/// Returns `true` if a new v-sync was queued.
pub fn tick_vsync(&mut self, current_instr_count: u64) -> bool {
let delta = current_instr_count.saturating_sub(self.last_instr_count);
self.last_instr_count = current_instr_count;
self.vsync_accumulator = self.vsync_accumulator.saturating_add(delta);
if self.vsync_accumulator < VSYNC_INSTR_PERIOD {
return false;
}
// Multiple periods may have elapsed in a single tick call if a
// large instruction delta went by (e.g. a long export). Drain
// the accumulator fully so we don't lag behind.
let periods = self.vsync_accumulator / VSYNC_INSTR_PERIOD;
self.vsync_accumulator %= VSYNC_INSTR_PERIOD;
for _ in 0..periods {
self.queue_interrupt(INTERRUPT_SOURCE_VSYNC);
}
true
}
/// Is HW thread 0 currently in a callback?
pub fn is_in_callback(&self) -> bool {
self.saved.is_some()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn queue_interrupt_drops_without_callback() {
let mut s = InterruptState::default();
s.queue_interrupt(INTERRUPT_SOURCE_VSYNC);
assert_eq!(s.dropped, 1);
assert!(s.pending.is_empty());
}
#[test]
fn queue_interrupt_fifo_preserves_order() {
let mut s = InterruptState::default();
s.set_callback(0x1000, 0xAB);
s.queue_interrupt(INTERRUPT_SOURCE_VSYNC);
s.queue_interrupt(INTERRUPT_SOURCE_CP);
s.queue_interrupt(INTERRUPT_SOURCE_VSYNC);
assert_eq!(s.dropped, 0);
// FIFO: take_next hands them out in push order.
assert_eq!(s.take_next(), Some(INTERRUPT_SOURCE_VSYNC));
assert_eq!(s.take_next(), Some(INTERRUPT_SOURCE_CP));
assert_eq!(s.take_next(), Some(INTERRUPT_SOURCE_VSYNC));
assert_eq!(s.take_next(), None);
}
#[test]
fn queue_interrupt_caps_at_queue_size() {
let mut s = InterruptState::default();
s.set_callback(0x1000, 0xAB);
for _ in 0..INTERRUPT_QUEUE_CAP {
s.queue_interrupt(INTERRUPT_SOURCE_VSYNC);
}
// Over-cap: drops rather than evicting the oldest.
s.queue_interrupt(INTERRUPT_SOURCE_VSYNC);
s.queue_interrupt(INTERRUPT_SOURCE_VSYNC);
assert_eq!(s.dropped, 2);
assert_eq!(s.pending.len(), INTERRUPT_QUEUE_CAP);
}
#[test]
fn tick_vsync_fires_at_new_150k_threshold() {
let mut s = InterruptState::default();
s.set_callback(0x1000, 0xAB);
assert_eq!(VSYNC_INSTR_PERIOD, 150_000);
assert!(!s.tick_vsync(VSYNC_INSTR_PERIOD - 1));
assert!(s.pending.is_empty());
assert!(s.tick_vsync(VSYNC_INSTR_PERIOD));
assert_eq!(s.peek_next(), Some(INTERRUPT_SOURCE_VSYNC));
}
#[test]
fn tick_vsync_drains_multiple_periods_in_one_call() {
// Long kernel export → big instr delta → multiple v-syncs must
// be delivered, not lost.
let mut s = InterruptState::default();
s.set_callback(0x1000, 0xAB);
assert!(s.tick_vsync(VSYNC_INSTR_PERIOD * 3 + 10));
assert_eq!(s.pending.len(), 3);
}
/// Simulates what the main loop does: inject, execute guest code up
/// to the sentinel, restore. Uses a single-instruction `bclr` callback
/// — the interpreter sees `pc == callback_pc`, steps, and the blr
/// instruction writes `lr` into `pc`, which equals `LR_HALT_SENTINEL`
/// → main loop detects and triggers restore.
#[test]
fn inject_restore_roundtrip_smoke() {
let mut ctx = PpcContext::new();
ctx.pc = 0x1000_0000;
ctx.lr = 0xCAFE_BABE;
ctx.gpr[3] = 0x1234;
ctx.gpr[4] = 0x5678;
let mut s = InterruptState::default();
s.set_callback(0x2000_0000, 0xDEAD);
// Simulate main loop inject: save ctx fields, divert pc/lr/r3/r4.
let saved = SavedCallbackCtx::capture(&ctx, INTERRUPT_SOURCE_VSYNC);
s.saved = Some(saved);
ctx.pc = 0x2000_0000;
ctx.lr = xenia_cpu::context::LR_HALT_SENTINEL;
ctx.gpr[3] = INTERRUPT_SOURCE_VSYNC as u64;
ctx.gpr[4] = 0xDEAD;
assert!(s.is_in_callback());
// Guest callback "runs" to the sentinel — simulate by writing
// pc = lr (what `blr` would do).
ctx.pc = ctx.lr as u32;
// Main loop detects pc == LR_HALT_SENTINEL while in_callback:
let saved = s.saved.take().unwrap();
saved.restore(&mut ctx);
s.delivered += 1;
assert_eq!(ctx.pc, 0x1000_0000);
assert_eq!(ctx.lr, 0xCAFE_BABE);
assert_eq!(ctx.gpr[3], 0x1234);
assert_eq!(ctx.gpr[4], 0x5678);
assert!(!s.is_in_callback());
assert_eq!(s.delivered, 1);
}
#[test]
fn saved_ctx_roundtrip() {
let mut ctx = PpcContext::new();
ctx.pc = 0x11223344;
ctx.lr = 0xDEADBEEF;
ctx.gpr[3] = 0xAAAA;
ctx.gpr[4] = 0xBBBB;
let saved = SavedCallbackCtx::capture(&ctx, INTERRUPT_SOURCE_VSYNC);
ctx.pc = 0;
ctx.lr = 0;
ctx.gpr[3] = 0;
ctx.gpr[4] = 0;
saved.restore(&mut ctx);
assert_eq!(ctx.pc, 0x11223344);
assert_eq!(ctx.lr, 0xDEADBEEF);
assert_eq!(ctx.gpr[3], 0xAAAA);
assert_eq!(ctx.gpr[4], 0xBBBB);
}
/// Full volatile-GPR + SP roundtrip. Regression test for the
/// 2026-04-24 IRQ-injection fix: the ISR callback's prologue clobbers
/// `[r1 - 8]` on the interrupted thread's stack unless the injector
/// pre-decrements SP by [`CALLBACK_STACK_PAD`] and the saved ctx puts
/// SP (and the rest of the PPC volatile set) back on return.
#[test]
fn saved_ctx_covers_sp_and_all_volatile_gprs() {
let mut ctx = PpcContext::new();
ctx.pc = 0xAAAA_BBBB;
ctx.lr = 0x1111_2222;
ctx.ctr = 0x3333_4444;
for i in 0..13 {
ctx.gpr[i] = 0x1000 + i as u64;
}
// r13..r31 are non-volatile and should survive the callback's own
// save/restore — the saved ctx deliberately does NOT cover them.
for i in 13..32 {
ctx.gpr[i] = 0xDEAD_0000 + i as u64;
}
let saved = SavedCallbackCtx::capture(&ctx, INTERRUPT_SOURCE_VSYNC);
// Simulate injector: flip pc/lr/r1/r3/r4 (what the real injector
// actually does — see try_inject_graphics_interrupt in main.rs).
ctx.pc = 0xCAFE;
ctx.lr = xenia_cpu::context::LR_HALT_SENTINEL;
ctx.gpr[1] = ctx.gpr[1].wrapping_sub(CALLBACK_STACK_PAD as u64);
ctx.gpr[3] = INTERRUPT_SOURCE_VSYNC as u64;
ctx.gpr[4] = 0xBEEF;
// Simulate callback clobbering a few volatile regs that aren't
// part of the "obviously diverted" set.
ctx.gpr[0] = 0xFEED_FACE;
ctx.gpr[7] = 0x9999;
ctx.gpr[12] = 0xABCD;
saved.restore(&mut ctx);
// All volatile GPRs restored to pre-injection.
for i in 0..13 {
assert_eq!(
ctx.gpr[i],
0x1000 + i as u64,
"volatile r{} clobbered by callback was not restored",
i
);
}
// SP specifically back to the pre-pad value.
assert_eq!(ctx.gpr[1], 0x1001, "SP must be restored to pre-injection");
// Non-volatile regs were never captured; they stay as the callback
// left them (here, untouched because we didn't modify 13..32).
for i in 13..32 {
assert_eq!(ctx.gpr[i], 0xDEAD_0000 + i as u64);
}
assert_eq!(ctx.pc, 0xAAAA_BBBB);
assert_eq!(ctx.lr, 0x1111_2222);
assert_eq!(ctx.ctr, 0x3333_4444);
}
}