Major HLE buildout in exports.rs: KeInitializeSemaphore now seeds
count/limit, XexGet{Module,Procedure}Address use distinct
HMODULE_XBOXKRNL/HMODULE_XAM pseudo-handles with a reverse
(ModuleId,ordinal)→thunk_addr map, plus sweeping additions across
sync primitives, file I/O, semaphores, events, threads, and
allocator paths needed to advance Sylpheed past VdSwap=2.
New modules:
- thread.rs — ThreadRef + per-thread suspension/wake plumbing
- interrupts.rs — IRQ delivery, pending-IRQ slots, IPI helpers
- path.rs — guest path normalization (D:\\, game:\\, etc.)
- audit.rs — --trace-handles harness backing the handle audit
- ui_bridge.rs — kernel-side endpoint of the xenia-ui bridge
(input snapshots, framebuffer publish handles)
state.rs grows to own the HW-slot scheduler state, the new audit /
UI bridge handles, and the per-handle reverse maps. xam.rs and
objects.rs follow suit for the HLE additions.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
425 lines
17 KiB
Rust
425 lines
17 KiB
Rust
//! Graphics interrupt + synthetic v-sync bookkeeping (P6).
|
||
//!
|
||
//! The Xbox 360 graphics driver calls `VdSetGraphicsInterruptCallback` to
|
||
//! register a single per-process callback that the OS invokes on:
|
||
//!
|
||
//! 1. **V-sync** — at 60 Hz; source code 0 (`INTERRUPT_SOURCE_VSYNC`).
|
||
//! 2. **Command-processor interrupt** — when `PM4_INTERRUPT` fires from the
|
||
//! guest-issued command stream; source code 1 (`INTERRUPT_SOURCE_CP`).
|
||
//!
|
||
//! Canary's [xboxkrnl_video.cc:303-310](xenia-canary/src/xenia/kernel/xboxkrnl/xboxkrnl_video.cc#L303-L310)
|
||
//! dispatches the callback on HW thread 0. We follow the same convention.
|
||
//!
|
||
//! The delivery model is cooperative: we inject the callback entry into HW
|
||
//! thread 0 at the top of a scheduler round when it's safe (not mid-export,
|
||
//! not already inside another interrupt). When the callback returns to
|
||
//! [`LR_HALT_SENTINEL`] the main loop restores the saved [`PpcContext`]
|
||
//! fields and the HW thread picks up where it left off.
|
||
|
||
use std::collections::VecDeque;
|
||
|
||
use xenia_cpu::context::{CrField, PpcContext};
|
||
use xenia_cpu::ThreadRef;
|
||
|
||
pub const INTERRUPT_SOURCE_VSYNC: u32 = 0;
|
||
pub const INTERRUPT_SOURCE_CP: u32 = 1;
|
||
|
||
/// Guest-registered V-sync / graphics-interrupt callback (from
|
||
/// `VdSetGraphicsInterruptCallback`).
|
||
#[derive(Debug, Clone, Copy)]
|
||
pub struct GraphicsInterruptCallback {
|
||
pub callback_pc: u32,
|
||
pub user_data: u32,
|
||
}
|
||
|
||
/// Snapshot of the fields we mutate when diverting a HW thread into an
|
||
/// interrupt callback. Restored when the callback returns to
|
||
/// `LR_HALT_SENTINEL`.
|
||
///
|
||
/// We save **all PPC volatile registers** (r0, r2–r12) plus `r1` (SP),
|
||
/// `pc`, `lr`, `ctr`, and `cr`. Non-volatile regs (r13–r31) are preserved
|
||
/// by the callback's own `__savegprlr_N` prologue/epilogue per the PPC
|
||
/// ELF ABI, so they don't need stashing here.
|
||
///
|
||
/// **SP (`gpr[1]`) is included because the injector decrements it by
|
||
/// [`CALLBACK_STACK_PAD`] before the callback runs** — see that constant's
|
||
/// docs for why. Without this, the callback's `__savegprlr_N` prologue
|
||
/// overwrites the interrupted function's own stack-saved LR (which lives
|
||
/// at `[r1 - 8]`), and when the interrupted function later tries to
|
||
/// return, `bclr` jumps to `LR_HALT_SENTINEL` and the thread exits
|
||
/// prematurely.
|
||
#[derive(Debug, Clone, Copy)]
|
||
pub struct SavedCallbackCtx {
|
||
pub pc: u32,
|
||
pub lr: u64,
|
||
pub ctr: u64,
|
||
/// All PPC volatile GPRs (r0, r2–r12) plus r1 (SP) in index order.
|
||
/// Index 0 = r0, 1 = r1, 2 = r2, …, 12 = r12. Index 13..32 unused.
|
||
pub gprs: [u64; 13],
|
||
pub cr: [CrField; 8],
|
||
pub source: u32,
|
||
}
|
||
|
||
/// Bytes the injector reserves below the interrupted thread's SP before
|
||
/// running the ISR callback. Matches Canary's
|
||
/// [`Processor::Execute`](../../../../xenia-canary/src/xenia/cpu/processor.cc#L383)
|
||
/// which decrements `r[1]` by `64 + 112 = 176` before
|
||
/// `function->Call(...)` and restores afterwards. The pad must be larger
|
||
/// than any plausible sum of `__savegprlr_N`'s save-area (up to 64 B for
|
||
/// r25-r31 + 8 B for LR) plus the callback's own `stwu r1,-N(r1)` frame
|
||
/// (the Sylpheed vsync ISR uses 128 B).
|
||
///
|
||
/// Pre-fix: the ISR's `__savegprlr_25` stored the callback's saved LR
|
||
/// (= `LR_HALT_SENTINEL`, from injection) at `[r1 - 8]` — exactly where
|
||
/// the interrupted thread's current `bl`-saved LR lived. The
|
||
/// interrupted function's return site got stomped with `SENTINEL`, so
|
||
/// `__restgprlr_N -> bclr` jumped to the halt sentinel and the thread
|
||
/// exited through the wrong path. Manifested in Sylpheed as tid=5
|
||
/// (producer for the render queue) terminating at cycle 7.5M, starving
|
||
/// both `0x10fc` (main's completion wait) and the PKEVENT that tid=6
|
||
/// polls — no second `VdSwap`, no first pixel.
|
||
pub const CALLBACK_STACK_PAD: u32 = 64 + 112;
|
||
|
||
impl SavedCallbackCtx {
|
||
pub fn capture(ctx: &PpcContext, source: u32) -> Self {
|
||
let mut gprs = [0u64; 13];
|
||
for i in 0..13 {
|
||
gprs[i] = ctx.gpr[i];
|
||
}
|
||
Self {
|
||
pc: ctx.pc,
|
||
lr: ctx.lr,
|
||
ctr: ctx.ctr,
|
||
gprs,
|
||
cr: ctx.cr,
|
||
source,
|
||
}
|
||
}
|
||
|
||
pub fn restore(self, ctx: &mut PpcContext) {
|
||
ctx.pc = self.pc;
|
||
ctx.lr = self.lr;
|
||
ctx.ctr = self.ctr;
|
||
for i in 0..13 {
|
||
ctx.gpr[i] = self.gprs[i];
|
||
}
|
||
ctx.cr = self.cr;
|
||
}
|
||
}
|
||
|
||
/// Maximum pending sources held in the FIFO queue before new ones are
|
||
/// dropped. Four is enough to absorb a short burst (a few v-syncs arriving
|
||
/// while HW 0 is mid-callback from a prior one) without letting runaway
|
||
/// delivery swamp the guest.
|
||
pub const INTERRUPT_QUEUE_CAP: usize = 4;
|
||
|
||
/// All interrupt bookkeeping — single field on `KernelState`.
|
||
///
|
||
/// **First-Pixels M2 (2026-04-20)** — changed from a single-slot
|
||
/// `pending_source: Option<u32>` coalesce to a bounded FIFO so bursts
|
||
/// don't drop silently, and dropped `VSYNC_INSTR_PERIOD` from 500k to
|
||
/// 150k so cadence approximates 60 Hz at the current ~10 MIPS interpreter
|
||
/// throughput. Combined with the `HwState::ServicingIrq` variant added to
|
||
/// `xenia-cpu::scheduler`, interrupts can now be delivered even when HW 0
|
||
/// is `Blocked(WaitAny)` — the injector stashes the block into the new
|
||
/// variant and the restore path re-blocks when the callback returns,
|
||
/// unless a `wake()` during the callback resolved the wait.
|
||
/// M2.5 — per-slot pending-IRQ bitmask. Each `AtomicU8` holds one bit per
|
||
/// interrupt source (currently 2 sources: VSYNC=bit 0, CP=bit 1) destined
|
||
/// for that specific HW slot. Used by the M3 parallel path: T_main (or
|
||
/// the GPU thread) sets a bit Release on the target slot's atomic; the
|
||
/// target T_cpu_i checks the bit Acquire at its quantum boundary and
|
||
/// self-injects without taking another thread's slot lock.
|
||
///
|
||
/// The 6-element fixed-size array mirrors `xenia_cpu::scheduler::HW_THREAD_COUNT`.
|
||
pub type PendingLocalIrq = [std::sync::atomic::AtomicU8;
|
||
xenia_cpu::scheduler::HW_THREAD_COUNT];
|
||
|
||
#[derive(Debug, Default)]
|
||
pub struct InterruptState {
|
||
/// Registered callback (set by `VdSetGraphicsInterruptCallback`).
|
||
pub callback: Option<GraphicsInterruptCallback>,
|
||
/// Bounded FIFO of pending interrupt sources awaiting injection.
|
||
/// Push-back on queue, pop-front on inject. Over-cap pushes drop.
|
||
pub pending: VecDeque<u32>,
|
||
/// When `Some`, some HW thread is currently running a callback; on
|
||
/// return-to-sentinel we restore this and clear the flag.
|
||
pub saved: Option<SavedCallbackCtx>,
|
||
/// Which guest thread the current callback was injected into.
|
||
/// Required because we no longer anchor delivery to HW 0 — any
|
||
/// non-Exited thread is a valid target. Meaningful only while
|
||
/// `saved.is_some()`. Stored as a `ThreadRef` so per-slot
|
||
/// runqueues don't get ambiguous addressing.
|
||
pub injected_ref: Option<ThreadRef>,
|
||
/// Monotonic count of delivered interrupts.
|
||
pub delivered: u64,
|
||
/// Dropped interrupts (callback unset, queue full, or thread
|
||
/// exited/idle at inject time).
|
||
pub dropped: u64,
|
||
/// Instruction-count accumulator for the synthetic v-sync ticker. At
|
||
/// `VSYNC_INSTR_PERIOD` the main loop pushes an `INTERRUPT_SOURCE_VSYNC`
|
||
/// onto `pending` and resets.
|
||
pub vsync_accumulator: u64,
|
||
/// Last observed instruction count — `tick_vsync` diffs against
|
||
/// this to advance `vsync_accumulator`.
|
||
pub last_instr_count: u64,
|
||
/// M2.5 — per-slot pending-IRQ bits. Set by the producer (M3's
|
||
/// IRQ-routing logic on `T_main`) with `Release`; consumed by the
|
||
/// target T_cpu_i with `Acquire` at quantum boundary. Unused under
|
||
/// the lockstep path (M2's single-host-thread model still uses
|
||
/// `pending` + `try_inject_graphics_interrupt`); the field is wired
|
||
/// here so M3's per-HW-thread path is a flag flip, not a refactor.
|
||
pub pending_local_irq: PendingLocalIrq,
|
||
}
|
||
|
||
/// How many guest instructions correspond to one synthetic v-sync.
|
||
///
|
||
/// Targets **~60 Hz at the post-Tier-3 interpreter throughput (~10 MIPS)**:
|
||
/// 10e6 instr/s ÷ 60 Hz ≈ 167k — we use 150k to give a small cushion.
|
||
/// Before M2 this was 500k (~20 Hz), which was enough for games that
|
||
/// don't gate anything on v-sync but not enough for titles like Sylpheed
|
||
/// whose main loop waits on the v-sync callback to signal an event every
|
||
/// frame.
|
||
pub const VSYNC_INSTR_PERIOD: u64 = 150_000;
|
||
|
||
impl InterruptState {
|
||
/// Record a new callback registration.
|
||
pub fn set_callback(&mut self, callback_pc: u32, user_data: u32) {
|
||
self.callback = Some(GraphicsInterruptCallback {
|
||
callback_pc,
|
||
user_data,
|
||
});
|
||
}
|
||
|
||
/// Queue an interrupt for the next safe injection point.
|
||
pub fn queue_interrupt(&mut self, source: u32) {
|
||
if self.callback.is_none() {
|
||
self.dropped += 1;
|
||
return;
|
||
}
|
||
if self.pending.len() >= INTERRUPT_QUEUE_CAP {
|
||
self.dropped += 1;
|
||
return;
|
||
}
|
||
self.pending.push_back(source);
|
||
}
|
||
|
||
/// Peek at the next pending source without removing it.
|
||
pub fn peek_next(&self) -> Option<u32> {
|
||
self.pending.front().copied()
|
||
}
|
||
|
||
/// Pop the next pending source (called by the injector after it has
|
||
/// committed to dispatching it).
|
||
pub fn take_next(&mut self) -> Option<u32> {
|
||
self.pending.pop_front()
|
||
}
|
||
|
||
/// Advance the v-sync accumulator by the delta since the last call.
|
||
/// Returns `true` if a new v-sync was queued.
|
||
pub fn tick_vsync(&mut self, current_instr_count: u64) -> bool {
|
||
let delta = current_instr_count.saturating_sub(self.last_instr_count);
|
||
self.last_instr_count = current_instr_count;
|
||
self.vsync_accumulator = self.vsync_accumulator.saturating_add(delta);
|
||
if self.vsync_accumulator < VSYNC_INSTR_PERIOD {
|
||
return false;
|
||
}
|
||
// Multiple periods may have elapsed in a single tick call if a
|
||
// large instruction delta went by (e.g. a long export). Drain
|
||
// the accumulator fully so we don't lag behind.
|
||
let periods = self.vsync_accumulator / VSYNC_INSTR_PERIOD;
|
||
self.vsync_accumulator %= VSYNC_INSTR_PERIOD;
|
||
for _ in 0..periods {
|
||
self.queue_interrupt(INTERRUPT_SOURCE_VSYNC);
|
||
}
|
||
true
|
||
}
|
||
|
||
/// Is HW thread 0 currently in a callback?
|
||
pub fn is_in_callback(&self) -> bool {
|
||
self.saved.is_some()
|
||
}
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
#[test]
|
||
fn queue_interrupt_drops_without_callback() {
|
||
let mut s = InterruptState::default();
|
||
s.queue_interrupt(INTERRUPT_SOURCE_VSYNC);
|
||
assert_eq!(s.dropped, 1);
|
||
assert!(s.pending.is_empty());
|
||
}
|
||
|
||
#[test]
|
||
fn queue_interrupt_fifo_preserves_order() {
|
||
let mut s = InterruptState::default();
|
||
s.set_callback(0x1000, 0xAB);
|
||
s.queue_interrupt(INTERRUPT_SOURCE_VSYNC);
|
||
s.queue_interrupt(INTERRUPT_SOURCE_CP);
|
||
s.queue_interrupt(INTERRUPT_SOURCE_VSYNC);
|
||
assert_eq!(s.dropped, 0);
|
||
// FIFO: take_next hands them out in push order.
|
||
assert_eq!(s.take_next(), Some(INTERRUPT_SOURCE_VSYNC));
|
||
assert_eq!(s.take_next(), Some(INTERRUPT_SOURCE_CP));
|
||
assert_eq!(s.take_next(), Some(INTERRUPT_SOURCE_VSYNC));
|
||
assert_eq!(s.take_next(), None);
|
||
}
|
||
|
||
#[test]
|
||
fn queue_interrupt_caps_at_queue_size() {
|
||
let mut s = InterruptState::default();
|
||
s.set_callback(0x1000, 0xAB);
|
||
for _ in 0..INTERRUPT_QUEUE_CAP {
|
||
s.queue_interrupt(INTERRUPT_SOURCE_VSYNC);
|
||
}
|
||
// Over-cap: drops rather than evicting the oldest.
|
||
s.queue_interrupt(INTERRUPT_SOURCE_VSYNC);
|
||
s.queue_interrupt(INTERRUPT_SOURCE_VSYNC);
|
||
assert_eq!(s.dropped, 2);
|
||
assert_eq!(s.pending.len(), INTERRUPT_QUEUE_CAP);
|
||
}
|
||
|
||
#[test]
|
||
fn tick_vsync_fires_at_new_150k_threshold() {
|
||
let mut s = InterruptState::default();
|
||
s.set_callback(0x1000, 0xAB);
|
||
assert_eq!(VSYNC_INSTR_PERIOD, 150_000);
|
||
assert!(!s.tick_vsync(VSYNC_INSTR_PERIOD - 1));
|
||
assert!(s.pending.is_empty());
|
||
assert!(s.tick_vsync(VSYNC_INSTR_PERIOD));
|
||
assert_eq!(s.peek_next(), Some(INTERRUPT_SOURCE_VSYNC));
|
||
}
|
||
|
||
#[test]
|
||
fn tick_vsync_drains_multiple_periods_in_one_call() {
|
||
// Long kernel export → big instr delta → multiple v-syncs must
|
||
// be delivered, not lost.
|
||
let mut s = InterruptState::default();
|
||
s.set_callback(0x1000, 0xAB);
|
||
assert!(s.tick_vsync(VSYNC_INSTR_PERIOD * 3 + 10));
|
||
assert_eq!(s.pending.len(), 3);
|
||
}
|
||
|
||
/// Simulates what the main loop does: inject, execute guest code up
|
||
/// to the sentinel, restore. Uses a single-instruction `bclr` callback
|
||
/// — the interpreter sees `pc == callback_pc`, steps, and the blr
|
||
/// instruction writes `lr` into `pc`, which equals `LR_HALT_SENTINEL`
|
||
/// → main loop detects and triggers restore.
|
||
#[test]
|
||
fn inject_restore_roundtrip_smoke() {
|
||
let mut ctx = PpcContext::new();
|
||
ctx.pc = 0x1000_0000;
|
||
ctx.lr = 0xCAFE_BABE;
|
||
ctx.gpr[3] = 0x1234;
|
||
ctx.gpr[4] = 0x5678;
|
||
|
||
let mut s = InterruptState::default();
|
||
s.set_callback(0x2000_0000, 0xDEAD);
|
||
|
||
// Simulate main loop inject: save ctx fields, divert pc/lr/r3/r4.
|
||
let saved = SavedCallbackCtx::capture(&ctx, INTERRUPT_SOURCE_VSYNC);
|
||
s.saved = Some(saved);
|
||
ctx.pc = 0x2000_0000;
|
||
ctx.lr = xenia_cpu::context::LR_HALT_SENTINEL;
|
||
ctx.gpr[3] = INTERRUPT_SOURCE_VSYNC as u64;
|
||
ctx.gpr[4] = 0xDEAD;
|
||
assert!(s.is_in_callback());
|
||
|
||
// Guest callback "runs" to the sentinel — simulate by writing
|
||
// pc = lr (what `blr` would do).
|
||
ctx.pc = ctx.lr as u32;
|
||
|
||
// Main loop detects pc == LR_HALT_SENTINEL while in_callback:
|
||
let saved = s.saved.take().unwrap();
|
||
saved.restore(&mut ctx);
|
||
s.delivered += 1;
|
||
|
||
assert_eq!(ctx.pc, 0x1000_0000);
|
||
assert_eq!(ctx.lr, 0xCAFE_BABE);
|
||
assert_eq!(ctx.gpr[3], 0x1234);
|
||
assert_eq!(ctx.gpr[4], 0x5678);
|
||
assert!(!s.is_in_callback());
|
||
assert_eq!(s.delivered, 1);
|
||
}
|
||
|
||
#[test]
|
||
fn saved_ctx_roundtrip() {
|
||
let mut ctx = PpcContext::new();
|
||
ctx.pc = 0x11223344;
|
||
ctx.lr = 0xDEADBEEF;
|
||
ctx.gpr[3] = 0xAAAA;
|
||
ctx.gpr[4] = 0xBBBB;
|
||
let saved = SavedCallbackCtx::capture(&ctx, INTERRUPT_SOURCE_VSYNC);
|
||
ctx.pc = 0;
|
||
ctx.lr = 0;
|
||
ctx.gpr[3] = 0;
|
||
ctx.gpr[4] = 0;
|
||
saved.restore(&mut ctx);
|
||
assert_eq!(ctx.pc, 0x11223344);
|
||
assert_eq!(ctx.lr, 0xDEADBEEF);
|
||
assert_eq!(ctx.gpr[3], 0xAAAA);
|
||
assert_eq!(ctx.gpr[4], 0xBBBB);
|
||
}
|
||
|
||
/// Full volatile-GPR + SP roundtrip. Regression test for the
|
||
/// 2026-04-24 IRQ-injection fix: the ISR callback's prologue clobbers
|
||
/// `[r1 - 8]` on the interrupted thread's stack unless the injector
|
||
/// pre-decrements SP by [`CALLBACK_STACK_PAD`] and the saved ctx puts
|
||
/// SP (and the rest of the PPC volatile set) back on return.
|
||
#[test]
|
||
fn saved_ctx_covers_sp_and_all_volatile_gprs() {
|
||
let mut ctx = PpcContext::new();
|
||
ctx.pc = 0xAAAA_BBBB;
|
||
ctx.lr = 0x1111_2222;
|
||
ctx.ctr = 0x3333_4444;
|
||
for i in 0..13 {
|
||
ctx.gpr[i] = 0x1000 + i as u64;
|
||
}
|
||
// r13..r31 are non-volatile and should survive the callback's own
|
||
// save/restore — the saved ctx deliberately does NOT cover them.
|
||
for i in 13..32 {
|
||
ctx.gpr[i] = 0xDEAD_0000 + i as u64;
|
||
}
|
||
|
||
let saved = SavedCallbackCtx::capture(&ctx, INTERRUPT_SOURCE_VSYNC);
|
||
|
||
// Simulate injector: flip pc/lr/r1/r3/r4 (what the real injector
|
||
// actually does — see try_inject_graphics_interrupt in main.rs).
|
||
ctx.pc = 0xCAFE;
|
||
ctx.lr = xenia_cpu::context::LR_HALT_SENTINEL;
|
||
ctx.gpr[1] = ctx.gpr[1].wrapping_sub(CALLBACK_STACK_PAD as u64);
|
||
ctx.gpr[3] = INTERRUPT_SOURCE_VSYNC as u64;
|
||
ctx.gpr[4] = 0xBEEF;
|
||
// Simulate callback clobbering a few volatile regs that aren't
|
||
// part of the "obviously diverted" set.
|
||
ctx.gpr[0] = 0xFEED_FACE;
|
||
ctx.gpr[7] = 0x9999;
|
||
ctx.gpr[12] = 0xABCD;
|
||
|
||
saved.restore(&mut ctx);
|
||
|
||
// All volatile GPRs restored to pre-injection.
|
||
for i in 0..13 {
|
||
assert_eq!(
|
||
ctx.gpr[i],
|
||
0x1000 + i as u64,
|
||
"volatile r{} clobbered by callback was not restored",
|
||
i
|
||
);
|
||
}
|
||
// SP specifically back to the pre-pad value.
|
||
assert_eq!(ctx.gpr[1], 0x1001, "SP must be restored to pre-injection");
|
||
// Non-volatile regs were never captured; they stay as the callback
|
||
// left them (here, untouched because we didn't modify 13..32).
|
||
for i in 13..32 {
|
||
assert_eq!(ctx.gpr[i], 0xDEAD_0000 + i as u64);
|
||
}
|
||
assert_eq!(ctx.pc, 0xAAAA_BBBB);
|
||
assert_eq!(ctx.lr, 0x1111_2222);
|
||
assert_eq!(ctx.ctr, 0x3333_4444);
|
||
}
|
||
}
|