xenia-kernel: HLE expansion, scheduler integration, audit + UI bridge

Major HLE buildout in exports.rs: KeInitializeSemaphore now seeds
count/limit, XexGet{Module,Procedure}Address use distinct
HMODULE_XBOXKRNL/HMODULE_XAM pseudo-handles with a reverse
(ModuleId,ordinal)→thunk_addr map, plus sweeping additions across
sync primitives, file I/O, semaphores, events, threads, and
allocator paths needed to advance Sylpheed past VdSwap=2.

New modules:
  - thread.rs   — ThreadRef + per-thread suspension/wake plumbing
  - interrupts.rs — IRQ delivery, pending-IRQ slots, IPI helpers
  - path.rs     — guest path normalization (D:\\, game:\\, etc.)
  - audit.rs    — --trace-handles harness backing the handle audit
  - ui_bridge.rs — kernel-side endpoint of the xenia-ui bridge
                   (input snapshots, framebuffer publish handles)

state.rs grows to own the HW-slot scheduler state, the new audit /
UI bridge handles, and the per-handle reverse maps. xam.rs and
objects.rs follow suit for the HLE additions.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-05-01 16:29:00 +02:00
parent f1fadb5398
commit 5f0d6487ea
11 changed files with 6369 additions and 270 deletions

View File

@@ -0,0 +1,424 @@
//! Graphics interrupt + synthetic v-sync bookkeeping (P6).
//!
//! The Xbox 360 graphics driver calls `VdSetGraphicsInterruptCallback` to
//! register a single per-process callback that the OS invokes on:
//!
//! 1. **V-sync** — at 60 Hz; source code 0 (`INTERRUPT_SOURCE_VSYNC`).
//! 2. **Command-processor interrupt** — when `PM4_INTERRUPT` fires from the
//! guest-issued command stream; source code 1 (`INTERRUPT_SOURCE_CP`).
//!
//! Canary's [xboxkrnl_video.cc:303-310](xenia-canary/src/xenia/kernel/xboxkrnl/xboxkrnl_video.cc#L303-L310)
//! dispatches the callback on HW thread 0. We follow the same convention.
//!
//! The delivery model is cooperative: we inject the callback entry into HW
//! thread 0 at the top of a scheduler round when it's safe (not mid-export,
//! not already inside another interrupt). When the callback returns to
//! [`LR_HALT_SENTINEL`] the main loop restores the saved [`PpcContext`]
//! fields and the HW thread picks up where it left off.
use std::collections::VecDeque;
use xenia_cpu::context::{CrField, PpcContext};
use xenia_cpu::ThreadRef;
pub const INTERRUPT_SOURCE_VSYNC: u32 = 0;
pub const INTERRUPT_SOURCE_CP: u32 = 1;
/// Guest-registered V-sync / graphics-interrupt callback (from
/// `VdSetGraphicsInterruptCallback`).
#[derive(Debug, Clone, Copy)]
pub struct GraphicsInterruptCallback {
pub callback_pc: u32,
pub user_data: u32,
}
/// Snapshot of the fields we mutate when diverting a HW thread into an
/// interrupt callback. Restored when the callback returns to
/// `LR_HALT_SENTINEL`.
///
/// We save **all PPC volatile registers** (r0, r2r12) plus `r1` (SP),
/// `pc`, `lr`, `ctr`, and `cr`. Non-volatile regs (r13r31) are preserved
/// by the callback's own `__savegprlr_N` prologue/epilogue per the PPC
/// ELF ABI, so they don't need stashing here.
///
/// **SP (`gpr[1]`) is included because the injector decrements it by
/// [`CALLBACK_STACK_PAD`] before the callback runs** — see that constant's
/// docs for why. Without this, the callback's `__savegprlr_N` prologue
/// overwrites the interrupted function's own stack-saved LR (which lives
/// at `[r1 - 8]`), and when the interrupted function later tries to
/// return, `bclr` jumps to `LR_HALT_SENTINEL` and the thread exits
/// prematurely.
#[derive(Debug, Clone, Copy)]
pub struct SavedCallbackCtx {
pub pc: u32,
pub lr: u64,
pub ctr: u64,
/// All PPC volatile GPRs (r0, r2r12) plus r1 (SP) in index order.
/// Index 0 = r0, 1 = r1, 2 = r2, …, 12 = r12. Index 13..32 unused.
pub gprs: [u64; 13],
pub cr: [CrField; 8],
pub source: u32,
}
/// Bytes the injector reserves below the interrupted thread's SP before
/// running the ISR callback. Matches Canary's
/// [`Processor::Execute`](../../../../xenia-canary/src/xenia/cpu/processor.cc#L383)
/// which decrements `r[1]` by `64 + 112 = 176` before
/// `function->Call(...)` and restores afterwards. The pad must be larger
/// than any plausible sum of `__savegprlr_N`'s save-area (up to 64 B for
/// r25-r31 + 8 B for LR) plus the callback's own `stwu r1,-N(r1)` frame
/// (the Sylpheed vsync ISR uses 128 B).
///
/// Pre-fix: the ISR's `__savegprlr_25` stored the callback's saved LR
/// (= `LR_HALT_SENTINEL`, from injection) at `[r1 - 8]` — exactly where
/// the interrupted thread's current `bl`-saved LR lived. The
/// interrupted function's return site got stomped with `SENTINEL`, so
/// `__restgprlr_N -> bclr` jumped to the halt sentinel and the thread
/// exited through the wrong path. Manifested in Sylpheed as tid=5
/// (producer for the render queue) terminating at cycle 7.5M, starving
/// both `0x10fc` (main's completion wait) and the PKEVENT that tid=6
/// polls — no second `VdSwap`, no first pixel.
pub const CALLBACK_STACK_PAD: u32 = 64 + 112;
impl SavedCallbackCtx {
pub fn capture(ctx: &PpcContext, source: u32) -> Self {
let mut gprs = [0u64; 13];
for i in 0..13 {
gprs[i] = ctx.gpr[i];
}
Self {
pc: ctx.pc,
lr: ctx.lr,
ctr: ctx.ctr,
gprs,
cr: ctx.cr,
source,
}
}
pub fn restore(self, ctx: &mut PpcContext) {
ctx.pc = self.pc;
ctx.lr = self.lr;
ctx.ctr = self.ctr;
for i in 0..13 {
ctx.gpr[i] = self.gprs[i];
}
ctx.cr = self.cr;
}
}
/// Maximum pending sources held in the FIFO queue before new ones are
/// dropped. Four is enough to absorb a short burst (a few v-syncs arriving
/// while HW 0 is mid-callback from a prior one) without letting runaway
/// delivery swamp the guest.
pub const INTERRUPT_QUEUE_CAP: usize = 4;
/// All interrupt bookkeeping — single field on `KernelState`.
///
/// **First-Pixels M2 (2026-04-20)** — changed from a single-slot
/// `pending_source: Option<u32>` coalesce to a bounded FIFO so bursts
/// don't drop silently, and dropped `VSYNC_INSTR_PERIOD` from 500k to
/// 150k so cadence approximates 60 Hz at the current ~10 MIPS interpreter
/// throughput. Combined with the `HwState::ServicingIrq` variant added to
/// `xenia-cpu::scheduler`, interrupts can now be delivered even when HW 0
/// is `Blocked(WaitAny)` — the injector stashes the block into the new
/// variant and the restore path re-blocks when the callback returns,
/// unless a `wake()` during the callback resolved the wait.
/// M2.5 — per-slot pending-IRQ bitmask. Each `AtomicU8` holds one bit per
/// interrupt source (currently 2 sources: VSYNC=bit 0, CP=bit 1) destined
/// for that specific HW slot. Used by the M3 parallel path: T_main (or
/// the GPU thread) sets a bit Release on the target slot's atomic; the
/// target T_cpu_i checks the bit Acquire at its quantum boundary and
/// self-injects without taking another thread's slot lock.
///
/// The 6-element fixed-size array mirrors `xenia_cpu::scheduler::HW_THREAD_COUNT`.
pub type PendingLocalIrq = [std::sync::atomic::AtomicU8;
xenia_cpu::scheduler::HW_THREAD_COUNT];
#[derive(Debug, Default)]
pub struct InterruptState {
/// Registered callback (set by `VdSetGraphicsInterruptCallback`).
pub callback: Option<GraphicsInterruptCallback>,
/// Bounded FIFO of pending interrupt sources awaiting injection.
/// Push-back on queue, pop-front on inject. Over-cap pushes drop.
pub pending: VecDeque<u32>,
/// When `Some`, some HW thread is currently running a callback; on
/// return-to-sentinel we restore this and clear the flag.
pub saved: Option<SavedCallbackCtx>,
/// Which guest thread the current callback was injected into.
/// Required because we no longer anchor delivery to HW 0 — any
/// non-Exited thread is a valid target. Meaningful only while
/// `saved.is_some()`. Stored as a `ThreadRef` so per-slot
/// runqueues don't get ambiguous addressing.
pub injected_ref: Option<ThreadRef>,
/// Monotonic count of delivered interrupts.
pub delivered: u64,
/// Dropped interrupts (callback unset, queue full, or thread
/// exited/idle at inject time).
pub dropped: u64,
/// Instruction-count accumulator for the synthetic v-sync ticker. At
/// `VSYNC_INSTR_PERIOD` the main loop pushes an `INTERRUPT_SOURCE_VSYNC`
/// onto `pending` and resets.
pub vsync_accumulator: u64,
/// Last observed instruction count — `tick_vsync` diffs against
/// this to advance `vsync_accumulator`.
pub last_instr_count: u64,
/// M2.5 — per-slot pending-IRQ bits. Set by the producer (M3's
/// IRQ-routing logic on `T_main`) with `Release`; consumed by the
/// target T_cpu_i with `Acquire` at quantum boundary. Unused under
/// the lockstep path (M2's single-host-thread model still uses
/// `pending` + `try_inject_graphics_interrupt`); the field is wired
/// here so M3's per-HW-thread path is a flag flip, not a refactor.
pub pending_local_irq: PendingLocalIrq,
}
/// How many guest instructions correspond to one synthetic v-sync.
///
/// Targets **~60 Hz at the post-Tier-3 interpreter throughput (~10 MIPS)**:
/// 10e6 instr/s ÷ 60 Hz ≈ 167k — we use 150k to give a small cushion.
/// Before M2 this was 500k (~20 Hz), which was enough for games that
/// don't gate anything on v-sync but not enough for titles like Sylpheed
/// whose main loop waits on the v-sync callback to signal an event every
/// frame.
pub const VSYNC_INSTR_PERIOD: u64 = 150_000;
impl InterruptState {
/// Record a new callback registration.
pub fn set_callback(&mut self, callback_pc: u32, user_data: u32) {
self.callback = Some(GraphicsInterruptCallback {
callback_pc,
user_data,
});
}
/// Queue an interrupt for the next safe injection point.
pub fn queue_interrupt(&mut self, source: u32) {
if self.callback.is_none() {
self.dropped += 1;
return;
}
if self.pending.len() >= INTERRUPT_QUEUE_CAP {
self.dropped += 1;
return;
}
self.pending.push_back(source);
}
/// Peek at the next pending source without removing it.
pub fn peek_next(&self) -> Option<u32> {
self.pending.front().copied()
}
/// Pop the next pending source (called by the injector after it has
/// committed to dispatching it).
pub fn take_next(&mut self) -> Option<u32> {
self.pending.pop_front()
}
/// Advance the v-sync accumulator by the delta since the last call.
/// Returns `true` if a new v-sync was queued.
pub fn tick_vsync(&mut self, current_instr_count: u64) -> bool {
let delta = current_instr_count.saturating_sub(self.last_instr_count);
self.last_instr_count = current_instr_count;
self.vsync_accumulator = self.vsync_accumulator.saturating_add(delta);
if self.vsync_accumulator < VSYNC_INSTR_PERIOD {
return false;
}
// Multiple periods may have elapsed in a single tick call if a
// large instruction delta went by (e.g. a long export). Drain
// the accumulator fully so we don't lag behind.
let periods = self.vsync_accumulator / VSYNC_INSTR_PERIOD;
self.vsync_accumulator %= VSYNC_INSTR_PERIOD;
for _ in 0..periods {
self.queue_interrupt(INTERRUPT_SOURCE_VSYNC);
}
true
}
/// Is HW thread 0 currently in a callback?
pub fn is_in_callback(&self) -> bool {
self.saved.is_some()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn queue_interrupt_drops_without_callback() {
let mut s = InterruptState::default();
s.queue_interrupt(INTERRUPT_SOURCE_VSYNC);
assert_eq!(s.dropped, 1);
assert!(s.pending.is_empty());
}
#[test]
fn queue_interrupt_fifo_preserves_order() {
let mut s = InterruptState::default();
s.set_callback(0x1000, 0xAB);
s.queue_interrupt(INTERRUPT_SOURCE_VSYNC);
s.queue_interrupt(INTERRUPT_SOURCE_CP);
s.queue_interrupt(INTERRUPT_SOURCE_VSYNC);
assert_eq!(s.dropped, 0);
// FIFO: take_next hands them out in push order.
assert_eq!(s.take_next(), Some(INTERRUPT_SOURCE_VSYNC));
assert_eq!(s.take_next(), Some(INTERRUPT_SOURCE_CP));
assert_eq!(s.take_next(), Some(INTERRUPT_SOURCE_VSYNC));
assert_eq!(s.take_next(), None);
}
#[test]
fn queue_interrupt_caps_at_queue_size() {
let mut s = InterruptState::default();
s.set_callback(0x1000, 0xAB);
for _ in 0..INTERRUPT_QUEUE_CAP {
s.queue_interrupt(INTERRUPT_SOURCE_VSYNC);
}
// Over-cap: drops rather than evicting the oldest.
s.queue_interrupt(INTERRUPT_SOURCE_VSYNC);
s.queue_interrupt(INTERRUPT_SOURCE_VSYNC);
assert_eq!(s.dropped, 2);
assert_eq!(s.pending.len(), INTERRUPT_QUEUE_CAP);
}
#[test]
fn tick_vsync_fires_at_new_150k_threshold() {
let mut s = InterruptState::default();
s.set_callback(0x1000, 0xAB);
assert_eq!(VSYNC_INSTR_PERIOD, 150_000);
assert!(!s.tick_vsync(VSYNC_INSTR_PERIOD - 1));
assert!(s.pending.is_empty());
assert!(s.tick_vsync(VSYNC_INSTR_PERIOD));
assert_eq!(s.peek_next(), Some(INTERRUPT_SOURCE_VSYNC));
}
#[test]
fn tick_vsync_drains_multiple_periods_in_one_call() {
// Long kernel export → big instr delta → multiple v-syncs must
// be delivered, not lost.
let mut s = InterruptState::default();
s.set_callback(0x1000, 0xAB);
assert!(s.tick_vsync(VSYNC_INSTR_PERIOD * 3 + 10));
assert_eq!(s.pending.len(), 3);
}
/// Simulates what the main loop does: inject, execute guest code up
/// to the sentinel, restore. Uses a single-instruction `bclr` callback
/// — the interpreter sees `pc == callback_pc`, steps, and the blr
/// instruction writes `lr` into `pc`, which equals `LR_HALT_SENTINEL`
/// → main loop detects and triggers restore.
#[test]
fn inject_restore_roundtrip_smoke() {
let mut ctx = PpcContext::new();
ctx.pc = 0x1000_0000;
ctx.lr = 0xCAFE_BABE;
ctx.gpr[3] = 0x1234;
ctx.gpr[4] = 0x5678;
let mut s = InterruptState::default();
s.set_callback(0x2000_0000, 0xDEAD);
// Simulate main loop inject: save ctx fields, divert pc/lr/r3/r4.
let saved = SavedCallbackCtx::capture(&ctx, INTERRUPT_SOURCE_VSYNC);
s.saved = Some(saved);
ctx.pc = 0x2000_0000;
ctx.lr = xenia_cpu::context::LR_HALT_SENTINEL;
ctx.gpr[3] = INTERRUPT_SOURCE_VSYNC as u64;
ctx.gpr[4] = 0xDEAD;
assert!(s.is_in_callback());
// Guest callback "runs" to the sentinel — simulate by writing
// pc = lr (what `blr` would do).
ctx.pc = ctx.lr as u32;
// Main loop detects pc == LR_HALT_SENTINEL while in_callback:
let saved = s.saved.take().unwrap();
saved.restore(&mut ctx);
s.delivered += 1;
assert_eq!(ctx.pc, 0x1000_0000);
assert_eq!(ctx.lr, 0xCAFE_BABE);
assert_eq!(ctx.gpr[3], 0x1234);
assert_eq!(ctx.gpr[4], 0x5678);
assert!(!s.is_in_callback());
assert_eq!(s.delivered, 1);
}
#[test]
fn saved_ctx_roundtrip() {
let mut ctx = PpcContext::new();
ctx.pc = 0x11223344;
ctx.lr = 0xDEADBEEF;
ctx.gpr[3] = 0xAAAA;
ctx.gpr[4] = 0xBBBB;
let saved = SavedCallbackCtx::capture(&ctx, INTERRUPT_SOURCE_VSYNC);
ctx.pc = 0;
ctx.lr = 0;
ctx.gpr[3] = 0;
ctx.gpr[4] = 0;
saved.restore(&mut ctx);
assert_eq!(ctx.pc, 0x11223344);
assert_eq!(ctx.lr, 0xDEADBEEF);
assert_eq!(ctx.gpr[3], 0xAAAA);
assert_eq!(ctx.gpr[4], 0xBBBB);
}
/// Full volatile-GPR + SP roundtrip. Regression test for the
/// 2026-04-24 IRQ-injection fix: the ISR callback's prologue clobbers
/// `[r1 - 8]` on the interrupted thread's stack unless the injector
/// pre-decrements SP by [`CALLBACK_STACK_PAD`] and the saved ctx puts
/// SP (and the rest of the PPC volatile set) back on return.
#[test]
fn saved_ctx_covers_sp_and_all_volatile_gprs() {
let mut ctx = PpcContext::new();
ctx.pc = 0xAAAA_BBBB;
ctx.lr = 0x1111_2222;
ctx.ctr = 0x3333_4444;
for i in 0..13 {
ctx.gpr[i] = 0x1000 + i as u64;
}
// r13..r31 are non-volatile and should survive the callback's own
// save/restore — the saved ctx deliberately does NOT cover them.
for i in 13..32 {
ctx.gpr[i] = 0xDEAD_0000 + i as u64;
}
let saved = SavedCallbackCtx::capture(&ctx, INTERRUPT_SOURCE_VSYNC);
// Simulate injector: flip pc/lr/r1/r3/r4 (what the real injector
// actually does — see try_inject_graphics_interrupt in main.rs).
ctx.pc = 0xCAFE;
ctx.lr = xenia_cpu::context::LR_HALT_SENTINEL;
ctx.gpr[1] = ctx.gpr[1].wrapping_sub(CALLBACK_STACK_PAD as u64);
ctx.gpr[3] = INTERRUPT_SOURCE_VSYNC as u64;
ctx.gpr[4] = 0xBEEF;
// Simulate callback clobbering a few volatile regs that aren't
// part of the "obviously diverted" set.
ctx.gpr[0] = 0xFEED_FACE;
ctx.gpr[7] = 0x9999;
ctx.gpr[12] = 0xABCD;
saved.restore(&mut ctx);
// All volatile GPRs restored to pre-injection.
for i in 0..13 {
assert_eq!(
ctx.gpr[i],
0x1000 + i as u64,
"volatile r{} clobbered by callback was not restored",
i
);
}
// SP specifically back to the pre-pad value.
assert_eq!(ctx.gpr[1], 0x1001, "SP must be restored to pre-injection");
// Non-volatile regs were never captured; they stay as the callback
// left them (here, untouched because we didn't modify 13..32).
for i in 13..32 {
assert_eq!(ctx.gpr[i], 0xDEAD_0000 + i as u64);
}
assert_eq!(ctx.pc, 0xAAAA_BBBB);
assert_eq!(ctx.lr, 0x1111_2222);
assert_eq!(ctx.ctr, 0x3333_4444);
}
}