Compare commits
6 Commits
iterate-2A
...
audit-2BF/
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9340ff4592 | ||
|
|
bcd018659b | ||
|
|
09e59e09b7 | ||
|
|
5a8fe21ad5 | ||
|
|
51489e34db | ||
|
|
9a93152981 |
@@ -242,6 +242,44 @@ enum Commands {
|
|||||||
/// line). Stdout when omitted.
|
/// line). Stdout when omitted.
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
lr_trace_out: Option<String>,
|
lr_trace_out: Option<String>,
|
||||||
|
/// AUDIT-2BF — comma-separated list of guest PCs (hex, no `0x`
|
||||||
|
/// prefix required) to capture as one-line `AUDIT-PC-PROBE`
|
||||||
|
/// records on every fire. Designed for the silph init chain
|
||||||
|
/// virtual-dispatch site at `sub_82172BA0+0x1E8` (PC
|
||||||
|
/// `0x82172D88`, a `bctrl` after a 3-deep vtable-slot-6 load).
|
||||||
|
/// Each record carries (pc, tid, hw, cycle, lr, r3, r11) plus
|
||||||
|
/// four guest-memory dereferences off r3: `[r3+0]` (vtable),
|
||||||
|
/// `[[r3+0]+24]` (slot 6 method = bctrl target), `[r3+0x0C]`
|
||||||
|
/// (auxiliary handle), `[r3+0x30]` (embedded sub-object vtable).
|
||||||
|
/// Compares directly against canary's round-9 capture:
|
||||||
|
/// r3=0xBCCC52C0, [r3+0]=0x820A3644, slot6=sub_821B55D8,
|
||||||
|
/// [r3+0xC]=0xF80000D8, [r3+0x30]=0x820A1870. Read-only;
|
||||||
|
/// lockstep digest unaffected. Settable via
|
||||||
|
/// `XENIA_AUDIT_PC_PROBE`. Example:
|
||||||
|
/// `--audit-pc-probe-hex=82172D88,82172D80`.
|
||||||
|
#[arg(long)]
|
||||||
|
audit_pc_probe_hex: Option<String>,
|
||||||
|
/// AUDIT-2BF round 14 — guest VA (hex, optional `0x` prefix) to
|
||||||
|
/// dereference 3 deep on every `--audit-pc-probe-hex` fire.
|
||||||
|
/// Emits a paired `AUDIT-MEM-READ` line with the singleton value,
|
||||||
|
/// vtable, vtable[0] (= first virtual method, the bctrl target
|
||||||
|
/// at `0x822F1B4C`), and vtable[24] (= slot 6 = canary's silph
|
||||||
|
/// chain target `sub_821B55D8`). Compare ours vs canary to
|
||||||
|
/// determine whether the bctrl dispatches to the same function
|
||||||
|
/// or a different one. Read-only; lockstep digest unaffected.
|
||||||
|
/// Settable via `XENIA_AUDIT_MEM_READ`. Example:
|
||||||
|
/// `--audit-mem-read-hex=828E1F08`.
|
||||||
|
#[arg(long)]
|
||||||
|
audit_mem_read_hex: Option<String>,
|
||||||
|
/// AUDIT-052 — number of bytes (4-byte aligned, max 256) to
|
||||||
|
/// dump from `r3` on every `--audit-pc-probe-hex` fire. Emits a
|
||||||
|
/// paired `AUDIT-R3-DUMP` line with the u32 lanes. Designed for
|
||||||
|
/// the 80-byte stack-local struct at `sub_82452DC0` (`r31+96`)
|
||||||
|
/// when probing `sub_8245B000` entry — where `r3` IS the struct
|
||||||
|
/// pointer. Read-only; lockstep digest unaffected. Settable via
|
||||||
|
/// `XENIA_AUDIT_R3_DUMP_BYTES`. Example: `--audit-r3-dump-bytes=80`.
|
||||||
|
#[arg(long)]
|
||||||
|
audit_r3_dump_bytes: Option<u32>,
|
||||||
},
|
},
|
||||||
/// Browse XISO disc image contents
|
/// Browse XISO disc image contents
|
||||||
Browse {
|
Browse {
|
||||||
@@ -405,6 +443,9 @@ fn main() -> Result<()> {
|
|||||||
probe_db,
|
probe_db,
|
||||||
lr_trace,
|
lr_trace,
|
||||||
lr_trace_out,
|
lr_trace_out,
|
||||||
|
audit_pc_probe_hex,
|
||||||
|
audit_mem_read_hex,
|
||||||
|
audit_r3_dump_bytes,
|
||||||
} => cmd_exec(
|
} => cmd_exec(
|
||||||
&path,
|
&path,
|
||||||
max_instructions,
|
max_instructions,
|
||||||
@@ -431,6 +472,9 @@ fn main() -> Result<()> {
|
|||||||
probe_db.as_deref(),
|
probe_db.as_deref(),
|
||||||
lr_trace.as_deref(),
|
lr_trace.as_deref(),
|
||||||
lr_trace_out.as_deref(),
|
lr_trace_out.as_deref(),
|
||||||
|
audit_pc_probe_hex.as_deref(),
|
||||||
|
audit_mem_read_hex.as_deref(),
|
||||||
|
audit_r3_dump_bytes,
|
||||||
),
|
),
|
||||||
Commands::Browse { path } => cmd_browse(&path),
|
Commands::Browse { path } => cmd_browse(&path),
|
||||||
Commands::Info { path } => cmd_info(&path),
|
Commands::Info { path } => cmd_info(&path),
|
||||||
@@ -662,6 +706,9 @@ fn cmd_exec(
|
|||||||
probe_db: Option<&str>,
|
probe_db: Option<&str>,
|
||||||
lr_trace: Option<&str>,
|
lr_trace: Option<&str>,
|
||||||
lr_trace_out: Option<&str>,
|
lr_trace_out: Option<&str>,
|
||||||
|
audit_pc_probe_hex: Option<&str>,
|
||||||
|
audit_mem_read_hex: Option<&str>,
|
||||||
|
audit_r3_dump_bytes: Option<u32>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
cmd_exec_inner(
|
cmd_exec_inner(
|
||||||
path,
|
path,
|
||||||
@@ -689,6 +736,9 @@ fn cmd_exec(
|
|||||||
probe_db,
|
probe_db,
|
||||||
lr_trace,
|
lr_trace,
|
||||||
lr_trace_out,
|
lr_trace_out,
|
||||||
|
audit_pc_probe_hex,
|
||||||
|
audit_mem_read_hex,
|
||||||
|
audit_r3_dump_bytes,
|
||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
false,
|
false,
|
||||||
@@ -735,6 +785,9 @@ fn cmd_check(
|
|||||||
None, // probe_db — same
|
None, // probe_db — same
|
||||||
None, // lr_trace — same
|
None, // lr_trace — same
|
||||||
None, // lr_trace_out — same
|
None, // lr_trace_out — same
|
||||||
|
None, // audit_pc_probe_hex — diagnostic, never wanted on goldens
|
||||||
|
None, // audit_mem_read_hex — same
|
||||||
|
None, // audit_r3_dump_bytes — same
|
||||||
out,
|
out,
|
||||||
expect,
|
expect,
|
||||||
stable_digest,
|
stable_digest,
|
||||||
@@ -767,6 +820,9 @@ fn cmd_exec_inner(
|
|||||||
probe_db: Option<&str>,
|
probe_db: Option<&str>,
|
||||||
lr_trace: Option<&str>,
|
lr_trace: Option<&str>,
|
||||||
lr_trace_out: Option<&str>,
|
lr_trace_out: Option<&str>,
|
||||||
|
audit_pc_probe_hex: Option<&str>,
|
||||||
|
audit_mem_read_hex: Option<&str>,
|
||||||
|
audit_r3_dump_bytes: Option<u32>,
|
||||||
digest_out: Option<&str>,
|
digest_out: Option<&str>,
|
||||||
digest_expect: Option<&str>,
|
digest_expect: Option<&str>,
|
||||||
stable_digest: bool,
|
stable_digest: bool,
|
||||||
@@ -1167,6 +1223,84 @@ fn cmd_exec_inner(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AUDIT-2BF — `--audit-pc-probe-hex=82172D88,...`. Bare-hex tokens
|
||||||
|
// (with or without `0x` prefix). Parses every comma-separated entry
|
||||||
|
// as a u32 PC and inserts into `kernel.audit_pc_probe_pcs`. Empty
|
||||||
|
// set is the hot-path no-op (single is_empty() check).
|
||||||
|
let audit_pc_probe_combined: Option<String> = match (
|
||||||
|
audit_pc_probe_hex, std::env::var("XENIA_AUDIT_PC_PROBE").ok(),
|
||||||
|
) {
|
||||||
|
(Some(s), _) => Some(s.to_string()),
|
||||||
|
(None, Some(s)) if !s.is_empty() => Some(s),
|
||||||
|
_ => None,
|
||||||
|
};
|
||||||
|
if let Some(list) = audit_pc_probe_combined {
|
||||||
|
for token in list.split(',').map(str::trim).filter(|s| !s.is_empty()) {
|
||||||
|
let hex = token.strip_prefix("0x").or_else(|| token.strip_prefix("0X")).unwrap_or(token);
|
||||||
|
let pc = u32::from_str_radix(hex, 16)
|
||||||
|
.map_err(|e| anyhow::anyhow!("--audit-pc-probe-hex {token:?}: {e}"))?;
|
||||||
|
kernel.audit_pc_probe_pcs.insert(pc);
|
||||||
|
}
|
||||||
|
if !quiet && !kernel.audit_pc_probe_pcs.is_empty() {
|
||||||
|
let mut pcs: Vec<u32> = kernel.audit_pc_probe_pcs.iter().copied().collect();
|
||||||
|
pcs.sort_unstable();
|
||||||
|
let strs: Vec<String> = pcs.iter().map(|p| format!("{p:#010x}")).collect();
|
||||||
|
tracing::info!(
|
||||||
|
"audit-pc-probe armed: {} ({})",
|
||||||
|
kernel.audit_pc_probe_pcs.len(),
|
||||||
|
strs.join(", "),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// AUDIT-2BF round 14 — `--audit-mem-read-hex=828E1F08`. Single
|
||||||
|
// hex VA (optional `0x` prefix). Stored on `kernel.audit_mem_read_addr`.
|
||||||
|
// Paired with `audit_pc_probe_pcs`: on every probe fire, the kernel
|
||||||
|
// emits a second `AUDIT-MEM-READ` line dereferencing 3 deep so we can
|
||||||
|
// resolve vtable[0] / vtable[24] at the singleton.
|
||||||
|
let audit_mem_read_combined: Option<String> = match (
|
||||||
|
audit_mem_read_hex, std::env::var("XENIA_AUDIT_MEM_READ").ok(),
|
||||||
|
) {
|
||||||
|
(Some(s), _) => Some(s.to_string()),
|
||||||
|
(None, Some(s)) if !s.is_empty() => Some(s),
|
||||||
|
_ => None,
|
||||||
|
};
|
||||||
|
if let Some(tok) = audit_mem_read_combined {
|
||||||
|
let tok = tok.trim();
|
||||||
|
if !tok.is_empty() {
|
||||||
|
let hex = tok.strip_prefix("0x").or_else(|| tok.strip_prefix("0X")).unwrap_or(tok);
|
||||||
|
let addr = u32::from_str_radix(hex, 16)
|
||||||
|
.map_err(|e| anyhow::anyhow!("--audit-mem-read-hex {tok:?}: {e}"))?;
|
||||||
|
kernel.audit_mem_read_addr = Some(addr);
|
||||||
|
if !quiet {
|
||||||
|
tracing::info!("audit-mem-read armed: {:#010x}", addr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// AUDIT-052 — `--audit-r3-dump-bytes=80`. When set, every
|
||||||
|
// `--audit-pc-probe-hex` fire emits a paired `AUDIT-R3-DUMP` line
|
||||||
|
// with N bytes from `r3` (4-byte aligned, capped at 256). Sized for
|
||||||
|
// the 80-byte stack-local struct at `sub_82452DC0`'s `r31+96` —
|
||||||
|
// probe `sub_8245B000` entry where `r3 == parent's r31+96`.
|
||||||
|
let audit_r3_dump_combined: Option<u32> = match (
|
||||||
|
audit_r3_dump_bytes, std::env::var("XENIA_AUDIT_R3_DUMP_BYTES").ok(),
|
||||||
|
) {
|
||||||
|
(Some(n), _) => Some(n),
|
||||||
|
(None, Some(s)) if !s.is_empty() => Some(
|
||||||
|
s.parse::<u32>().map_err(|e| anyhow::anyhow!("--audit-r3-dump-bytes {s:?}: {e}"))?,
|
||||||
|
),
|
||||||
|
_ => None,
|
||||||
|
};
|
||||||
|
if let Some(n) = audit_r3_dump_combined {
|
||||||
|
if n > 0 {
|
||||||
|
kernel.audit_r3_dump_bytes = Some(n);
|
||||||
|
if !quiet {
|
||||||
|
tracing::info!("audit-r3-dump armed: {} bytes", n);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Diagnostic. Parse `--dump-addr=0x828F3D08,...` (or
|
// Diagnostic. Parse `--dump-addr=0x828F3D08,...` (or
|
||||||
// `XENIA_DUMP_ADDR=...`) into `kernel.dump_addrs`. The contents
|
// `XENIA_DUMP_ADDR=...`) into `kernel.dump_addrs`. The contents
|
||||||
// are dumped at end-of-run by `dump_thread_diagnostic`. Pure
|
// are dumped at end-of-run by `dump_thread_diagnostic`. Pure
|
||||||
@@ -1990,7 +2124,13 @@ fn coord_pre_round(
|
|||||||
}
|
}
|
||||||
|
|
||||||
kernel.fire_due_timers();
|
kernel.fire_due_timers();
|
||||||
try_inject_graphics_interrupt(kernel);
|
// Graphics-interrupt delivery is no longer done here — see
|
||||||
|
// `dispatch_graphics_interrupts`, called from the outer loop with
|
||||||
|
// `mem` and `&mut stats` in scope. The audio path still uses the
|
||||||
|
// asynchronous LR-sentinel inject because each XAudio client has a
|
||||||
|
// dedicated worker thread (audit-048 Plan B) that the callback
|
||||||
|
// runs on; we just queue the source and the worker_prologue's
|
||||||
|
// halt-sentinel restore path closes the loop.
|
||||||
if kernel.xaudio_tick_enabled {
|
if kernel.xaudio_tick_enabled {
|
||||||
try_inject_audio_callback(kernel);
|
try_inject_audio_callback(kernel);
|
||||||
}
|
}
|
||||||
@@ -2010,6 +2150,24 @@ fn coord_idle_advance(
|
|||||||
shutdown: &Option<std::sync::Arc<std::sync::atomic::AtomicBool>>,
|
shutdown: &Option<std::sync::Arc<std::sync::atomic::AtomicBool>>,
|
||||||
stats: &ExecStats,
|
stats: &ExecStats,
|
||||||
) -> RoundCtl {
|
) -> RoundCtl {
|
||||||
|
// Path β (iterate-2.BE follow-up): when the scheduler has no Ready
|
||||||
|
// threads, `coord_pre_round`'s instruction-count vsync ticker stops
|
||||||
|
// advancing (instruction_count is frozen). That starves the
|
||||||
|
// host-driven graphics ISR dispatcher: queue stays empty, no
|
||||||
|
// deliveries occur, and the very stall we're trying to break out of
|
||||||
|
// gets worse. Tick vsync from wallclock here unconditionally — it's
|
||||||
|
// a host-clock read, independent of instruction count, and the
|
||||||
|
// dispatcher in the outer loop will drain whatever we queue on the
|
||||||
|
// next pass. Mirrors the `--parallel` ticker choice in
|
||||||
|
// `coord_pre_round` (`tick_vsync_wallclock` branch).
|
||||||
|
if kernel.interrupts.tick_vsync_wallclock() {
|
||||||
|
use std::sync::atomic::Ordering;
|
||||||
|
let mmio = kernel.gpu.mmio();
|
||||||
|
let prev = mmio.d1mode_vblank_vline_status.load(Ordering::Relaxed);
|
||||||
|
mmio.d1mode_vblank_vline_status
|
||||||
|
.store(prev | 0x1, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
|
||||||
let next_timer = kernel.earliest_timer_deadline();
|
let next_timer = kernel.earliest_timer_deadline();
|
||||||
let next_wait = kernel.scheduler.earliest_wait_deadline();
|
let next_wait = kernel.scheduler.earliest_wait_deadline();
|
||||||
let target = match (next_timer, next_wait) {
|
let target = match (next_timer, next_wait) {
|
||||||
@@ -2218,6 +2376,7 @@ fn worker_prologue(
|
|||||||
// the helper, no overhead on the hot path.
|
// the helper, no overhead on the hot path.
|
||||||
kernel.fire_ctor_probe_if_match(hw_id, mem);
|
kernel.fire_ctor_probe_if_match(hw_id, mem);
|
||||||
kernel.fire_branch_probe_if_match(hw_id);
|
kernel.fire_branch_probe_if_match(hw_id);
|
||||||
|
kernel.fire_audit_pc_probe_if_match(hw_id, mem);
|
||||||
kernel.fire_lr_trace_if_match(hw_id);
|
kernel.fire_lr_trace_if_match(hw_id);
|
||||||
|
|
||||||
if mem.has_mem_watch() {
|
if mem.has_mem_watch() {
|
||||||
@@ -2595,12 +2754,21 @@ fn run_execution(
|
|||||||
let mut workers: [WorkerCtx; xenia_cpu::scheduler::HW_THREAD_COUNT] =
|
let mut workers: [WorkerCtx; xenia_cpu::scheduler::HW_THREAD_COUNT] =
|
||||||
std::array::from_fn(|i| WorkerCtx::new(i as u8, force_per_instr));
|
std::array::from_fn(|i| WorkerCtx::new(i as u8, force_per_instr));
|
||||||
|
|
||||||
|
// Iterate-2.BE — decode cache used by the synchronous ISR
|
||||||
|
// dispatcher. ISRs are short (~40 PPC instructions) but fire
|
||||||
|
// every ~16.7 ms, so persisting the cache across calls avoids
|
||||||
|
// re-decoding the same handful of pages 60×/s.
|
||||||
|
let mut isr_decode_cache = xenia_cpu::decoder::DecodeCache::new();
|
||||||
|
|
||||||
'outer: loop {
|
'outer: loop {
|
||||||
// Per-round prologue: budget / shutdown / heartbeat / vsync /
|
// Per-round prologue: budget / shutdown / heartbeat / vsync /
|
||||||
// timers / graphics-interrupt injection. Carved into
|
// timers / audio-interrupt injection. Carved into
|
||||||
// `coord_pre_round` so the parallel scheduler (Step 03+) can
|
// `coord_pre_round` so the parallel scheduler (Step 03+) can
|
||||||
// call the same coordination logic between phaser barriers
|
// call the same coordination logic between phaser barriers
|
||||||
// without duplicating it from the lockstep path.
|
// without duplicating it from the lockstep path. The
|
||||||
|
// graphics-interrupt dispatch is hoisted out — it runs
|
||||||
|
// *synchronously* (host-driven, iterate-2.BE) and needs `mem`
|
||||||
|
// + `&mut stats` which aren't in `coord_pre_round`'s scope.
|
||||||
match coord_pre_round(
|
match coord_pre_round(
|
||||||
kernel,
|
kernel,
|
||||||
&stats,
|
&stats,
|
||||||
@@ -2612,6 +2780,13 @@ fn run_execution(
|
|||||||
RoundCtl::BreakOuter => break,
|
RoundCtl::BreakOuter => break,
|
||||||
RoundCtl::Continue => {}
|
RoundCtl::Continue => {}
|
||||||
}
|
}
|
||||||
|
dispatch_graphics_interrupts(
|
||||||
|
kernel,
|
||||||
|
mem,
|
||||||
|
&mut stats,
|
||||||
|
&mut isr_decode_cache,
|
||||||
|
thunk_map,
|
||||||
|
);
|
||||||
|
|
||||||
// Snapshot round schedule. `round_schedule` also advances rng state
|
// Snapshot round schedule. `round_schedule` also advances rng state
|
||||||
// when seeded; mutation is intentional.
|
// when seeded; mutation is intentional.
|
||||||
@@ -2789,6 +2964,10 @@ fn run_execution_parallel(
|
|||||||
|
|
||||||
let throttle_start = Instant::now();
|
let throttle_start = Instant::now();
|
||||||
|
|
||||||
|
// Iterate-2.BE — decode cache for the synchronous ISR dispatcher.
|
||||||
|
// Lives on the coordinator (this) thread; workers never touch it.
|
||||||
|
let mut isr_decode_cache = xenia_cpu::decoder::DecodeCache::new();
|
||||||
|
|
||||||
const COORD_ID: u8 = xenia_cpu::scheduler::HW_THREAD_COUNT as u8; // = 6
|
const COORD_ID: u8 = xenia_cpu::scheduler::HW_THREAD_COUNT as u8; // = 6
|
||||||
const PARTY_COUNT: u32 = xenia_cpu::scheduler::HW_THREAD_COUNT as u32 + 1;
|
const PARTY_COUNT: u32 = xenia_cpu::scheduler::HW_THREAD_COUNT as u32 + 1;
|
||||||
|
|
||||||
@@ -3025,6 +3204,22 @@ fn run_execution_parallel(
|
|||||||
}
|
}
|
||||||
let mut guard = pre_outcome.1;
|
let mut guard = pre_outcome.1;
|
||||||
|
|
||||||
|
// Iterate-2.BE — host-driven synchronous ISR dispatch.
|
||||||
|
// Runs under the kernel lock while workers are still parked
|
||||||
|
// at the phaser B2 barrier (the coordinator hasn't published
|
||||||
|
// the runnable mask or arrived at the phaser yet), so no
|
||||||
|
// contention with worker steps.
|
||||||
|
{
|
||||||
|
let mut s = stats_mtx.lock().expect("stats mutex poisoned");
|
||||||
|
dispatch_graphics_interrupts(
|
||||||
|
&mut *guard,
|
||||||
|
mem,
|
||||||
|
&mut *s,
|
||||||
|
&mut isr_decode_cache,
|
||||||
|
thunk_map,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
guard.scheduler.begin_round();
|
guard.scheduler.begin_round();
|
||||||
let order = guard.scheduler.round_schedule();
|
let order = guard.scheduler.round_schedule();
|
||||||
|
|
||||||
@@ -3140,121 +3335,161 @@ fn run_execution_parallel(
|
|||||||
stats_mtx.into_inner().expect("stats mutex poisoned")
|
stats_mtx.into_inner().expect("stats mutex poisoned")
|
||||||
}
|
}
|
||||||
|
|
||||||
/// First-Pixels M2 — inject a queued graphics interrupt into HW thread 0
|
/// Iterate-2.BE — host-driven synchronous dispatch of all queued
|
||||||
/// when it's safe to do so (callback registered, no interrupt already
|
/// graphics interrupts. Mirrors canary's
|
||||||
/// running). Called at the top of each scheduler round.
|
/// [`EmulateCPInterruptDPC`](../../../../xenia-canary/src/xenia/kernel/kernel_state.cc#L1370)
|
||||||
|
/// → [`Processor::Execute`](../../../../xenia-canary/src/xenia/cpu/processor.cc#L413)
|
||||||
|
/// path: pick a guest thread, borrow its `PpcContext`, jam the ISR
|
||||||
|
/// PC + args into it, and **run the interpreter inline on the host
|
||||||
|
/// thread** until the ISR returns to `LR_HALT_SENTINEL`. Then restore
|
||||||
|
/// the borrowed context and continue.
|
||||||
///
|
///
|
||||||
/// Unlike the earlier P6 version which only delivered when HW 0 was
|
/// Drains the full pending FIFO each call — canary's frame-limiter
|
||||||
/// `Ready`, this one also delivers when HW 0 is `Blocked`: the injector
|
/// runs at its own cadence and our queue can already hold up to
|
||||||
/// stashes the block reason into the new `HwState::ServicingIrq(reason)`
|
/// `INTERRUPT_QUEUE_CAP` coalesced v-sync events.
|
||||||
/// variant, flips the thread to that state so `round_schedule` runs it,
|
|
||||||
/// and — on callback return to `LR_HALT_SENTINEL` — the restore path
|
|
||||||
/// re-creates `Blocked(reason)`, unless a `wake()` during the callback
|
|
||||||
/// (e.g. `KeSetEvent` → `wake_eligible_waiters`) flipped it to `Ready`,
|
|
||||||
/// in which case the wait was resolved and we leave it.
|
|
||||||
///
|
///
|
||||||
/// This is the fix that unblocks games (like Sylpheed) which gate their
|
/// Why this replaces the prior victim-mutate-then-wait scheme: with
|
||||||
/// main loop on a v-sync callback signaling an event the main thread
|
/// the old asynchronous injection, when every guest thread idled (post
|
||||||
/// waits on. The earlier "only-when-Ready" policy dropped 397 of 399
|
/// boot, when Sylpheed's main thread reaches its WAIT_FOREVER on the
|
||||||
/// observed v-syncs on a 1 B-instruction Sylpheed probe; now they
|
/// vsync-driven PKEVENT and all worker threads are likewise Blocked),
|
||||||
/// actually get delivered.
|
/// the next scheduler round had no `Ready` victim and `Blocked` ones
|
||||||
fn try_inject_graphics_interrupt(kernel: &mut xenia_kernel::KernelState) {
|
/// still required at least one round of execution to reach the
|
||||||
|
/// callback. Audit-059 measured `gpu.interrupt.delivered = 54` over
|
||||||
|
/// 3.9 s vs canary's 4712 — an 87× shortfall. Host-driven dispatch
|
||||||
|
/// makes delivery rate a function of wall clock, not guest-thread
|
||||||
|
/// readiness.
|
||||||
|
///
|
||||||
|
/// Victim selection still mirrors the canary precedent: prefer Ready
|
||||||
|
/// (no state mangling), else any Blocked thread (we temporarily flip
|
||||||
|
/// to `ServicingIrq(reason)` for the duration of the inline run so
|
||||||
|
/// `call_export` etc. see a coherent thread state, and restore the
|
||||||
|
/// `Blocked(reason)` on the way out unless the ISR itself signaled a
|
||||||
|
/// wake). Idle / Exited / already-ServicingIrq slots are skipped — if
|
||||||
|
/// nothing remains the source is dropped (still the right behavior;
|
||||||
|
/// canary's `XThread::GetCurrentThread()` would assert).
|
||||||
|
///
|
||||||
|
/// All execution while in-flight runs against the borrowed thread's
|
||||||
|
/// `ctx`. We set `scheduler.current = Some(target_ref)` so kernel
|
||||||
|
/// imports (`KeSetEvent`, `KeReleaseSemaphore`, etc.) reach the right
|
||||||
|
/// context, then restore the previous `current` on the way out. The
|
||||||
|
/// dispatch is single-threaded — under `--parallel` it runs on the
|
||||||
|
/// coordinator with workers parked at the phaser barrier, so there is
|
||||||
|
/// no contention.
|
||||||
|
fn dispatch_graphics_interrupts(
|
||||||
|
kernel: &mut xenia_kernel::KernelState,
|
||||||
|
mem: &xenia_memory::GuestMemory,
|
||||||
|
stats: &mut ExecStats,
|
||||||
|
decode_cache: &mut xenia_cpu::decoder::DecodeCache,
|
||||||
|
thunk_map: &HashMap<u32, (ModuleId, u16, String)>,
|
||||||
|
) {
|
||||||
|
use xenia_cpu::interpreter::{step_cached, StepResult};
|
||||||
use xenia_cpu::scheduler::HwState;
|
use xenia_cpu::scheduler::HwState;
|
||||||
|
const LR_HALT: u32 = xenia_cpu::context::LR_HALT_SENTINEL as u32;
|
||||||
|
/// Defensive cap so a runaway ISR can't lock the coordinator on
|
||||||
|
/// the per-tick dispatch. Real Sylpheed vsync ISR is ~40 PPC
|
||||||
|
/// instructions; canary's `Processor::Execute` has no analogous
|
||||||
|
/// cap because it runs on a dedicated host thread, but we run
|
||||||
|
/// inline on the coordinator so a budget is prudent.
|
||||||
|
const MAX_INSTRS_PER_ISR: u64 = 1_000_000;
|
||||||
|
|
||||||
if kernel.interrupts.is_in_callback() {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
let Some(cb) = kernel.interrupts.callback else {
|
let Some(cb) = kernel.interrupts.callback else {
|
||||||
// No callback registered; drain any pending entries (they
|
|
||||||
// wouldn't have made it into the queue per `queue_interrupt`'s
|
|
||||||
// own `callback.is_none()` guard, but be defensive).
|
|
||||||
kernel.interrupts.pending.clear();
|
kernel.interrupts.pending.clear();
|
||||||
return;
|
return;
|
||||||
};
|
};
|
||||||
|
|
||||||
let Some(source) = kernel.interrupts.peek_next() else {
|
// Iterate-2.BF.γ: graphics dispatch is fully synchronous (host-driven,
|
||||||
return;
|
// iterate-2.BE) — it borrows a guest thread, runs the ISR to
|
||||||
|
// LR_HALT_SENTINEL, and restores all in-call before returning. So it
|
||||||
|
// CAN safely coexist with an audio callback mid-flight, *as long as we
|
||||||
|
// pick a different victim thread* than the one audio borrowed. The old
|
||||||
|
// blanket `is_in_callback()` gate caused 5.85M skipped dispatches in
|
||||||
|
// lockstep boot (vs 55 with-pending dispatches) — audio is essentially
|
||||||
|
// always mid-flight on its dedicated worker, which choked vsync
|
||||||
|
// delivery at ~54. Exclude only audio's borrowed thread; the queue
|
||||||
|
// drains synchronously and graphics ISR completion does not touch
|
||||||
|
// `interrupts.saved` (used exclusively by the async audio path).
|
||||||
|
let audio_borrowed = if kernel.interrupts.is_in_callback() {
|
||||||
|
kernel.interrupts.injected_ref
|
||||||
|
} else {
|
||||||
|
None
|
||||||
};
|
};
|
||||||
|
|
||||||
// Canary's `EmulateCPInterruptDPC` (kernel_state.cc:1373) dispatches on
|
while let Some(source) = kernel.interrupts.peek_next() {
|
||||||
// whatever the current thread happens to be — real hardware fires the
|
// Victim selection: Ready first, then Blocked (canary's
|
||||||
// interrupt on CPU 2 and the kernel impersonates a DPC on top of
|
// `XThread::GetCurrentThread()` analog — any live thread will
|
||||||
// whichever thread is active. Hard-anchoring to HW 0 breaks the moment
|
// do for borrowing context). Skip Idle/Exited/ServicingIrq.
|
||||||
// `main()` returns: Sylpheed's main thread exits right after init, the
|
// Skip the audio-borrowed thread (if any) to avoid clobbering
|
||||||
// render worker spins on a `PKEVENT` inside the interrupt callback's
|
// its `SavedCallbackCtx` mid-flight.
|
||||||
// user_data struct (`user_data + 0x5C`), and because HW 0 is now
|
let excluded = audio_borrowed;
|
||||||
// `Exited(_)` our injector drops every subsequent vsync — the PKEVENT
|
|
||||||
// is never signaled and the worker polls forever.
|
|
||||||
//
|
|
||||||
// Pick the first HW thread we can plausibly run the callback on:
|
|
||||||
// 1. Prefer `Ready` (no state-mangling needed)
|
|
||||||
// 2. Else take a `Blocked(reason)` thread and swap to
|
|
||||||
// `ServicingIrq(reason)` so the round scheduler runs it; the
|
|
||||||
// LR-sentinel restore path reinstates the block on callback return
|
|
||||||
// 3. Skip `Idle`, `Exited`, or already-`ServicingIrq` slots
|
|
||||||
//
|
|
||||||
// The callback itself just signals a game-side event and returns — it
|
|
||||||
// doesn't care which HW thread it ran on.
|
|
||||||
// Pass 1: find any Ready thread across all slots.
|
|
||||||
let mut victim: Option<xenia_cpu::ThreadRef> = None;
|
let mut victim: Option<xenia_cpu::ThreadRef> = None;
|
||||||
'outer_ready: for (hw_id, slot) in kernel.scheduler.slots.iter().enumerate() {
|
'outer_ready: for (hw_id, slot) in kernel.scheduler.slots.iter().enumerate() {
|
||||||
for (idx, t) in slot.runqueue.iter().enumerate() {
|
for (idx, t) in slot.runqueue.iter().enumerate() {
|
||||||
|
let r = xenia_cpu::ThreadRef::new(hw_id as u8, idx as u16);
|
||||||
|
if excluded == Some(r) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if matches!(t.state, HwState::Ready) {
|
if matches!(t.state, HwState::Ready) {
|
||||||
victim = Some(xenia_cpu::ThreadRef::new(hw_id as u8, idx as u16));
|
victim = Some(r);
|
||||||
break 'outer_ready;
|
break 'outer_ready;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Pass 2: any Blocked thread (we'll flip it to ServicingIrq).
|
|
||||||
if victim.is_none() {
|
if victim.is_none() {
|
||||||
'outer_blocked: for (hw_id, slot) in kernel.scheduler.slots.iter().enumerate() {
|
'outer_blocked: for (hw_id, slot) in kernel.scheduler.slots.iter().enumerate() {
|
||||||
for (idx, t) in slot.runqueue.iter().enumerate() {
|
for (idx, t) in slot.runqueue.iter().enumerate() {
|
||||||
|
let r = xenia_cpu::ThreadRef::new(hw_id as u8, idx as u16);
|
||||||
|
if excluded == Some(r) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if matches!(t.state, HwState::Blocked(_)) {
|
if matches!(t.state, HwState::Blocked(_)) {
|
||||||
victim = Some(xenia_cpu::ThreadRef::new(hw_id as u8, idx as u16));
|
victim = Some(r);
|
||||||
break 'outer_blocked;
|
break 'outer_blocked;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let Some(target_ref) = victim else {
|
let Some(target_ref) = victim else {
|
||||||
// All threads Idle/Exited/already servicing — nothing to inject on.
|
// No donor at all — drop and exit (no point looping if the
|
||||||
|
// next source has the same problem).
|
||||||
kernel.interrupts.take_next();
|
kernel.interrupts.take_next();
|
||||||
kernel.interrupts.dropped += 1;
|
kernel.interrupts.dropped += 1;
|
||||||
return;
|
return;
|
||||||
};
|
};
|
||||||
|
|
||||||
let t = kernel.scheduler.thread_mut(target_ref);
|
// Commit: pop the queue, flag temporary state.
|
||||||
let prev_state = t.state.clone();
|
let _ = kernel.interrupts.take_next();
|
||||||
match prev_state {
|
let prev_state = kernel.scheduler.thread(target_ref).state.clone();
|
||||||
HwState::Ready => {}
|
let was_blocked = matches!(prev_state, HwState::Blocked(_));
|
||||||
HwState::Blocked(reason) => {
|
if let HwState::Blocked(reason) = prev_state.clone() {
|
||||||
t.state = HwState::ServicingIrq(reason);
|
kernel.scheduler.thread_mut(target_ref).state =
|
||||||
}
|
HwState::ServicingIrq(reason);
|
||||||
_ => unreachable!("victim selection above filtered out other variants"),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let _ = kernel.interrupts.take_next();
|
// Save the borrowed ctx fields the ISR will clobber. Matches
|
||||||
|
// canary's processor.cc:387-394 (save prev lr, run, restore).
|
||||||
|
let saved = {
|
||||||
let t = kernel.scheduler.thread_mut(target_ref);
|
let t = kernel.scheduler.thread_mut(target_ref);
|
||||||
let saved = xenia_kernel::SavedCallbackCtx::capture(&t.ctx, source);
|
let saved = xenia_kernel::SavedCallbackCtx::capture(&t.ctx, source);
|
||||||
kernel.interrupts.injected_ref = Some(target_ref);
|
|
||||||
t.ctx.pc = cb.callback_pc;
|
t.ctx.pc = cb.callback_pc;
|
||||||
t.ctx.lr = xenia_cpu::context::LR_HALT_SENTINEL;
|
t.ctx.lr = xenia_cpu::context::LR_HALT_SENTINEL;
|
||||||
// Canary `Processor::Execute` decrements the guest SP by 176 before
|
// Canary processor.cc:383 — pad SP so the callback's
|
||||||
// running the callback and restores on return (see Canary
|
// __savegprlr_N prologue doesn't stomp the interrupted
|
||||||
// processor.cc:383). Without this pad the callback's
|
// function's saved LR at [r1-8].
|
||||||
// `__savegprlr_N` prologue stomps the interrupted function's
|
|
||||||
// already-saved LR at [r1-8], so when the interrupted function
|
|
||||||
// later returns via `__restgprlr_N -> bclr` it jumps to
|
|
||||||
// `LR_HALT_SENTINEL` and the thread exits prematurely. Matching
|
|
||||||
// restore lives in `SavedCallbackCtx::restore` (which now also
|
|
||||||
// restores r1).
|
|
||||||
t.ctx.gpr[1] = t
|
t.ctx.gpr[1] = t
|
||||||
.ctx
|
.ctx
|
||||||
.gpr[1]
|
.gpr[1]
|
||||||
.wrapping_sub(xenia_kernel::interrupts::CALLBACK_STACK_PAD as u64);
|
.wrapping_sub(xenia_kernel::interrupts::CALLBACK_STACK_PAD as u64);
|
||||||
t.ctx.gpr[3] = source as u64;
|
t.ctx.gpr[3] = source as u64;
|
||||||
t.ctx.gpr[4] = cb.user_data as u64;
|
t.ctx.gpr[4] = cb.user_data as u64;
|
||||||
kernel.interrupts.saved = Some(saved);
|
saved
|
||||||
|
};
|
||||||
|
|
||||||
|
// Stash the previous `scheduler.current` (call_export reaches
|
||||||
|
// it; imports the ISR calls must dispatch on the borrowed
|
||||||
|
// thread). Restore on the way out.
|
||||||
|
let prev_current = kernel.scheduler.current;
|
||||||
|
kernel.scheduler.current = Some(target_ref);
|
||||||
|
|
||||||
metrics::counter!("gpu.interrupt.delivered", "source" => format!("{source}"))
|
metrics::counter!("gpu.interrupt.delivered", "source" => format!("{source}"))
|
||||||
.increment(1);
|
.increment(1);
|
||||||
tracing::debug!(
|
tracing::debug!(
|
||||||
@@ -3262,24 +3497,113 @@ fn try_inject_graphics_interrupt(kernel: &mut xenia_kernel::KernelState) {
|
|||||||
hw_id = target_ref.hw_id,
|
hw_id = target_ref.hw_id,
|
||||||
idx = target_ref.idx,
|
idx = target_ref.idx,
|
||||||
callback = format_args!("{:#010x}", cb.callback_pc),
|
callback = format_args!("{:#010x}", cb.callback_pc),
|
||||||
"graphics interrupt: injecting"
|
"graphics interrupt: dispatching synchronously (iterate-2.BE)"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Inline interpreter loop on the borrowed context until the
|
||||||
|
// ISR returns to LR_HALT_SENTINEL (its `blr` writes
|
||||||
|
// `lr → pc`). Per-instruction step handles imports via
|
||||||
|
// thunk_map (the ISR typically just calls `KeSetEvent`).
|
||||||
|
let mut isr_instrs: u64 = 0;
|
||||||
|
loop {
|
||||||
|
let pc = kernel.scheduler.ctx_mut_ref(target_ref).pc;
|
||||||
|
if pc == LR_HALT {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if isr_instrs >= MAX_INSTRS_PER_ISR {
|
||||||
|
tracing::warn!(
|
||||||
|
pc = format_args!("{:#010x}", pc),
|
||||||
|
isr_instrs,
|
||||||
|
"graphics ISR exceeded MAX_INSTRS_PER_ISR; aborting"
|
||||||
|
);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Import-thunk intercept: same shape as worker_prologue's
|
||||||
|
// step 2 (line ~2287).
|
||||||
|
if let Some((module, ordinal, _name)) = thunk_map.get(&pc) {
|
||||||
|
let module = *module;
|
||||||
|
let ordinal_u32 = *ordinal as u32;
|
||||||
|
kernel.call_export(module, ordinal_u32, mem);
|
||||||
|
let post_ref = kernel.scheduler.current;
|
||||||
|
let c = match post_ref {
|
||||||
|
Some(r) => kernel.scheduler.ctx_mut_ref(r),
|
||||||
|
None => kernel.scheduler.ctx_mut_ref(target_ref),
|
||||||
|
};
|
||||||
|
c.pc = c.lr as u32;
|
||||||
|
c.cycle_count += 1;
|
||||||
|
c.timebase += 1;
|
||||||
|
stats.instruction_count += 1;
|
||||||
|
stats.import_count += 1;
|
||||||
|
isr_instrs += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if !mem.is_mapped(pc) {
|
||||||
|
tracing::error!(
|
||||||
|
pc = format_args!("{:#010x}", pc),
|
||||||
|
isr_instrs,
|
||||||
|
"graphics ISR hit unmapped PC; aborting"
|
||||||
|
);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
let ctx = kernel.scheduler.ctx_mut_ref(target_ref);
|
||||||
|
let page_ver = mem.page_version(ctx.pc);
|
||||||
|
let r = step_cached(ctx, mem, decode_cache, page_ver);
|
||||||
|
stats.instruction_count += 1;
|
||||||
|
isr_instrs += 1;
|
||||||
|
match r {
|
||||||
|
StepResult::Continue => {}
|
||||||
|
StepResult::SystemCall => {
|
||||||
|
tracing::warn!("graphics ISR hit `sc` instruction; aborting");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
StepResult::Trap => {
|
||||||
|
tracing::warn!("graphics ISR hit trap; aborting");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
StepResult::Halted => break,
|
||||||
|
StepResult::Unimplemented(op) => {
|
||||||
|
tracing::warn!(?op, "graphics ISR hit unimplemented opcode; aborting");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Restore the borrowed context.
|
||||||
|
saved.restore(kernel.scheduler.ctx_mut_ref(target_ref));
|
||||||
|
kernel.scheduler.current = prev_current;
|
||||||
|
kernel.interrupts.delivered += 1;
|
||||||
|
|
||||||
|
// Restore thread state. If the ISR signaled a wake on the
|
||||||
|
// borrowed thread (e.g. canary `KeSetEvent` → scheduler wake)
|
||||||
|
// the state may already be Ready; only re-block if still
|
||||||
|
// ServicingIrq.
|
||||||
|
if was_blocked {
|
||||||
|
let t = kernel.scheduler.thread_mut(target_ref);
|
||||||
|
if let HwState::ServicingIrq(reason) = t.state.clone() {
|
||||||
|
t.state = HwState::Blocked(reason);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// AUDIT-032 Plan B — inject a pending XAudio buffer-complete callback
|
/// AUDIT-032 Plan B — inject a pending XAudio buffer-complete callback
|
||||||
/// into the **dedicated audio worker** registered for the head-of-queue
|
/// into the **dedicated audio worker** registered for the head-of-queue
|
||||||
/// client. Mirrors
|
/// client. Uses the asynchronous LR-sentinel injection mechanism (same
|
||||||
/// [`try_inject_graphics_interrupt`] (same SP-pad, same saved-context
|
/// SP-pad, same `SavedCallbackCtx` restore-on-sentinel as the pre-iterate-2.BE
|
||||||
/// restore-on-sentinel) but the target thread is fixed at registration
|
/// graphics path) but the target thread is fixed at registration time
|
||||||
/// time instead of selected via the random-victim policy. The pre-fix
|
/// instead of selected via the random-victim policy. The pre-fix
|
||||||
/// random-victim path corrupted unrelated thread state
|
/// random-victim path corrupted unrelated thread state
|
||||||
/// (APUBUG-PRODUCER-001 "HW-thread hijack"); per-client workers eliminate
|
/// (APUBUG-PRODUCER-001 "HW-thread hijack"); per-client workers eliminate
|
||||||
/// that whole class of regression.
|
/// that whole class of regression.
|
||||||
///
|
///
|
||||||
/// Mutual exclusion with the graphics path is via the shared
|
/// Mutual exclusion with the graphics path (which is now synchronous —
|
||||||
/// `interrupts.saved` slot — if a graphics callback is already in flight,
|
/// see `dispatch_graphics_interrupts`) is via the shared
|
||||||
/// `is_in_callback()` returns true and we bail until it returns to the
|
/// `interrupts.saved` slot — if an audio callback is already in flight,
|
||||||
/// `LR_HALT_SENTINEL`.
|
/// `is_in_callback()` returns true and `dispatch_graphics_interrupts`
|
||||||
|
/// defers until it returns to the `LR_HALT_SENTINEL`.
|
||||||
fn try_inject_audio_callback(kernel: &mut xenia_kernel::KernelState) {
|
fn try_inject_audio_callback(kernel: &mut xenia_kernel::KernelState) {
|
||||||
use xenia_cpu::scheduler::HwState;
|
use xenia_cpu::scheduler::HwState;
|
||||||
|
|
||||||
|
|||||||
@@ -8,13 +8,18 @@
|
|||||||
//! guest-issued command stream; source code 1 (`INTERRUPT_SOURCE_CP`).
|
//! guest-issued command stream; source code 1 (`INTERRUPT_SOURCE_CP`).
|
||||||
//!
|
//!
|
||||||
//! Canary's [xboxkrnl_video.cc:303-310](xenia-canary/src/xenia/kernel/xboxkrnl/xboxkrnl_video.cc#L303-L310)
|
//! Canary's [xboxkrnl_video.cc:303-310](xenia-canary/src/xenia/kernel/xboxkrnl/xboxkrnl_video.cc#L303-L310)
|
||||||
//! dispatches the callback on HW thread 0. We follow the same convention.
|
//! dispatches the callback on HW thread 0. We follow the same convention
|
||||||
|
//! for picking a *context donor*, but as of iterate-2.BE the dispatch
|
||||||
|
//! itself is **synchronous and host-driven**: the main loop runs the ISR
|
||||||
|
//! inline on the borrowed guest context, mirroring canary's
|
||||||
|
//! `EmulateCPInterruptDPC → Processor::Execute` path
|
||||||
|
//! ([kernel_state.cc:1370](../../../../xenia-canary/src/xenia/kernel/kernel_state.cc#L1370),
|
||||||
|
//! [processor.cc:413](../../../../xenia-canary/src/xenia/cpu/processor.cc#L413)).
|
||||||
|
//! Independent of whether the donor guest thread was Ready or Blocked.
|
||||||
//!
|
//!
|
||||||
//! The delivery model is cooperative: we inject the callback entry into HW
|
//! The audio callback path (audit-048) still uses asynchronous LR-sentinel
|
||||||
//! thread 0 at the top of a scheduler round when it's safe (not mid-export,
|
//! injection on a dedicated per-client worker thread; the
|
||||||
//! not already inside another interrupt). When the callback returns to
|
//! [`SavedCallbackCtx`] machinery below remains in use there.
|
||||||
//! [`LR_HALT_SENTINEL`] the main loop restores the saved [`PpcContext`]
|
|
||||||
//! fields and the HW thread picks up where it left off.
|
|
||||||
|
|
||||||
use std::collections::VecDeque;
|
use std::collections::VecDeque;
|
||||||
use std::time::{Duration, Instant};
|
use std::time::{Duration, Instant};
|
||||||
|
|||||||
@@ -244,6 +244,41 @@ pub struct KernelState {
|
|||||||
/// Distinct from `ctor_probe_pcs` because that helper emits 8
|
/// Distinct from `ctor_probe_pcs` because that helper emits 8
|
||||||
/// frames of back-chain per hit — too noisy for branch tracing.
|
/// frames of back-chain per hit — too noisy for branch tracing.
|
||||||
pub branch_probe_pcs: std::collections::HashSet<u32>,
|
pub branch_probe_pcs: std::collections::HashSet<u32>,
|
||||||
|
/// AUDIT-2BF — diagnostic. PCs at which to emit a structured one-line
|
||||||
|
/// `AUDIT-PC-PROBE` record on every fire, designed for the silph init
|
||||||
|
/// chain virtual-dispatch site at `sub_82172BA0+0x1E8` (PC
|
||||||
|
/// `0x82172D88`, a `bctrl` after a 3-deep load of vtable slot 6). The
|
||||||
|
/// emitted line carries (pc, tid, hw, cycle, lr, r3, r11) plus four
|
||||||
|
/// guest-memory dereferences off `r3`: `[r3+0]` (vtable), `[[r3+0]+24]`
|
||||||
|
/// (slot 6 method pointer = the bctrl target), `[r3+0x0C]` (audit-059
|
||||||
|
/// round-9 canary-known auxiliary handle `0xF80000D8`), and `[r3+0x30]`
|
||||||
|
/// (canary-known embedded sub-object vtable `0x820A1870`). Distinct
|
||||||
|
/// from `branch_probe_pcs` because that helper only logs registers (no
|
||||||
|
/// memory) and from `lr_trace_pcs` because that emits JSON intended
|
||||||
|
/// for canary diffing, not the four hard-coded indirect dereferences
|
||||||
|
/// needed here. Read-only — no guest state mutation. Lockstep
|
||||||
|
/// digest unaffected. Settable via `--audit-pc-probe-hex` /
|
||||||
|
/// `XENIA_AUDIT_PC_PROBE`.
|
||||||
|
pub audit_pc_probe_pcs: std::collections::HashSet<u32>,
|
||||||
|
/// AUDIT-2BF round 14 — diagnostic. Optional guest VA. When set, each
|
||||||
|
/// `AUDIT-PC-PROBE` fire emits a paired `AUDIT-MEM-READ` line with
|
||||||
|
/// `addr`, `*addr` (singleton value), `**addr` (vtable), `***addr+0`
|
||||||
|
/// (vtable[0] = first virtual method), and `***addr+24` (vtable[6]
|
||||||
|
/// in 4-byte stride = slot 6 = silph chain bctrl target). Three-deep
|
||||||
|
/// dereference to resolve the vtable[0] target at the bctrl site
|
||||||
|
/// `0x822F1B4C` inside `sub_822F1AA8`. Read-only; lockstep digest
|
||||||
|
/// unaffected. Settable via `--audit-mem-read-hex` /
|
||||||
|
/// `XENIA_AUDIT_MEM_READ`.
|
||||||
|
pub audit_mem_read_addr: Option<u32>,
|
||||||
|
/// AUDIT-052 — diagnostic. When set, each `AUDIT-PC-PROBE` fire
|
||||||
|
/// additionally emits an `AUDIT-R3-DUMP` line with N bytes of guest
|
||||||
|
/// memory dumped from `r3` as `u32` lanes (4-byte aligned only).
|
||||||
|
/// Sized for audit-051's 80-byte stack-local struct at `r31+96`
|
||||||
|
/// inside `sub_82452DC0` (probe `sub_8245B000` entry where
|
||||||
|
/// `r3 == parent's r31+96`). Read-only; lockstep digest unaffected.
|
||||||
|
/// Settable via `--audit-r3-dump-bytes` /
|
||||||
|
/// `XENIA_AUDIT_R3_DUMP_BYTES`.
|
||||||
|
pub audit_r3_dump_bytes: Option<u32>,
|
||||||
/// M12 — diagnostic. PCs at which to emit a structured JSONL record
|
/// M12 — diagnostic. PCs at which to emit a structured JSONL record
|
||||||
/// per fire, designed for diffing against xenia-canary's
|
/// per fire, designed for diffing against xenia-canary's
|
||||||
/// `--log_lr_on_pc` patch output. Each line carries
|
/// `--log_lr_on_pc` patch output. Each line carries
|
||||||
@@ -327,6 +362,9 @@ impl KernelState {
|
|||||||
ctor_probe_pcs: std::collections::HashSet::new(),
|
ctor_probe_pcs: std::collections::HashSet::new(),
|
||||||
pc_probe_consumers: HashMap::new(),
|
pc_probe_consumers: HashMap::new(),
|
||||||
branch_probe_pcs: std::collections::HashSet::new(),
|
branch_probe_pcs: std::collections::HashSet::new(),
|
||||||
|
audit_pc_probe_pcs: std::collections::HashSet::new(),
|
||||||
|
audit_mem_read_addr: None,
|
||||||
|
audit_r3_dump_bytes: None,
|
||||||
lr_trace_pcs: std::collections::HashSet::new(),
|
lr_trace_pcs: std::collections::HashSet::new(),
|
||||||
lr_trace_writer: None,
|
lr_trace_writer: None,
|
||||||
dump_addrs: Vec::new(),
|
dump_addrs: Vec::new(),
|
||||||
@@ -797,6 +835,91 @@ impl KernelState {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// AUDIT-2BF — diagnostic. If the live PC for HW slot `hw_id` is in
|
||||||
|
/// `self.audit_pc_probe_pcs`, emit a single one-line
|
||||||
|
/// `AUDIT-PC-PROBE` record with (pc, tid, hw, cycle, lr, r3, r11)
|
||||||
|
/// plus four guest-memory dereferences off r3: `[r3+0]` (vtable),
|
||||||
|
/// `[[r3+0]+24]` (slot 6 method = bctrl target), `[r3+0x0C]`
|
||||||
|
/// (auxiliary handle field), `[r3+0x30]` (embedded sub-object
|
||||||
|
/// vtable field). Tuned for the silph init chain virtual-dispatch
|
||||||
|
/// site at `sub_82172BA0+0x1E8` (PC `0x82172D88`).
|
||||||
|
///
|
||||||
|
/// Read-only. No guest-state mutation; lockstep digest unaffected.
|
||||||
|
/// Empty set is the common case → single `is_empty()` test on the
|
||||||
|
/// hot path.
|
||||||
|
pub fn fire_audit_pc_probe_if_match(&self, hw_id: u8, mem: &GuestMemory) {
|
||||||
|
if self.audit_pc_probe_pcs.is_empty() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let ctx = self.scheduler.ctx(hw_id);
|
||||||
|
let pc = ctx.pc;
|
||||||
|
if !self.audit_pc_probe_pcs.contains(&pc) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let tid = self.scheduler.tid(hw_id).unwrap_or(0);
|
||||||
|
let r3 = ctx.gpr[3] as u32;
|
||||||
|
let r11 = ctx.gpr[11] as u32;
|
||||||
|
let lr = ctx.lr as u32;
|
||||||
|
let cycle = ctx.cycle_count;
|
||||||
|
// Memory dereferences. Guest pointers may be unmapped/garbage;
|
||||||
|
// `read_u32` returns 0 for unmapped pages (heap.rs:510 returns
|
||||||
|
// a default), so an all-zero block in the output reliably
|
||||||
|
// indicates an invalid `r3`.
|
||||||
|
let vtable = mem.read_u32(r3);
|
||||||
|
let slot6_method = if vtable != 0 {
|
||||||
|
mem.read_u32(vtable.wrapping_add(24))
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
};
|
||||||
|
let aux_handle = mem.read_u32(r3.wrapping_add(0x0C));
|
||||||
|
let sub_vt = mem.read_u32(r3.wrapping_add(0x30));
|
||||||
|
println!(
|
||||||
|
"AUDIT-PC-PROBE pc={:#010x} tid={} hw={} cycle={} lr={:#010x} r3={:#010x} r11={:#010x} \
|
||||||
|
[r3+0]={:#010x} [[r3+0]+24]={:#010x} [r3+0x0C]={:#010x} [r3+0x30]={:#010x}",
|
||||||
|
pc, tid, hw_id, cycle, lr, r3, r11,
|
||||||
|
vtable, slot6_method, aux_handle, sub_vt,
|
||||||
|
);
|
||||||
|
// AUDIT-2BF round 14 — paired memory-read. When
|
||||||
|
// `audit_mem_read_addr` is set, dereference 3 deep: singleton
|
||||||
|
// pointer → vtable → vtable[0] / vtable[24]. Defensively
|
||||||
|
// null-checks each level. `read_u32` returns 0 for unmapped
|
||||||
|
// pages so all-zero output is the unmapped/uninitialized
|
||||||
|
// signature.
|
||||||
|
if let Some(addr) = self.audit_mem_read_addr {
|
||||||
|
let val = mem.read_u32(addr);
|
||||||
|
let vt = if val != 0 { mem.read_u32(val) } else { 0 };
|
||||||
|
let m0 = if vt != 0 { mem.read_u32(vt) } else { 0 };
|
||||||
|
let m6 = if vt != 0 { mem.read_u32(vt.wrapping_add(24)) } else { 0 };
|
||||||
|
println!(
|
||||||
|
"AUDIT-MEM-READ addr={:#010x} val={:#010x} vtable={:#010x} \
|
||||||
|
vtable[0]={:#010x} vtable[24]={:#010x} pc={:#010x} tid={} cycle={}",
|
||||||
|
addr, val, vt, m0, m6, pc, tid, cycle,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
// AUDIT-052 — dump N bytes of guest memory from r3 as u32 lanes
|
||||||
|
// when `audit_r3_dump_bytes` is set. Sized for the 80-byte
|
||||||
|
// stack-local struct at sub_82452DC0's `r31+96` (probe is
|
||||||
|
// sub_8245B000 entry where r3 IS the struct ptr). Output
|
||||||
|
// format: `AUDIT-R3-DUMP pc=… r3=… +0x00=… +0x04=… …`.
|
||||||
|
if let Some(n) = self.audit_r3_dump_bytes {
|
||||||
|
let n = n.min(256) & !3u32; // cap 256B, 4-byte align
|
||||||
|
let mut out = String::with_capacity(64 + (n as usize) * 16);
|
||||||
|
use std::fmt::Write as _;
|
||||||
|
let _ = write!(
|
||||||
|
&mut out,
|
||||||
|
"AUDIT-R3-DUMP pc={:#010x} tid={} cycle={} r3={:#010x}",
|
||||||
|
pc, tid, cycle, r3,
|
||||||
|
);
|
||||||
|
let mut off: u32 = 0;
|
||||||
|
while off < n {
|
||||||
|
let v = mem.read_u32(r3.wrapping_add(off));
|
||||||
|
let _ = write!(&mut out, " +0x{:02x}={:#010x}", off, v);
|
||||||
|
off = off.wrapping_add(4);
|
||||||
|
}
|
||||||
|
println!("{}", out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// M12 — diagnostic. If the live PC for HW slot `hw_id` is in
|
/// M12 — diagnostic. If the live PC for HW slot `hw_id` is in
|
||||||
/// `self.lr_trace_pcs`, emit one JSONL record. Format mirrors what
|
/// `self.lr_trace_pcs`, emit one JSONL record. Format mirrors what
|
||||||
/// xenia-canary's `--log_lr_on_pc` patch emits, plus the cycle
|
/// xenia-canary's `--log_lr_on_pc` patch emits, plus the cycle
|
||||||
|
|||||||
Reference in New Issue
Block a user