From c51f51f9cb1ba7e947bdc3b5416c5f2fecffe17c Mon Sep 17 00:00:00 2001 From: MechaCat02 Date: Mon, 4 May 2026 21:35:10 +0200 Subject: [PATCH] =?UTF-8?q?feat(kernel):=20KRNBUG-AUDIT-007=20=E2=80=94=20?= =?UTF-8?q?--branch-probe=20instrumentation;=20sub=5F824A9710=20exit=20gat?= =?UTF-8?q?e=20identified?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sister to --pc-probe / --ctor-probe but emits a single compact one-line BRANCH-PROBE record per fire (pc, tid, hw, cycle, r3, lr, cr0/cr6 flags) with no back-chain. Designed for tracing every conditional-branch fire inside a candidate-gate function so the last PC reached before the function epilogue identifies the exit branch. Runtime trace at audit-runs/audit-007/sub_824A9710-trace.log decisively identifies the priv-11 gate: - Exit branch: 0x824a9944 (post bl sub_824ABD88 first call) - Responsible kernel call: NtDeviceIoControlFile, FsCtlCode=0x74004 (registered as stub_success at exports.rs:90) - Mechanical chain: stub returns 0/SUCCESS without writing OUT, game reads [out_buf+8], finds zero, assigns hardcoded 0xC0000034 (STATUS_OBJECT_NAME_NOT_FOUND) at sub_824ABD88:0x824abea8-ac, exits via 0x824a9944's lt branch before priv-11 site at 0x824a99a0. 592→592 tests; lockstep instructions=100000010, swaps=2, draws=0 deterministic across reruns. Read-only diagnostic — no fix this session. Next session: KRNBUG-IO-003 (real NtDeviceIoControlFile per canary NullDevice::IoControl for FsCtlCodes 0x70000 + 0x74004). Co-Authored-By: Claude Opus 4.7 (1M context) --- audit-findings.md | 140 +++++++++++++++++++++++++++++++ crates/xenia-app/src/main.rs | 46 ++++++++++ crates/xenia-kernel/src/state.rs | 43 ++++++++++ 3 files changed, 229 insertions(+) diff --git a/audit-findings.md b/audit-findings.md index 1b41add..abf8fbe 100644 --- a/audit-findings.md +++ b/audit-findings.md @@ -5079,3 +5079,143 @@ the priv-11 gate**. - `audit-runs/post-IO-002/canary_only.txt` — set-difference output (the 7-entry list) - `audit-runs/post-IO-002/canary_exports.txt`, `ours_exports.txt` — sorted unique export names + +--- + +## KRNBUG-AUDIT-007 — branch-probe instrumentation + sub_824A9710 exit-branch identification (2026-05-04) + +### Outcome + +**`--branch-probe` instrumentation landed (read-only diagnostic). Runtime trace decisively identified the priv-11 gate.** +- 592→592 tests; lockstep `instructions=100000010, swaps=2, draws=0` deterministic across reruns + (`audit-runs/audit-007/lock_post_branchprobe.json` ≡ `lock_post_branchprobe_run2.json` + ≡ `audit-runs/post-IO-002/lock_n100m_run1.json`). +- Branch: `investigate-sub-824a9710/p0-branch-probe` — kept (instrumentation is reusable). + +### Decisive runtime evidence + +`audit-runs/audit-007/sub_824A9710-trace.log`: +``` +BRANCH-PROBE pc=0x824a9710 tid=1 hw=0 cycle=5363003 r3=0x00000000 lr=0x824a9acc +BRANCH-PROBE pc=0x824a97e0 tid=1 hw=0 cycle=5369559 r3=0xc0000034 lr=0x824a9940 +BRANCH-PROBE pc=0x824a9a98 tid=1 hw=0 cycle=5369562 r3=0x00000002 lr=0x824a97e4 +``` + +The probe at `0x824a97e0` (the failure landing pad) captured `r3=0xC0000034`, `lr=0x824a9940` (= the +`cmpi 0,r3,0` PC after `bl sub_824ABD88` at `0x824a993c`). This pinpoints: +- **Exit branch**: `0x824a9944` (`bc 12, lt, 0x824A97E0`) — taken because r3 was 0xC0000034 < 0. +- **Responsible bl**: `0x824a993c` → `sub_824ABD88` first call. +- **Status code**: `0xC0000034` = `STATUS_OBJECT_NAME_NOT_FOUND`. + +### Root-cause chain through sub_824ABD88 + +The function-detector's `end_address=0x824abe3c` for sub_824ABD88 was a truncation artifact; +the function actually runs to `0x824ac184`. Within that range the `0xC0000034` is **HARDCODED** +at `0x824abea8-0x824abeac`: +``` +0x824abe90 bl NtDeviceIoControlFile (FsCtlCode=0x74004, out_buf=r1+160, out_len=16) +0x824abe94 cmpi 0, r3, 0 +0x824abe98 bc 12, lt, 0x824abeb8 # if r3 < 0 → failure cleanup (NOT taken; stub returned 0 = success) +0x824abe9c ld r10, 168(r1) # load doubleword from [out_buf+8] +0x824abea0 cmpi cr6, 1, r10, 0 # 64-bit cmp r10 == 0 +0x824abea4 bc 4, 4*cr6+eq, 0x824abeb0 # if NOT eq, skip the assignment +0x824abea8 addis r3, r0, 0xC000 # r3 = 0xC0000000 +0x824abeac ori r3, r3, 0x34 # r3 = 0xC0000034 (STATUS_OBJECT_NAME_NOT_FOUND) +0x824abeb0 cmpi cr6, 0, r3, 0 +0x824abeb4 bc 4, 4*cr6+lt, 0x824abecc # if NOT lt → success path; r3 < 0 → NOT taken +0x824abeb8 or r28, r3, r3 # save 0xC0000034 +0x824abebc lwz r3, 96(r1) +0x824abec0 bl NtClose +0x824abec4 or r3, r28, r28 # restore failure status +0x824abec8 b 0x824abe34 # epilogue → return 0xC0000034 +``` + +The game expects the IOCTL response's upper 8 bytes to be non-zero. Our +`NtDeviceIoControlFile` is registered as `stub_success` at +`crates/xenia-kernel/src/exports.rs:90` — returns 0 (SUCCESS) but writes nothing +into the OUT buffer. The fresh stack frame has zero at `[r1+168]`, so the check +at `0x824abea4` falls through to the hardcoded failure assignment. + +### Canary reference + +`audit-runs/post-IO-002/canary.log` lines 1196-1209 show canary calls +`NtDeviceIoControlFile(handle, ..., FsCtlCode=0x74004, ..., out_buf, out_len=16)`, +gets a populated 16-byte response (whose upper 8 bytes are non-zero), then proceeds +through 17× NtWriteFile zero-fill, NtClose, NtCreateFile (Cache0\), NtQueryVolumeInformationFile +class=3, NtClose, and finally **`XexCheckExecutablePrivilege(0x0000000B)`** — the +priv-11 site that has never fired in our run. Immediately followed by +**`XamTaskSchedule(824A93C8, 828A28F0, ...)`** — the canary-only export hunt's +gate-pivot call. + +The IOCTL implementation in canary lives in `xenia-canary/src/xenia/vfs/devices/null_device.{h,cc}` +(`NullDevice::IoControl`) — the device's `IoControl` writes the structured payload +that the game-side check consumes. + +### Next session: KRNBUG-IO-003 + +**Where:** `crates/xenia-kernel/src/exports.rs:90` — replace the +`stub_success` registration with a real `nt_device_io_control_file`. + +**Minimum viable fix:** for FsCtlCode=0x74004, write any non-zero u64 at +`[out_buf+8]`. That alone clears the gate. + +**Canary-faithful fix:** mirror `NullDevice::IoControl` for FsCtlCodes +`0x70000` (8-byte response, consumed at `sub_824ABD88:0x824abe3c` for a +log2/shift count) and `0x74004` (16-byte response, partition geometry). +Fall through to `STATUS_NOT_IMPLEMENTED` for unrecognized codes so future +divergences surface. + +**Falsifiable cascade prediction:** +- `XexCheckExecutablePrivilege` count: **1 → 2** (priv=0xA + priv=0xB). +- `XamTaskSchedule` count: **0 → 1**. +- canary-only export count: **7 → ≤ 3**. +- Worker thread spawn at `ExCreateThread(entry=0x82181830, ctx=0x828F3D08)` — + the parked-handle 0x100c producer fires. +- `swaps=2 draws=0` plateau persists (renderer is multi-causal). + +**Failure modes to watch for:** +- (α) Re-running `--branch-probe` should show a NEW exit branch in + `sub_824A9710` (one of `0x824a996c`, `0x824a9998`, `0x824a9a18`) if a downstream + helper has its own unimplemented dependency. +- (β) sub_824ABA98's analogous failure path (called at 0x824a9950, 0x824a9990) + may surface if its own kernel-call dependencies are stubs. +- (γ) `nt_write_file` against the synth empty-file Cache0 path needs to handle + the 17× zero-fill loop; if our implementation rejects writes to a zero-byte + file, the cascade stalls just past the IOCTL fix. + +### Files added / modified (instrumentation only) + +- `crates/xenia-kernel/src/state.rs` — added `branch_probe_pcs: HashSet` + field + `fire_branch_probe_if_match(hw_id)` method emitting a single compact + `BRANCH-PROBE` line per fire (pc, tid, hw, cycle, r3, lr, cr0/cr6). Sister to + `fire_ctor_probe_if_match`; no back-chain walk. ~40 LOC. +- `crates/xenia-app/src/main.rs` — `--branch-probe` CLI flag (env var + `XENIA_BRANCH_PROBE`), parser, and call in `worker_prologue`. ~30 LOC. + +### Probe-machinery limitation + +The probe fires only when the **block head** at the matched PC is dispatched — +mid-block PCs in the request set don't trigger because the prologue runs once +per block, not once per instruction. In this trace: function entries, failure +landing pads (`0x824a97e0`), and external-call return PCs (`0x824a9a98`) all +hit. Internal `bc` PCs (`0x824a9944`, `0x824a9958`, ...) were silent. The data +captured was sufficient — the failure landing PC + LR pair uniquely identified +the upstream branch — but if a future audit needs every-branch coverage, the +helper call would need to move from `worker_prologue` into the per-instruction +step loop (or a custom block-scan that flags branches matching the request +list). + +### Trace artifacts (re-runnable) + +- `audit-runs/audit-007/sub_824A9710-trace.log` — 5 BRANCH-PROBE lines + thread diagnostics. +- `audit-runs/audit-007/sub_824A9710-trace.err` — full kernel-call trace + counter dump. +- `audit-runs/audit-007/lock_post_branchprobe.json`, `lock_post_branchprobe_run2.json` — lockstep digests. + +Re-run command: +``` +PROBE_LIST="0x824a9aa0,0x824a9128,0x824a9710,0x824a9778,0x824a9788,0x824a9790,0x824a97dc,0x824a97e0,0x824a9824,0x824a9828,0x824a9840,0x824a9850,0x824a985c,0x824a9870,0x824a9880,0x824a9888,0x824a9918,0x824a9944,0x824a9958,0x824a996c,0x824a9998,0x824a999c,0x824a99a0,0x824a99a8,0x824a9a10,0x824a9a18,0x824a9a60,0x824a9a78,0x824a9a98" +./target/release/xenia-rs exec sylpheed.iso --halt-on-deadlock \ + --branch-probe="$PROBE_LIST" -n 500_000_000 \ + > audit-runs/audit-007/sub_824A9710-trace.log \ + 2> audit-runs/audit-007/sub_824A9710-trace.err +``` diff --git a/crates/xenia-app/src/main.rs b/crates/xenia-app/src/main.rs index 65ed5ca..751d9d2 100644 --- a/crates/xenia-app/src/main.rs +++ b/crates/xenia-app/src/main.rs @@ -201,6 +201,18 @@ enum Commands { /// runs — typically when `--halt-on-deadlock` triggers. #[arg(long)] dump_addr: Option, + /// Diagnostic. Comma-separated list of guest PCs that emit a + /// one-line `BRANCH-PROBE` record on each fire (pc, tid, hw, + /// cycle, r3, lr, cr0/cr6 flags). Sister to `--pc-probe` but + /// without the 8-frame back-chain, suited for tracing every + /// conditional branch inside a candidate-gate function so the + /// last PC reached before the function epilogue identifies the + /// exit branch. Example: + /// `--branch-probe=0x824a9710,0x824a9778,0x824a97dc,...`. + /// Read-only; lockstep digest unaffected. Settable via + /// `XENIA_BRANCH_PROBE`. + #[arg(long)] + branch_probe: Option, }, /// Browse XISO disc image contents Browse { @@ -358,6 +370,7 @@ fn main() -> Result<()> { xaudio_tick, ctor_probe, dump_addr, + branch_probe, } => cmd_exec( &path, max_instructions, @@ -378,6 +391,7 @@ fn main() -> Result<()> { xaudio_tick, ctor_probe.as_deref(), dump_addr.as_deref(), + branch_probe.as_deref(), ), Commands::Browse { path } => cmd_browse(&path), Commands::Info { path } => cmd_info(&path), @@ -581,6 +595,7 @@ fn cmd_exec( xaudio_tick: bool, ctor_probe: Option<&str>, dump_addr: Option<&str>, + branch_probe: Option<&str>, ) -> Result<()> { cmd_exec_inner( path, @@ -602,6 +617,7 @@ fn cmd_exec( xaudio_tick, ctor_probe, dump_addr, + branch_probe, None, None, false, @@ -642,6 +658,7 @@ fn cmd_check( xaudio_tick, None, // ctor_probe — diagnostic, never wanted on goldens None, // dump_addr — same + None, // branch_probe — diagnostic, never wanted on goldens out, expect, stable_digest, @@ -668,6 +685,7 @@ fn cmd_exec_inner( xaudio_tick: bool, ctor_probe: Option<&str>, dump_addr: Option<&str>, + branch_probe: Option<&str>, digest_out: Option<&str>, digest_expect: Option<&str>, stable_digest: bool, @@ -972,6 +990,33 @@ fn cmd_exec_inner( } } + let branch_probe_combined: Option = match ( + branch_probe, + std::env::var("XENIA_BRANCH_PROBE").ok(), + ) { + (Some(s), _) => Some(s.to_string()), + (None, Some(s)) if !s.is_empty() => Some(s), + _ => None, + }; + if let Some(list) = branch_probe_combined { + for token in list.split(',').map(str::trim).filter(|s| !s.is_empty()) { + let pc = parse_hex_u32(token).map_err(|e| { + anyhow::anyhow!("invalid PC in --branch-probe: {token:?}: {e}") + })?; + kernel.branch_probe_pcs.insert(pc); + } + if !quiet && !kernel.branch_probe_pcs.is_empty() { + let mut pcs: Vec = kernel.branch_probe_pcs.iter().copied().collect(); + pcs.sort_unstable(); + let strs: Vec = pcs.iter().map(|p| format!("{p:#010x}")).collect(); + tracing::info!( + "branch probes armed: {} ({})", + kernel.branch_probe_pcs.len(), + strs.join(", "), + ); + } + } + // Diagnostic. Parse `--dump-addr=0x828F3D08,...` (or // `XENIA_DUMP_ADDR=...`) into `kernel.dump_addrs`. The contents // are dumped at end-of-run by `dump_thread_diagnostic`. Pure @@ -1952,6 +1997,7 @@ fn worker_prologue( // Empty set is the common case → single `is_empty()` test inside // the helper, no overhead on the hot path. kernel.fire_ctor_probe_if_match(hw_id, mem); + kernel.fire_branch_probe_if_match(hw_id); // 1) Halt-sentinel check (per HW thread). if pc == LR_HALT { diff --git a/crates/xenia-kernel/src/state.rs b/crates/xenia-kernel/src/state.rs index d127276..6f4b962 100644 --- a/crates/xenia-kernel/src/state.rs +++ b/crates/xenia-kernel/src/state.rs @@ -215,6 +215,15 @@ pub struct KernelState { /// extended syntax of `--pc-probe` / `--ctor-probe`. Read-only /// load — does not mutate guest state. pub pc_probe_consumers: HashMap, + /// Diagnostic. Comma-separated set of guest PCs that, when reached, + /// emit a single compact one-line `BRANCH-PROBE` record. The line + /// includes (pc, tid, hw, cycle, r3, lr, cr0.{lt,gt,eq}, cr6.{lt,gt,eq}) + /// — designed for tracing every conditional-branch fire inside a + /// candidate-gate function (sub_824A9710 etc.) so the LAST PC + /// reached before function epilogue identifies the exit branch. + /// Distinct from `ctor_probe_pcs` because that helper emits 8 + /// frames of back-chain per hit — too noisy for branch tracing. + pub branch_probe_pcs: std::collections::HashSet, /// Diagnostic. Guest addresses to dump (64 bytes each, hex + u32 /// lanes) at end-of-run. Populated from `--dump-addr=0x828F3D08, /// 0x828F4070`. Used to inspect static dispatcher / job-queue / @@ -277,6 +286,7 @@ impl KernelState { parallel_active: false, ctor_probe_pcs: std::collections::HashSet::new(), pc_probe_consumers: HashMap::new(), + branch_probe_pcs: std::collections::HashSet::new(), dump_addrs: Vec::new(), }; crate::exports::register_exports(&mut state); @@ -620,6 +630,39 @@ impl KernelState { } } + /// Diagnostic. If the live PC for HW slot `hw_id` is in + /// `self.branch_probe_pcs`, emit one compact `BRANCH-PROBE` line + /// with (pc, tid, hw, cycle, r3, lr, cr0.{lt,gt,eq}, cr6.{lt,gt,eq}). + /// No back-chain walk — designed for tracing every conditional + /// branch fire inside a candidate-gate function. Read-only. + /// Lockstep digest unaffected. + pub fn fire_branch_probe_if_match(&self, hw_id: u8) { + if self.branch_probe_pcs.is_empty() { + return; + } + let ctx = self.scheduler.ctx(hw_id); + let pc = ctx.pc; + if !self.branch_probe_pcs.contains(&pc) { + return; + } + let tid = self.scheduler.tid(hw_id).unwrap_or(0); + let r3 = ctx.gpr[3] as u32; + let lr = ctx.lr as u32; + let cycle = ctx.cycle_count; + let cr0 = &ctx.cr[0]; + let cr6 = &ctx.cr[6]; + println!( + "BRANCH-PROBE pc={:#010x} tid={} hw={} cycle={} r3={:#010x} lr={:#010x} cr0={}{}{} cr6={}{}{}", + pc, tid, hw_id, cycle, r3, lr, + if cr0.lt { 'L' } else { '.' }, + if cr0.gt { 'G' } else { '.' }, + if cr0.eq { 'E' } else { '.' }, + if cr6.lt { 'L' } else { '.' }, + if cr6.gt { 'G' } else { '.' }, + if cr6.eq { 'E' } else { '.' }, + ); + } + /// Read a TLS slot for the currently running HW thread. pub fn tls_get(&self, index: u32) -> u64 { self.scheduler.tls_get(index)