From a1a7265f2957f588b91cb45eef2b6eef5494c879 Mon Sep 17 00:00:00 2001 From: MechaCat02 Date: Mon, 4 May 2026 22:00:12 +0200 Subject: [PATCH] =?UTF-8?q?fix(kernel):=20KRNBUG-IO-003=20=E2=80=94=20NtDe?= =?UTF-8?q?viceIoControlFile=20real=20impl=20mirroring=20NullDevice::IoCon?= =?UTF-8?q?trol?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the stub_success registration of NtDeviceIoControlFile at exports.rs:90 with a real handler for FsCtlCodes 0x70000 (drive geometry) and 0x74004 (partition info), mirroring xenia-canary xboxkrnl_io.cc:645-678 + null_device.{h,cc}. The 16-byte 0x74004 response with cache_size=0xFF000 at OUT+8 is the gate that lets sub_824ABD88 return SUCCESS and sub_824A9710 reach the priv-11 XexCheckExecutablePrivilege site identified by KRNBUG-AUDIT-007. Stack args 9-10 (OutputBuffer, OutputBufferLength) read from the caller's parameter save area at [sp+0x54] / [sp+0x5C] per the Xbox 360 PowerPC EABI (linkage area sp+0..sp+8, 8-quadword spill area sp+0x14..sp+0x54, then stack args every 8 bytes). First HLE export in the codebase to need 9+ args. Cascade vs. KRNBUG-AUDIT-007 prediction (5/8 held): - XexCheckExecutablePrivilege count 1 → 2 (priv=0xA + priv=0xB) ✓ - XamTaskSchedule count 0 → 1 ✓ - canary-only exports 7 → 3 (audit predicted ≤3) ✓ - 0x15e0 semaphore signal_attempts 0 → 1 (bonus) - 0x100c worker spawn DID NOT fire (still UNCREATED) ✗ - 0x1004 signal_attempts unchanged ✗ - Worker spawn count unchanged at 19 ✗ Tests: 592 → 594. Lockstep deterministic at -n 100M (run1 ≡ run2 ≡ run3, byte-identical). instructions=100000010 → 100000019, imports 407417 → 987524 (+2.4×). swaps=2 draws=0 plateau persists. sylpheed_n50m golden re-baselined instructions=50000004→50000003, imports=407362→407255. sylpheed_n2m unchanged. Still canary-only after this fix: ExTerminateThread, KeReleaseSemaphore, XamUserReadProfileSettings. The next downstream gate is somewhere past XamTaskSchedule's completion path. Co-Authored-By: Claude Opus 4.7 (1M context) --- audit-findings.md | 46 +++ audit-runs/audit-006/canary_export_queue.md | 273 ++++++++++++++++++ .../xenia-app/tests/golden/sylpheed_n50m.json | 4 +- crates/xenia-kernel/src/exports.rs | 109 ++++++- 4 files changed, 429 insertions(+), 3 deletions(-) create mode 100644 audit-runs/audit-006/canary_export_queue.md diff --git a/audit-findings.md b/audit-findings.md index abf8fbe..0217cbe 100644 --- a/audit-findings.md +++ b/audit-findings.md @@ -5219,3 +5219,49 @@ PROBE_LIST="0x824a9aa0,0x824a9128,0x824a9710,0x824a9778,0x824a9788,0x824a9790,0x > audit-runs/audit-007/sub_824A9710-trace.log \ 2> audit-runs/audit-007/sub_824A9710-trace.err ``` + +## KRNBUG-IO-003 — `NtDeviceIoControlFile` real implementation (LANDED 2026-05-04) + +### Outcome + +**Replaced `stub_success` registration with a real `nt_device_io_control_file` mirroring canary `NullDevice::IoControl` for FsCtlCodes 0x70000 + 0x74004.** +- 592 → 594 tests; lockstep `instructions=100000019 imports=987524 swaps=2 draws=0` deterministic across run1/run2/run3 (`audit-runs/post-IO-003/lock_n100m_run{1,2,3}.json`, all byte-identical). +- Branch: `xboxkrnl-ioctl/p0-fsctl-mountinfo` (no-ff merge). +- `sylpheed_n50m` golden re-baselined `instructions=50000004→50000003`, `imports=407362→407255`. `sylpheed_n2m` unchanged. + +### Audit-007 prediction scorecard + +| # | Prediction | Pre | Post | Held? | +|---|---|---|---|---| +| (a) | `cargo test --workspace --release` green | 592 | 594 | ✓ | +| (b) | Lockstep determinism preserved | bit-identical | bit-identical (run1≡run2≡run3) | ✓ | +| (c) | `XexCheckExecutablePrivilege` count: 1 → ≥2 | 1 | 2 | ✓ | +| (c) | `XamTaskSchedule` count: 0 → ≥1 | 0 | 1 | ✓ | +| (e) | canary-only exports: 7 → ≤3 | 7 | 3 | ✓ | +| (d) | 0x100c worker spawn (handle goes from UNCREATED to created+signaled) | UNCREATED | UNCREATED | **✗** | +| (d) | 0x1004 signal_attempts > 0 | 0 | 0 | **✗** | +| (d) | 0x15e0 signal_attempts > 0 | 0 | 1 (primary=1, "not stuck") | ✓ (new) | +| (f) | Worker thread spawn count: 19 → higher | 19 | 19 | **✗** | + +5/8 predictions held; cascade fired but not as far as audit-007 expected. Specifically: +- The priv-11 query DOES fire → flows into `XamTaskSchedule` → 0x15e0 semaphore-signal pump now runs. +- The audit-006 `canary_only.txt` 7 entries reduce by 4 (`KeResetEvent`, `ObCreateSymbolicLink`, `XamTaskCloseHandle`, `XamTaskSchedule`). Still missing: `ExTerminateThread`, `KeReleaseSemaphore`, `XamUserReadProfileSettings`. +- `XeCryptSha` (1) and `XeKeysConsolePrivateKeySign` (1) also now fire (were 0). +- Per-handle 0x100c stays UNCREATED — the producer chain that should spawn the 0x100c worker is gated downstream of where IO-003 unblocks. The 7→3 canary-only entries that remain (`ExTerminateThread`, `KeReleaseSemaphore`, `XamUserReadProfileSettings`) are the next clue: any of those could be the next gate. + +### Files modified + +- `crates/xenia-kernel/src/exports.rs:90` — `stub_success` → `nt_device_io_control_file`. +- `crates/xenia-kernel/src/exports.rs` (new function) — body mirrors `xboxkrnl_io.cc:645-678` + `null_device.cc` (canary). Stack args 9-10 read from `[sp+0x54]` / `[sp+0x5C]` per Xbox 360 PowerPC ABI (parameter save area at sp+0x14 + 64 bytes spill = sp+0x54, confirmed by disasm of caller `sub_824ABD88` at `0x824abe04-0x824abe10` and `0x824abe78-0x824abe70`). +- `crates/xenia-kernel/src/exports.rs` (new tests) — `nt_device_io_control_file_drive_geometry` (FsCtlCode 0x70000) + `nt_device_io_control_file_partition_info_unblocks_gate` (FsCtlCode 0x74004 — asserts OUT+8 ≠ 0, the gate condition). +- `crates/xenia-app/tests/golden/sylpheed_n50m.json` — re-baselined. + +### Trace artifacts + +- `audit-runs/post-IO-003/lock_n100m_run{1,2,3}.json` — three byte-identical 100M lockstep runs. +- `audit-runs/post-IO-003/lock_n500m.json` — 500M lockstep digest (`instructions=500000010 imports=5629676`). +- `audit-runs/post-IO-003/exec_trace_focus_500m.log` — `--trace-handles-focus=0x1004,0x100c,0x15e0` at 500M. + +### Next session candidates + +The 0x100c worker still doesn't spawn. Three of audit-006's canary-only entries (`ExTerminateThread`, `KeReleaseSemaphore`, `XamUserReadProfileSettings`) remain canary-only — any of them may be the next downstream gate. Re-running `--branch-probe` against `sub_824A9710` would now show a new exit branch (the priv-11 site fires, so the failure mode has shifted). diff --git a/audit-runs/audit-006/canary_export_queue.md b/audit-runs/audit-006/canary_export_queue.md new file mode 100644 index 0000000..bae655f --- /dev/null +++ b/audit-runs/audit-006/canary_export_queue.md @@ -0,0 +1,273 @@ +# Canary-Only Export Fix Queue (audit-006) + +- Status: **PARTIAL CASCADE (2026-05-04, post-KRNBUG-IO-003). 7 → 3 canary-only exports.** `NtDeviceIoControlFile` real impl landed; the priv-11 query (`XexCheckExecutablePrivilege(0xB)`) and `XamTaskSchedule` now fire. **Reclassified (now firing on our side):** `KeResetEvent`, `ObCreateSymbolicLink`, `XamTaskCloseHandle`, `XamTaskSchedule`. **Bonus pickups:** `XeCryptSha`, `XeKeysConsolePrivateKeySign` (both 0→1 — were not on the canary-only list because they were already in `ours_exports` but unreached). **Still canary-only:** `ExTerminateThread`, `KeReleaseSemaphore`, `XamUserReadProfileSettings`. Worker thread spawn count unchanged at 19; handle 0x100c remains UNCREATED. See KRNBUG-IO-003 in `audit-findings.md` and `project_xenia_rs_io_003_ioctl_2026_05_04.md`. + +- Prior status (now superseded): **SUPERSEDED by AUDIT-007 (2026-05-04). Real gate identified: `NtDeviceIoControlFile` (FsCtlCode=0x74004) is `stub_success` at `crates/xenia-kernel/src/exports.rs:90`. Game-side `sub_824ABD88:0x824abea8-ac` reads `[out_buf+8]` of the IOCTL response, finds zero (stub doesn't write OUT), assigns hardcoded `0xC0000034` (STATUS_OBJECT_NAME_NOT_FOUND); caller `sub_824A9710` exits at `0x824a9944` before priv-11. Tier 4 entries remain parked, classification unchanged (still REAL_BUT_UNREACHED), awaiting KRNBUG-IO-003. See `project_xenia_rs_audit_007_branch_probe_2026_05_04.md` for the runtime trace + decisive proof.** + +- Prior status: **PARTIAL — KRNBUG-IO-002 landed, but predicted cascade did NOT fire (7 → 7). Tier 0 marked superseded; Tier 4 entries STILL parked. Re-audit needed to find the real upstream gate.** +- Pre-state: master HEAD `556a8c3`, exports diff captured 2026-05-04 +- Post-IO-002 state: branch `xboxkrnl-vol-allocunit/p0-65536-cluster`, fresh 500 M trace at `audit-runs/post-IO-002/`. Canary-only kernel exports remain identical: `{ExTerminateThread, KeReleaseSemaphore, KeResetEvent, ObCreateSymbolicLink, XamTaskCloseHandle, XamTaskSchedule, XamUserReadProfileSettings}`. +- Inputs: + - `canary.log` (348720 B, identical to audit-005 oracle, canary build `9467c77f0`) + - `ours.log` (692 MB, 5.6 M trace lines, run at 17:20–17:21 today, post-IO-001) +- Tooling: `diff.py` + plain `comm -23` set-difference on extracted call names + +## Headline finding + +**7/7 canary-only entries classify as REAL_BUT_UNREACHED or STUB_BUT_UNREACHED.** +Per the audit-006 spec stop condition ("if two-thirds of entries are +REAL_BUT_UNREACHED, the problem isn't stubs — it's an upstream gate"), +the next session should **NOT** pull a Tier-1 entry from this queue. +Instead, it should fix the gate. + +The gate is **KRNBUG-IO-002**: our `nt_query_volume_information_file` +class-3 (FileFsSizeInformation) returns alloc_unit = 1 × 2048 = 2048, +but Sylpheed's `main(1, 0x10000, 0xFF000)` expects alloc_unit = 65536 +(see `project_xenia_rs_io_nullfile_2026_05_04.md`). +Sylpheed's verifier `sub_824ABA98` rejects 2048, propagates failure to +`sub_824A9710`, which exits early before its `XexCheckExecutablePrivilege(0xB)` +call site. Canary fires the priv-11 query *and* the entire downstream +cluster (`XamTaskSchedule` → Cache0 callback thread → 0x100c worker spawn +→ display-init pump → profile-settings cascade); we fire none of it. + +Direct evidence (telemetry): +- Our `XexCheckExecutablePrivilege` count = **1** (priv=0xA only). + Canary count = **2** (priv=0xA + priv=0xB). +- All 7 canary-only entries have ours-side count = **0** at -n 500M. +- Our trace ends with main thread (hw=0) parked on `XNotifyGetNext + + NtWaitForSingleObjectEx(0x10f4, lr=0x824ac578)` and hw=1 parked on + `NtWaitForMultipleObjectsEx(lr=0x824ab214) + cs=0x828f3e70` — + classic post-cache-recreate spin. +- The 44 `NtWriteFile` calls in ours.log (cache zero-fill) are followed by + more NtClose / NtCreateFile cycles, but `XexCheckExecutablePrivilege(0xB)` + never fires → priv-11 site in `sub_824A9710` is unreached. +- Memory's predicted `0xC000014F` does not yet appear in ours.log; first + cache-related error is `0xC0000034` (OBJECT_NAME_NOT_FOUND) from + `lr=0x824a97e4`. This still fits the gate hypothesis: the recreate path + is reached, completes its writes, re-opens, queries volume info, and + the *game-side* verifier rejects our reply silently (no kernel error). + +--- + +## Tier 0 — upstream gate (SUPERSEDED 2026-05-04 — fix landed but cascade did NOT fire) + +### KRNBUG-IO-002 — `nt_query_volume_information_file` block size — **LANDED, gate hypothesis FALSIFIED** + +**Outcome:** the block-size literals at `exports.rs:1255-1256` were corrected +to canary's NullDevice values (`sectors_per_unit=0x80, bytes_per_sector=0x200`, +product `0x10000`). 591 → 592 tests, lockstep `instructions=100000010, swaps=2, +draws=0` deterministic across two reruns (`audit-runs/post-IO-002/lock_n100m_run{1,2}.json`). +sylpheed_n50m oracle still matches its existing golden (no observable change at -n 50M). + +**However, the predicted cascade DID NOT fire.** Set-difference on a fresh +500 M trace (`audit-runs/post-IO-002/ours.log`) produces the **identical** +seven-entry canary-only set audit-006 captured pre-fix: + +``` +ExTerminateThread, KeReleaseSemaphore, KeResetEvent, +ObCreateSymbolicLink, XamTaskCloseHandle, XamTaskSchedule, +XamUserReadProfileSettings +``` + +`XexCheckExecutablePrivilege` count remains **1** (priv=0xA only, priv=0xB +unreached). `XamTaskSchedule` count remains **0**. Worker thread spawns +fell from 19 → 18 (within noise — single thread variance per call-site +breakdown: `lr=0x824ac5f0×15 + 0x824cd984×1 + 0x824d2e68×2`). The 16 +NtQueryVolumeInformationFile call sites in `ours.log` all originate from +a single LR `0x82611f38` — meaning the `audit-006` premise that +`sub_824ABA98`/`sub_824A9710` consume the volume-info reply at the +priv-11 gate may be **incorrect**, or the gate consumes a *different* +information class entirely. + +**Stop-condition triggered.** Per the IO-002 task brief, this session does +not pivot to a second fix. The fix is correct (it makes our reply +byte-identical to canary's NullDevice and survives every test we have); +it is just not load-bearing for the priv-11 gate. The branch landed as a +strict no-op at our current boot horizon — kept because it's correct and +unblocks no regression. + +**Next-session next gate hypothesis (untested):** +- The audit-005 disasm of `sub_824ABA98` may have mis-attributed the consumer + of bytes_per_sector. The IO-001 trace decisively located the failure at + the `NtReadFile` inside `sub_824A9710`, not at any volume-info site. + Re-read the `sub_824A9710` disasm with that in mind. +- Volume-info LR `0x82611f38` is far downstream of the priv-10/priv-11 + cluster (the calls *complete* successfully — they don't gate anything + visible). The actual gate may be `nt_query_information_file`, + `nt_query_full_attributes_file`, an FsCtl IOCTL, or a different + alloc-unit query path. +- Per AUDIT-005 instrumentation, the priv-11 site at `sub_824A9710` PC + cluster has **never fired** in any session. Probe `sub_824A9710` entry + with `--pc-probe` and trace which conditional exits the function before + the priv-11 query — that's the real gate. + +--- + +### KRNBUG-IO-002 — `nt_query_volume_information_file` block size (original spec, kept for archaeology) + +- **Where in our code:** `crates/xenia-kernel/src/exports.rs:1241-1269` (function + `nt_query_volume_information_file`). +- **Classification:** `REAL_BUT_BUGGY`. Registered at exports.rs:100, called + 16× in ours.log (16× in canary.log too — call counts match), returns + `STATUS_SUCCESS`, but the FileFsSizeInformation payload is wrong. +- **Bug:** class=3 branch writes `(total=0x100000, free=0, + sectors_per_unit=1, bytes_per_sector=2048)`. Product = 2048 bytes per + cluster. +- **Canary reference:** + - Entry function `NtQueryVolumeInformationFile_entry` at + `xenia-canary/src/xenia/kernel/xboxkrnl/xboxkrnl_io_info.cc:323` (case + `XFileFsSizeInformation` at lines 355–365). Canary delegates to per-device + methods on `file->device()`. + - `NullDevice` (the device backing `\Device\Harddisk0\Cache0`) returns + `sectors_per_allocation_unit() = 0x80` and `bytes_per_sector() = 0x200` + at `xenia-canary/src/xenia/vfs/devices/null_device.h:38-46`. Product = + 65536, matching Sylpheed's expectation. + - Other device backings (HostPath, DiscImage, DiscZArchive) all return + `(1, 0x200)` = 512. Sylpheed's first volume query at this site is + against Cache0 (NullDevice), so the relevant value is 65536. +- **Fix sketch (minimum):** in the class=3 branch, change the two writes to + `mem.write_u32(info+16, 128); mem.write_u32(info+20, 512);` (and reduce + TotalAllocationUnits accordingly so the disk size remains plausible — + e.g. 0x10 units × 128 sectors × 512 bytes ≈ 1 MB, matching NullDevice). + Total diff ≤ 4 lines. +- **Fix sketch (proper, deferred until the cluster fires reliably):** + introduce a per-handle device-info lookup so HostPath / DiscImage paths + return their canary-correct values too. Skipped for now because Sylpheed + only queries Cache0 at this gate. +- **Expected observable post-fix:** + - `XexCheckExecutablePrivilege` count: 1 → 2. + - `XamTaskSchedule` count: 0 → 1 (callback=0x824A93C8, message=0x828A28F0). + - `kernel.calls{XamTaskSchedule}` finally non-zero — closes the + APUBUG-PRODUCER-001 / XAMBUG-PRODUCER-001 producer hunt that + falsified XAudio + XamTaskSchedule producer hypotheses. + - Spawn of the Cache0 callback thread (XThread::Execute thid 7 in + canary, our equivalent to come). + - Inside that thread: `StfsCreateDevice` (still undefined extern in + canary too — does not block) + `ObCreateSymbolicLink` + + `ExRegisterTitleTerminateNotification`. + - Back on main: `KeResetEvent(0x8287094C)`, `NtCreateEvent`, + `ExCreateThread(entry=0x82181830, ctx=0x828F3D08)` — and `0x82181830` + is the worker entry for **dispatcher 0x100c**, one of the four + parked-handle producers (per + `project_xenia_rs_producer_stack_trace_2026_05_03.md`). Spawning + that worker should advance handle 0x100c's `signal_attempts` + counter off zero. + - Eventually (further into the boot): `XamUserGetXUID`, + `XamUserReadProfileSettings`, `XamContentCreateEnumerator`, + `KeReleaseSemaphore` display-pump (268+ calls in canary at this + horizon). +- **Risk:** low. Two-line value change. NullDevice is the only device + Sylpheed asks about at this gate; other devices are not yet hit. +- **Effort:** trivial. +- **Dependencies:** none. Land directly. +- **Verification chain:** `cargo test -p xenia-kernel`, + then `cargo run --release -p xenia-app -- exec sylpheed.iso -n 500_000_000` + with kernel-call tracing on, then re-run audit-006's set-difference; + expect canary-only count to drop from 7 toward 0 as the cluster fires. + +--- + +## Tier 4 — REAL_BUT_UNREACHED / STUB_BUT_UNREACHED — do not fix yet + +These are downstream of Tier 0. Reachability is blocked on KRNBUG-IO-002 +landing. After IO-002 lands, re-derive this list — most entries should +have moved off, and any survivors will be classifiable on real evidence. + +| # | Export | Ordinal | Library | Our state | Canary impl | Canary calls (at horizon) | Cascade rank | +|---|--------|---------|---------|-----------|-------------|---------------------------|--------------| +| 1 | `XamTaskSchedule` | 0x01AF | xam | REAL_BUT_UNREACHED (`xam_task_schedule`, xam.rs:213) | `xam_task.cc:43-80` | 1 (gate-pivot call) | upstream-of-cluster — fires the entire post-IO-002 cascade | +| 2 | `XamTaskCloseHandle` | 0x01B1 | xam | STUB_BUT_UNREACHED (`stub_success`, xam.rs:33) | `xam_task.cc:83-93` (one-liner: `NtClose` + last-error) | 1 | low (cleanup after #1) | +| 3 | `KeResetEvent` | 0x8F | xboxkrnl | REAL_BUT_UNREACHED (`ke_reset_event`, exports.rs:3172) | `xboxkrnl_threading.cc:566` | 1 | medium — clears 0x8287094C right before ExCreateThread(0x82181830) on main | +| 4 | `ObCreateSymbolicLink` | 0x0103 | xboxkrnl | STUB_BUT_UNREACHED (`stub_success`, exports.rs:121) | `xboxkrnl_ob.cc:351` | 1 | low — Cache0-symlink registration; cosmetic for Sylpheed boot | +| 5 | `KeReleaseSemaphore` | 0x88 | xboxkrnl | REAL_BUT_UNREACHED (`ke_release_semaphore`, exports.rs:3280) | `xboxkrnl_threading.cc:724` | 268 | high (in volume) — display-init pump on the post-cluster main loop | +| 6 | `ExTerminateThread` | 0x19 | xboxkrnl | REAL_BUT_UNREACHED (`ex_terminate_thread`, exports.rs:312) | `xboxkrnl_threading.cc:173` | 2 | low — thread cleanup on Cache0 / profile threads | +| 7 | `XamUserReadProfileSettings` | 0x0219 | xam | REAL_BUT_UNREACHED (`xam_user_read_profile_settings`, xam.rs:327) | `xam_user.cc:329` | 2 | medium — gates the `XamUserGetXUID → profile load` flow far downstream | + +**Why every entry above is Tier 4 (not Tier 1):** + +- Each entry's first call in `canary.log` falls **after** line 1210 + (`XamTaskSchedule(824A93C8, ...)`), which is the gate-pivot call. +- Our trace contains zero of any of the seven, despite running 500 M + instructions and reaching the post-cache-recreate horizon. +- Six of the seven are already real implementations. The two stubs + (`XamTaskCloseHandle`, `ObCreateSymbolicLink`) are minor cleanups; even + upgrading them would not move boot progress until #1 (`XamTaskSchedule`) + fires. +- Therefore: fixing any of these in isolation is wasted effort. They + should be re-classified after KRNBUG-IO-002 lands and the priv-11 / + Cache0 callback chain runs. + +--- + +## Tier 1 / 2 / 3 — empty for this audit + +No entry qualifies as Tier 1 or Tier 2 in the current state. The single +high-cascade fix worth pulling next is the Tier-0 gate (KRNBUG-IO-002), +which is **not itself a canary-only export** — it's a wrong-value bug in +an export both sides call, so the diff.py based set-difference doesn't +surface it. That is exactly why audit-006 was scoped this way: to confirm +the gate hypothesis from `project_xenia_rs_io_nullfile_2026_05_04.md` +before another implementation session is started. + +--- + +## Cross-check vs IO-001 snapshot + +IO-001 memory recorded these 7 still-canary-only exports: +> ExTerminateThread, KeReleaseSemaphore, KeResetEvent, ObCreateSymbolicLink, +> XamTaskCloseHandle, XamTaskSchedule, XamUserReadProfileSettings. + +Audit-006 set-difference produces the **identical** 7, in 1:1 +correspondence. No new canary-only export has appeared since IO-001 +landed; no entry has moved off. Cascade is still parked at the same gate. + +The `XeCryptSha`, `XeKeysConsolePrivateKeySign`, and `NtDeviceIoControlFile` +entries that IO-001 was credited with unblocking are confirmed: ours +calls them 1, 1, 2 times respectively (canary calls them 1, 1, 2 — exact +match). They are correctly off the canary-only list. + +--- + +## Methodology notes + +1. **"Cascade rank" definition:** estimated by where the export's first + canary call falls in the boot sequence and how many downstream code + paths depend on it. "high" = upstream-of-cluster (XamTaskSchedule). + "medium" = intermediate (KeResetEvent, profile cascade). + "low" = leaf cleanup or cosmetic (XamTaskCloseHandle, ObCreateSymbolicLink). + Rank only matters once Tier 0 is landed; until then everything is parked. +2. **Reachability oracle:** binary `grep -c "call=NAME"` against ours.log + at -n 500M. Zero counts are conclusive for "unreached" because tracing + is unconditional. +3. **Canary log freshness:** the log is from 17:34 (3 h before this + audit) but is byte-identical to audit-005's input — canary's behavior + is deterministic given the same ROM and the canary build header + (`canary_experimental@9467c77f0 on May 2 2026`) hasn't changed. + Re-running through Lutris is unnecessary. +4. **Gate confirmation:** memory predicted block-size mismatch as the + IO-002 blocker; this audit confirmed it by eliminating the alternative + (no Tier-1-eligible canary-only export exists in the current 7-entry + list). The 0xC000014F status memory predicted is not yet visible in + ours.log because the recreate path completes the writes — the + verifier inside `sub_824ABA98` rejects the volume-info reply at the + game level (no kernel error logged). +5. **What this queue is *not*:** a list of fixes to land. The audit-006 + discipline was scoping; the discipline of subsequent sessions is to + re-run audit-006's diff after IO-002, then either close audit-006 (if + the cluster fires through and all 7 entries drop) or open audit-007 + on whatever new canary-only set surfaces. + +--- + +## Recommended next session + +**KRNBUG-IO-002 (block-size fix), one-shot.** Two-line edit at +`crates/xenia-kernel/src/exports.rs:1255-1256`. Verify the cluster fires +by re-running audit-006's set-difference; expect 7 → 0 (or close to 0) +canary-only entries. If new entries surface in either direction, that's +audit-007's input. + +**Do not** open this queue's Tier 4 entries before IO-002 closes. Their +classification is pending; their fix sketches will look very different +once they're observably called and their actual return values can be +compared to canary. diff --git a/crates/xenia-app/tests/golden/sylpheed_n50m.json b/crates/xenia-app/tests/golden/sylpheed_n50m.json index 4591f81..8691377 100644 --- a/crates/xenia-app/tests/golden/sylpheed_n50m.json +++ b/crates/xenia-app/tests/golden/sylpheed_n50m.json @@ -1,6 +1,6 @@ { - "instructions": 50000000, - "imports": 407362, + "instructions": 50000003, + "imports": 407255, "unimpl": 0, "draws": 0, "swaps": 2, diff --git a/crates/xenia-kernel/src/exports.rs b/crates/xenia-kernel/src/exports.rs index 90ff2c7..c759a26 100644 --- a/crates/xenia-kernel/src/exports.rs +++ b/crates/xenia-kernel/src/exports.rs @@ -87,7 +87,7 @@ pub fn register_exports(state: &mut KernelState) { state.register_export(Xboxkrnl, 0xD2, "NtCreateFile", nt_create_file); state.register_export(Xboxkrnl, 0xD5, "NtCreateSemaphore", nt_create_semaphore); state.register_export(Xboxkrnl, 0xD7, "NtCreateTimer", nt_create_timer); - state.register_export(Xboxkrnl, 0xD9, "NtDeviceIoControlFile", stub_success); + state.register_export(Xboxkrnl, 0xD9, "NtDeviceIoControlFile", nt_device_io_control_file); state.register_export(Xboxkrnl, 0xDA, "NtDuplicateObject", nt_duplicate_object); state.register_export(Xboxkrnl, 0xDB, "NtFlushBuffersFile", stub_success); state.register_export(Xboxkrnl, 0xDC, "NtFreeVirtualMemory", stub_success); @@ -1001,6 +1001,64 @@ fn nt_write_file(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelStat signal_io_completion_event(state, event_handle); } +/// Mirrors canary `NullDevice::IoControl` via +/// [xboxkrnl_io.cc:645-678](xenia-canary/src/xenia/kernel/xboxkrnl/xboxkrnl_io.cc). +/// Used by `XMountUtilityDrive` cache-mount probes; Sylpheed issues both +/// `0x70000` (drive geometry) and `0x74004` (partition info) inside +/// `sub_824ABD88`. The OUT-buffer fields it writes are the gate that +/// keeps `sub_824A9710` from synthesizing `STATUS_OBJECT_NAME_NOT_FOUND`. +fn nt_device_io_control_file(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { + const X_IOCTL_DISK_GET_DRIVE_GEOMETRY: u32 = 0x70000; + const X_IOCTL_DISK_GET_PARTITION_INFO: u32 = 0x74004; + const STATUS_BUFFER_TOO_SMALL: u64 = 0xC000_0023; + const STATUS_INVALID_PARAMETER: u64 = 0xC000_000D; + const CACHE_SIZE: u64 = 0xFF000; + + let event_handle = ctx.gpr[4] as u32; + let io_status_block = ctx.gpr[7] as u32; + let io_control_code = ctx.gpr[8] as u32; + let sp = ctx.gpr[1] as u32; + let output_buffer = mem.read_u32(sp + 0x54); + let output_buffer_len = mem.read_u32(sp + 0x5C); + + let status: u64 = match io_control_code { + X_IOCTL_DISK_GET_DRIVE_GEOMETRY => { + if output_buffer_len < 0x8 { + STATUS_BUFFER_TOO_SMALL + } else { + mem.write_u32(output_buffer, (CACHE_SIZE / 512) as u32); + mem.write_u32(output_buffer + 4, 512); + STATUS_SUCCESS + } + } + X_IOCTL_DISK_GET_PARTITION_INFO => { + if output_buffer_len < 0x10 { + STATUS_BUFFER_TOO_SMALL + } else { + mem.write_u64(output_buffer, 0); + mem.write_u64(output_buffer + 8, CACHE_SIZE); + STATUS_SUCCESS + } + } + _ => { + tracing::warn!( + io_control_code = format!("0x{:X}", io_control_code), + "NtDeviceIoControlFile: unhandled IOCTL" + ); + STATUS_INVALID_PARAMETER + } + }; + + let info = if status == STATUS_SUCCESS { + output_buffer_len + } else { + 0 + }; + write_io_status_block(mem, io_status_block, status as u32, info); + ctx.gpr[3] = status; + signal_io_completion_event(state, event_handle); +} + /// Minimal `NtQueryInformationFile`. The only classes Sylpheed (and most /// games) use are `FileStandardInformation` (5) and `FilePositionInformation` /// (14). Anything else gets zeros + success. @@ -5461,6 +5519,55 @@ mod tests { assert!(state.xaudio.get(index).is_none()); } + /// FsCtlCode 0x70000 (drive geometry): canary writes + /// `cache_size/512` at OUT+0 and `512` at OUT+4 (both u32 BE). + #[test] + fn nt_device_io_control_file_drive_geometry() { + let (mut ctx, mem, mut state) = fresh(); + let sp = SCRATCH_BASE + 0x800; + let iosb = SCRATCH_BASE + 0x100; + let out_buf = SCRATCH_BASE + 0x200; + ctx.gpr[1] = sp as u64; + ctx.gpr[3] = 0xF800_0010; + ctx.gpr[4] = 0; + ctx.gpr[7] = iosb as u64; + ctx.gpr[8] = 0x70000; + mem.write_u32(sp + 0x54, out_buf); + mem.write_u32(sp + 0x5C, 0x8); + nt_device_io_control_file(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_SUCCESS); + assert_eq!(mem.read_u32(out_buf), 0xFF000 / 512); + assert_eq!(mem.read_u32(out_buf + 4), 512); + assert_eq!(mem.read_u32(iosb), STATUS_SUCCESS as u32); + assert_eq!(mem.read_u32(iosb + 4), 0x8); + } + + /// FsCtlCode 0x74004 (partition info): canary writes `0` at OUT+0 and + /// `cache_size = 0xFF000` at OUT+8 (both u64 BE). Sub_824ABD88 at + /// `0x824abe9c` reads OUT+8 and synthesizes `0xC0000034` if zero — a + /// non-zero value at OUT+8 is the entire fix. + #[test] + fn nt_device_io_control_file_partition_info_unblocks_gate() { + let (mut ctx, mem, mut state) = fresh(); + let sp = SCRATCH_BASE + 0x800; + let iosb = SCRATCH_BASE + 0x100; + let out_buf = SCRATCH_BASE + 0x200; + ctx.gpr[1] = sp as u64; + ctx.gpr[3] = 0xF800_0010; + ctx.gpr[4] = 0; + ctx.gpr[7] = iosb as u64; + ctx.gpr[8] = 0x74004; + mem.write_u32(sp + 0x54, out_buf); + mem.write_u32(sp + 0x5C, 0x10); + nt_device_io_control_file(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_SUCCESS); + assert_eq!(mem.read_u64(out_buf), 0); + assert_eq!(mem.read_u64(out_buf + 8), 0xFF000); + assert_ne!(mem.read_u64(out_buf + 8), 0, "OUT+8 must be non-zero"); + assert_eq!(mem.read_u32(iosb), STATUS_SUCCESS as u32); + assert_eq!(mem.read_u32(iosb + 4), 0x10); + } + /// Once a client is registered, the lockstep ticker eventually queues /// a fire — proves the producer pipeline is wired end-to-end through /// the kernel state.