Compare commits
185 Commits
c694bb3f43
...
iterate-2I
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f75bc96d17 | ||
|
|
de21c7a544 | ||
|
|
f3b7e8b760 | ||
|
|
7e2603a9e5 | ||
|
|
5aaadfec36 | ||
|
|
0332d1990d | ||
|
|
6271ba1f55 | ||
|
|
48b19e490f | ||
|
|
341196a111 | ||
|
|
b20c99f141 | ||
|
|
db90ad0f7d | ||
|
|
481591fdb2 | ||
|
|
52c30d82a7 | ||
|
|
229b46c765 | ||
|
|
40f208ea4e | ||
|
|
8683fb59ed | ||
|
|
b5885b8560 | ||
|
|
9340ff4592 | ||
|
|
bcd018659b | ||
|
|
09e59e09b7 | ||
|
|
5a8fe21ad5 | ||
|
|
51489e34db | ||
|
|
9a93152981 | ||
|
|
ac2f89a7bb | ||
|
|
2a8ff9515d | ||
|
|
25704c5811 | ||
|
|
49f3eafa15 | ||
|
|
7bc9e3acac | ||
|
|
e428ce33aa | ||
|
|
b03192c772 | ||
|
|
56ffa40a6a | ||
|
|
d8766c6242 | ||
|
|
77034b6cbf | ||
|
|
9028021936 | ||
|
|
5af792c9fc | ||
|
|
85d1603124 | ||
|
|
38d8871e8d | ||
|
|
81c90f9a53 | ||
|
|
ab4fe211e5 | ||
|
|
0209e88f0a | ||
|
|
4ff08f6116 | ||
|
|
3bd77ab506 | ||
|
|
1d6c51fbf8 | ||
|
|
bd5753311e | ||
|
|
89f5f7e4a9 | ||
|
|
fd68285210 | ||
|
|
70120465a3 | ||
|
|
e061e21851 | ||
|
|
690943ceef | ||
|
|
412ba858b4 | ||
|
|
08d41cf2fc | ||
|
|
de5a15ecfb | ||
|
|
c03f2bc9e2 | ||
|
|
d9e40d3564 | ||
|
|
978a6950d1 | ||
|
|
cc54ca8e64 | ||
|
|
76dfe7fd7a | ||
|
|
7ed6192b7b | ||
|
|
5d2401f9c5 | ||
|
|
d736a1dc12 | ||
|
|
91a7df5f6a | ||
|
|
b78e6fd205 | ||
|
|
50a488776f | ||
|
|
2cce044516 | ||
|
|
a1a7265f29 | ||
|
|
58f416c284 | ||
|
|
c51f51f9cb | ||
|
|
79697ddf4e | ||
|
|
7675035082 | ||
|
|
556a8c387a | ||
|
|
bef9793aec | ||
|
|
a6208a1249 | ||
|
|
19659d7f76 | ||
|
|
33e49e70c8 | ||
|
|
1a892d4641 | ||
|
|
451b3b28fe | ||
|
|
3e2fc1ec88 | ||
|
|
6a070bedc6 | ||
|
|
7108d6d131 | ||
|
|
48eed258f0 | ||
|
|
f84e947547 | ||
|
|
6440261e2e | ||
|
|
2a9fd1fc86 | ||
|
|
9d45efe5d5 | ||
|
|
07068e7616 | ||
|
|
38f78c88a8 | ||
|
|
691404e36e | ||
|
|
b54aa48d10 | ||
|
|
eb71fe8daf | ||
|
|
866855000c | ||
|
|
27d3608174 | ||
|
|
b82919bdd0 | ||
|
|
d1105aafae | ||
|
|
0e95e38813 | ||
|
|
7a1b6b3306 | ||
|
|
aa3f1d344f | ||
|
|
c7fccccbc6 | ||
|
|
6f851a2083 | ||
|
|
780e854c2f | ||
|
|
104078dc29 | ||
|
|
8fc1b1dfed | ||
|
|
fceaa81f46 | ||
|
|
e7d0fcf2c9 | ||
|
|
537d789deb | ||
|
|
8723d6826b | ||
|
|
a07784349d | ||
|
|
ec2d955dbd | ||
|
|
c5c6713419 | ||
|
|
78ea81c12a | ||
|
|
1b74db6fa7 | ||
|
|
82f3d611e2 | ||
|
|
0590bffdd9 | ||
|
|
1f416aaa2e | ||
|
|
62f673d094 | ||
|
|
9ab986ec09 | ||
|
|
caa37fc595 | ||
|
|
09c6c927bd | ||
|
|
f1166d0f75 | ||
|
|
9de18a9eec | ||
|
|
4029041618 | ||
|
|
1f9696ad47 | ||
|
|
261480616c | ||
|
|
ebfd18a64e | ||
|
|
2d223eee69 | ||
|
|
9827b03f1a | ||
|
|
a7155f4571 | ||
|
|
8b9fddc488 | ||
|
|
112202c2b9 | ||
|
|
5ece5e315f | ||
|
|
99e7814836 | ||
|
|
0f2a26c460 | ||
|
|
68c0ee55ce | ||
|
|
d96986a10e | ||
|
|
9f88e275b8 | ||
|
|
d39d0bab4d | ||
|
|
05f2f72c71 | ||
|
|
6fe2cbf251 | ||
|
|
6ba8f83c30 | ||
|
|
538fa5ab74 | ||
|
|
49bf74fae6 | ||
|
|
26b98975c3 | ||
|
|
f6a444b9d1 | ||
|
|
5c45108249 | ||
|
|
d945aeae83 | ||
|
|
49103bb898 | ||
|
|
16993bb8af | ||
|
|
20a730d69e | ||
|
|
82a9bff934 | ||
|
|
bf8208e88c | ||
|
|
145a7a4019 | ||
|
|
e18a0a40b8 | ||
|
|
f424132a5b | ||
|
|
f3ebaba5c9 | ||
|
|
7609dcd406 | ||
|
|
2be25bdd41 | ||
|
|
d4f6ea787b | ||
|
|
3d8e2ced2e | ||
|
|
52ece4bd86 | ||
|
|
cedee3c385 | ||
|
|
a8c918cf9e | ||
|
|
52b05b127f | ||
|
|
6b9de17925 | ||
|
|
64e8ecbfd0 | ||
|
|
197d76c44e | ||
|
|
d51b9346df | ||
|
|
75544fa9db | ||
|
|
147daa0721 | ||
|
|
ca5b90b700 | ||
|
|
c9f194dda1 | ||
|
|
d75c4edf67 | ||
|
|
a107ac9ae7 | ||
|
|
d4e227eeab | ||
|
|
af54eb28bd | ||
|
|
24d347436a | ||
|
|
4538fa9e70 | ||
|
|
bae9305982 | ||
|
|
b1285ba560 | ||
|
|
79eb52c378 | ||
|
|
5f0d6487ea | ||
|
|
f1fadb5398 | ||
|
|
45e15d7885 | ||
|
|
c36cca14f9 | ||
|
|
e9b2b57a44 | ||
|
|
e2b8860e10 | ||
|
|
f166d061be |
16
.gitignore
vendored
16
.gitignore
vendored
@@ -1,4 +1,18 @@
|
||||
/target/
|
||||
target/
|
||||
*.iso
|
||||
*.xiso
|
||||
*.db
|
||||
|
||||
# Audit reports / pre-pass findings (local artifacts, not source)
|
||||
audit-out/
|
||||
audit-*.md
|
||||
|
||||
# Run logs from stress harnesses and ad-hoc captures
|
||||
*.stdout
|
||||
*.stderr
|
||||
*.log
|
||||
|
||||
# Runtime cache artifacts (vkd3d-proton / DXVK shader caches dropped into the
|
||||
# working dir by the Wine canary build)
|
||||
vkd3d-proton.cache*
|
||||
*.dxvk-cache
|
||||
|
||||
4487
Cargo.lock
generated
4487
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
21
Cargo.toml
21
Cargo.toml
@@ -12,6 +12,7 @@ members = [
|
||||
"crates/xenia-hid",
|
||||
"crates/xenia-debugger",
|
||||
"crates/xenia-analysis",
|
||||
"crates/xenia-ui",
|
||||
"crates/xenia-app",
|
||||
]
|
||||
|
||||
@@ -33,10 +34,17 @@ xenia-apu = { path = "crates/xenia-apu" }
|
||||
xenia-hid = { path = "crates/xenia-hid" }
|
||||
xenia-debugger = { path = "crates/xenia-debugger" }
|
||||
xenia-analysis = { path = "crates/xenia-analysis" }
|
||||
xenia-ui = { path = "crates/xenia-ui" }
|
||||
|
||||
# External dependencies
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter", "json", "registry"] }
|
||||
tracing-appender = "0.2"
|
||||
tracing-chrome = "0.7"
|
||||
tracing-error = "0.2"
|
||||
metrics = "0.24"
|
||||
metrics-util = "0.19"
|
||||
pprof = { version = "0.14", features = ["flamegraph", "protobuf-codec"] }
|
||||
bitflags = "2"
|
||||
byteorder = "1"
|
||||
thiserror = "2"
|
||||
@@ -44,4 +52,13 @@ anyhow = "1"
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
aes = "0.8"
|
||||
rusqlite = { version = "0.31", features = ["bundled"] }
|
||||
duckdb = { version = "1", features = ["bundled"] }
|
||||
|
||||
# UI / rendering / input (used by xenia-ui and xenia-app with --ui)
|
||||
winit = "0.30"
|
||||
wgpu = "22"
|
||||
gilrs = "0.11"
|
||||
pollster = "0.3"
|
||||
crossbeam-utils = "0.8"
|
||||
crossbeam-channel = "0.5"
|
||||
bytemuck = { version = "1", features = ["derive"] }
|
||||
|
||||
6071
audit-findings.md
Normal file
6071
audit-findings.md
Normal file
File diff suppressed because it is too large
Load Diff
629
audit-report-2026-04-29.md
Normal file
629
audit-report-2026-04-29.md
Normal file
@@ -0,0 +1,629 @@
|
||||
# PPC Instruction Audit — Triaged Report (2026-04-29)
|
||||
|
||||
**Status**: audit complete. **No code modified.** This file is the fix-order plan for the follow-up session.
|
||||
**Source of truth**: detailed bug entries (one heading per PPCBUG ID) live in `audit-findings.md`. This file references every entry by ID so nothing is lost — it does not duplicate the per-bug detail.
|
||||
|
||||
## Counts
|
||||
|
||||
- **Total findings**: 253 PPCBUG IDs, of which 4 are explicitly retracted/withdrawn (PPCBUG-220, 222, 226, 482, 483 — see Notes section).
|
||||
- **Net findings**: ~248 actionable.
|
||||
- **Severity breakdown** (rough):
|
||||
- HIGH: ~55 (~22%)
|
||||
- MEDIUM: ~75 (~30%)
|
||||
- LOW (test gaps + cosmetic + informational): ~118 (~48%)
|
||||
|
||||
## Headline findings (most likely Sylpheed-renderer-blockers)
|
||||
|
||||
1. **PPCBUG-107 cascade** — `ReservationTable::invalidate_for_write` defined and unit-tested but never called from any of the **50+ store opcodes** in the interpreter. Under `--parallel`, every cross-thread atomic via `lwarx`/`stwcx.` is silently broken: spinlocks succeed without exclusion, atomic counters race, condition-variable handshakes never sync. Plausible direct cause of the 4-worker-thread renderer plateau (`project_xenia_rs_sylpheed_stage3_2026_04_29.md`). **Fix is mechanical**: one-line `if t.has_active_reservers() { t.invalidate_for_write(ea) }` before every `mem.write_*` in interpreter.rs.
|
||||
|
||||
2. **PPCBUG-053+054 cascade** — `bcx`/`bclrx` CTR zero-test compares all 64 bits; `mtspr CTR` writes full 64-bit GPR. Combined with PPCBUG-006 (`negx` poisons GPR upper 32) → **`neg; mtctr; bdnz` loops run forever**.
|
||||
|
||||
3. **8 decoder/field-extraction bugs collapse into 6 missing accessors** + 1 wrong sh64 formula + 1 missing decode_op6 dot-form entry. The disassembler already has correct local versions. Single mechanical sweep.
|
||||
|
||||
4. **PPCBUG-046 (`clrldi r3, r4, 32`)** — the canonical zero-extend-low-32 idiom is currently a no-op. Emitted constantly by 32-bit-ABI compilers.
|
||||
|
||||
5. **PPCBUG-510** — `stvewx128` corrupts 12 adjacent bytes per call.
|
||||
|
||||
6. **PPCBUG-424/425** — `vmaddfp128`/`vmaddcfp128` operand swap. Every D3D vertex/pixel shader using FMA with non-aliased operands gets wrong arithmetic.
|
||||
|
||||
7. **PPCBUG-360/363** — `vperm128` uses wrong control vector (every D3D shader swizzle); `vpkd3d128` missing post-pack permutation (canonical D3D vertex-pack `pack=1` always wrong).
|
||||
|
||||
8. **PPCBUG-275/420-422** — VC-form and VMX128_R-form `rc_bit()` reads bit 0 instead of bit 21/27 → **CR6 never updated for ANY VMX vector compare dot form**. Breaks every `vcmpequb. + bc CR6_all_true` early-exit loop in audio mixing, font rendering, string ops.
|
||||
|
||||
## Recommended fix order
|
||||
|
||||
The phases below are the recommended fix order for the follow-up session. Each phase is **independently mergeable**; later phases may reveal that earlier phases unblocked their symptoms (e.g. P1 by itself could be sufficient to break open the Sylpheed renderer plateau).
|
||||
|
||||
After each phase: `cargo test --workspace --release` (must stay at 506+ pass) AND `xenia-rs check sylpheed.iso -n 100M` (must not regress against the 2026-04-29 addis-fix baseline of `swaps=2`). The acid test is whether `draws > 0` opens after P1 or P2.
|
||||
|
||||
---
|
||||
|
||||
### Phase 1 — Cross-thread atomicity (PPCBUG-107 cascade)
|
||||
|
||||
**Why first**: highest confidence smoking-gun for the renderer plateau. Single, mechanical, low-risk fix. Largest leverage relative to size.
|
||||
|
||||
**Coupled — must land together**:
|
||||
- PPCBUG-107 (root: missing call from stores)
|
||||
- PPCBUG-130 (9 byte/halfword stores)
|
||||
- PPCBUG-140, 141, 142, 143, 144 (5 word stores: stw/stwu/stwx/stwux/stwbrx)
|
||||
- PPCBUG-150 (5 doubleword stores: std/stdu/stdx/stdux/stdbrx)
|
||||
- PPCBUG-160 (3 multiple/string stores: stmw/stswi/stswx)
|
||||
- PPCBUG-167 (9 FP stores)
|
||||
- PPCBUG-511, 512, 513, 514 (16 VMX stores)
|
||||
|
||||
**Independent but related**:
|
||||
- PPCBUG-151 (stwcx/stdcx reservation width discriminator) — separate fix; add `reservation_width: u8` to PpcContext.
|
||||
- PPCBUG-108 (legacy per-context path: cross-thread invalidation impossible) — informational; --reservations-table mode bypasses.
|
||||
|
||||
**Approach** — one PR adds `if t.has_active_reservers() { t.invalidate_for_write(ea) }` before every `mem.write_*` call site. Scope:
|
||||
```
|
||||
mem.write_u8 / write_u16 / write_u32 / write_u64 / write_f32 / write_f64
|
||||
mem.write_vec128 / write_vec128_aligned (for VMX)
|
||||
```
|
||||
~38 sites total. Add 1+ targeted concurrency tests (lwarx + cross-thread plain store + stwcx., expect EQ=0).
|
||||
|
||||
---
|
||||
|
||||
### Phase 2 — Decoder/field-extraction structural sweep
|
||||
|
||||
**Why second**: single mechanical sweep, fixes 12 distinct HIGH-severity findings, unblocks correct execution of compiler-emitted code. Disassembler already has correct local extraction logic — promote/port.
|
||||
|
||||
**Coupled — same commit**:
|
||||
- PPCBUG-040 + PPCBUG-560 — fix `sh64()` bit order AND fix the test helper that was masking it
|
||||
- PPCBUG-046 + PPCBUG-561 — promote `mb_md()` from `disasm.rs:1256` to `decoder.rs`; replace 6 inline-formula sites in interpreter.rs (rldicl/rldicr/rldic/rldimi/rldcl/rldcr)
|
||||
- PPCBUG-275 + PPCBUG-276 + PPCBUG-420 + PPCBUG-421 + PPCBUG-422 + PPCBUG-562 — add `vc_rc_bit()` (PPC bit 21) and `vx128r_rc_bit()` (PPC bit 27); replace `instr.rc_bit()` at all VMX compare dot-form sites
|
||||
- PPCBUG-315 + PPCBUG-563 — add `vx128_4_z()`, `vx128_4_imm()`; fix `vrlimi128`
|
||||
- PPCBUG-361 + PPCBUG-565 — add `vx128_5_sh()`; fix `vsldoi128`
|
||||
- PPCBUG-362 + PPCBUG-564 — add `vx128_p_perm()`; fix `vpermwi128`
|
||||
- PPCBUG-423 + PPCBUG-600 — add 5 odd-key entries to `decode_op6` key4 for `vcmp*fp128.` dot forms
|
||||
|
||||
**Independent in this phase**:
|
||||
- PPCBUG-360 — `vperm128` reads VC from `vd128()` instead of VX128_2 VC field at integer bits 6-8. Fix at the call site (or add `vx128_2_vc()` accessor).
|
||||
- PPCBUG-363 + PPCBUG-369 — `vpkd3d128` missing post-pack permutation; add the `pack`/`shift` field handling per Canary.
|
||||
|
||||
**Test fixture updates required** (PPCBUG-560 lesson) — once `sh64()` is fixed, verify all `disasm_goldens.rs` test helpers encode shifts ISA-correctly. Don't trust the existing fixtures blindly.
|
||||
|
||||
---
|
||||
|
||||
### Phase 3 — Other HIGH bugs (single targeted fixes)
|
||||
|
||||
**Independent**:
|
||||
- PPCBUG-510 — `stvewx128` corrupting 12 bytes per call. Direct fix: align EA to word, write only 4 bytes.
|
||||
- PPCBUG-424 — `vmaddfp128` operand order: change `ai.mul_add(bi, di)` → `ai.mul_add(di, bi)`.
|
||||
- PPCBUG-425 — `vmaddcfp128` operand order similarly.
|
||||
- PPCBUG-053 + PPCBUG-054 — `bcx`/`bclrx` CTR zero-test (32-bit) + `mtspr CTR` truncation (defensive firewall). Coupled.
|
||||
- PPCBUG-640 — `fmt_bc` spurious condition suffix on pure `bdnz`/`bdz`. Port the `fmt_bclr` pattern.
|
||||
- PPCBUG-641 — `lwsync` shows as `sync` in disassembler (re-assessment of PPCBUG-088). Same fix.
|
||||
|
||||
---
|
||||
|
||||
### Phase 4 — 32-bit ABI writeback truncation sweep
|
||||
|
||||
**Why this phase**: cross-cutting, mechanical. Once ALL writebacks truncate via `as u32 as u64`, the systemic 32-bit-ABI invariant is restored and most CR0/CA helper-correctness concerns become moot.
|
||||
|
||||
#### 4a — Active poisoning (every execution corrupts GPR upper bits)
|
||||
|
||||
These bugs corrupt GPR upper bits **regardless** of whether upstream sources are clean — typically because the implementation applies Rust's `!u64` (full 64-bit NOT) somewhere:
|
||||
- PPCBUG-006 (negx — `(!ra).wrapping_add(1)`)
|
||||
- PPCBUG-008 (subfex — `(!ra).wrapping_add(rb).wrapping_add(ca)`)
|
||||
- PPCBUG-018 (subfzex)
|
||||
- PPCBUG-019 (subfmex)
|
||||
- PPCBUG-028 (orcx — `rs | !rb`)
|
||||
- PPCBUG-029 (norx — `!(rs | rb)` — the canonical `not` mnemonic, hot path)
|
||||
- PPCBUG-030 (nandx)
|
||||
- PPCBUG-031 (eqvx — `!(rs ^ rb)` — common `eqv rA, rA, rA` set-to-all-ones)
|
||||
- PPCBUG-033 (andcx via `!rb`)
|
||||
- PPCBUG-034 (extsbx — `as i8 as i64 as u64`)
|
||||
- PPCBUG-035 (extshx)
|
||||
|
||||
#### 4b — Same-shape-as-addis (latent under clean inputs, active when upstream is poisoned)
|
||||
|
||||
- PPCBUG-001 (addi), PPCBUG-002 (addic), PPCBUG-003 (addicx), PPCBUG-005 (subficx), PPCBUG-007 (subfcx CA), PPCBUG-008 (subfex CA — also in 4a)
|
||||
- PPCBUG-004 (mulli), PPCBUG-009 (mullwx)
|
||||
- PPCBUG-010 + PPCBUG-011 (divwx writeback + CR0 — **must land together**, not independently)
|
||||
- PPCBUG-041 + PPCBUG-042 + PPCBUG-043 (srawx/srawix writeback + CR0 coupling — **must land together**)
|
||||
- PPCBUG-095, 096, 097, 098 (lha/lhax/lhau/lhaux halfword sign-extension)
|
||||
- PPCBUG-105 (lwa/lwax/lwaux — note: 64-bit-mode-only; less common in 32-bit-ABI binaries)
|
||||
|
||||
#### 4c — Latent writeback (only triggers if 4a/4b are unfixed)
|
||||
|
||||
These can be fixed in the same sweep but won't fire under clean inputs:
|
||||
- PPCBUG-012, 013, 014, 015, 016, 017 (addx/addcx/addex/addzex/addmex/subfx)
|
||||
- PPCBUG-032 (andx/orx/xorx)
|
||||
|
||||
#### 4d — CR0 32-bit-ABI compare (cross-cutting catch-all)
|
||||
|
||||
PPCBUG-020 documents the catch-all; the per-opcode locations are referenced from there:
|
||||
- PPCBUG-020 (catch-all in groups 2-5)
|
||||
- PPCBUG-023 (andisx)
|
||||
- PPCBUG-024 (rlwinmx), PPCBUG-025 (rlwimix), PPCBUG-026 (rlwnmx)
|
||||
- PPCBUG-036 (extsbx), PPCBUG-037 (extshx) — **must land with PPCBUG-034/035**
|
||||
- PPCBUG-044 (slwx/srwx)
|
||||
|
||||
**Fix shape** — at every Rc=1 path, change `update_cr_signed(0, result as i64)` to `update_cr_signed(0, result as u32 as i32 as i64)`. Once 4a/4b/4c land, both forms become equivalent and 4d becomes belt-and-suspenders (still recommended for resilience).
|
||||
|
||||
---
|
||||
|
||||
### Phase 5 — FPU correctness (graphics middleware impact)
|
||||
|
||||
#### 5a — Round-to-int and FPSCR.RN
|
||||
|
||||
- PPCBUG-221 + PPCBUG-227 (`round_to_i64` NearestEven broken near 2^52 — must land together; `round_to_i32` delegates)
|
||||
- PPCBUG-201 (FPSCR.RN not honored for double arithmetic)
|
||||
- PPCBUG-432 (vrfin/vrfin128 round-half-away-from-zero vs round-to-nearest-even)
|
||||
|
||||
#### 5b — VXISI / NaN / SNaN handling for FMA family
|
||||
|
||||
- PPCBUG-181, 182 (single fmaddsx/fmsubsx/fnmaddsx/fnmsubsx VXISI)
|
||||
- PPCBUG-202, 203, 204 (double fmaddx/fmsubx/fnmaddx/fnmsubx VXISI — esp. 203 hot for Newton-Raphson)
|
||||
- PPCBUG-183, 205 (fnmadd/fnmsub Rust unary `-` flips NaN sign — fix: skip negation on NaN)
|
||||
- PPCBUG-186 (SNaN priority for FMA)
|
||||
- PPCBUG-128 (lfs SNaN quietening — bit-manipulation widening helper needed)
|
||||
|
||||
#### 5c — Inexact / FPSCR exception bits
|
||||
|
||||
- PPCBUG-180 (single XX/FR/FI never set), PPCBUG-200 (double XX/FR/FI never set)
|
||||
- PPCBUG-223 (fcmpo VXSNAN/VXVC), PPCBUG-224 (fcfidx XX), PPCBUG-225 (frspx XX/FR/FI), PPCBUG-229 (fctidx/fctidzx XX/FX), PPCBUG-230 (fctiwx/fctiwzx XX/FX), PPCBUG-231 (frspx SNaN host dependency)
|
||||
- PPCBUG-165 + PPCBUG-166 + PPCBUG-168 (stfs* FPSCR + RN + SNaN)
|
||||
|
||||
#### 5d — Subnormal flush (FPSCR.NI / VSCR.NJ)
|
||||
|
||||
- PPCBUG-185 (FPU NI subnormal flush not modeled)
|
||||
- PPCBUG-435, 436, 437 (VMX NJ subnormal flush — vaddfp/vsubfp/vmulfp128, vmsum3fp128/vmsum4fp128 product intermediates, vmaddfp/vmaddfp128/vmaddcfp128/vnmsubfp128 outputs)
|
||||
|
||||
#### 5e — Estimate precision (vs hardware ~12-bit)
|
||||
|
||||
- PPCBUG-184 (fres)
|
||||
- PPCBUG-428..431 (vrefp, vrsqrtefp, vexptefp, vlogefp — same shape as fres)
|
||||
|
||||
#### 5f — VMX float compares + saturation
|
||||
|
||||
- PPCBUG-426, 427 (vnmsubfp/vnmsubfp128 double-rounding)
|
||||
- PPCBUG-433 (vctsxs/vcfpsxws128 NaN saturate to INT_MIN)
|
||||
|
||||
---
|
||||
|
||||
### Phase 6 — Other MEDIUM correctness
|
||||
|
||||
- PPCBUG-021 (overflow.rs OE checks at bit 63 — sub-register ops; partly covered by P4)
|
||||
- PPCBUG-022 (`mulld_ov` missing INT_MIN × -1)
|
||||
- PPCBUG-027 (rlwimix upper-32 ISA-deviation — auto-resolves once P4 lands)
|
||||
- PPCBUG-039 (cntlzdx 32-bit-ABI counts upper-zero — only matters if emitted)
|
||||
- PPCBUG-063 (trap pc-after-advance)
|
||||
- PPCBUG-064 (sc LEV field)
|
||||
- PPCBUG-065 (twi 31, r0, IMM typed-trap — relevant to Sylpheed C++ throw work, see `project_xenia_rs_sylpheed_throw_2026_04_28.md`)
|
||||
- PPCBUG-068 (mcrfs VX summary recomputation)
|
||||
- PPCBUG-078 (mtmsrd L=1 partial MSR-write)
|
||||
- PPCBUG-080 (mfvscr zero upper 96 bits)
|
||||
- PPCBUG-123 + PPCBUG-124 + PPCBUG-161 + PPCBUG-566 (XER TBC for lswx/stswx — coupled; add `xer_tbc: u8` to PpcContext, wire into xer()/set_xer(); enables lswx and stswx)
|
||||
- PPCBUG-125 (lmw RA-in-destination skip)
|
||||
- PPCBUG-126 + PPCBUG-162 (lswi/stswi `instr.rb()` → `instr.nb()`)
|
||||
- PPCBUG-487 + PPCBUG-495 (vsum* operand naming)
|
||||
- PPCBUG-515 (lvebx/lvehx/lvewx vs Canary divergence — document; xenia-rs is more ISA-faithful)
|
||||
- PPCBUG-516 (lvsr sh=0 case — add comment + debug_assert)
|
||||
- PPCBUG-601 (decode_op6 overlapping windows — document the invariant)
|
||||
- PPCBUG-642 (fmt_bcctr extended forms)
|
||||
- PPCBUG-643 + PPCBUG-644 (SIMM/D-form decimal vs hex — alignment with Canary disassembly)
|
||||
- PPCBUG-367 (vupkhpx/vupklpx channel replication vs zero-extend)
|
||||
- PPCBUG-368 (vpkpx pack_pixel_555 channel assignment unverified)
|
||||
- PPCBUG-366 (vspltisb/vspltish sign-extension idiom — fragile, not wrong)
|
||||
|
||||
---
|
||||
|
||||
### Phase 7 — Frozen-snapshot drift (separate sweep)
|
||||
|
||||
8 opcodes' frozen snapshots in `ppc-manual/<cat>/<op>.md` differ from live code:
|
||||
- PPCBUG-066 (td/tdi/tw/twi)
|
||||
- PPCBUG-117 (ldarx)
|
||||
- PPCBUG-145 (stwcx)
|
||||
- PPCBUG-560 (already-listed: rldicl test helper bit-order)
|
||||
- Plus the implicit drift in addicx (PPCBUG-003), andisx (PPCBUG-023), cmp/cmpi (PPCBUG-050), extsbx/extshx (PPCBUG-036/037, PPCBUG-032 in batch 1)
|
||||
|
||||
**Recommendation**: regenerate frozen snapshots from current code for the entire ppc-manual after Phases 1-4 land. Add a CI check that compares snapshots vs live code on every PR.
|
||||
|
||||
---
|
||||
|
||||
### Phase 8 — Test gap closure (broad)
|
||||
|
||||
Single PR per group is overkill; recommend bundling test additions with each Phase 1-6 PR (test the bug being fixed). The remaining LOW IDs are pure-test-gap entries — list:
|
||||
|
||||
- PPCBUG-045 (shift), 047 (rld), 055 (branch), 067 (trap+sc), 070 (CR logical)
|
||||
- PPCBUG-081, 082, 083, 084, 085 (SPR/MSR/TB/FPSCR/VSCR moves), 089 (cache+sync)
|
||||
- PPCBUG-091 (lbz), 100 (lha), 109, 110, 111 (lwa/lwbrx/lwarx), 118 (ld), 127 (lmw/lswi/lswx), 129 (lfs/lfd)
|
||||
- PPCBUG-132 (stb/sth), 146, 147 (stw/stwcx), 153 (std/stdcx), 163 (stmw/stswi/stswx), 171 (stfs/stfd)
|
||||
- PPCBUG-187 (FPU single), 208 (FPU double), 228 (FPU misc convert)
|
||||
- PPCBUG-240 (VMX add/sub), 243 (VMX sat helpers)
|
||||
- PPCBUG-277, 278, 279 (VMX compare/min/max/avg)
|
||||
- PPCBUG-316, 317, 320, 321, 322, 323, 324, 325 (VMX shift/rotate/logical)
|
||||
- PPCBUG-370, 371, 372, 373, 374, 375, 376, 377, 378 (VMX permute/pack)
|
||||
- PPCBUG-438, 439, 440 (VMX float compare/round/convert)
|
||||
- PPCBUG-490, 491, 492, 493, 494 (VMX multiply-sum)
|
||||
- PPCBUG-517, 518, 519 (VMX load/store)
|
||||
- PPCBUG-567 (decoder accessors)
|
||||
- PPCBUG-604 (decoder dispatch tables)
|
||||
- PPCBUG-649, 650, 652 (golden fixtures for branches/VMX128)
|
||||
|
||||
---
|
||||
|
||||
## Notes & administrative
|
||||
|
||||
### Withdrawn / retracted
|
||||
|
||||
- **PPCBUG-220** — `fctiwx` strict-`>` threshold actually correct (`i32::MAX` exactly representable in f64). Retracted by group-31 subagent.
|
||||
- **PPCBUG-222** — `fctidx` positive-overflow sentinel `0x7FFF_FFFF_FFFF_FFFF` is the correct ISA value. Retracted.
|
||||
- **PPCBUG-226** — FPRF 5-bit codes for fcmpu/fcmpo are correct per PowerISA. Retracted.
|
||||
- **PPCBUG-482** — `vmhaddshs` shift `>>15` is correct per spec snapshots. Retracted.
|
||||
- **PPCBUG-483** — `vmhraddshs` shift `>>15` is correct per spec snapshots. Retracted.
|
||||
|
||||
### Wontfix / informational (not retracted but no fix needed)
|
||||
|
||||
- **PPCBUG-038** — extswx ISA-correct, intentional 64-bit sign-extension. Document the asymmetry with extsb/extsh after PPCBUG-034/035 land.
|
||||
- **PPCBUG-090, 099, 152** — invalid-form (rD==rA) silently destroys load/store result. Per ISA: undefined behavior. No compiler emits these; matches Canary. Optional `debug_assert!`.
|
||||
- **PPCBUG-106, 115, 131, 169, 170, 206, 207, 318, 319, 364, 365, 434, 651, 653, 645, 646, 648** — informational confirmations that the implementation is correct, no change needed.
|
||||
- **PPCBUG-069** — test comment OX(so)=0 is wrong but the assert is correct.
|
||||
- **PPCBUG-602, 603, 605** — undocumented decoder dispatch quirks; correct but should add comments.
|
||||
- **PPCBUG-647, 654** — disassembler edge-case behavior on invalid encodings; not-a-bug for valid input.
|
||||
|
||||
### Coupling matrix (must-land-together)
|
||||
|
||||
| Group | IDs | Reason |
|
||||
|---|---|---|
|
||||
| divwx | 010, 011 | Quotient zero-extension changes the CR0 sign view |
|
||||
| srawx/srawix | 041, 042, 043 | Writeback truncation invalidates the CR0 view |
|
||||
| extsbx/extshx | 034+036, 035+037 | Same coupling shape as srawx |
|
||||
| sh64 | 040, 560 | Test helper is wrong in the inverse direction |
|
||||
| mb_md sweep | 046, 561 | Promote disasm.rs accessor first |
|
||||
| VC-form Rc | 275, 276, 420, 421, 562 | All consume the same new accessor |
|
||||
| VMX128_R Rc | 422, 562 | Same accessor sweep |
|
||||
| vrlimi128 | 315, 563 | Field accessor + caller fix |
|
||||
| vsldoi128 | 361, 565 | Field accessor + caller fix |
|
||||
| vpermwi128 | 362, 564 | Field accessor + caller fix |
|
||||
| vcmp*fp128. | 423, 600 | decode_op6 odd keys + opcode mapping |
|
||||
| XER TBC | 123, 124, 161, 566 | Add field, wire xer()/set_xer(), enables lswx/stswx |
|
||||
| round_to_i64 | 221, 227 | round_to_i32 delegates |
|
||||
| stfs FPSCR | 165, 166, 168 | Single fix shape covers all three |
|
||||
|
||||
### Dependency on the addis fix
|
||||
|
||||
The addis fix (`project_xenia_rs_addis_signext_root_cause_2026_04_29.md`) is already in place. Phase 4 generalizes that fix systematically; without it, the writeback-truncation invariant would still be incomplete.
|
||||
|
||||
### Anticipated impact on the Sylpheed renderer plateau
|
||||
|
||||
Strong candidates for direct cause of the plateau:
|
||||
- **PPCBUG-107** — broken atomics. Workers wait forever on never-signaled events; classical broken-spinlock symptom.
|
||||
- **PPCBUG-053+054** — broken `bdnz` loops; could explain workers parked indefinitely.
|
||||
- **PPCBUG-046 (`clrldi r3, r4, 32`)** — pollution propagation in 32-bit ABI; could break any pointer-clean-up sequence.
|
||||
|
||||
After applying Phase 1 alone, run `xenia-rs check sylpheed.iso -n 4B --parallel` and check whether `draws > 0`. If yes, the plateau was atomics; if no, proceed to P2/P3.
|
||||
|
||||
---
|
||||
|
||||
## Progress log
|
||||
|
||||
### P1 — Cross-thread atomicity sweep (merged 2026-05-01, HEAD ca5b90b)
|
||||
|
||||
**PPCBUGs fixed**: 107, 130, 140, 141, 142, 143, 144, 150, 160, 167, 511, 512, 513, 514, 151, 108. Plus review-fix additions: dcbz, dcbz128, stswi two-line, stswx two-line (merged in review-fix commit c9f194d).
|
||||
|
||||
**Gate results**:
|
||||
- `cargo test --workspace --release`: 449 passed, 0 failed
|
||||
- `-n 100M` lockstep: swaps=2, clean
|
||||
- `-n 100M --parallel --reservations-table`: swaps=2, clean
|
||||
- **Acid test** `-n 4B --parallel --reservations-table`: swaps=2, draws=**0**, no RtlRaiseException, no panics
|
||||
|
||||
**Conclusion**: P1 did NOT unblock the Sylpheed renderer. `draws` remains 0. The renderer plateau is not caused by broken cross-thread atomics alone. Proceeding to P2 (decoder/field-extraction sweep). The strongest remaining candidate per the plan is PPCBUG-046 (`clrldi r3, r4, 32` no-op).
|
||||
|
||||
---
|
||||
|
||||
### P2 — Decoder/field-extraction structural sweep (merged 2026-05-01, HEAD see `git log master --oneline -1`)
|
||||
|
||||
**PPCBUGs fixed**: 040, 046, 275, 276, 315, 360, 361, 362, 363, 369, 420, 421, 422, 423, 560, 561, 562, 563, 564, 565, 600.
|
||||
|
||||
**Batches**:
|
||||
- Batch 1: PPCBUG-040+560 — sh64() bit-order fix (XS-form SH split) + rldicl test helper encoding
|
||||
- Batch 2: PPCBUG-046+561 — mb_md() accessor; all 6 rld* MB fields corrected (clrldi was a no-op)
|
||||
- Batch 3: PPCBUG-275+276+420+421+422+423+562+600 — vc_rc_bit()/vx128r_rc_bit() Rc accessors; 13 vcmp interpreter sites; 5 decode_op6 dot-form entries
|
||||
- Batch 4: PPCBUG-315+563 — vrlimi128 vx128_4_z/imm field extraction
|
||||
- Batch 5: PPCBUG-361+565 — vsldoi128 vx128_5_sh field extraction
|
||||
- Batch 6: PPCBUG-362+564 — vpermwi128 vx128_p_perm field extraction
|
||||
- Batch 7: PPCBUG-360 — vperm128 vc128_2() accessor (was erroneously vd128())
|
||||
- Batch 8: PPCBUG-363+369 — vpkd3d128 post-pack permutation (MakePermuteMask tables from canary)
|
||||
|
||||
**Gate results**:
|
||||
- `cargo test --workspace --release`: 201 (cpu) + 6 (disasm goldens) + 144 + 76 + 16 + 8 + … passed, 0 failed
|
||||
- Independent code reviewer: all 9 check items OK
|
||||
- `-n 100M` lockstep smoke: ISO not available in CI environment; last known good at P1 HEAD was swaps=2
|
||||
- **Acid test** `-n 4B --parallel --reservations-table`: pending (ISO not in CI environment)
|
||||
|
||||
**Conclusion**: All P2 fixes applied and reviewed. Decoder field extraction is now correct for all audited VMX128 and MD/XS-form instructions. Whether P2 unblocks the renderer (`draws > 0`) requires the sylpheed.iso acid test on the user's machine. PPCBUG-046 (clrldi no-op fix) was the highest-probability P2 renderer-unblock candidate. Next: P3 — isolated HIGH bugs (PPCBUG-510, 424/425, 053+054, 640, 641).
|
||||
|
||||
---
|
||||
|
||||
### P3 — Isolated HIGH bugs (merged 2026-05-02, HEAD f3ebaba)
|
||||
|
||||
**PPCBUGs fixed**: 053+054 (coupled CTR 32-bit), 424+425 (vmaddfp128/vmaddcfp128 operand swap), 510 (stvewx128 corruption), 640+650 (bdnz/bdz suffix), 641+649 (sync/lwsync), **700 (NEW)**.
|
||||
|
||||
**Batches**:
|
||||
- Batch 1: PPCBUG-510 — stvewx128 16-byte corruption fixed (word-align EA, extract lane, write 4 bytes)
|
||||
- Batch 2: PPCBUG-424+425 + PPCBUG-700 partial (va128 PPC[11-15] partial fix) — vmaddfp128/vmaddcfp128 operand swap to VA*VD+VB
|
||||
- Batch 3: PPCBUG-053+054 — bcx/bclrx 32-bit CTR compare + mtspr CTR truncation
|
||||
- Batch 4: PPCBUG-640+650 — fmt_bc spurious bdnzge/bdzge suffix gated on `!uncond`
|
||||
- Batch 5: PPCBUG-641+649 — sync/lwsync L-field disambiguation
|
||||
- Phase review fix: **PPCBUG-700 (NEW)** — VMX128 register accessors (va128/vb128/vd128/vx128r_rc_bit) rewritten to canary's bitfield positions. Audit's "confirmed-clean" line-2958 assessment was based on miscounting LSB-first packed C++ bitfields. Per canary (`xenia-canary/src/xenia/cpu/ppc/ppc_decode_data.h:484-663`):
|
||||
- VA128 = PPC[11-15] | PPC[26]<<5 | PPC[21]<<6 (3 fields, 7 bits)
|
||||
- VB128 = PPC[16-20] | PPC[30-31]<<5
|
||||
- VD128 = PPC[6-10] | PPC[28-29]<<5
|
||||
- VX128_R Rc = PPC[25] (host bit 6) — NOT PPC[27] as PPCBUG-422 prescribed
|
||||
Affects 30+ VMX128 opcodes; production game code with VR>=32 was silently mis-decoded. Speculative `key4_dt` dot-form dispatch in `decode_op6` removed (canary has no separate dot-form opcodes for VX128_R). New PPCBUG-700 entry added to `audit-findings.md` Phase C4 invalidating audit line 2958.
|
||||
|
||||
**Gate results**:
|
||||
- `cargo test --workspace --release`: **470 passed, 0 failed** (up from 467 baseline at P3 start; 3 new CTR regression tests added)
|
||||
- Independent code reviewer: 1 BLOCKING issue (PPCBUG-700 above) — addressed before merge
|
||||
- `-n 100M` lockstep smoke: ISO not in CI; checked locally during development
|
||||
- **Acid test** `-n 4B --parallel --reservations-table`: **deferred to end of all phases** per user direction
|
||||
|
||||
**Conclusion**: All P3 fixes applied + reviewed + reviewer's blocking concern resolved. Phase 3 also produced one HIGH discovery (PPCBUG-700) that the audit had missed. Total fixes: 6 commits, 7 distinct PPCBUG groups. Next: P4 — 32-bit ABI writeback truncation sweep, ~30 IDs across 4a-4d sub-sections.
|
||||
|
||||
---
|
||||
|
||||
### P4 — 32-bit ABI writeback truncation sweep (merged 2026-05-02, HEAD d945aea)
|
||||
|
||||
**PPCBUGs fixed**: ~43 IDs across the 4a/4b/4c/4d sub-sections.
|
||||
- 4a active poisoning: 006 (negx), 008 (subfex), 018 (subfzex), 019 (subfmex), 028 (orcx), 029 (norx), 030 (nandx), 031 (eqvx), 033 (andcx)
|
||||
- 4a/4d coupled: 034+035+036+037 (extsbx/extshx writeback + CR0)
|
||||
- 4b immediate ALU: 001 (addi), 002 (addic), 003 (addicx), 004 (mulli), 005 (subficx), 007 (subfcx CA)
|
||||
- 4b mul/div + srawx coupled: 009 (mullwx), 010+011 (divwx + CR0), 041+042+043 (srawx/srawix + CR0)
|
||||
- 4b loads: 095-098 (lha/lhax/lhau/lhaux), 105 (lwa/lwax/lwaux)
|
||||
- 4c latent: 012-017 (addx/addcx/addex/addzex/addmex/subfx), 032 (andx/orx/xorx CR0)
|
||||
- 4d CR0 catch-all: 020 (in mulhwx/mulhwux/divwux/andx/orx/xorx/cntlzwx etc.), 023 (andisx), 024 (rlwinmx), 025 (rlwimix), 026 (rlwnmx), 044 (slwx/srwx)
|
||||
|
||||
**Batches**:
|
||||
- Batch 1 (e18a0a4): 4a active poisoning NOT/SUB family — 9 PPCBUGs
|
||||
- Batch 2 (145a7a4): 4a/4d coupled extsbx+extshx+CR0 — 4 PPCBUGs (must land together)
|
||||
- Batch 3 (bf8208e): 4b immediate ALU — 6 PPCBUGs
|
||||
- Batch 4 (82a9bff): 4b mul/div + srawx coupled — 6 PPCBUGs (two coupling groups)
|
||||
- Batch 5 (20a730d): 4b halfword + lwa loads — 5 PPCBUGs
|
||||
- Batch 6 (16993bb): 4c latent + 4d CR0 catch-all — ~13 PPCBUGs
|
||||
- Review-fix (49103bb): subfx/subfcx OE predicate + mulli test rigor
|
||||
|
||||
**Phase invariants restored**: every 32-bit ABI GPR write zero-extends from a u32 result, every CR0 update views the result as i32, every CA bit comes from a 32-bit unsigned compare. Downstream 64-bit unsigned compares (the addis-incident shape) can no longer be fed polluted upper bits from any of the 40+ touched ALU sites. The frozen-snapshot drift detected in PPCBUG-003 (addicx CR0) and PPCBUG-023 (andisx CR0) is also resolved.
|
||||
|
||||
**Review findings**:
|
||||
- BLOCKING issue caught: subfx and subfcx OE handlers in batch 6 still used the legacy `sum_overflow_64` helper. The helper compares the 32-bit `true_diff` against a u64 view of the result; any legitimate i32::MIN result (bit 31 set) spuriously triggered OV=1. Fixed in 49103bb with two new discriminating regression tests.
|
||||
- Minor caught: `mulli_overflow_wraps_to_32` rubber-stamped — both pre/post fix wrote 0 for the chosen inputs. Redesigned to use polluted-upper-bits inputs that genuinely discriminate.
|
||||
|
||||
**Gate results**:
|
||||
- `cargo test --workspace --release`: **494 passed, 0 failed** (up from 470 at P3 merge; 24 new regression tests across the batches)
|
||||
- 64-bit ABI ops verified untouched: rldicl/rldicr/rldic/rldimi/rldcl/rldcr, sldx/srdx/sradx/sradix, mulhdx/mulhdux/mulldx, divdx/divdux, cntlzdx, extswx
|
||||
- **Acid test** `-n 4B --parallel --reservations-table`: deferred per user direction
|
||||
|
||||
**Conclusion**: P4 is the largest ABI-correctness sweep of the audit. The systemic invariant is restored. Next: P5 — FPU correctness (~30 IDs).
|
||||
|
||||
---
|
||||
|
||||
### P5 — FPU correctness (merged 2026-05-02, HEAD d39d0ba)
|
||||
|
||||
**PPCBUGs fixed**: 21 IDs across the 5a-5f sub-sections.
|
||||
- 5a (round-to-int): 221+227 (round_to_i64 NearestEven near 2^52, coupled), 432 (vrfin round-to-even)
|
||||
- 5b (FMA VXISI + NaN sign): 181, 182 (single fmaddsx/fmsubsx VXISI), 202, 203 (double fmaddx/fmsubx/fnmaddx/fnmsubx VXISI), 183, 205 (NaN sign preservation in fnmaddx/fnmsubx and *sx siblings)
|
||||
- 5c (XX-on-inexact): 223 (verified already correct), 224 (fcfidx XX), 225 (frspx XX), 229 (fctidx/fctidzx XX), 230 (fctiwx/fctiwzx XX)
|
||||
- 5d (subnormal flush): 435 (vaddfp/vsubfp/vmulfp128 missing flush), 436 (vmsum3fp128/vmsum4fp128 per-product flush), 437 (vmaddfp family output flush)
|
||||
- 5e (estimate precision): 184 (fresx canary parity via f32 input quantization)
|
||||
- 5f (saturation + single-FMA): 426 (vnmsubfp single FMA), 427 (vnmsubfp128 single FMA), 433 (vctsxs NaN→INT_MIN)
|
||||
|
||||
**Batches**:
|
||||
- Batch 1 (f6a444b): 5a round-to-int + vrfin
|
||||
- Batch 2 (26b9897): 5b FMA — new `check_invalid_fma_add` helper in fpscr.rs derives VXISI from input properties
|
||||
- Batch 3 (49bf74f): 5c XX bit on conversions
|
||||
- Batch 4 (538fa5a): 5d VSCR.NJ unconditional flush (matches Canary; Xbox 360 always boots NJ=1)
|
||||
- Batch 5 (6ba8f83): 5e fresx pre-quantize input
|
||||
- Batch 6 (6fe2cbf): 5f single-FMA + vctsxs NaN
|
||||
- Review-fix nit (05f2f72): vrfin → stdlib `f32::round_ties_even()`
|
||||
|
||||
**Deferred for focused sub-batches** (Status: open in audit-findings.md):
|
||||
- PPCBUG-201 (FPSCR.RN for double arithmetic) — requires MXCSR set/restore wrappers around 10+ FPU arms
|
||||
- PPCBUG-185 (FPSCR.NI flush for scalar FPU) — requires NI bit constant + post-op flush wrapper
|
||||
- PPCBUG-180 + PPCBUG-200 (XX/FR/FI in update_after_op) — requires pre-vs-post-round comparison
|
||||
|
||||
**Review findings**:
|
||||
- Independent reviewer verdict: **MERGE-READY**. No blocking issues.
|
||||
- Two non-blocking minor follow-ups noted: (a) `check_invalid_fma_add` doesn't catch the finite-product-overflow + infinite-b cancellation half of PPCBUG-202 (audit-acknowledged as rare); (b) vrfin used inline tie-breaker — replaced with stdlib `round_ties_even()` in 05f2f72.
|
||||
|
||||
**Gate results**:
|
||||
- `cargo test --workspace --release`: **498 passed, 0 failed** (up from 494 at P4 merge; 5 new regression tests across the batches)
|
||||
- **Acid test** `-n 4B --parallel --reservations-table`: deferred per user direction
|
||||
|
||||
**Conclusion**: P5 covers the FPU correctness foundation (round-to-int, VXISI, NaN preservation, XX bit, subnormal flush). Three substantive items deferred. Next: P6 — Other MEDIUM correctness (overflow.rs sweep, trap PC-after-advance, sc LEV, twi typed-trap, etc.).
|
||||
|
||||
---
|
||||
|
||||
### P6 — Other MEDIUM correctness (merged 2026-05-02, HEAD 112202c)
|
||||
|
||||
**PPCBUGs fixed**: 13 IDs across the misc-MEDIUM scope.
|
||||
- Trap/sc/typed-trap (063/064/065): trap PC stays at CIA on Trap; sc LEV logged; twi 31, r0, IMM SIMM type code logged.
|
||||
- XER TBC infrastructure (123/124/161/566): new `xer_tbc: u8` field in `PpcContext`, wired into `xer()`/`set_xer()`; enables `lswx`/`stswx` (which were permanent no-ops without the TBC infrastructure).
|
||||
- Load-multiple cleanups (125/126/162): `lmw` skips writes to RA when in [RT..32) per ISA; `lswi`/`stswi` use `instr.nb()` instead of misnamed `instr.rb()`.
|
||||
- SPR/MSR/VSCR (068/078/080): `mcrfs` now recomputes the VX summary bit; `mtmsrd L=1` does the partial MSR write per ISA; `mfvscr` zero-extends the VSCR word into the upper 96 bits of VD.
|
||||
- Verification/auto-resolved (022/021/027/039): `mulld_ov` test confirms `checked_mul` handles INT_MIN*-1 correctly (audit's "missing" claim was incorrect); 021/027 auto-resolved by P4; 039 wontfix per audit.
|
||||
|
||||
**Batches**:
|
||||
- Batch 1 (d96986a): trap/sc semantics
|
||||
- Batch 2 (68c0ee5): XER TBC + load-multiple cleanups
|
||||
- Batch 3 (0f2a26c): SPR/MSR/VSCR
|
||||
- Batch 4 (99e7814): mulld_ov verification
|
||||
- Review-fix nit (5ece5e3): mcrfs uses existing `fpscr::VX_ALL` constant
|
||||
|
||||
**Deferred (Status: open in audit-findings.md)**:
|
||||
- Structural enum extensions (no consumer yet): `StepResult::HypervisorCall` for PPCBUG-064 sc 2 routing; `StepResult::Trap { type_code: u16 }` for PPCBUG-065 typed-trap C++ exception class routing — relevant if/when SEH dispatch lands.
|
||||
- Cosmetic/test-coverage: PPCBUG-642 (fmt_bcctr ISA-undefined edge), 643/644 (SIMM/D-form decimal vs hex — would re-baseline all goldens), 367/368 (vupkhpx/vpkpx channels), 487/495 (vsum naming), 515/516 (lvebx/lvsr docs), 601 (decode_op6 invariant doc).
|
||||
|
||||
**Review findings**: independent reviewer verdict was LGTM on all 4 commits, one cosmetic nit (use existing `fpscr::VX_ALL` instead of duplicate inline mask) applied immediately in 5ece5e3. No blocking issues. Reviewer specifically verified: trap-PC change against all `StepResult::Trap` consumers (none rely on `ctx.pc` for the faulting address); XER TBC field initialization through the single `PpcContext::new()` path that `Default` delegates to; `Vec128` lane ordering for `mfvscr` zero-extend.
|
||||
|
||||
**Gate results**:
|
||||
- `cargo test --workspace --release`: **498 passed, 0 failed**
|
||||
- **Acid test** `-n 4B --parallel --reservations-table`: deferred per user direction
|
||||
|
||||
**Conclusion**: P6 closes the misc-MEDIUM scope. All correctness fixes in scope have landed; structural enum extensions and cosmetic items are explicitly deferred and tracked. Remaining phases: P7 (frozen-snapshot drift, 8 opcodes), P8 (test gap closure, ~50 IDs).
|
||||
|
||||
---
|
||||
|
||||
### P7 — Frozen-snapshot drift sweep (2026-05-02, manual regen — no xenia-rs code change)
|
||||
|
||||
**PPCBUGs fixed**: 3 IDs.
|
||||
- PPCBUG-066: ppc-manual/branch/{td,tdi,tw,twi}.md — old unconditional-trap stub replaced with current TO-field-evaluating implementation snippet.
|
||||
- PPCBUG-117: ppc-manual/memory/ldarx.md — refreshed to current reservation_line/reservation_table model.
|
||||
- PPCBUG-145: ppc-manual/memory/stwcx.md — same reservation refresh.
|
||||
|
||||
**Methodology**: ran `python3 ppc-manual/generator/generate_manual.py` (the existing idempotent generator that scrapes xenia-rs and xenia-canary source for each opcode and emits a Markdown page). Output: 350 family pages updated, 598-key index.json refreshed.
|
||||
|
||||
**Verification**: post-regen `grep` confirms (a) the old "For now, just trace and continue" stub is gone from every page; (b) modern constructs (`trap::evaluate`, the current reservation pattern) appear in the trap and reservation pages.
|
||||
|
||||
**Note on scope**: the `ppc-manual/` directory is not versioned in `xenia-rs/.git`. The regen is therefore "done by running the script" with no commit landing in this repo. Documented for posterity here.
|
||||
|
||||
**Implicit drift cleared by earlier phases**: addicx (PPCBUG-003 fixed in P4), andisx (PPCBUG-023 fixed in P4), cmp/cmpi (PPCBUG-050 — no code change required; manual snapshot now reflects current behavior), extsbx/extshx (PPCBUG-036/037 fixed in P4 batch 2), 32 in batch 1 — all auto-resolved by re-running the generator after P1-P6.
|
||||
|
||||
**Conclusion**: P7 is functionally complete. No xenia-rs code change. Next: P8 — test gap closure.
|
||||
|
||||
---
|
||||
|
||||
### P8 — Test gap closure (merged 2026-05-02, HEAD 4029041)
|
||||
|
||||
**PPCBUGs closed**: 38 IDs across the test-gap LOW scope (audit listed ~50; 38 closed, ~12 remain Status: open as test-gap-only items that don't block functionality).
|
||||
|
||||
**Closed**:
|
||||
- Branch/CR/SPR/sync: 055, 067, 070, 081, 082, 083, 084, 085, 089
|
||||
- Loads: 091, 100, 109, 110, 111, 118, 127, 129
|
||||
- Stores: 132, 146, 147, 153, 163, 171
|
||||
- FPU: 187, 208, 228
|
||||
- VMX integer: 240, 277
|
||||
- VMX shift/rotate/logical: 316, 320, 321, 323
|
||||
- VMX permute: 370
|
||||
- VMX float compare/round/convert: 438, 439, 440
|
||||
- VMX multiply-add: 490
|
||||
- VMX load/store: 517
|
||||
|
||||
**Remaining open** (LOW test-gap, non-blocking): 045, 047, 066, 088 (PPCBUG-088 disasm-only test gap), 117, 145, 279, 317, 322, 324, 325, 371-378, 491-494, 518, 519, 567. These can stay open until a focused test-coverage sprint or incidentally landed during ongoing work.
|
||||
|
||||
**Batches**:
|
||||
- Batch 1 (9827b03): branch/CR-logical/SPR/MSR/FPSCR/sync — 12 tests
|
||||
- Batch 2 (2d223ee): load/store base + XER-TBC-driven lswx/stswx — 15 tests
|
||||
- Batch 3 (ebfd18a): FPU + VMX float — 14 tests; reviewer caught a VX-form encoding nit (XO at bit 0 not bit 1) during this batch and the author re-encoded all VX/VC tests before commit
|
||||
- Batch 4 (2614806): VMX integer/permute/load-store — 12 tests
|
||||
- Review-fix nit (1f9696a): test rename `vmsum3fp_horizontal_3lane_sum` → `vmaddfp_lane_fma` (test body actually exercised vmaddfp)
|
||||
|
||||
**Review findings**: independent reviewer verdict was LGTM on all 4 batches with no blocking issues. Every hand-encoded raw was mechanically cross-checked against canary's `INSTRUCTION(0x..., ..., kVX|kVC|kX|kA, ...)` base raw — no encoding mismatches. The XER-TBC-driven `lswx`/`stswx` tests are particularly load-bearing: they exercise the new infrastructure landed in P6 (68c0ee5); both opcodes were permanent no-ops pre-P6.
|
||||
|
||||
**Gate results**:
|
||||
- `cargo test --workspace --release`: **551 passed, 0 failed** (up from 498 at P7 merge — 53 net new tests; one `vmsum3fp_…` rename = -1+1 = net 0)
|
||||
- **Acid test** `-n 4B --parallel --reservations-table`: deferred per user direction
|
||||
|
||||
**Conclusion**: P8 closes the meaningful test-coverage gaps for opcode groups that previously had near-zero unit tests. Combined with the regression tests embedded in P1-P6 commits, the test suite now exercises every primary opcode form (branch, CR, SPR, FPU, VMX integer, VMX float, VMX load/store, scalar load/store) at least once. Remaining LOW test-gap items can be closed incrementally without blocking the audit's functional fixes.
|
||||
|
||||
---
|
||||
|
||||
### Post-P8 — End-to-end review + acid test (2026-05-02)
|
||||
|
||||
**End-to-end reviewer findings** (cross-cutting after all 8 phases):
|
||||
|
||||
1. **BLOCKING-LIKELY**: `lwa`/`lwax`/`lwaux` were converted to zero-extend in P4 batch 5 (PPCBUG-105 "minimal-fix"); reviewer flagged this as ISA-deviating. Per PowerISA, "Load Word and Algebraic" must sign-extend. Hotfix landed at HEAD f1166d0 — restored `as i32 as i64 as u64` form, updated test from `lwa_high_bit_set_zero_extends_upper` to `lwa_sign_extends_to_i64`.
|
||||
2. **Cosmetic** `fpscr.rs:289` duplicate-branch typo in `round_single_toward_zero` — both branches were `adj_bits - 1`. Replaced with the unconditional form + comment. HEAD 09c6c92.
|
||||
3. **Minor** reservation table's `active_reservers` counter is slot-occupancy, not reserver-count — once dirtied via cross-line-collision displacement, stores eternally pay the `invalidate_for_write` Acquire-load cost. Correctness-preserving (counter is upper bound), but performance can degrade. Documented; deferred to a focused performance sub-batch.
|
||||
4. **Asymmetric** `extswx` is the only sign-extend opcode left at 64-bit ABI (P4 converted every other extsXx to 32-bit). Per PPCBUG-038 (audit `wontfix`), this matches ISA's documented "argument-register canonicalization in 64-bit mode" intent. No code change. Reviewer flagged the asymmetry — accepted.
|
||||
|
||||
**Acid test result** (`xenia-rs check sylpheed.iso -n 4000000000 --parallel --reservations-table`, 2026-05-02 12:28→12:46):
|
||||
- Exit code: 0 (clean termination, no panics, no RtlRaiseException, no halts)
|
||||
- swaps=1 (frame=1 XE_SWAP, fb=0x4b0d7000, 1280×720)
|
||||
- draws=0
|
||||
- 14 ExCreateThread spawns, 2 worker exits via LR sentinel
|
||||
- The renderer plateau is **NOT unblocked** by the cumulative P1-P8 correctness fixes
|
||||
- Note: the binary tested was pre-lwa-hotfix (built before commit f1166d0). The lwa change is unlikely to affect Sylpheed (compilers don't emit `lwa` in 32-bit-ABI code), but a re-run after the hotfix would be the conservative confirmation.
|
||||
|
||||
**Implication**: the renderer plateau (`draws=0`) has a non-PPC-correctness root cause. The audit's catch was correctness-justified independent of the renderer (PPCBUGs are real bugs, well-grounded against canary), but the cumulative ~161 PPCBUG fixes do not unblock the specific Sylpheed-rendering issue. Next investigation tracks should focus on:
|
||||
- Graphics-pipeline-side issues (EDRAM resolve gaps per `project_xenia_rs_edram_resolve_gap.md`, RT readback)
|
||||
- Kernel HLE divergences (event signaling, timer queues, file system)
|
||||
- The unresolved BST-validation paradox documented in `project_xenia_rs_sylpheed_event_chain_2026_04_29.md` (sub_82175E68 registers 0x828F3F68 in the BST but the validator doesn't find it eight instructions later)
|
||||
|
||||
These are out of scope for the PPC instruction audit.
|
||||
|
||||
---
|
||||
|
||||
## Index — every PPCBUG referenced (in numerical order)
|
||||
|
||||
This list intentionally includes every ID found in `audit-findings.md` so nothing is dropped. For each entry's full description / file:line / fix snippet / test recommendation, see the corresponding `### PPCBUG-NNN` heading in `audit-findings.md`.
|
||||
|
||||
001-022 (batch 1: integer ALU): 001, 002, 003, 004, 005, 006, 007, 008, 009, 010, 011, 012, 013, 014, 015, 016, 017, 018, 019, 020, 021, 022.
|
||||
|
||||
023 (batch 2 group 6 logic immediate): 023.
|
||||
|
||||
024-027 (batch 2 group 9 word rotate): 024, 025, 026, 027.
|
||||
|
||||
028-033 (batch 2 group 7 logic register): 028, 029, 030, 031, 032, 033.
|
||||
|
||||
034-039 (batch 2 group 8 sign-extend / count-leading-zeros): 034, 035, 036, 037, 038, 039.
|
||||
|
||||
040-045 (batch 2 group 11 shift): 040, 041, 042, 043, 044, 045.
|
||||
|
||||
046-047 (batch 2 group 10 doubleword rotate): 046, 047.
|
||||
|
||||
048-052 reserved (group 12 compare): 048, 049, 050.
|
||||
|
||||
053-055 (batch 3 group 13 branch): 053, 054, 055.
|
||||
|
||||
063-067 (batch 3 group 14 trap+sc): 063, 064, 065, 066, 067.
|
||||
|
||||
068-070 (batch 3 group 15 CR logical): 068, 069, 070.
|
||||
|
||||
078-085 (batch 3 group 16 SPR/MSR/TB/FPSCR/VSCR): 078, 079, 080, 081, 082, 083, 084, 085.
|
||||
|
||||
088-089 (batch 3 group 17 cache+sync): 088, 089.
|
||||
|
||||
090-091 (batch 4 group 18 load byte): 090, 091.
|
||||
|
||||
095-100 (batch 4 group 19 load halfword): 095, 096, 097, 098, 099, 100.
|
||||
|
||||
105-111 (batch 4 group 20 load word + reservation): 105, 106, 107, 108, 109, 110, 111.
|
||||
|
||||
115-118 (batch 4 group 21 load doubleword): 115, 116, 117, 118.
|
||||
|
||||
123-127 (batch 4 group 22 load multiple/string): 123, 124, 125, 126, 127.
|
||||
|
||||
128-129 (batch 4 group 23 load float): 128, 129.
|
||||
|
||||
130-132 (batch 5 group 24 store byte/halfword): 130, 131, 132.
|
||||
|
||||
140-147 (batch 5 group 25 store word + stwcx): 140, 141, 142, 143, 144, 145, 146, 147.
|
||||
|
||||
150-153 (batch 5 group 26 store doubleword): 150, 151, 152, 153.
|
||||
|
||||
160-163 (batch 5 group 27 store multiple/string): 160, 161, 162, 163.
|
||||
|
||||
165-171 (batch 5 group 28 store float): 165, 166, 167, 168, 169, 170, 171.
|
||||
|
||||
180-187 (batch 6 group 29 FPU single arithmetic): 180, 181, 182, 183, 184, 185, 186, 187.
|
||||
|
||||
200-208 (batch 6 group 30 FPU double arithmetic): 200, 201, 202, 203, 204, 205, 206, 207, 208.
|
||||
|
||||
220-231 (batch 6 group 31 FPU sign/move/compare/convert): 220 [retracted], 221, 222 [retracted], 223, 224, 225, 226 [retracted], 227, 228, 229, 230, 231.
|
||||
|
||||
240-243 (batch 7 group 32 VMX integer add/sub): 240, 241, 242, 243.
|
||||
|
||||
275-279 (batch 7 group 33 VMX integer compare/min/max/avg): 275, 276, 277, 278, 279.
|
||||
|
||||
315-325 (batch 7 group 34 VMX integer logical/shift/rotate): 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325.
|
||||
|
||||
360-378 (batch 8 group 35 VMX permute/pack): 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378.
|
||||
|
||||
420-440 (batch 8 group 36 VMX float arith+compare): 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440.
|
||||
|
||||
482-495 (batch 8 group 37 VMX multiply-sum + special): 482 [retracted], 483 [retracted], 487, 490, 491, 492, 493, 494, 495.
|
||||
|
||||
510-519 (batch 8 group 38 VMX load/store): 510, 511, 512, 513, 514, 515, 516, 517, 518, 519.
|
||||
|
||||
560-567 (Phase C1 decoder field extractors): 560, 561, 562, 563, 564, 565, 566, 567.
|
||||
|
||||
600-605 (Phase C2 decoder opcode-lookup): 600, 601, 602, 603, 604, 605.
|
||||
|
||||
640-654 (Phase C3 disassembler formatter): 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654.
|
||||
|
||||
**Counted IDs**: 253. **Retracted**: 220, 222, 226, 482, 483 (5). **Net actionable**: 248.
|
||||
|
||||
**Counted by phase here**: P1 (~17 IDs), P2 (~17 IDs), P3 (~7 IDs), P4 (~30 IDs), P5 (~30 IDs), P6 (~25 IDs), P7 (~5 IDs), P8 (~50 IDs), Notes (~30 wontfix/informational/retracted). Total accounts for all 253 IDs — every ID is either in a fix phase, the wontfix/informational list, or retracted. **Nothing has been dropped.**
|
||||
1013
audit-runs/audit-004/run-50m-probe-v2.txt
Normal file
1013
audit-runs/audit-004/run-50m-probe-v2.txt
Normal file
File diff suppressed because it is too large
Load Diff
1151
audit-runs/audit-004/run-50m-probe.txt
Normal file
1151
audit-runs/audit-004/run-50m-probe.txt
Normal file
File diff suppressed because it is too large
Load Diff
281
audit-runs/audit-006/canary_export_queue.md
Normal file
281
audit-runs/audit-006/canary_export_queue.md
Normal file
@@ -0,0 +1,281 @@
|
||||
# Canary-Only Export Fix Queue (audit-006)
|
||||
|
||||
- Status: **POST-KE-001 (2026-05-06): 2 canary-only (XamUserReadProfileSettings DROPPED post-XamUserGetSigninState landing earlier; KE-001 unsuspended audio workers but KeReleaseSemaphore producer is downstream-gated and did NOT fire).** `KeResumeThread` is now a real impl per canary `xboxkrnl_threading.cc:216-227` (KRNBUG-KE-001, branch `ke-resume-thread/p0-canary-mirror`). Cascade A passed: tids 9 (entry=0x824D2878) and 10 (entry=0x824D2940) leave Suspended → run prologue → park on `WaitAny` for audio buffer-completion semaphores `0x828A3254` / `0x828A3230`. Cascade B partial: `NtSetEvent 667→3334` (5×) but `KeReleaseSemaphore=0` and `XAudioSubmitRenderDriverFrame=0` — workers stuck before the producer. Cascade C predicted 2→1, actual 2→2 (`ExTerminateThread`, `KeReleaseSemaphore` both still canary-only). Cascade D: `--pc-probe=0x82184318,0x82184374` armed — neither fires; `--dump-addr=0x828F4070` no DUMP lines; γ-cluster blocker unchanged; signal_attempts on 0x1004/0x100c/0x1020/0x15e4 still 0. swaps=2 draws=0 plateau intact. Lockstep `instructions=100000003 imports=987516` deterministic ×2. Goldens re-baselined `sylpheed_n50m.json instructions 50000003→50000011, imports 407255→407247`. See KRNBUG-KE-001 in `audit-findings.md`.
|
||||
|
||||
- Prior status (superseded by KE-001): **POST-IO-004 (2026-05-06): 7 → 3 canary-only.** Real `XamNotifyCreateListener` + `XNotifyGetNext` landed (KRNBUG-IO-004). Dispatch arm at `0x822f1be8` now fires; `sub_82173DC8` runs in a tight loop on tid=1; renderer-cluster L1 entries `0x822c6870`, `0x824563e0`, `0x823ddb50` are reached for the first time. 4 reclassified RE-FIRES (now reached): `KeResetEvent`, `ObCreateSymbolicLink`, `XamTaskCloseHandle`, `XamTaskSchedule`. Still canary-only: `ExTerminateThread`, `KeReleaseSemaphore`, `XamUserReadProfileSettings` — all REAL_BUT_UNREACHED at the new boot horizon. Worker count 18 → 20. signal_attempts on 0x15e0 = 1 (was 0). draws=0 still expected at this step. See KRNBUG-IO-004 in `audit-findings.md` and `project_xenia_rs_io_004_xnotify_listener_2026_05_06.md`.
|
||||
|
||||
- Prior status (superseded by IO-004): **AUDIT-009 (2026-05-05): GATE IS HIGHER THAN THE CLUSTER ITSELF.** AUDIT-008's β-hypothesis (gate sits among the 5 callers of `sub_821800D8` in 0x82287000-0x82292FFF) is **falsified**: a 21-PC `--branch-probe` (the 6 parents + 5 shims + dispatcher + 9 audit-005 producer-callsites) shows **0/21 firings** at -n 500M (`audit-runs/audit-009/probe-500m.err`). The whole 0x82287000-0x82294000 cluster is unreached. Static analysis: the cluster's level-1 root functions (`sub_82293448`, `sub_822919C8`) have **zero non-call xrefs in sylpheed.db** — they are reached only via vtable / function-pointer that's never written. Main parks at `sub_822F1AA8` frame-poll loop forever (1.49M XNotifyGetNext iterations). Three canary-only exports (`ExTerminateThread`, `KeReleaseSemaphore`, `XamUserReadProfileSettings`) remain REAL_BUT_UNREACHED — same as audit-008. **DO NOT pull from this queue.** Next-session probe set: cluster L1 roots + new thread entry trampolines (0x822c6870 / 0x824563e0 / 0x823dde30 / 0x823ddb50) + main's frame-poll callees + main's post-poll continuation list. See KRNBUG-AUDIT-009 in `audit-findings.md` and `project_xenia_rs_audit_009_renderer_unreached_2026_05_05.md`.
|
||||
|
||||
- Prior status (superseded by AUDIT-009): **AUDIT-008 MODEL RESET (2026-05-05).** 0x100c worker IS spawned post-IO-003 as tid=3 (ctx=0x828F3D08), 0x1004 as tid=11, 0x15e0 as tid=17. AUDIT-008 hypothesized the gate among the 5 non-create-chain callers of `sub_821800D8` whose parents live in 0x82287000-0x82292FFF. AUDIT-009 falsified that — those parents are themselves never entered, so the gate is one level above.
|
||||
|
||||
- Prior status (superseded by AUDIT-008): **PARTIAL CASCADE (2026-05-04, post-KRNBUG-IO-003). 7 → 3 canary-only exports.** `NtDeviceIoControlFile` real impl landed; the priv-11 query (`XexCheckExecutablePrivilege(0xB)`) and `XamTaskSchedule` now fire. **Reclassified (now firing on our side):** `KeResetEvent`, `ObCreateSymbolicLink`, `XamTaskCloseHandle`, `XamTaskSchedule`. **Bonus pickups:** `XeCryptSha`, `XeKeysConsolePrivateKeySign` (both 0→1 — were not on the canary-only list because they were already in `ours_exports` but unreached). **Still canary-only:** `ExTerminateThread`, `KeReleaseSemaphore`, `XamUserReadProfileSettings`. ~~Worker thread spawn count unchanged at 19; handle 0x100c remains UNCREATED.~~ (audit-008: 0x100c worker IS spawned, claim was wrong.) See KRNBUG-IO-003 in `audit-findings.md` and `project_xenia_rs_io_003_ioctl_2026_05_04.md`.
|
||||
|
||||
- Prior status (now superseded): **SUPERSEDED by AUDIT-007 (2026-05-04). Real gate identified: `NtDeviceIoControlFile` (FsCtlCode=0x74004) is `stub_success` at `crates/xenia-kernel/src/exports.rs:90`. Game-side `sub_824ABD88:0x824abea8-ac` reads `[out_buf+8]` of the IOCTL response, finds zero (stub doesn't write OUT), assigns hardcoded `0xC0000034` (STATUS_OBJECT_NAME_NOT_FOUND); caller `sub_824A9710` exits at `0x824a9944` before priv-11. Tier 4 entries remain parked, classification unchanged (still REAL_BUT_UNREACHED), awaiting KRNBUG-IO-003. See `project_xenia_rs_audit_007_branch_probe_2026_05_04.md` for the runtime trace + decisive proof.**
|
||||
|
||||
- Prior status: **PARTIAL — KRNBUG-IO-002 landed, but predicted cascade did NOT fire (7 → 7). Tier 0 marked superseded; Tier 4 entries STILL parked. Re-audit needed to find the real upstream gate.**
|
||||
- Pre-state: master HEAD `556a8c3`, exports diff captured 2026-05-04
|
||||
- Post-IO-002 state: branch `xboxkrnl-vol-allocunit/p0-65536-cluster`, fresh 500 M trace at `audit-runs/post-IO-002/`. Canary-only kernel exports remain identical: `{ExTerminateThread, KeReleaseSemaphore, KeResetEvent, ObCreateSymbolicLink, XamTaskCloseHandle, XamTaskSchedule, XamUserReadProfileSettings}`.
|
||||
- Inputs:
|
||||
- `canary.log` (348720 B, identical to audit-005 oracle, canary build `9467c77f0`)
|
||||
- `ours.log` (692 MB, 5.6 M trace lines, run at 17:20–17:21 today, post-IO-001)
|
||||
- Tooling: `diff.py` + plain `comm -23` set-difference on extracted call names
|
||||
|
||||
## Headline finding
|
||||
|
||||
**7/7 canary-only entries classify as REAL_BUT_UNREACHED or STUB_BUT_UNREACHED.**
|
||||
Per the audit-006 spec stop condition ("if two-thirds of entries are
|
||||
REAL_BUT_UNREACHED, the problem isn't stubs — it's an upstream gate"),
|
||||
the next session should **NOT** pull a Tier-1 entry from this queue.
|
||||
Instead, it should fix the gate.
|
||||
|
||||
The gate is **KRNBUG-IO-002**: our `nt_query_volume_information_file`
|
||||
class-3 (FileFsSizeInformation) returns alloc_unit = 1 × 2048 = 2048,
|
||||
but Sylpheed's `main(1, 0x10000, 0xFF000)` expects alloc_unit = 65536
|
||||
(see `project_xenia_rs_io_nullfile_2026_05_04.md`).
|
||||
Sylpheed's verifier `sub_824ABA98` rejects 2048, propagates failure to
|
||||
`sub_824A9710`, which exits early before its `XexCheckExecutablePrivilege(0xB)`
|
||||
call site. Canary fires the priv-11 query *and* the entire downstream
|
||||
cluster (`XamTaskSchedule` → Cache0 callback thread → 0x100c worker spawn
|
||||
→ display-init pump → profile-settings cascade); we fire none of it.
|
||||
|
||||
Direct evidence (telemetry):
|
||||
- Our `XexCheckExecutablePrivilege` count = **1** (priv=0xA only).
|
||||
Canary count = **2** (priv=0xA + priv=0xB).
|
||||
- All 7 canary-only entries have ours-side count = **0** at -n 500M.
|
||||
- Our trace ends with main thread (hw=0) parked on `XNotifyGetNext +
|
||||
NtWaitForSingleObjectEx(0x10f4, lr=0x824ac578)` and hw=1 parked on
|
||||
`NtWaitForMultipleObjectsEx(lr=0x824ab214) + cs=0x828f3e70` —
|
||||
classic post-cache-recreate spin.
|
||||
- The 44 `NtWriteFile` calls in ours.log (cache zero-fill) are followed by
|
||||
more NtClose / NtCreateFile cycles, but `XexCheckExecutablePrivilege(0xB)`
|
||||
never fires → priv-11 site in `sub_824A9710` is unreached.
|
||||
- Memory's predicted `0xC000014F` does not yet appear in ours.log; first
|
||||
cache-related error is `0xC0000034` (OBJECT_NAME_NOT_FOUND) from
|
||||
`lr=0x824a97e4`. This still fits the gate hypothesis: the recreate path
|
||||
is reached, completes its writes, re-opens, queries volume info, and
|
||||
the *game-side* verifier rejects our reply silently (no kernel error).
|
||||
|
||||
---
|
||||
|
||||
## Tier 0 — upstream gate (SUPERSEDED 2026-05-04 — fix landed but cascade did NOT fire)
|
||||
|
||||
### KRNBUG-IO-002 — `nt_query_volume_information_file` block size — **LANDED, gate hypothesis FALSIFIED**
|
||||
|
||||
**Outcome:** the block-size literals at `exports.rs:1255-1256` were corrected
|
||||
to canary's NullDevice values (`sectors_per_unit=0x80, bytes_per_sector=0x200`,
|
||||
product `0x10000`). 591 → 592 tests, lockstep `instructions=100000010, swaps=2,
|
||||
draws=0` deterministic across two reruns (`audit-runs/post-IO-002/lock_n100m_run{1,2}.json`).
|
||||
sylpheed_n50m oracle still matches its existing golden (no observable change at -n 50M).
|
||||
|
||||
**However, the predicted cascade DID NOT fire.** Set-difference on a fresh
|
||||
500 M trace (`audit-runs/post-IO-002/ours.log`) produces the **identical**
|
||||
seven-entry canary-only set audit-006 captured pre-fix:
|
||||
|
||||
```
|
||||
ExTerminateThread, KeReleaseSemaphore, KeResetEvent,
|
||||
ObCreateSymbolicLink, XamTaskCloseHandle, XamTaskSchedule,
|
||||
XamUserReadProfileSettings
|
||||
```
|
||||
|
||||
`XexCheckExecutablePrivilege` count remains **1** (priv=0xA only, priv=0xB
|
||||
unreached). `XamTaskSchedule` count remains **0**. Worker thread spawns
|
||||
fell from 19 → 18 (within noise — single thread variance per call-site
|
||||
breakdown: `lr=0x824ac5f0×15 + 0x824cd984×1 + 0x824d2e68×2`). The 16
|
||||
NtQueryVolumeInformationFile call sites in `ours.log` all originate from
|
||||
a single LR `0x82611f38` — meaning the `audit-006` premise that
|
||||
`sub_824ABA98`/`sub_824A9710` consume the volume-info reply at the
|
||||
priv-11 gate may be **incorrect**, or the gate consumes a *different*
|
||||
information class entirely.
|
||||
|
||||
**Stop-condition triggered.** Per the IO-002 task brief, this session does
|
||||
not pivot to a second fix. The fix is correct (it makes our reply
|
||||
byte-identical to canary's NullDevice and survives every test we have);
|
||||
it is just not load-bearing for the priv-11 gate. The branch landed as a
|
||||
strict no-op at our current boot horizon — kept because it's correct and
|
||||
unblocks no regression.
|
||||
|
||||
**Next-session next gate hypothesis (untested):**
|
||||
- The audit-005 disasm of `sub_824ABA98` may have mis-attributed the consumer
|
||||
of bytes_per_sector. The IO-001 trace decisively located the failure at
|
||||
the `NtReadFile` inside `sub_824A9710`, not at any volume-info site.
|
||||
Re-read the `sub_824A9710` disasm with that in mind.
|
||||
- Volume-info LR `0x82611f38` is far downstream of the priv-10/priv-11
|
||||
cluster (the calls *complete* successfully — they don't gate anything
|
||||
visible). The actual gate may be `nt_query_information_file`,
|
||||
`nt_query_full_attributes_file`, an FsCtl IOCTL, or a different
|
||||
alloc-unit query path.
|
||||
- Per AUDIT-005 instrumentation, the priv-11 site at `sub_824A9710` PC
|
||||
cluster has **never fired** in any session. Probe `sub_824A9710` entry
|
||||
with `--pc-probe` and trace which conditional exits the function before
|
||||
the priv-11 query — that's the real gate.
|
||||
|
||||
---
|
||||
|
||||
### KRNBUG-IO-002 — `nt_query_volume_information_file` block size (original spec, kept for archaeology)
|
||||
|
||||
- **Where in our code:** `crates/xenia-kernel/src/exports.rs:1241-1269` (function
|
||||
`nt_query_volume_information_file`).
|
||||
- **Classification:** `REAL_BUT_BUGGY`. Registered at exports.rs:100, called
|
||||
16× in ours.log (16× in canary.log too — call counts match), returns
|
||||
`STATUS_SUCCESS`, but the FileFsSizeInformation payload is wrong.
|
||||
- **Bug:** class=3 branch writes `(total=0x100000, free=0,
|
||||
sectors_per_unit=1, bytes_per_sector=2048)`. Product = 2048 bytes per
|
||||
cluster.
|
||||
- **Canary reference:**
|
||||
- Entry function `NtQueryVolumeInformationFile_entry` at
|
||||
`xenia-canary/src/xenia/kernel/xboxkrnl/xboxkrnl_io_info.cc:323` (case
|
||||
`XFileFsSizeInformation` at lines 355–365). Canary delegates to per-device
|
||||
methods on `file->device()`.
|
||||
- `NullDevice` (the device backing `\Device\Harddisk0\Cache0`) returns
|
||||
`sectors_per_allocation_unit() = 0x80` and `bytes_per_sector() = 0x200`
|
||||
at `xenia-canary/src/xenia/vfs/devices/null_device.h:38-46`. Product =
|
||||
65536, matching Sylpheed's expectation.
|
||||
- Other device backings (HostPath, DiscImage, DiscZArchive) all return
|
||||
`(1, 0x200)` = 512. Sylpheed's first volume query at this site is
|
||||
against Cache0 (NullDevice), so the relevant value is 65536.
|
||||
- **Fix sketch (minimum):** in the class=3 branch, change the two writes to
|
||||
`mem.write_u32(info+16, 128); mem.write_u32(info+20, 512);` (and reduce
|
||||
TotalAllocationUnits accordingly so the disk size remains plausible —
|
||||
e.g. 0x10 units × 128 sectors × 512 bytes ≈ 1 MB, matching NullDevice).
|
||||
Total diff ≤ 4 lines.
|
||||
- **Fix sketch (proper, deferred until the cluster fires reliably):**
|
||||
introduce a per-handle device-info lookup so HostPath / DiscImage paths
|
||||
return their canary-correct values too. Skipped for now because Sylpheed
|
||||
only queries Cache0 at this gate.
|
||||
- **Expected observable post-fix:**
|
||||
- `XexCheckExecutablePrivilege` count: 1 → 2.
|
||||
- `XamTaskSchedule` count: 0 → 1 (callback=0x824A93C8, message=0x828A28F0).
|
||||
- `kernel.calls{XamTaskSchedule}` finally non-zero — closes the
|
||||
APUBUG-PRODUCER-001 / XAMBUG-PRODUCER-001 producer hunt that
|
||||
falsified XAudio + XamTaskSchedule producer hypotheses.
|
||||
- Spawn of the Cache0 callback thread (XThread::Execute thid 7 in
|
||||
canary, our equivalent to come).
|
||||
- Inside that thread: `StfsCreateDevice` (still undefined extern in
|
||||
canary too — does not block) + `ObCreateSymbolicLink` +
|
||||
`ExRegisterTitleTerminateNotification`.
|
||||
- Back on main: `KeResetEvent(0x8287094C)`, `NtCreateEvent`,
|
||||
`ExCreateThread(entry=0x82181830, ctx=0x828F3D08)` — and `0x82181830`
|
||||
is the worker entry for **dispatcher 0x100c**, one of the four
|
||||
parked-handle producers (per
|
||||
`project_xenia_rs_producer_stack_trace_2026_05_03.md`). Spawning
|
||||
that worker should advance handle 0x100c's `signal_attempts`
|
||||
counter off zero.
|
||||
- Eventually (further into the boot): `XamUserGetXUID`,
|
||||
`XamUserReadProfileSettings`, `XamContentCreateEnumerator`,
|
||||
`KeReleaseSemaphore` display-pump (268+ calls in canary at this
|
||||
horizon).
|
||||
- **Risk:** low. Two-line value change. NullDevice is the only device
|
||||
Sylpheed asks about at this gate; other devices are not yet hit.
|
||||
- **Effort:** trivial.
|
||||
- **Dependencies:** none. Land directly.
|
||||
- **Verification chain:** `cargo test -p xenia-kernel`,
|
||||
then `cargo run --release -p xenia-app -- exec sylpheed.iso -n 500_000_000`
|
||||
with kernel-call tracing on, then re-run audit-006's set-difference;
|
||||
expect canary-only count to drop from 7 toward 0 as the cluster fires.
|
||||
|
||||
---
|
||||
|
||||
## Tier 4 — REAL_BUT_UNREACHED / STUB_BUT_UNREACHED — do not fix yet
|
||||
|
||||
These are downstream of Tier 0. Reachability is blocked on KRNBUG-IO-002
|
||||
landing. After IO-002 lands, re-derive this list — most entries should
|
||||
have moved off, and any survivors will be classifiable on real evidence.
|
||||
|
||||
| # | Export | Ordinal | Library | Our state | Canary impl | Canary calls (at horizon) | Cascade rank |
|
||||
|---|--------|---------|---------|-----------|-------------|---------------------------|--------------|
|
||||
| 1 | `XamTaskSchedule` | 0x01AF | xam | REAL_BUT_UNREACHED (`xam_task_schedule`, xam.rs:213) | `xam_task.cc:43-80` | 1 (gate-pivot call) | upstream-of-cluster — fires the entire post-IO-002 cascade |
|
||||
| 2 | `XamTaskCloseHandle` | 0x01B1 | xam | STUB_BUT_UNREACHED (`stub_success`, xam.rs:33) | `xam_task.cc:83-93` (one-liner: `NtClose` + last-error) | 1 | low (cleanup after #1) |
|
||||
| 3 | `KeResetEvent` | 0x8F | xboxkrnl | REAL_BUT_UNREACHED (`ke_reset_event`, exports.rs:3172) | `xboxkrnl_threading.cc:566` | 1 | medium — clears 0x8287094C right before ExCreateThread(0x82181830) on main |
|
||||
| 4 | `ObCreateSymbolicLink` | 0x0103 | xboxkrnl | STUB_BUT_UNREACHED (`stub_success`, exports.rs:121) | `xboxkrnl_ob.cc:351` | 1 | low — Cache0-symlink registration; cosmetic for Sylpheed boot |
|
||||
| 5 | `KeReleaseSemaphore` | 0x88 | xboxkrnl | REAL_BUT_UNREACHED (`ke_release_semaphore`, exports.rs:3280) | `xboxkrnl_threading.cc:724` | 268 | high (in volume) — display-init pump on the post-cluster main loop |
|
||||
| 6 | `ExTerminateThread` | 0x19 | xboxkrnl | REAL_BUT_UNREACHED (`ex_terminate_thread`, exports.rs:312) | `xboxkrnl_threading.cc:173` | 2 | low — thread cleanup on Cache0 / profile threads |
|
||||
| 7 | `XamUserReadProfileSettings` | 0x0219 | xam | REAL_BUT_UNREACHED (`xam_user_read_profile_settings`, xam.rs:327) | `xam_user.cc:329` | 2 | medium — gates the `XamUserGetXUID → profile load` flow far downstream |
|
||||
|
||||
**Why every entry above is Tier 4 (not Tier 1):**
|
||||
|
||||
- Each entry's first call in `canary.log` falls **after** line 1210
|
||||
(`XamTaskSchedule(824A93C8, ...)`), which is the gate-pivot call.
|
||||
- Our trace contains zero of any of the seven, despite running 500 M
|
||||
instructions and reaching the post-cache-recreate horizon.
|
||||
- Six of the seven are already real implementations. The two stubs
|
||||
(`XamTaskCloseHandle`, `ObCreateSymbolicLink`) are minor cleanups; even
|
||||
upgrading them would not move boot progress until #1 (`XamTaskSchedule`)
|
||||
fires.
|
||||
- Therefore: fixing any of these in isolation is wasted effort. They
|
||||
should be re-classified after KRNBUG-IO-002 lands and the priv-11 /
|
||||
Cache0 callback chain runs.
|
||||
|
||||
---
|
||||
|
||||
## Tier 1 / 2 / 3 — empty for this audit
|
||||
|
||||
No entry qualifies as Tier 1 or Tier 2 in the current state. The single
|
||||
high-cascade fix worth pulling next is the Tier-0 gate (KRNBUG-IO-002),
|
||||
which is **not itself a canary-only export** — it's a wrong-value bug in
|
||||
an export both sides call, so the diff.py based set-difference doesn't
|
||||
surface it. That is exactly why audit-006 was scoped this way: to confirm
|
||||
the gate hypothesis from `project_xenia_rs_io_nullfile_2026_05_04.md`
|
||||
before another implementation session is started.
|
||||
|
||||
---
|
||||
|
||||
## Cross-check vs IO-001 snapshot
|
||||
|
||||
IO-001 memory recorded these 7 still-canary-only exports:
|
||||
> ExTerminateThread, KeReleaseSemaphore, KeResetEvent, ObCreateSymbolicLink,
|
||||
> XamTaskCloseHandle, XamTaskSchedule, XamUserReadProfileSettings.
|
||||
|
||||
Audit-006 set-difference produces the **identical** 7, in 1:1
|
||||
correspondence. No new canary-only export has appeared since IO-001
|
||||
landed; no entry has moved off. Cascade is still parked at the same gate.
|
||||
|
||||
The `XeCryptSha`, `XeKeysConsolePrivateKeySign`, and `NtDeviceIoControlFile`
|
||||
entries that IO-001 was credited with unblocking are confirmed: ours
|
||||
calls them 1, 1, 2 times respectively (canary calls them 1, 1, 2 — exact
|
||||
match). They are correctly off the canary-only list.
|
||||
|
||||
---
|
||||
|
||||
## Methodology notes
|
||||
|
||||
1. **"Cascade rank" definition:** estimated by where the export's first
|
||||
canary call falls in the boot sequence and how many downstream code
|
||||
paths depend on it. "high" = upstream-of-cluster (XamTaskSchedule).
|
||||
"medium" = intermediate (KeResetEvent, profile cascade).
|
||||
"low" = leaf cleanup or cosmetic (XamTaskCloseHandle, ObCreateSymbolicLink).
|
||||
Rank only matters once Tier 0 is landed; until then everything is parked.
|
||||
2. **Reachability oracle:** binary `grep -c "call=NAME"` against ours.log
|
||||
at -n 500M. Zero counts are conclusive for "unreached" because tracing
|
||||
is unconditional.
|
||||
3. **Canary log freshness:** the log is from 17:34 (3 h before this
|
||||
audit) but is byte-identical to audit-005's input — canary's behavior
|
||||
is deterministic given the same ROM and the canary build header
|
||||
(`canary_experimental@9467c77f0 on May 2 2026`) hasn't changed.
|
||||
Re-running through Lutris is unnecessary.
|
||||
4. **Gate confirmation:** memory predicted block-size mismatch as the
|
||||
IO-002 blocker; this audit confirmed it by eliminating the alternative
|
||||
(no Tier-1-eligible canary-only export exists in the current 7-entry
|
||||
list). The 0xC000014F status memory predicted is not yet visible in
|
||||
ours.log because the recreate path completes the writes — the
|
||||
verifier inside `sub_824ABA98` rejects the volume-info reply at the
|
||||
game level (no kernel error logged).
|
||||
5. **What this queue is *not*:** a list of fixes to land. The audit-006
|
||||
discipline was scoping; the discipline of subsequent sessions is to
|
||||
re-run audit-006's diff after IO-002, then either close audit-006 (if
|
||||
the cluster fires through and all 7 entries drop) or open audit-007
|
||||
on whatever new canary-only set surfaces.
|
||||
|
||||
---
|
||||
|
||||
## Recommended next session
|
||||
|
||||
**KRNBUG-IO-002 (block-size fix), one-shot.** Two-line edit at
|
||||
`crates/xenia-kernel/src/exports.rs:1255-1256`. Verify the cluster fires
|
||||
by re-running audit-006's set-difference; expect 7 → 0 (or close to 0)
|
||||
canary-only entries. If new entries surface in either direction, that's
|
||||
audit-007's input.
|
||||
|
||||
**Do not** open this queue's Tier 4 entries before IO-002 closes. Their
|
||||
classification is pending; their fix sketches will look very different
|
||||
once they're observably called and their actual return values can be
|
||||
compared to canary.
|
||||
0
audit-runs/audit-009/branch-probe.trace
Normal file
0
audit-runs/audit-009/branch-probe.trace
Normal file
131
audit-runs/audit-059-handle-disambiguation/FINDINGS.md
Normal file
131
audit-runs/audit-059-handle-disambiguation/FINDINGS.md
Normal file
@@ -0,0 +1,131 @@
|
||||
# AUDIT-059 — handle disambiguation (iterate 2.BD)
|
||||
|
||||
**Date:** 2026-06-06. **Engines:** ours `target/release/xenia-rs -n 50M` (3.9 s wall, 50M instr, 40k import calls), canary Wine `xenia_canary.exe --mute=true --audit_handle_lifecycle=true` (~35 s wall, 34k log lines, 0 fatals).
|
||||
|
||||
## Verdict — HANDOFF's wedge handles are stale
|
||||
|
||||
HANDOFF said: *"opt_callback signals 0x108c, tid=1 wedges on 0x10e8."* Both IDs are now `<UNCREATED>` in ours, along with `0x1090 / 0x10dc / 0x10fc / 0x1104` (also in HANDOFF's adjacent list). The allocation order shifted since that snapshot.
|
||||
|
||||
## Real wedges, current code state
|
||||
|
||||
| Handle | Kind | Engine state | Waiter | Notes |
|
||||
|---|---|---|---|---|
|
||||
| **0x12a4** | `<UNCREATED>` | `<AUDIT_BLIND>`, waiters=1 | **tid=1 main**, pc=0x824ac578 | Wait went via `do_wait_single` but creation never hit `NtCreateEvent` — `KeInitializeEvent` path. **This is the iterate-2.BC wedge** (recorded as "0x10e8" in HANDOFF — same site, different ID). |
|
||||
| **0x12ac** | Event/Auto | `<NO_SIGNALS_DESPITE_WAITS>`, waiters=1 | **tid=13** silph UI cluster, pc=0x824ac578 lr=0x821cb1e0 | Frame trail: `0x821cb1e0 → 0x821cbae0 → 0x821cc454 → 0x821c4f18 → 0x82174a80`. Frames 3-5 carry `silph::UImpl@GamePart_Title` / `silph::VGamePart_Title` vtables — **audit-049's cluster, unchanged**. |
|
||||
| 0x12b8 | Event/Auto | NO_SIGNALS, waiters=1 | (tid TBD) | Sibling, 0xC bytes from 0x12ac. |
|
||||
| 0x1020 | Event/Manual | NO_SIGNALS, waiters=1 | — | γ-class. |
|
||||
| 0x1040 | Event/Auto | NO_SIGNALS, waits=32 (hot poll) | — | Heavy wait, no signal. |
|
||||
| 0x10a8 | Event/Auto | NO_SIGNALS, waits=7 | — | γ-class. |
|
||||
| 0x10e4 | Event/Manual | NO_SIGNALS, waiters=1, waits=2 | — | γ-class. |
|
||||
|
||||
**Working handles** (sanity baseline): 0x1028 (Sema, 8 waits / 7 signals / 7 wakes), 0x10d0 (Sema, 2 waits / 1 signal / 1 wake), 0x10f0 (Event/Auto, 1/1/1 ✓ marked `<SUSPECT>` but actually fine), 0x10e0 (Event/Manual, 32 primary signals from somewhere).
|
||||
|
||||
## GPU interrupt delivery — the iterate-2.BC delta confirmed
|
||||
|
||||
| Engine | gpu.interrupt.delivered (vsync) | EmulateCPInterruptDPC / vblank pump |
|
||||
|---|---:|---:|
|
||||
| **ours** | 54 (source=0) + 1 (source=1) | — |
|
||||
| **canary** | — | **4712** in 30 s ≈ 157 Hz |
|
||||
|
||||
**~87× ratio.** Confirms HANDOFF's diagnosis: ours' victim-thread injector dies once guest threads all park; canary's host frame-limiter thread keeps firing regardless.
|
||||
|
||||
## Canary signaler attribution
|
||||
|
||||
Top KeSetEvent guest_ptrs in canary (30 s window):
|
||||
|
||||
| guest_ptr | KeSetEvent fires | Inferred role |
|
||||
|---|---:|---|
|
||||
| `0x828A3254` | 5729 | Audio host-pump worker (per AUDIT-032: `r3=0x828A3230` region) |
|
||||
| `0x828A3244` | 5728 | Audio host-pump sibling |
|
||||
| `0x828A3244` + 16-byte stride | — | Static XEX-image audio event struct |
|
||||
| `0xBCE25234` | 1301 | **silph UI cluster PKEVENT** (heap-allocated, 0x10 stride). Likely ours' 0x12ac analog. |
|
||||
| `0xBCE25214 / 0xBCE25244 / 0xBCE25224` | 648 / 603 / 603 | Sibling silph UI PKEVENTs (0x10 stride struct). Likely ours' 0x12a4 / 0x12b8 / 0x1040 analogs. |
|
||||
|
||||
Ours signals every one of those equivalents **0 times**.
|
||||
|
||||
## Round 2 — LR-extended probes name the producer
|
||||
|
||||
Extended the canary probes with guest-LR capture (5 sites in `xboxkrnl_threading.cc`, 10 LOC). Re-ran the harness. Now each `KeSetEvent` line carries the guest function that signaled the event. Result for the silph UI cluster:
|
||||
|
||||
| PKEVENT | KeSetEvent count | Producer LR(s) |
|
||||
|---|---:|---|
|
||||
| `0xBCE25214` | 574 | `0x82508510` (single producer) |
|
||||
| `0xBCE25224` | 565 | `0x82508358` (single producer) |
|
||||
| `0xBCE25234` | 1153 | `0x82506C90` (579) + `0x82508524` (574) |
|
||||
| `0xBCE25244` | 570 | `0x82506F9C` (single producer) |
|
||||
| `0xBCE25284` | 1 | `0x82507ABC` (one-shot 5th-worker init?) |
|
||||
|
||||
All 6 producer LRs sit in `0x82506000–0x82509000`. **This is exactly the `sub_825070F0` worker thread cluster** that audit-057/058 already named:
|
||||
|
||||
> *audit-057: "sub_825070F0 (4 missing, initializes 4 workers w/ shared ctx 0xBCE25340, entries 0x82506528/58/88/B8)"*
|
||||
|
||||
The 4 worker entries (`0x82506528/58/88/B8`) are inside `sub_82506xxx` — exactly where the producer LRs `0x82506C90`/`0x82506F9C` live. The other producer LRs `0x825083xx` / `0x825085xx` are in downstream callees (workers call deeper code which itself calls KeSetEvent).
|
||||
|
||||
For comparison the audio host-pump pair gets a single sharp producer too:
|
||||
- `0x828A3254` × 5271 ← `lr=0x824D2A44`
|
||||
- `0x828A3244` × 5271 ← `lr=0x824D292C`
|
||||
|
||||
(These match AUDIT-032's PC `0x824D229C / r3=0x828A3230` region — already-understood audio host-pump.)
|
||||
|
||||
## Verdict — 2.BE is INSUFFICIENT for the silph UI wedge
|
||||
|
||||
The silph UI PKEVENTs are signaled exclusively by threads spawned by `sub_825070F0`. Per audit-057/058, **`sub_825070F0` fires 0× in ours** — those 4 worker threads never spawn. Therefore the PKEVENTs are never signaled. Therefore tid=13 (`0x12ac` in ours) wedges forever.
|
||||
|
||||
**`sub_825070F0`'s call chain is gated by the audit-009 "unreachability island"** — a CRT-driven fnptr-array bootstrap that ours fails to enumerate. VSync delivery is irrelevant to that bootstrap; the host frame-limiter thread does not drive CRT initializers.
|
||||
|
||||
Therefore:
|
||||
- **2.BE alone CANNOT unwedge tid=13.** It will close the 54-vs-4712 VSync delivery gap and may unblock things downstream of vsync, but the silph UI wedge has an independent missing-signaler root cause.
|
||||
- **2.BE may still unwedge tid=1 main on `0x12a4`** — that wait went via `KeInitializeEvent` (handle never hit `NtCreateEvent` in ours, hence `<AUDIT_BLIND>`). Whether `0x12a4`'s signaler depends on VSync is unknown without further probing.
|
||||
|
||||
## Implications for next moves
|
||||
|
||||
A single fix won't take us to draws > 0. We need at least two:
|
||||
|
||||
1. **2.BE (VSync delivery)** — still worth landing for the architectural correctness it brings, AND because it's the only fix that can unwedge tid=1 main's `0x12a4` if that's vsync-derived. ~60–80 LOC per Agent C's plan.
|
||||
2. **2.BF (sub_825070F0 activation)** — this is the audit-058 unfinished business. Options:
|
||||
- (a) **Static work:** trace canary's CRT-driven fnptr-array path that activates the silph UI bootstrap; backport the missing init into ours. High info, slow. Requires more probing.
|
||||
- (b) **Direct synthetic spawn:** ours injects host-side `ExCreateThread` calls for the 4 worker entries at boot completion, mirroring AUDIT-048's audio-host-pump precedent. Pragmatic; ~40 LOC; risks getting context (`0xBCE25340`) wrong.
|
||||
|
||||
A possible third move:
|
||||
|
||||
3. **Re-probe with LR on Wait paths** (we already added it but didn't grep for it) — to tell us whether tid=1's wait on `0x12a4` is the same LR as `sub_825070F0`-chain or a totally different signaler. If different, it's a 3rd missing producer.
|
||||
|
||||
## Round 4 — wait-side guest LR via one-frame back-chain walk
|
||||
|
||||
After fixing the PPC stack-walk offset (Xbox 360 stores saved LR at `[prev_sp - 8]`, not the `+4` AIX convention), wait-side LR comes through cleanly.
|
||||
|
||||
**Canary's top wait sites:**
|
||||
|
||||
| canary handle | wait count | guest_lr | LR region | mapping |
|
||||
|---|---:|---|---|---|
|
||||
| `F800005C` | 1635 | `0x8216EE14` | kernel early-boot infra | unrelated |
|
||||
| `F800000C` | 1597 | `0x824AFFC4` | xboxkrnl wrapper (scheduler / work-queue?) | unrelated |
|
||||
| **`F80000DC`** | **476** | **`0x821C7D3C`** | **silph::UImpl/GamePart** | **= ours' 0x12ac silph UI wedge** |
|
||||
| `F80000B0` | 6 across | `0x821CBAE0` + `0x821CC19C` + `0x822DFE2x/D0` | **exact match with audit-049's frame trail** | sibling silph UI wait |
|
||||
|
||||
Identity proof: ours' audit-049 frame trail for the silph UI wedge was `0x821cb1e0 / 0x821cbae0 / 0x821cc454 / 0x821c4f18 / 0x82174a80`. Round 4 captures `0x821CBAE0` and `0x821CC19C` (adjacent PCs) as wait LRs in canary — same cluster, same code.
|
||||
|
||||
**Refined verdict.** ours' `0x12a4` (tid=1 main, AUDIT_BLIND) and `0x12ac` (tid=13 silph UI) are 8 bytes apart — likely sibling KEVENT fields in the same silph UI struct. canary's analogs are in the `F80000xx` namespace, similarly clustered. The single fix that addresses both:
|
||||
|
||||
> **2.BF (b)** — synthetic host-side spawn of `sub_825070F0`'s 4 workers at the audit-058-identified context (`0xBCE25340`), entries `0x82506528/58/88/B8`. Once those workers run, they signal the silph UI PKEVENT cluster, unwedging BOTH tid=1 main and tid=13 silph UI in one shot.
|
||||
|
||||
2.BE (host-driven VSync ISR delivery) becomes follow-on work after the UI bootstrap completes and frame pacing actually matters.
|
||||
|
||||
## Open questions for iterate 2.BD′ / 2.BE planning
|
||||
|
||||
1. **Does 2.BE alone unwedge tid=13?** Cheapest verification path: land 2.BE and re-run audit-059, see whether `0x12ac` signal count goes 0 → non-zero.
|
||||
2. **What is the LR-pattern of canary's `KeSetEvent guest_ptr=0xBCE25234` callers?** The current probe doesn't capture LR — extending the cvar to do so on a filtered subset would let us name the producer function in canary's namespace.
|
||||
3. **Does the GPU frame-limiter's CP interrupt actually walk into the silph UI cluster?** I.e., does `EmulateCPInterruptDPC` → `interrupt_callback` → guest code ever hit `sub_821CB030` or its callees? An LR probe inside `EmulateCPInterruptDPC` would answer this.
|
||||
|
||||
## Artifacts
|
||||
|
||||
- `canary.log` 2.2 MB / 34,095 lines / 32,977 AUDIT-HLC lines
|
||||
- `canary.stdout` 2.2 MB (duplicate of canary.log due to log_file fallback)
|
||||
- `canary.stderr` 8.4 KB (Wine diagnostics)
|
||||
- `ours.log` 479 lines (focus ledger + thread diagnostics + final state)
|
||||
- `ours.stderr` 317 lines (kernel-call counters)
|
||||
- `vkd3d-proton.cache.write` 15 KB (build artifact, ignored)
|
||||
|
||||
Commits in play (xenia-canary, fork-local only):
|
||||
- `03362b59f` cross-build-wine (cross-compile toolchain)
|
||||
- `d031d7c51` audit-handle-lifecycle-probes (this audit's probes)
|
||||
116
audit-runs/audit-059-handle-disambiguation/ROUND_34_PLAN.md
Normal file
116
audit-runs/audit-059-handle-disambiguation/ROUND_34_PLAN.md
Normal file
@@ -0,0 +1,116 @@
|
||||
# Round 34 — silph_ui_synth.rs (cluster B sibling) — DEFERRED PLAN
|
||||
|
||||
## Background
|
||||
|
||||
Rounds 23-33 drove γ-cluster #2 down to the actual gate: **`sub_821741C8`** (silph worker-dispatch loop) fires 0× in ours / 471× in canary (tid=6). It's invoked via dynamic vtable slot 9 from `sub_821752C0` thunk. The vtable writer is in the audit-050 unreachability island — there's no static caller chain to hook into.
|
||||
|
||||
The fix shape is a synth module analogous to `silph_synth.rs` (rounds 18-21):
|
||||
- Synthesize a singleton-like object with the right vtable
|
||||
- Spawn a guest thread at the right entry with this object as r3
|
||||
- Let the dispatch chain do the rest
|
||||
|
||||
Rounds 18-21 took 4 rounds to land cluster A's analog and ended at "workers run live but idle" because of missing foreign-pointer fields. Cluster B will face similar challenges.
|
||||
|
||||
## Sub-round breakdown (estimated 5-8 rounds)
|
||||
|
||||
### 34.α — Probe canary's dispatcher singleton (1 round)
|
||||
Capture canary's runtime state at `sub_821741C8` entry:
|
||||
- `r3 = 0xBCA44C00` (canary tid=6's dispatcher singleton)
|
||||
- Dump `r3..r3+0x80` to identify all fields
|
||||
- Note vtable address at `[r3+0]`
|
||||
|
||||
```bash
|
||||
WINEDEBUG=-all wine xenia_canary.exe --mute=true --audit_handle_lifecycle=true \
|
||||
--audit_jit_prolog_pc=0x821741C8 --audit_jit_prolog_r3_bytes=128 \
|
||||
--audit_jit_prolog_mem_dump=<vtable_va_from_r3+0> \
|
||||
...
|
||||
```
|
||||
|
||||
### 34.β — Probe full vtable layout (1 round)
|
||||
Read the vtable bytes statically from the PE (canary's `[r3+0]` IS a static XEX VA — same trick as round 21):
|
||||
- Read 32-64 slots from PE at file offset = vtable VA - 0x82000000
|
||||
- Confirm slot 9 = `sub_821C7CB8` and `vtable+0x24` thunk to `sub_821741C8`
|
||||
- Look at all other slots — do any reference deep guest code that needs more init?
|
||||
|
||||
Cross-reference each slot's DB reach. If a slot is the dispatcher's own method body, it'll be called from within the chain — needs to exist.
|
||||
|
||||
### 34.γ — Skeleton synth + thread spawn (1 round)
|
||||
Create `crates/xenia-kernel/src/silph_ui_synth.rs` mirroring `silph_synth.rs` structure:
|
||||
```rust
|
||||
pub fn spawn_silph_ui_dispatcher(state: &mut KernelState, mem: &GuestMemory, scheduler: &mut Scheduler) -> Result<u32, &'static str> {
|
||||
if state.silph_ui_synth_done { return Ok(state.silph_ui_synth_ctx); }
|
||||
|
||||
// Allocate ~0x100-0x200 bytes for the dispatcher singleton
|
||||
let ctx = state.heap_alloc(0x200, 16)?;
|
||||
mem.write_zeros(ctx, 0x200);
|
||||
|
||||
// Install static-XEX vtable at [+0]
|
||||
mem.write_u32(ctx + 0x00, VTABLE_VA); // discovered in 34.β
|
||||
|
||||
// Other init fields from 34.α dump
|
||||
// ...
|
||||
|
||||
// Spawn dispatcher thread at sub_821748F0 with r3=ctx
|
||||
scheduler.spawn(SpawnParams{
|
||||
entry: 0x821748F0,
|
||||
start_context: ctx,
|
||||
create_suspended: false,
|
||||
...
|
||||
})?;
|
||||
|
||||
state.silph_ui_synth_done = true;
|
||||
state.silph_ui_synth_ctx = ctx;
|
||||
Ok(ctx)
|
||||
}
|
||||
```
|
||||
|
||||
Hook point: first reach of `sub_821CB030` in the existing silph factory chain (the call site that should normally trigger this dispatcher's creation in canary).
|
||||
|
||||
Add 3-mode env gate: `XENIA_SILPH_UI_SYNTH={unset|=suspend|=1}`.
|
||||
|
||||
### 34.δ — Run + diagnose first crash (1 round)
|
||||
Almost certainly crashes on a NULL deref of one of the singleton's fields. Use round 19's pattern:
|
||||
- Probe at thread entry + early BB heads
|
||||
- Identify the offset that's accessed
|
||||
- Compare to canary's value at that offset
|
||||
|
||||
### 34.ε..η — Iterate on field fills (2-4 rounds)
|
||||
Each crash identifies one more required field. Fill it. Re-run. Continue until workers idle (verdict D analog).
|
||||
|
||||
### 34.θ — Producer-side seeding (1 round)
|
||||
Even with the dispatcher running, work-items may not flow. Per round 32 it's pool 3 that's starved (271 fires in canary). The producers are `sub_821CBEA8 / sub_821D24A0 / sub_821CD458` — they may need their own bootstrap. Probe what triggers them in canary.
|
||||
|
||||
## Verification at each stage
|
||||
|
||||
After every commit:
|
||||
- `cargo test --release --workspace` — 765/765 must pass
|
||||
- `XENIA_CACHE_PERSIST=1 XENIA_SILPH_UI_SYNTH=1 ./target/release/xenia-rs exec <ISO> -n 50000000 --trace-handles-focus=0x1218,0x1224,0x12a4,0x12ac`
|
||||
- Check:
|
||||
- No crash
|
||||
- `sub_821741C8` fires
|
||||
- `sub_82450b68` r4=3 fires increase
|
||||
- Handle 0x1224 / 0x1218 transition out of NO_SIGNALS_DESPITE_WAITS
|
||||
- Eventually: `VdSwap > 1, draws > 0`
|
||||
|
||||
## Risk register
|
||||
|
||||
- **High**: dispatcher singleton may require many more fields than the analog WorkerCtx (rounds 18-21 needed 8 KEVENTs + ring + descriptors + index table; UI dispatcher likely has similar scope)
|
||||
- **High**: foreign-arena pointers in canary's heap (similar to round 19's `[+0x28/+0x2C/+0x30]`) may need their own synthesis
|
||||
- **Medium**: cluster B's worker may itself spawn threads which need contexts which need... cascading scope
|
||||
- **Low**: workspace tests breaking (probe infrastructure is solid)
|
||||
- **Low**: existing iterate-2BE work regressing (it's on a separate branch)
|
||||
|
||||
## Off-ramps
|
||||
|
||||
If we hit a wall at any sub-round, the off-ramps are:
|
||||
1. Land the infrastructure as opt-in (rounds 18-21 pattern) and ship cluster A + cluster B both as opt-in env vars
|
||||
2. Drop cluster B entirely and PR the iterate-2BE work to master (production-ready architectural fix)
|
||||
3. Pivot to lockstep diff of inflate function (round 30 hypothesis (i)) if cluster B keeps producing crash-fix layers
|
||||
|
||||
## Branch plan
|
||||
|
||||
New branch: `iterate-2BF/silph-ui-synth` off `iterate-2BF/synthetic-silph-spawn` HEAD `40f208e`. Each sub-round = 1 commit. All commits opt-in via env var; default behavior unchanged.
|
||||
|
||||
## When ready to execute
|
||||
|
||||
Dispatch with the prompt at the round-33 agent's recommendation, starting at sub-round 34.α.
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,66 @@
|
||||
AUDIT-PC-PROBE pc=0x8216ea68 tid=1 hw=0 cycle=5362918 lr=0x824ab8e0 r3=0x00000000 r11=0x00000000 [r3+0]=0x00000000 [[r3+0]+24]=0x00000000 [r3+0x0C]=0x00000000 [r3+0x30]=0x00000000
|
||||
AUDIT-PC-PROBE pc=0x822f1aa8 tid=1 hw=0 cycle=6181256 lr=0x8216ee14 r3=0x40d09a40 r11=0x40111910 [r3+0]=0x00000021 [[r3+0]+24]=0x00000000 [r3+0x0C]=0x40541a40 [r3+0x30]=0x00000000
|
||||
AUDIT-PC-PROBE pc=0x822f1b38 tid=1 hw=0 cycle=6181641 lr=0x822f1b38 r3=0x00000001 r11=0x824b0000 [r3+0]=0x00000000 [[r3+0]+24]=0x00000000 [r3+0x0C]=0x00000000 [r3+0x30]=0x00000000
|
||||
AUDIT-PC-PROBE pc=0x821746b0 tid=1 hw=0 cycle=9229300 lr=0x82173c38 r3=0x40ba9a80 r11=0x00000000 [r3+0]=0x40111910 [[r3+0]+24]=0x00000000 [r3+0x0C]=0x00000000 [r3+0x30]=0x00000000
|
||||
AUDIT-PC-PROBE pc=0x821748f0 tid=13 hw=1 cycle=0 lr=0xbcbcbcbc r3=0x4024a840 r11=0x00000000 [r3+0]=0x40ba9a80 [[r3+0]+24]=0x00000000 [r3+0x0C]=0x00000000 [r3+0x30]=0x4250dec0
|
||||
|
||||
=== Final State ===
|
||||
PC: 0x00000000
|
||||
LR: 0xbcbcbcbc
|
||||
CTR: 0x00000000
|
||||
CR: 0x00000000
|
||||
XER: CA=0 OV=0 SO=0
|
||||
|
||||
=== Thread diagnostics ===
|
||||
hw=0 idx=0 tid=1 state=Blocked(WaitAny { handles: [4208], deadline: None }) pc=0x824ac578 lr=0x824ac578 sp=0x700ff6e0
|
||||
r0=0x82153bf0 r3=0x00000000 r4=0x00000001 r5=0x00000000 r6=0x00000000 r7=0x03a72328
|
||||
r8=0x43b77284 r9=0x43b77328 r10=0x00000001 r11=0x00000103 r12=0x82173c64 r13=0x7fff0000
|
||||
hw=0 idx=1 tid=11 state=Blocked(WaitAny { handles: [2190094916, 2190094880], deadline: None }) pc=0x824d2a94 lr=0x824d2a94 sp=0x71497d90
|
||||
r0=0x00000000 r3=0x00000000 r4=0x71497de0 r5=0x00000001 r6=0x00000003 r7=0x00000001
|
||||
r8=0x00000000 r9=0x00000000 r10=0x71497df0 r11=0x828a3244 r12=0xbcbcbcbc r13=0x4b9f1000
|
||||
hw=1 idx=0 tid=2 state=Blocked(WaitAny { handles: [2189887804], deadline: None }) pc=0x824a95f8 lr=0x824a95f8 sp=0x710ffd20
|
||||
r0=0x0000030c r3=0x00000000 r4=0x00000003 r5=0x00000001 r6=0x00000000 r7=0x00000000
|
||||
r8=0x00000001 r9=0x6f000000 r10=0x824a9178 r11=0x82870000 r12=0x824a94f0 r13=0x4acc3000
|
||||
hw=1 idx=1 tid=13 state=Blocked(WaitAny { handles: [4216], deadline: None }) pc=0x824ac578 lr=0x824ac578 sp=0x715a7a20
|
||||
r0=0x821511d0 r3=0x00000000 r4=0x00000001 r5=0x00000000 r6=0x00000000 r7=0x03a723d0
|
||||
r8=0x43b77334 r9=0x43b77334 r10=0x40541f80 r11=0x00000001 r12=0x821cb1e0 r13=0x4d1d4000
|
||||
hw=2 idx=0 tid=7 state=Blocked(WaitAny { handles: [1111821148], deadline: Some(42946672) }) pc=0x824cd4f4 lr=0x824cd4f4 sp=0x71187e60
|
||||
r0=0x00000000 r3=0x00000000 r4=0x00000003 r5=0x00000001 r6=0x00000000 r7=0x71187eb0
|
||||
r8=0x00000000 r9=0x00000000 r10=0x00000002 r11=0x00000002 r12=0xbcbcbcbc r13=0x4b1d6000
|
||||
hw=2 idx=1 tid=8 state=Blocked(WaitAny { handles: [4176, 4128], deadline: None }) pc=0x824ab214 lr=0x824ab214 sp=0x71287c90
|
||||
r0=0x00000000 r3=0x00000000 r4=0x71287cf0 r5=0x00000001 r6=0x00000001 r7=0x00000000
|
||||
r8=0x00000000 r9=0x00009030 r10=0x00000002 r11=0x00000020 r12=0x822f1ff0 r13=0x4b90a000
|
||||
hw=3 idx=0 tid=4 state=Blocked(WaitAny { handles: [4120], deadline: None }) pc=0x824ac578 lr=0x824ac578 sp=0x7112fb80
|
||||
r0=0x821511a0 r3=0x00000000 r4=0x00000001 r5=0x00000000 r6=0x00000000 r7=0x03a723d0
|
||||
r8=0x43b7732c r9=0x828f0000 r10=0x00000008 r11=0x00000000 r12=0x8245a660 r13=0x4adc6000
|
||||
hw=3 idx=1 tid=5 state=Blocked(WaitAny { handles: [4224], deadline: None }) pc=0x824ac578 lr=0x824ac578 sp=0x7116fbe0
|
||||
r0=0x821511a0 r3=0x00000000 r4=0x00000001 r5=0x00000000 r6=0x00000000 r7=0x03a723d0
|
||||
r8=0x43b7732c r9=0x828f0000 r10=0x00000001 r11=0x00000000 r12=0x82458b34 r13=0x4adc8000
|
||||
hw=4 idx=0 tid=9 state=Ready pc=0x824d140c lr=0x824d22b4 sp=0x71387df0
|
||||
r0=0x00000000 r3=0x4250dedc r4=0x4250e040 r5=0x00000001 r6=0x00000000 r7=0x00000000
|
||||
r8=0x4b9ec000 r9=0x01010000 r10=0x01010000 r11=0x00000000 r12=0x824d22a8 r13=0x4b9ec000
|
||||
hw=5 idx=0 tid=3 state=Blocked(WaitAny { handles: [4112], deadline: None }) pc=0x824ac578 lr=0x824ac578 sp=0x7111fdf0
|
||||
r0=0x82153bf0 r3=0x00000000 r4=0x00000001 r5=0x00000000 r6=0x00000000 r7=0x00000a10
|
||||
r8=0x00000010 r9=0x00000000 r10=0x00009030 r11=0x00000000 r12=0x82181988 r13=0x4adc4000
|
||||
hw=5 idx=1 tid=6 state=Ready pc=0x824ab214 lr=0x824ab214 sp=0x7117fc60
|
||||
r0=0x821511a0 r3=0x00000001 r4=0x7117fcc0 r5=0x00000001 r6=0x00000001 r7=0x00000000
|
||||
r8=0x7117fcb0 r9=0x00009030 r10=0x00000002 r11=0x00000020 r12=0x82458d68 r13=0x4adca000
|
||||
hw=5 idx=2 tid=10 state=Ready pc=0x824d1404 lr=0x824d22b4 sp=0x71487e00
|
||||
r0=0x00000000 r3=0x4250dedc r4=0x4250e040 r5=0x00000001 r6=0x00000000 r7=0x00000000
|
||||
r8=0x4b9ee000 r9=0x01010000 r10=0x01010000 r11=0x00000000 r12=0x824d22a8 r13=0x4b9ee000
|
||||
hw=5 idx=3 tid=12 state=Ready pc=0x824aa6a4 lr=0x824aa6a4 sp=0x714a7da0
|
||||
r0=0x00000000 r3=0x000000ff r4=0x00000020 r5=0x714a7df4 r6=0x00000000 r7=0x00000000
|
||||
r8=0x00000000 r9=0x00000000 r10=0x00000000 r11=0x00000001 r12=0x8217898c r13=0x4d1d2000
|
||||
|
||||
-- Handle waiter lists --
|
||||
handle=0x00001020 Semaphore(0/2147483647) waiters(tid)=[8]
|
||||
handle=0x42450b5c Event(sig=false, mr=true) waiters(tid)=[7]
|
||||
handle=0x828a3244 Event(sig=false, mr=false) waiters(tid)=[11]
|
||||
handle=0x00001018 Semaphore(0/2147483647) waiters(tid)=[4]
|
||||
handle=0x8287093c Event(sig=false, mr=false) waiters(tid)=[2]
|
||||
handle=0x00001070 Thread(id=13, exit=None) waiters(tid)=[1]
|
||||
handle=0x00001080 Event(sig=false, mr=false) waiters(tid)=[5]
|
||||
handle=0x00001078 Event(sig=false, mr=false) waiters(tid)=[13]
|
||||
handle=0x828a3220 Event(sig=false, mr=true) waiters(tid)=[11]
|
||||
handle=0x00001050 Event(sig=false, mr=true) waiters(tid)=[8]
|
||||
handle=0x00001010 Event(sig=false, mr=true) waiters(tid)=[3]
|
||||
@@ -0,0 +1,167 @@
|
||||
# Round-A1..A4 findings — canary tid=6 spawn chain & divergence frontier
|
||||
|
||||
## Anchor reframe (round-37 misread corrected)
|
||||
|
||||
The "factory/registry layer divergence at [0x828E1F08]" framing is falsified.
|
||||
Both engines install the SAME static-XEX `.rdata` vtable `0x820A183C` at the
|
||||
singleton's `[+0]`. The instance VAs differ only because of ε-class allocator
|
||||
divergence (audit-043).
|
||||
|
||||
| Probe | Canary | Ours |
|
||||
|----------------------------|----------------------|----------------------|
|
||||
| `[0x828E1F08]` | 0xBC22C910 (heap) | 0x40111910 (heap) |
|
||||
| `[[0x828E1F08]+0]` vtable | 0x820A183C | 0x820A183C (SAME) |
|
||||
| `vtable[+0]` thunk | 0x82175330 | 0x82175330 (SAME) |
|
||||
| `vtable[+8]` thunk | 0x82175340 → b sub_821741C8 | SAME (vtable bytes from XEX `.rdata`) |
|
||||
|
||||
The thunks at 0x82175330+ are 8-byte `lwz r3, 8(r3); b <real_method>`
|
||||
trampolines. Slot 2 (`+0x08`) is the worker dispatch entry that round 33
|
||||
identified as 471× in canary tid=6 / 0× in ours.
|
||||
|
||||
## A.1 — Canary dispatcher loop is in sub_822F1AA8 on tid=6
|
||||
|
||||
Probe `--audit_jit_prolog_pc=0x821741C8 --audit_jit_prolog_r3_bytes=256` on
|
||||
canary (35 s):
|
||||
|
||||
- ~1678 fires of sub_821741C8 on **tid=6**
|
||||
- r3 at entry = `0xBCCC4A80` (the inner sub-object of the silph::UImpl
|
||||
singleton — extracted via the thunk's `lwz r3, 8(r3)`)
|
||||
- LR at entry = `0x822F1D5C` (return PC after the `bctrl` at 0x822F1D58 inside
|
||||
sub_822F1AA8)
|
||||
- Singleton's `[+C0..+D0]` UTF-16 spells "HF Frequency" (a UI label)
|
||||
|
||||
The dispatch site in canary (the `bctrl`) is at PC 0x822F1D58 inside
|
||||
sub_822F1AA8:
|
||||
```
|
||||
0x822F1D40: lwz r3, 7944(r25) ; r3 = [r25+0x1F08] = [0x828E1F08]
|
||||
0x822F1D4C: lwz r11, 0(r3) ; vtable
|
||||
0x822F1D50: lwz r11, 8(r11) ; vtable[+8] = thunk 0x82175340
|
||||
0x822F1D54: mtctr r11
|
||||
0x822F1D58: bctrl ; → 0x82175340 → b 0x821741C8
|
||||
```
|
||||
|
||||
## A.2 — Canary tid=6 spawn site is sub_821746B0 at PC 0x82174824
|
||||
|
||||
Enumeration of `ExCreateThread` calls in canary (35 s, 21 unique tuples):
|
||||
|
||||
```
|
||||
entry=821748F0 start_ctx=BC365700 lr=824AC5F0 guest_lr=82174828 ← silph dispatcher #1
|
||||
entry=821748F0 start_ctx=BC366DA0 lr=824AC5F0 guest_lr=82174828 ← silph dispatcher #2
|
||||
```
|
||||
|
||||
PC `0x82174824` is the `bl 0x82172370` (the `ExCreateThread` thunk) inside
|
||||
`sub_821746B0`. The setup is:
|
||||
```
|
||||
0x8217480C: lis r11, 0x8217
|
||||
0x82174810: li r7, 0
|
||||
0x82174814: li r6, 4 ; priority
|
||||
0x82174818: mr r5, r29 ; start_ctx
|
||||
0x8217481C: addi r4, r11, 18672 ; r4 = 0x821748F0 (entry)
|
||||
0x82174820: li r3, 0
|
||||
0x82174824: bl 0x82172370 ; ExCreateThread
|
||||
```
|
||||
|
||||
The entry `0x821748F0` is a thread main that calls `bl 0x821749C0` (the
|
||||
inner dispatch).
|
||||
|
||||
## A.3 — sub_822F1AA8 spawns a SECOND thread at 0x822F1B08
|
||||
|
||||
The dispatch-loop function `sub_822F1AA8` itself ALSO spawns a thread at
|
||||
PC 0x822F1B08 with entry=`sub_822F1EE0` and `start_ctx=BCE24A40`:
|
||||
```
|
||||
0x822F1AEC: lis r11, 0x822F
|
||||
0x822F1AFC: addi r4, r11, 7904 ; r4 = 0x822F1EE0
|
||||
0x822F1B08: bl 0x82172370 ; ExCreateThread
|
||||
```
|
||||
|
||||
sub_822F1EE0 → sub_822F1F20 contains its own atomic state-machine + wait loop.
|
||||
|
||||
## A.3' — sub_822F1AA8 has exactly 2 callers, both in sub_8216EA68
|
||||
|
||||
```
|
||||
source=0x8216ECCC source_func=0x8216EA68 kind=call
|
||||
source=0x8216EE10 source_func=0x8216EA68 kind=call
|
||||
```
|
||||
|
||||
So sub_8216EA68 is the only function that drives sub_822F1AA8.
|
||||
|
||||
## A.4 — Ours' divergence is INSIDE the spawned thread, NOT at the spawn
|
||||
|
||||
Mirror-probed ours at `sub_821746B0` body BB heads (parallel mode, 50M
|
||||
instructions, XENIA_CACHE_PERSIST=1):
|
||||
|
||||
| PC | Fires | Notes |
|
||||
|-------------|-------|------------------------------------------------|
|
||||
| 0x821746B0 | 1 | Entry. r3=0x40ba9a80 |
|
||||
| 0x821746E0 | 1 | After `bl 0x8284DCFC` (critical-section) |
|
||||
| 0x82174798 | 1 | After the early `beq` (r28==0 branch) |
|
||||
| 0x821747B8 | 1 | **Past the gate**: `[0x828E2B14]=0x40105000` non-NULL; `bl 0x82150EF8` returned r3=0x4024a840 (NON-NULL) |
|
||||
| 0x821747D8 | 1 | After the inner `bl 0x821723F0` |
|
||||
| 0x8217480C | 1 | Enters the spawn block |
|
||||
| 0x82174828 | 1 | **Post-`bl ExCreateThread`**, r3=0x1070 = thread handle |
|
||||
|
||||
**OURS DOES SPAWN THE THREAD VIA THIS SITE.** The returned handle 0x1070 is
|
||||
**tid=13's thread handle** (per round 37 final state). So **ours' tid=13 IS
|
||||
the same logical thread as canary's tid=6** — spawned by the identical call
|
||||
site with the same entry (0x821748F0).
|
||||
|
||||
## A.4 — Divergence is INSIDE the spawned thread's body
|
||||
|
||||
Round 37's frame trail for ours' tid=13 wedge:
|
||||
`0x821CB1E0 → 0x821CBAE0 → 0x821CC454 → 0x821C4F18 → 0x82174A80`
|
||||
|
||||
The LAST frame `0x82174A80` is **inside sub_821749C0** (= the inner dispatch
|
||||
called from sub_821748F0). It's right after the vtable dispatch at
|
||||
0x82174A78 (`bctrl` on `[r30+vtable][+16]`):
|
||||
|
||||
```
|
||||
0x82174a64: mr r3, r30 ; r3 = some object
|
||||
0x82174a68: lwz r11, 0(r30)
|
||||
0x82174a6c: lwz r4, 4(r29)
|
||||
0x82174a70: lwz r5, 8(r31)
|
||||
0x82174a74: lwz r11, 16(r11) ; r11 = vtable[+0x10]
|
||||
0x82174a78: mtctr r11
|
||||
0x82174a7c: bctrl ; dispatch
|
||||
0x82174a80: lwz r3, 0(r29) ; ← wedge frame top (LR after bctrl)
|
||||
```
|
||||
|
||||
So `sub_821749C0`'s vtable[+0x10] dispatch on tid=13/tid=6's `r30` object
|
||||
lands at audit-049 territory in ours (chain through sub_821CB030+0x128 that
|
||||
ends waiting forever on handle 0x1078). In canary, the same dispatch on the
|
||||
same object SHOULD land somewhere that ultimately reaches sub_822F1AA8's
|
||||
dispatch loop and runs sub_821741C8 1678× via vtable[+8].
|
||||
|
||||
**The object `r30` is the result of `bl 0x821CF3F0`** at PC 0x821749DC. So
|
||||
sub_821CF3F0 returns a registry-lookup object; the vtable on this object's
|
||||
slot +0x10 method's body determines whether the thread wedges or runs.
|
||||
|
||||
## Phase B classification
|
||||
|
||||
Class 3 — **Missing init-time precondition**. Ours reaches the spawn site,
|
||||
ours' tid=13 enters the chain, ours' tid=13 enters sub_821749C0, but the
|
||||
vtable[+0x10] dispatch at PC 0x82174A78 in ours lands in audit-049 territory
|
||||
(wait forever on 0x1078) rather than continuing through the canonical chain
|
||||
toward sub_822F1AA8's outer dispatch loop.
|
||||
|
||||
Possible classes to refine in next round:
|
||||
- **3a**: same vtable but state-dependent — `r30`'s field at a specific offset
|
||||
differs in ours vs canary, causing the method body to take a different
|
||||
branch.
|
||||
- **3b**: the vtable in `r30` is DIFFERENT in ours vs canary (e.g., ours has
|
||||
a base-class vtable but canary has a derived-class vtable).
|
||||
- **4**: synthesis fallback — spawn a SECOND thread that runs sub_822F1AA8's
|
||||
dispatch loop directly, bypassing the wedged sub_821749C0 chain.
|
||||
|
||||
## Next probe (A.4.5)
|
||||
|
||||
Probe both engines at sub_821749C0 entry filtering tid=13 (ours) / tid=6
|
||||
(canary), capturing:
|
||||
- `r3` and `r4` at entry (the factory-output object and the ctx)
|
||||
- After the `bl 0x821CF3F0` at 0x821749DC: capture r30 (= sub_821CF3F0
|
||||
return — the object whose vtable is dispatched at 0x82174A78)
|
||||
- At PC 0x82174A78 (the divergent bctrl): r30 + r30+0 (vtable) + vtable[+0x10]
|
||||
(the dispatch target)
|
||||
|
||||
If ours and canary have IDENTICAL `vtable[+0x10]` targets but the method
|
||||
body's behavior differs → class 3a (state divergence). If targets differ →
|
||||
class 3b (vtable identity divergence).
|
||||
@@ -0,0 +1,91 @@
|
||||
AUDIT-PC-PROBE pc=0x821746b0 tid=1 hw=0 cycle=9228833 lr=0x82173c38 r3=0x40ba9a80 r11=0x00000000 [r3+0]=0x40111910 [[r3+0]+24]=0x00000000 [r3+0x0C]=0x00000000 [r3+0x30]=0x00000000
|
||||
AUDIT-MEM-READ addr=0x828e2b14 val=0x40105000 vtable=0x40105004 vtable[0]=0x40105008 vtable[24]=0x40105020 pc=0x821746b0 tid=1 cycle=9228833
|
||||
AUDIT-PC-PROBE pc=0x821746e0 tid=1 hw=0 cycle=9228856 lr=0x821746e0 r3=0x00000000 r11=0x00000000 [r3+0]=0x00000000 [[r3+0]+24]=0x00000000 [r3+0x0C]=0x00000000 [r3+0x30]=0x00000000
|
||||
AUDIT-MEM-READ addr=0x828e2b14 val=0x40105000 vtable=0x40105004 vtable[0]=0x40105008 vtable[24]=0x40105020 pc=0x821746e0 tid=1 cycle=9228856
|
||||
AUDIT-PC-PROBE pc=0x82174798 tid=1 hw=0 cycle=9228859 lr=0x821746e0 r3=0x00000000 r11=0x00000000 [r3+0]=0x00000000 [[r3+0]+24]=0x00000000 [r3+0x0C]=0x00000000 [r3+0x30]=0x00000000
|
||||
AUDIT-MEM-READ addr=0x828e2b14 val=0x40105000 vtable=0x40105004 vtable[0]=0x40105008 vtable[24]=0x40105020 pc=0x82174798 tid=1 cycle=9228859
|
||||
AUDIT-PC-PROBE pc=0x821747b8 tid=1 hw=0 cycle=9229012 lr=0x821747ac r3=0x4024a840 r11=0x4024a840 [r3+0]=0x4024ace0 [[r3+0]+24]=0x43777290 [r3+0x0C]=0x4024a820 [r3+0x30]=0x4250dec0
|
||||
AUDIT-MEM-READ addr=0x828e2b14 val=0x40105000 vtable=0x40105004 vtable[0]=0x40105008 vtable[24]=0x40105020 pc=0x821747b8 tid=1 cycle=9229012
|
||||
AUDIT-PC-PROBE pc=0x821747d8 tid=1 hw=0 cycle=9229440 lr=0x821747cc r3=0x4024a840 r11=0xffffffff [r3+0]=0x40ba9a80 [[r3+0]+24]=0x00000000 [r3+0x0C]=0x00000000 [r3+0x30]=0x4250dec0
|
||||
AUDIT-MEM-READ addr=0x828e2b14 val=0x40105000 vtable=0x40105004 vtable[0]=0x40105008 vtable[24]=0x40105020 pc=0x821747d8 tid=1 cycle=9229440
|
||||
AUDIT-PC-PROBE pc=0x8217480c tid=1 hw=0 cycle=9229443 lr=0x821747cc r3=0x4024a840 r11=0xffffffff [r3+0]=0x40ba9a80 [[r3+0]+24]=0x00000000 [r3+0x0C]=0x00000000 [r3+0x30]=0x4250dec0
|
||||
AUDIT-MEM-READ addr=0x828e2b14 val=0x40105000 vtable=0x40105004 vtable[0]=0x40105008 vtable[24]=0x40105020 pc=0x8217480c tid=1 cycle=9229443
|
||||
AUDIT-PC-PROBE pc=0x82174828 tid=1 hw=0 cycle=9229509 lr=0x82174828 r3=0x00001070 r11=0x824b0000 [r3+0]=0x00000000 [[r3+0]+24]=0x00000000 [r3+0x0C]=0x00000000 [r3+0x30]=0x00000000
|
||||
AUDIT-MEM-READ addr=0x828e2b14 val=0x40105000 vtable=0x40105004 vtable[0]=0x40105008 vtable[24]=0x40105020 pc=0x82174828 tid=1 cycle=9229509
|
||||
|
||||
=== Final State ===
|
||||
PC: 0x824ac578
|
||||
LR: 0x824ac578
|
||||
CTR: 0x82153bf0
|
||||
CR: 0x24000028
|
||||
XER: CA=0 OV=0 SO=0
|
||||
r0 : 0x0000000082153bf0
|
||||
r1 : 0x00000000700ff6e0
|
||||
r2 : 0x0000000020000000
|
||||
r4 : 0x0000000000000001
|
||||
r7 : 0x0000000003a72328
|
||||
r8 : 0x0000000043b77284
|
||||
r9 : 0x0000000043b77328
|
||||
r10: 0x0000000000000001
|
||||
r11: 0x0000000000000103
|
||||
r12: 0x0000000082173c64
|
||||
r13: 0x000000007fff0000
|
||||
r18: 0x0000000040d09a7c
|
||||
r23: 0x00000000828f3844
|
||||
r26: 0x000000004024a620
|
||||
r27: 0x00000000820a17a8
|
||||
r31: 0x0000000000001070
|
||||
|
||||
=== Thread diagnostics ===
|
||||
hw=0 idx=0 tid=1 state=Blocked(WaitAny { handles: [4208], deadline: None }) pc=0x824ac578 lr=0x824ac578 sp=0x700ff6e0
|
||||
r0=0x82153bf0 r3=0x00000000 r4=0x00000001 r5=0x00000000 r6=0x00000000 r7=0x03a72328
|
||||
r8=0x43b77284 r9=0x43b77328 r10=0x00000001 r11=0x00000103 r12=0x82173c64 r13=0x7fff0000
|
||||
hw=0 idx=1 tid=11 state=Blocked(WaitAny { handles: [2190094916, 2190094880], deadline: None }) pc=0x824d2a94 lr=0x824d2a94 sp=0x71497d90
|
||||
r0=0x00000000 r3=0x00000000 r4=0x71497de0 r5=0x00000001 r6=0x00000003 r7=0x00000001
|
||||
r8=0x00000000 r9=0x00000000 r10=0x71497df0 r11=0x828a3244 r12=0xbcbcbcbc r13=0x4b9f1000
|
||||
hw=1 idx=0 tid=2 state=Blocked(WaitAny { handles: [2189887804], deadline: None }) pc=0x824a95f8 lr=0x824a95f8 sp=0x710ffd20
|
||||
r0=0x0000030c r3=0x00000000 r4=0x00000003 r5=0x00000001 r6=0x00000000 r7=0x00000000
|
||||
r8=0x00000001 r9=0x6f000000 r10=0x824a9178 r11=0x82870000 r12=0x824a94f0 r13=0x4acc3000
|
||||
hw=1 idx=1 tid=13 state=Blocked(WaitAny { handles: [4216], deadline: None }) pc=0x824ac578 lr=0x824ac578 sp=0x715a7a20
|
||||
r0=0x821511d0 r3=0x00000000 r4=0x00000001 r5=0x00000000 r6=0x00000000 r7=0x03a723d0
|
||||
r8=0x43b77334 r9=0x43b77334 r10=0x40541f80 r11=0x00000001 r12=0x821cb1e0 r13=0x4d1d4000
|
||||
hw=2 idx=0 tid=7 state=Blocked(WaitAny { handles: [1111821148], deadline: Some(42946672) }) pc=0x824cd4f4 lr=0x824cd4f4 sp=0x71187e60
|
||||
r0=0x00000000 r3=0x00000000 r4=0x00000003 r5=0x00000001 r6=0x00000000 r7=0x71187eb0
|
||||
r8=0x00000000 r9=0x00000000 r10=0x00000002 r11=0x00000002 r12=0xbcbcbcbc r13=0x4b1d6000
|
||||
hw=2 idx=1 tid=8 state=Blocked(WaitAny { handles: [4176, 4132], deadline: None }) pc=0x824ab214 lr=0x824ab214 sp=0x71287c90
|
||||
r0=0x00000000 r3=0x00000000 r4=0x71287cf0 r5=0x00000001 r6=0x00000001 r7=0x00000000
|
||||
r8=0x00000000 r9=0x00009030 r10=0x00000002 r11=0x00000020 r12=0x822f1ff0 r13=0x4b90a000
|
||||
hw=3 idx=0 tid=4 state=Blocked(WaitAny { handles: [4120], deadline: None }) pc=0x824ac578 lr=0x824ac578 sp=0x7112fb80
|
||||
r0=0x821511a0 r3=0x00000000 r4=0x00000001 r5=0x00000000 r6=0x00000000 r7=0x03a723d0
|
||||
r8=0x43b7732c r9=0x828f0000 r10=0x00000008 r11=0x00000000 r12=0x8245a660 r13=0x4adc6000
|
||||
hw=3 idx=1 tid=5 state=Blocked(WaitAny { handles: [4224], deadline: None }) pc=0x824ac578 lr=0x824ac578 sp=0x7116fbe0
|
||||
r0=0x821511a0 r3=0x00000000 r4=0x00000001 r5=0x00000000 r6=0x00000000 r7=0x03a723d0
|
||||
r8=0x43b7732c r9=0x828f0000 r10=0x00000001 r11=0x00000000 r12=0x82458b34 r13=0x4adc8000
|
||||
hw=4 idx=0 tid=9 state=Ready pc=0x824d140c lr=0x824d22b4 sp=0x71387df0
|
||||
r0=0x00000000 r3=0x4250dedc r4=0x4250e040 r5=0x00000001 r6=0x00000000 r7=0x00000000
|
||||
r8=0x4b9ec000 r9=0x01010000 r10=0x01010000 r11=0x00000000 r12=0x824d22a8 r13=0x4b9ec000
|
||||
hw=5 idx=0 tid=3 state=Blocked(WaitAny { handles: [4112], deadline: None }) pc=0x824ac578 lr=0x824ac578 sp=0x7111fdf0
|
||||
r0=0x82153bf0 r3=0x00000000 r4=0x00000001 r5=0x00000000 r6=0x00000000 r7=0x00000a10
|
||||
r8=0x00000010 r9=0x00000000 r10=0x00009030 r11=0x00000000 r12=0x82181988 r13=0x4adc4000
|
||||
hw=5 idx=1 tid=6 state=Ready pc=0x824ab214 lr=0x824ab214 sp=0x7117fc60
|
||||
r0=0x821511a0 r3=0x00000001 r4=0x7117fcc0 r5=0x00000001 r6=0x00000001 r7=0x00000000
|
||||
r8=0x7117fcb0 r9=0x00009030 r10=0x00000002 r11=0x00000020 r12=0x82458d68 r13=0x4adca000
|
||||
hw=5 idx=2 tid=10 state=Ready pc=0x824d140c lr=0x824d22b4 sp=0x71487e00
|
||||
r0=0x00000000 r3=0x4250dedc r4=0x4250e040 r5=0x00000001 r6=0x00000000 r7=0x00000000
|
||||
r8=0x4b9ee000 r9=0x01010000 r10=0x01010000 r11=0x00000000 r12=0x824d22a8 r13=0x4b9ee000
|
||||
hw=5 idx=3 tid=12 state=Ready pc=0x824aa6a4 lr=0x824aa6a4 sp=0x714a7da0
|
||||
r0=0x00000000 r3=0x000000ff r4=0x00000020 r5=0x714a7df4 r6=0x00000000 r7=0x00000000
|
||||
r8=0x00000000 r9=0x00000000 r10=0x00000000 r11=0x00000001 r12=0x8217898c r13=0x4d1d2000
|
||||
|
||||
-- Handle waiter lists --
|
||||
handle=0x00001024 Semaphore(0/2147483647) waiters(tid)=[8]
|
||||
handle=0x00001010 Event(sig=false, mr=true) waiters(tid)=[3]
|
||||
handle=0x00001070 Thread(id=13, exit=None) waiters(tid)=[1]
|
||||
handle=0x00001080 Event(sig=false, mr=false) waiters(tid)=[5]
|
||||
handle=0x828a3244 Event(sig=false, mr=false) waiters(tid)=[11]
|
||||
handle=0x00001018 Semaphore(0/2147483647) waiters(tid)=[4]
|
||||
handle=0x00001050 Event(sig=false, mr=true) waiters(tid)=[8]
|
||||
handle=0x00001078 Event(sig=false, mr=false) waiters(tid)=[13]
|
||||
handle=0x8287093c Event(sig=false, mr=false) waiters(tid)=[2]
|
||||
handle=0x828a3220 Event(sig=false, mr=true) waiters(tid)=[11]
|
||||
handle=0x42450b5c Event(sig=false, mr=true) waiters(tid)=[7]
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,136 @@
|
||||
# Phase A synthesis — canary tid=6 IS the main thread; the wedge is sub_822F1AA8's loop exit
|
||||
|
||||
## Top-line finding
|
||||
|
||||
**Canary's `tid=6` is canary's main thread.** Confirmed by probing `entry_point`
|
||||
(`sub_824AB748`) with `--audit_jit_prolog_pc=0x824AB748`: fires 1× on
|
||||
`tid=00000006` with `lr=BCBCBCBC` (= OS-initial / no caller). Ours numbers
|
||||
its main thread `tid=1`. Same logical thread; different label.
|
||||
|
||||
Therefore "tid=6 fires sub_821741C8 471×" (round 33) means **the main thread**
|
||||
loops inside `sub_822F1AA8` firing `sub_821741C8` ~1678×/30s in canary. In
|
||||
ours, the main thread (tid=1) runs `sub_822F1AA8` ONCE, exits the loop, and
|
||||
proceeds to thread-join on the spawned init thread (handle 0x1070 = tid=13),
|
||||
which is itself blocked forever on handle 0x1078.
|
||||
|
||||
## Call chain (identical in both engines, different runtime behavior)
|
||||
|
||||
```
|
||||
entry_point (sub_824AB748)
|
||||
│
|
||||
├─ sub_824ACB38 CRT-driven fnptr-array iterator (audit-050 region)
|
||||
├─ ...
|
||||
└─ sub_8216EA68 Many local calls including:
|
||||
├─ ExCreateThread(entry=sub_8217F0F8 ...) ; sibling thread
|
||||
├─ sub_822F1AA8(controller=...) ; FIRST call (PC 0x8216ECCC)
|
||||
└─ sub_822F1AA8(controller=0xBCE24A40 canary / ; SECOND call (PC 0x8216EE10)
|
||||
0x40d09a40 ours) ↑ this is the loop
|
||||
```
|
||||
|
||||
The SECOND call is what runs the dispatcher loop. Its LR = 0x8216EE14.
|
||||
Confirmed in both engines.
|
||||
|
||||
## sub_822F1AA8 loop structure
|
||||
|
||||
```
|
||||
0x822F1AA8: entry, r30 = r3 (controller)
|
||||
0x822F1AEC-0x822F1B08: ExCreateThread(entry=sub_822F1EE0, ctx=r30) → r29 = handle
|
||||
0x822F1B30-0x822F1B34: bl 0x824AA8B0(r3=r29) ; ?
|
||||
0x822F1B38-0x822F1B4C: first bctrl → vtable[+0] of [0x828E1F08]
|
||||
0x822F1B50-0x822F1B74: setup, bl 0x824AA330 INFINITE wait on [r22+32]
|
||||
0x822F1B80-0x822F1BA8: post-wait setup; [r30+0] |= 0x2
|
||||
0x822F1BB0-0x822F1BBC: TOP-OF-LOOP CHECK: if [r30+0] & 0x10000000 → goto 0x822F1E10 (exit)
|
||||
0x822F1BCC..0x822F1DEC: loop body (includes the vtable[+8] bctrl → sub_821741C8 at PC 0x822F1D58)
|
||||
0x822F1DEC-0x822F1DFC: bl 0x824AA330 INFINITE wait on [r23+0]
|
||||
0x822F1E00-0x822F1E0C: END-OF-ITERATION CHECK: if [r30+0] & 0x10000000 == 0 → goto 0x822F1BCC (re-loop)
|
||||
0x822F1E10-0x822F1E18: EXIT: [r30+0] |= 0x02000000 (set MSB-6 = LSB-25)
|
||||
0x822F1E1C-0x822F1E24: release something via bl 0x824AA2F0
|
||||
0x822F1E28-0x822F1E30: bl 0x824AA330 INFINITE on [r30+28] = SPAWNED THREAD HANDLE (thread join!)
|
||||
0x822F1E40: bl 0x824AA3E0
|
||||
0x822F1E44-0x822F1E5C: final cleanup: vtable[+24] bctrl on [0x828E1F08]
|
||||
0x822F1E60-0x822F1E78: [r30+0] = 0, then [r30+0] |= 1; bl 0x824567E0
|
||||
0x822F1E7C-0x822F1E88: epilogue
|
||||
```
|
||||
|
||||
**Loop exit gate**: `[r30+0] & 0x10000000` (bit 28 LSB / bit 3 MSB). Set →
|
||||
exit. Both top-of-loop check (0x822F1BBC) and end-of-iteration check
|
||||
(0x822F1E0C) gate on the same bit.
|
||||
|
||||
## What's different between engines
|
||||
|
||||
| Engine | [r30+0] at entry | Loop iterations | Exits sub_822F1AA8? |
|
||||
|--------|------------------|------------------|----------------------|
|
||||
| canary | 0x21 (per probe) | ~1678+ in 30s | NO (stays in loop) |
|
||||
| ours | 0x21 (per probe) | 0 (probes show none of the loop-body PCs fire after entry) | YES (exits quickly) |
|
||||
|
||||
Both engines have `[r30+0]=0x21` at entry — bit 28 NOT set. After the `ori
|
||||
r11, r11, 0x2` at 0x822F1B90, both should have `[r30+0]=0x23`. Bit 28 still
|
||||
not set.
|
||||
|
||||
So **some code sets bit 28 on [r30+0] between sub_822F1AA8 entry and the
|
||||
loop check** in ours but not in canary.
|
||||
|
||||
Mem-watch on 0x40d09a40 (ours' controller VA) shows **zero guest writes** in
|
||||
my 50M-instruction parallel run. Possible reasons:
|
||||
- The setter writes from kernel/runtime code that mem-watch doesn't capture
|
||||
(kernel-host store, not guest JIT store)
|
||||
- The setter writes via a computed alias (different VA but same backing)
|
||||
- The bit IS set via a probe-quantum-elided JIT store
|
||||
|
||||
## Phase B classification
|
||||
|
||||
**Class 3a — state-divergence on the controller object**. The vtable
|
||||
identity is the same (round-37 confirmed `0x820A183C` in both). The
|
||||
controller object's bit 28 of `[+0]` evolves differently during the setup
|
||||
between sub_822F1AA8 entry and the loop check.
|
||||
|
||||
Class 4 (synthesis) is now LESS attractive: ours' main thread DOES reach
|
||||
sub_822F1AA8 with the right controller. We don't need to spawn the
|
||||
dispatcher — we need to PREVENT the main thread from exiting the loop.
|
||||
|
||||
## Pragmatic next step — JIT instrumentation to find bit-28 setter
|
||||
|
||||
Most direct diagnostic: add a JIT hook in xenia-cpu that, for guest stores
|
||||
in the range [0x822F1AA8, 0x822F1E10), captures the guest PC + the written
|
||||
value when the store would set bit 28 of any address. This identifies the
|
||||
exact PC that sets the loop-exit bit.
|
||||
|
||||
Alternative: extend `--mem-watch` to also capture kernel-side stores by
|
||||
hooking the GuestMemory write path at the kernel-state level.
|
||||
|
||||
Even simpler: add a one-shot `--bit-watch=ADDR:MASK` cvar that fires when
|
||||
the value at ADDR has any bit in MASK transition from 0→1, regardless of
|
||||
who wrote it. This is the cleanest diagnostic for this exact pattern.
|
||||
|
||||
## Fix shape (when bit-28 setter is identified)
|
||||
|
||||
If the bit-28 setter is inside the vtable[+0] dispatch chain at 0x822F1B4C
|
||||
(target sub_82173990), then the fix might be a state-init issue in the
|
||||
kernel/runtime.
|
||||
|
||||
If the bit-28 setter is inside the inner wait or one of the kernel calls
|
||||
(`bl 0x824AA8B0`, `bl 0x824AA330`), the fix might be a missing event signal
|
||||
or a wrong handle-state evolution.
|
||||
|
||||
If we can't identify the setter cleanly, the synthesis fallback is to
|
||||
**inject a kernel-side hook that clears bit 28 of [r30+0] on every entry to
|
||||
sub_822F1AA8's bit-check site (0x822F1BB0)**. Crude but should keep the
|
||||
main thread in the loop.
|
||||
|
||||
## Why this is a clearer wedge picture than rounds 22-33
|
||||
|
||||
Rounds 22-33 chased the audit-049 wedge from various angles. The diagnoses
|
||||
landed on different layers:
|
||||
- R22: "wrong cluster targeted" (cluster A vs B)
|
||||
- R26-30: "state-machine progression bug"
|
||||
- R32-33: "pool 3 starvation; bootstrap walk-back"
|
||||
|
||||
This round establishes the simplest possible framing:
|
||||
|
||||
> **Canary's main thread loops forever in a dispatcher; ours' main thread
|
||||
> exits the loop after one setup phase. The exit is gated by a single bit
|
||||
> on the controller's flag word.**
|
||||
|
||||
If bit 28 of `[controller+0]` could be permanently cleared, ours' main
|
||||
thread would stay in the loop, sub_821741C8 would dispatch, signals would
|
||||
flow, tid=13 would complete, draws would happen.
|
||||
@@ -0,0 +1,79 @@
|
||||
AUDIT-PC-PROBE pc=0x822f1aa8 tid=1 hw=0 cycle=6180796 lr=0x8216ee14 r3=0x40d09a40 r11=0x40111910 [r3+0]=0x00000021 [[r3+0]+24]=0x00000000 [r3+0x0C]=0x40541a40 [r3+0x30]=0x00000000
|
||||
AUDIT-PC-PROBE pc=0x822f1b38 tid=1 hw=0 cycle=6181181 lr=0x822f1b38 r3=0x00000001 r11=0x824b0000 [r3+0]=0x00000000 [[r3+0]+24]=0x00000000 [r3+0x0C]=0x00000000 [r3+0x30]=0x00000000
|
||||
|
||||
=== Final State ===
|
||||
PC: 0x824ac578
|
||||
LR: 0x824ac578
|
||||
CTR: 0x82153bf0
|
||||
CR: 0x24000028
|
||||
XER: CA=0 OV=0 SO=0
|
||||
r0 : 0x0000000082153bf0
|
||||
r1 : 0x00000000700ff6e0
|
||||
r2 : 0x0000000020000000
|
||||
r4 : 0x0000000000000001
|
||||
r7 : 0x0000000003a72328
|
||||
r8 : 0x0000000043b77284
|
||||
r9 : 0x0000000043b77328
|
||||
r10: 0x0000000000000001
|
||||
r11: 0x0000000000000103
|
||||
r12: 0x0000000082173c64
|
||||
r13: 0x000000007fff0000
|
||||
r18: 0x0000000040d09a7c
|
||||
r23: 0x00000000828f3844
|
||||
r26: 0x000000004024a4e0
|
||||
r27: 0x00000000820a17a8
|
||||
r31: 0x0000000000001070
|
||||
|
||||
=== Thread diagnostics ===
|
||||
hw=0 idx=0 tid=1 state=Blocked(WaitAny { handles: [4208], deadline: None }) pc=0x824ac578 lr=0x824ac578 sp=0x700ff6e0
|
||||
r0=0x82153bf0 r3=0x00000000 r4=0x00000001 r5=0x00000000 r6=0x00000000 r7=0x03a72328
|
||||
r8=0x43b77284 r9=0x43b77328 r10=0x00000001 r11=0x00000103 r12=0x82173c64 r13=0x7fff0000
|
||||
hw=0 idx=1 tid=11 state=Blocked(WaitAny { handles: [2190094916, 2190094880], deadline: None }) pc=0x824d2a94 lr=0x824d2a94 sp=0x71497d90
|
||||
r0=0x00000000 r3=0x00000000 r4=0x71497de0 r5=0x00000001 r6=0x00000003 r7=0x00000001
|
||||
r8=0x00000000 r9=0x00000000 r10=0x71497df0 r11=0x828a3244 r12=0xbcbcbcbc r13=0x4b9f1000
|
||||
hw=1 idx=0 tid=2 state=Blocked(WaitAny { handles: [2189887804], deadline: None }) pc=0x824a95f8 lr=0x824a95f8 sp=0x710ffd20
|
||||
r0=0x0000030c r3=0x00000000 r4=0x00000003 r5=0x00000001 r6=0x00000000 r7=0x00000000
|
||||
r8=0x00000001 r9=0x6f000000 r10=0x824a9178 r11=0x82870000 r12=0x824a94f0 r13=0x4acc3000
|
||||
hw=1 idx=1 tid=13 state=Blocked(WaitAny { handles: [4216], deadline: None }) pc=0x824ac578 lr=0x824ac578 sp=0x715a7a20
|
||||
r0=0x821511d0 r3=0x00000000 r4=0x00000001 r5=0x00000000 r6=0x00000000 r7=0x03a723d0
|
||||
r8=0x43b77334 r9=0x43b77334 r10=0x40541f80 r11=0x00000001 r12=0x821cb1e0 r13=0x4d1d4000
|
||||
hw=2 idx=0 tid=7 state=Blocked(WaitAny { handles: [1111821148], deadline: Some(42946672) }) pc=0x824cd4f4 lr=0x824cd4f4 sp=0x71187e60
|
||||
r0=0x00000000 r3=0x00000000 r4=0x00000003 r5=0x00000001 r6=0x00000000 r7=0x71187eb0
|
||||
r8=0x00000000 r9=0x00000000 r10=0x00000002 r11=0x00000002 r12=0xbcbcbcbc r13=0x4b1d6000
|
||||
hw=2 idx=1 tid=8 state=Blocked(WaitAny { handles: [4176, 4132], deadline: None }) pc=0x824ab214 lr=0x824ab214 sp=0x71287c90
|
||||
r0=0x00000000 r3=0x00000000 r4=0x71287cf0 r5=0x00000001 r6=0x00000001 r7=0x00000000
|
||||
r8=0x00000000 r9=0x00009030 r10=0x00000002 r11=0x00000020 r12=0x822f1ff0 r13=0x4b90a000
|
||||
hw=3 idx=0 tid=4 state=Blocked(WaitAny { handles: [4120], deadline: None }) pc=0x824ac578 lr=0x824ac578 sp=0x7112fb80
|
||||
r0=0x821511a0 r3=0x00000000 r4=0x00000001 r5=0x00000000 r6=0x00000000 r7=0x03a723d0
|
||||
r8=0x43b7732c r9=0x828f0000 r10=0x00000008 r11=0x00000000 r12=0x8245a660 r13=0x4adc6000
|
||||
hw=3 idx=1 tid=5 state=Blocked(WaitAny { handles: [4224], deadline: None }) pc=0x824ac578 lr=0x824ac578 sp=0x7116fbe0
|
||||
r0=0x821511a0 r3=0x00000000 r4=0x00000001 r5=0x00000000 r6=0x00000000 r7=0x03a723d0
|
||||
r8=0x43b7732c r9=0x828f0000 r10=0x00000001 r11=0x00000000 r12=0x82458b34 r13=0x4adc8000
|
||||
hw=4 idx=0 tid=9 state=Ready pc=0x824d1404 lr=0x824d22b4 sp=0x71387df0
|
||||
r0=0x00000000 r3=0x4250dedc r4=0x4250e040 r5=0x00000001 r6=0x00000000 r7=0x00000000
|
||||
r8=0x4b9ec000 r9=0x01010000 r10=0x01010000 r11=0x00000000 r12=0x824d22a8 r13=0x4b9ec000
|
||||
hw=5 idx=0 tid=3 state=Blocked(WaitAny { handles: [4112], deadline: None }) pc=0x824ac578 lr=0x824ac578 sp=0x7111fdf0
|
||||
r0=0x82153bf0 r3=0x00000000 r4=0x00000001 r5=0x00000000 r6=0x00000000 r7=0x00000a10
|
||||
r8=0x00000010 r9=0x00000000 r10=0x00009030 r11=0x00000000 r12=0x82181988 r13=0x4adc4000
|
||||
hw=5 idx=1 tid=6 state=Ready pc=0x824ab214 lr=0x824ab214 sp=0x7117fc60
|
||||
r0=0x821511a0 r3=0x00000001 r4=0x7117fcc0 r5=0x00000001 r6=0x00000001 r7=0x00000000
|
||||
r8=0x7117fcb0 r9=0x00009030 r10=0x00000002 r11=0x00000020 r12=0x82458d68 r13=0x4adca000
|
||||
hw=5 idx=2 tid=10 state=Ready pc=0x824d1404 lr=0x824d22b4 sp=0x71487e00
|
||||
r0=0x00000000 r3=0x4250dedc r4=0x4250e040 r5=0x00000001 r6=0x00000000 r7=0x00000000
|
||||
r8=0x4b9ee000 r9=0x01010000 r10=0x01010000 r11=0x00000000 r12=0x824d22a8 r13=0x4b9ee000
|
||||
hw=5 idx=3 tid=12 state=Ready pc=0x824aa6a4 lr=0x824aa6a4 sp=0x714a7da0
|
||||
r0=0x00000000 r3=0x000000ff r4=0x00000020 r5=0x714a7df4 r6=0x00000000 r7=0x00000000
|
||||
r8=0x00000000 r9=0x00000000 r10=0x00000000 r11=0x00000001 r12=0x8217898c r13=0x4d1d2000
|
||||
|
||||
-- Handle waiter lists --
|
||||
handle=0x00001018 Semaphore(0/2147483647) waiters(tid)=[4]
|
||||
handle=0x8287093c Event(sig=false, mr=false) waiters(tid)=[2]
|
||||
handle=0x00001070 Thread(id=13, exit=None) waiters(tid)=[1]
|
||||
handle=0x42450b5c Event(sig=false, mr=true) waiters(tid)=[7]
|
||||
handle=0x00001078 Event(sig=false, mr=false) waiters(tid)=[13]
|
||||
handle=0x00001080 Event(sig=false, mr=false) waiters(tid)=[5]
|
||||
handle=0x828a3244 Event(sig=false, mr=false) waiters(tid)=[11]
|
||||
handle=0x00001024 Semaphore(0/2147483647) waiters(tid)=[8]
|
||||
handle=0x828a3220 Event(sig=false, mr=true) waiters(tid)=[11]
|
||||
handle=0x00001010 Event(sig=false, mr=true) waiters(tid)=[3]
|
||||
handle=0x00001050 Event(sig=false, mr=true) waiters(tid)=[8]
|
||||
@@ -0,0 +1,127 @@
|
||||
# Phase C.1 — Validation refutes Phase A's bit-28 setter hypothesis
|
||||
|
||||
## TL;DR
|
||||
|
||||
Phase A claimed: "bit 28 of `[0x40d09a40]` (controller word) gets set in ours, causing sub_822F1AA8's dispatcher loop to exit early; candidate setter is `sub_821B55D8` at PC `0x821B5DA4`."
|
||||
|
||||
**Phase C.1 falsifies this in 4 sub-rounds:**
|
||||
|
||||
1. **`sub_821B55D8` is dead code** in both engines — its `XamInputSetState` wrapper `sub_824AA858` fires 0× in both.
|
||||
2. **`[0x40d09a40]` is never set to anything with bit 28** — `--dump-addr` at end of run shows `+0x00 = 0x00000021`, the entry value. Bit 28 is NEVER set.
|
||||
3. **The actual wedge is at the `bcctrl` at PC `0x822F1B4C`** (inside sub_822F1AA8 setup, BEFORE the dispatcher loop). tid=1 never reaches the loop top-check.
|
||||
4. **The bcctrl calls `sub_82173990`** (vtable[0] of the dispatcher singleton at `[0x828E1F08]`), which eventually waits for tid=13 to terminate. tid=13 wedges in the audit-049 silph::UImpl@GamePart_Title chain on handle `0x1078`.
|
||||
|
||||
The C.2 force-clear POC (the planned next step) would have **zero effect** because bit 28 is never set. Skipped per plan stopping criterion.
|
||||
|
||||
## Probe-fire counts (ours, 50M-instr parallel)
|
||||
|
||||
| PC | sub-round | fires | meaning |
|
||||
|---|---|---|---|
|
||||
| `0x821B55D8` (Phase A candidate fn entry) | 1 | **0** | function never reached → β/γ |
|
||||
| `0x821B5D98,DA0,DAC,D48` (loop BB heads) | 1 | **0** | function never reached |
|
||||
| `0x822F1AA8` (sub_822F1AA8 entry) | 2,3,4 | 2-3 | reached |
|
||||
| `0x822F1B38` (post-`bl 0x824AA8B0`) | 4 | 2 | reached |
|
||||
| `0x822F1B50` (post-`bcctrl`) | 4 | **0** | **bcctrl never returns** |
|
||||
| `0x822F1B60,B78,B80,BBC` (loop setup/top) | 3 | 0 | unreachable past bcctrl |
|
||||
| `0x822F1E10` (loop exit cleanup) | 2 | 0 | loop never entered, never exited |
|
||||
| `0x822F1E34` (post-thread-join) | 2 | 0 | never reached |
|
||||
| `0x82173990` (vtable[0] target) | 4 | 2 | called via bcctrl, r3=singleton (LR=0x822F1B50) |
|
||||
| `0x821748F0` (tid=13 entry) | 4 | 2 | tid=13 runs |
|
||||
| `0x821C4EB0` (silph::UImpl@GamePart_Title) | 4 | 2 | audit-009/049 reached on tid=13 |
|
||||
| `0x82457388,0x824574C0,0x82457408,0x82457490` (other oris candidates) | 2 | 0 | unreachable |
|
||||
|
||||
## Canary probe results
|
||||
|
||||
| PC | fires | meaning |
|
||||
|---|---|---|
|
||||
| `0x824AA858` (XamInputSetState wrapper) | **0** | sub_821B55D8 chain is dead code in CANARY too |
|
||||
| `0x822F1B50` (post-bcctrl, attempted) | **0** | canary's JitProlog only fires at function entries, so not directly testable; but per audit round-33 sub_821741C8 fires 471× in canary → bcctrl DOES return in canary |
|
||||
|
||||
## Critical evidence: `--dump-addr=0x40d09a40` at end of run
|
||||
|
||||
```
|
||||
addr=0x40d09a40
|
||||
+0x00: 00 00 00 21 00 00 00 01 42 44 df 00 40 54 1a 40
|
||||
^^^^^^^^^^^ ^^^^^^^^^^^
|
||||
+0x10: 40 54 1b 40 40 54 1b 80 40 54 1b c0 00 00 10 54
|
||||
+0x20: 00 00 00 00 40 24 a8 20 00 00 00 08 00 00 00 00
|
||||
```
|
||||
|
||||
- `[+0x00] = 0x00000021` ← bit 28 (mask 0x10000000) is NOT SET. Same value as at sub_822F1AA8 entry.
|
||||
- `[+0x1c] = 0x00001054` ← spawned init thread handle (= tid=8's thread handle, NOT 0x1070)
|
||||
- Thread state: tid=1 waits on handle `0x1070`, tid=13 waits on handle `0x1078`.
|
||||
|
||||
Handle `0x1070` is **tid=13's thread handle** (per stderr: `ExCreateThread: tid=13 handle=0x1070 entry=0x821748f0 ctx=0x4024a840 suspended=true`). So tid=1's wait at the wedge point is a **thread-join on tid=13**, NOT a thread-join on the dispatcher init thread (tid=8, handle 0x1054).
|
||||
|
||||
## Wedge path (corrected)
|
||||
|
||||
```
|
||||
entry_point (sub_824AB748) [tid=1 main]
|
||||
└─ sub_8216EA68
|
||||
└─ sub_822F1AA8(controller=0x40d09a40) [LR=0x8216EE14]
|
||||
├─ ExCreateThread(entry=sub_822F1EE0, ctx=controller) [PC 0x822F1B08]
|
||||
│ ⇒ tid=8 spawn, handle=0x1054 (suspended)
|
||||
├─ bl 0x824AA8B0 (no-op probe) [PC 0x822F1B34]
|
||||
└─ bcctrl on vtable[+0] of [0x828E1F08] singleton [PC 0x822F1B4C]
|
||||
│
|
||||
└─ sub_82173990(r3=singleton) [r3=0x40ba9a80, vtable=0x40111910]
|
||||
└─ ... (768-byte function with ≥18 calls; calls sub_82448AA0, sub_824AA7A0,
|
||||
sub_82448BC8, sub_82448C50, sub_8216F218, sub_8217C850, sub_82178E50,
|
||||
sub_821835E0, ...)
|
||||
└─ ... → KeWaitForSingleObject INFINITE on handle 0x1070
|
||||
(= tid=13's thread handle, thread-join)
|
||||
⇒ WEDGE — tid=13 never exits
|
||||
|
||||
(Concurrently — spawned somewhere else, not from sub_822F1AA8:)
|
||||
[tid=13, spawn-handle=0x1070, ctx=0x4024a840]
|
||||
└─ sub_821748F0 (worker boilerplate, entry from ExCreateThread)
|
||||
├─ sub_82172798, sub_82172818
|
||||
└─ sub_821749C0
|
||||
└─ sub_821CF3F0
|
||||
└─ ... → sub_821C4EB0 (UImpl@GamePart_Title@silph) [audit-009/049!]
|
||||
└─ ... → sub_821CB030 (creates KEVENT at +0x128)
|
||||
⇒ KeWaitForSingleObject INFINITE on handle 0x1078
|
||||
⇒ WEDGE — handle 0x1078 is never signaled in ours
|
||||
```
|
||||
|
||||
## Why Phase A's hypothesis is wrong
|
||||
|
||||
Phase A:
|
||||
1. Disassembled sub_822F1AA8's body, observed the bit-28 loop-exit check at `0x822F1BB8` and end-of-iter check at `0x822F1E0C`.
|
||||
2. Mem-watch on `0x40d09a40` showed zero stores → inferred "the setter writes via some path mem-watch doesn't capture."
|
||||
3. DB-scanned `oris ?, ?, 0x1000` (49 sites), found `sub_821B55D8 + 0x821B5DA4` with pattern `bl sub_824AA858 ; if r3 == 0xAA: oris r11, 0x1000 ; stw`.
|
||||
4. Concluded `sub_821B55D8` was the setter.
|
||||
|
||||
What Phase A missed:
|
||||
- Mem-watch's 0-stores result was correct: **NO setter exists**. Bit 28 is never set in either engine. The mem-watch null-result was a hint that the bit-28 hypothesis itself was wrong, but Phase A interpreted it as "mem-watch misses something."
|
||||
- The disasm-based hypothesis was visually compelling (a loop iterating arrays and setting bit 28 when a kernel call returns 0xAA) but never verified runtime.
|
||||
- `sub_821B55D8` is itself dead code in both engines.
|
||||
|
||||
## Reading-error class #19: disasm-pattern-match without runtime verification
|
||||
|
||||
When scanning for a hypothesized signal source via DB pattern-match (`oris ?, ?, 0x1000`), the analyst must run a probe to verify the suspected site is *both reached* and *takes the suspected path* before declaring it the cause. Phase A bypassed both checks. The single `--dump-addr=0x40d09a40` flag in sub-round 2 (literally 4 keystrokes added to the existing probe command) revealed the central assumption was wrong.
|
||||
|
||||
## Real divergence (handed to next session)
|
||||
|
||||
This is the **same wedge as audit-049/058/059**: tid=13 wedges in the silph::UImpl@GamePart_Title cluster on handle `0x1078`. tid=1 wedges on tid=13's thread-handle (`0x1070`) inside `sub_82173990`'s call chain.
|
||||
|
||||
`sub_82173990` is vtable[0] of the dispatcher singleton at `[0x828E1F08]`. It's a 768-byte function with ≥18 calls; the actual wait site is somewhere down its tree. To localize where in `sub_82173990` the wait happens, probe its BB heads + the `KeWaitForSingleObject` thunks (`sub_824AA330`, `sub_824AA708`).
|
||||
|
||||
The fix-shape is **NOT** "force-clear bit 28." The fix-shape is **"signal handle 0x1078 in the audit-049 cluster, or short-circuit tid=13's wait."** Round 22 (silph_synth.rs) attempted the cluster-A version of this. Cluster B (silph::UImpl) needs its own synthesis or a kernel-side signal of handle 0x1078.
|
||||
|
||||
## Phase C verdict
|
||||
|
||||
- C.1: 4 sub-rounds executed (within budget).
|
||||
- C.2: **NOT EXECUTED** — POC would be no-op since bit 28 is never set. Per plan stopping criterion, do not proceed to C.2 blind when C.1 refutes the diagnosis.
|
||||
- C.3: not applicable.
|
||||
- Branch state: no source changes. Audit artifacts only.
|
||||
|
||||
## Files in this directory
|
||||
|
||||
- `ours-c1-probe.log/stderr` — sub-round 1, probe at sub_821B55D8 BB heads (0 fires)
|
||||
- `ours-sr2-confirm-bit28.log/stderr` — sub-round 2, probe loop top/exit + dump-addr (bit 28 NEVER SET)
|
||||
- `ours-sr3-wait-trace.log/stderr` — sub-round 3, probe wait site + handle 0x1070 trace
|
||||
- `ours-sr4-bcctrl-trace.log/stderr` — sub-round 4, probe pre/post bcctrl + sub_82173990 entry + tid=13 entry (decisive)
|
||||
- canary side in `../round-C1-setter-validation-canary/`:
|
||||
- `canary-824AA858.log` — XamInputSetState wrapper fires 0× in canary too
|
||||
- `canary-822F1B50.log` — JitProlog can't probe at BB-internal PCs (function-entry-only)
|
||||
@@ -0,0 +1,144 @@
|
||||
# Phase D — Audit-049 Auto-Signal POC — FINDINGS
|
||||
|
||||
**Branch**: `iterate-2C/silph-ui-spawn-trace` (extends Phase C `481591f`)
|
||||
**Date**: 2026-06-11
|
||||
**Sub-rounds**: D2.SR1 → D2.SR4 (4/4 used)
|
||||
**Verdict**: **B — partial unwedge**
|
||||
|
||||
## Mission
|
||||
|
||||
Phase C diagnosed the audit-049 wedge as tid=13 (silph::UImpl@GamePart_Title) waiting INFINITE on a KEVENT created at `sub_821CB030+0x128` (`lr=0x821cb15c`, post-bl PC). The Phase D POC tests this diagnosis by hooking `NtCreateEvent` from that exact call site and auto-signaling the resulting handle after a configurable delay (`XENIA_SILPH_UI_AUTOSIGNAL_DELAY` instructions).
|
||||
|
||||
If tid=13 unblocks, the diagnosis is confirmed. If new wedges or new threads appear downstream, even better — that's actual game progression past the wedge.
|
||||
|
||||
## Result summary
|
||||
|
||||
| Symptom | SR2/SR3 baseline | SR4 (POC firing) |
|
||||
|---|---|---|
|
||||
| `silph autosignal: scheduled handle=0x1078 caller_lr=0x821cb15c` | yes (SR2/SR3) | yes |
|
||||
| `silph autosignal: firing handle=0x1078` | NO | **yes (cycle 16326209)** |
|
||||
| handle 0x1078 final | `signaled=false waiters=1 <NO_SIGNALS_DESPITE_WAITS>` | `signal_attempts=1 waiters=0` |
|
||||
| tid=13 final state | `Blocked(WaitAny[0x1078])` | **`Ready` pc=0x824a9108** |
|
||||
| tid=1 final state | `Blocked(WaitAny[0x1070])` thread-join | `Blocked(WaitAny[0x1070])` (tid=13 not yet exited) |
|
||||
| ExCreateThread total | 10 | **12 (+tid=14, +tid=15)** |
|
||||
| New downstream wedges | none past 0x1078 | **0x1084 (Event/Auto), 0x1088 (Event/Manual)** |
|
||||
| `cxx_throw` runtime_error decoded | none | **yes, stack depth 6, top L0=0x82612b50 → L4=sub_82450B60+0x1A8 → L6=sub_82450a50** |
|
||||
| VdSwap | 1 | 1 |
|
||||
| gpu.interrupt.delivered{source=0} | 6393 | 4539 (different trajectory, no draws) |
|
||||
|
||||
**Conclusion**: tid=13 unwedged cleanly from the audit-049 wait, spawned two follow-on threads (tid=14 entry=`silph` ctx=`0x40929c00`, tid=15 a worker), and progressed deep enough into the silph::UImpl state machine to throw a `runtime_error` from sub_82450a50 → sub_82450B60+0x1A8 (the dispatcher cluster from round 26). The auto-signal **is not** the proper signaler — it lets tid=13 proceed but downstream state-machine invariants the missing real signaler would have established are not in place, so the dispatcher trips on a "not-registered instance" lookup.
|
||||
|
||||
This is a **clean confirmation** of the Phase C diagnosis: the wedge handle, the wait site, and the LR filter are all correct. The fix shape is:
|
||||
- Either: synthesize the missing signaler properly (cluster-B silph_ui_synth.rs analogue from R33's deferred plan)
|
||||
- Or: track what the auto-signal needed to write into the work-item state (`[+8]` field per R26) BEFORE signaling, so the dispatcher's BST lookup succeeds
|
||||
|
||||
## Sub-round detail
|
||||
|
||||
### D2.SR1 — initial run, hook never fires (wrong LR filter)
|
||||
|
||||
Filter checked `creator_lr ∈ [0x821CB15C, 0x821CB160]` against `ctx.lr` at `nt_create_event` entry. But `ctx.lr` is the **thunk wrapper return slot** (`0x824a9f6c`), not the guest caller's post-bl PC. Confirmed via handle-audit `created stack` dump: frame 0 lr=`0x824a9f6c`, frame 1 lr=`0x821cb15c`. The guest caller's LR lives one frame up the PPC EABI back-chain.
|
||||
|
||||
Diagnosis classification: **D (filter mismatch)**. Reading-error class #20 (new).
|
||||
|
||||
### D2.SR2 — frame-1-LR fix; hook schedules, never fires
|
||||
|
||||
Refactored `maybe_register_silph_autosignal` to take `(ctx, mem)`, walk back-chain via existing `walk_guest_back_chain` (1 step), match the saved LR. Hook now fires:
|
||||
|
||||
```
|
||||
silph autosignal: scheduled handle=0x1078 caller_lr=0x821cb15c for cycle 10000 (now=0, delay=10000)
|
||||
```
|
||||
|
||||
But no "firing" log appears, and tid=13 stays Blocked. Classification: **D (drain site never reached)**.
|
||||
|
||||
### D2.SR3 — diagnostic added; confirms drain site never visited
|
||||
|
||||
Added a one-shot info-level "tick (first visit, none due)" log inside `fire_due_silph_autosignals` when pending is non-empty but nothing due. Re-ran. **The tick-diagnostic never fired either** — proving the function isn't being called at all in `--parallel` mode.
|
||||
|
||||
Root cause: `--parallel` dispatches to `run_execution_parallel` (line 2928 of main.rs), which has its own outer loop at line 3186. My Phase D wiring only touched the lockstep path at line 2763. Classification: **D (wrong code path wired)**.
|
||||
|
||||
### D2.SR4 — parallel-path wiring added; hook fires; tid=13 unblocks
|
||||
|
||||
Added the same `set_now_cycle_hint` + `fire_due_silph_autosignals` calls inside the parallel outer loop, right after `coord_pre_round` (and under the same `kernel_arc` guard, so no extra locking). Re-built, re-ran.
|
||||
|
||||
Now all three log lines appear:
|
||||
|
||||
```
|
||||
silph autosignal: scheduled handle=0x1078 caller_lr=0x821cb15c for cycle 16326202 (now=16316202, delay=10000)
|
||||
silph autosignal: tick (first visit, none due) now=16316213 pending=1 first_deadline=16326202
|
||||
silph autosignal: firing handle=0x1078 prev_signaled=Some(false) at cycle 16326209
|
||||
```
|
||||
|
||||
`now=16316202` at schedule time confirms `set_now_cycle_hint` is wired through correctly (the parallel path was simply never visited in SR2/SR3). Fire at cycle 16326209 = deadline 16326202 + 7-cycle scheduler granularity. Diagnostic classification: **B (partial unwedge — new waits and cxx_throw downstream)**.
|
||||
|
||||
## Code shape
|
||||
|
||||
POC is ~70 LOC across four files, all env-gated. Default off.
|
||||
|
||||
| File | Change | Lines |
|
||||
|---|---|---|
|
||||
| `crates/xenia-cpu/src/scheduler.rs` | `GuestThread.start_entry/start_context` fields; `spawn()` populates; `current_thread_entry_and_ctx()` helper | +18 |
|
||||
| `crates/xenia-kernel/src/state.rs` | `AutoSignalPending` struct; `silph_autosignal_*` fields; `set_now_cycle_hint`, `maybe_register_silph_autosignal`, `fire_due_silph_autosignals` methods | +95 |
|
||||
| `crates/xenia-kernel/src/exports.rs` | Hook in `nt_create_event` | +3 |
|
||||
| `crates/xenia-app/src/main.rs` | Fire-site wiring in lockstep loop (line 2788) **and** parallel loop (line 3215) | +12 |
|
||||
|
||||
Tests stay green at **655/655**.
|
||||
|
||||
## Reading-error class #20 (new)
|
||||
|
||||
**`ctx.lr` at kernel export entry ≠ guest caller's post-bl PC.** When a guest `bl` calls an export thunk, the thunk-wrapper has its own frame between the guest caller and the export body. At export-body entry, `ctx.lr` holds the *wrapper's* return slot, not the guest caller's post-bl PC.
|
||||
|
||||
To match a specific guest call site by LR, the export must walk one step up the back-chain (`walk_guest_back_chain(ctx.gpr[1], ctx.lr, mem, 2)`) and use `frames[1].lr`.
|
||||
|
||||
SR1 burned one full sub-round on this. Detect early in future POCs by comparing `ctx.lr` against the handle-audit's `created stack` frame dump for a known-good event (e.g. one created from a labelled site).
|
||||
|
||||
## Reading-error class #21 (new)
|
||||
|
||||
**`--parallel` and lockstep have separate outer loops in main.rs.** They share `coord_pre_round` (carved out exactly for this reason), but anything wired adjacent to that call site only takes effect on the path it's wired on. Lockstep is `run_execution` (line 2706, outer loop at 2763). Parallel is `run_execution_parallel` (line 2928, outer loop at 3186).
|
||||
|
||||
Per-round hooks added for a specific build mode must be wired in **both** paths. SR2/SR3 burned two sub-rounds on this.
|
||||
|
||||
## Files modified + LR mapping (for follow-up sessions)
|
||||
|
||||
**Wedge handle creation** (confirmed by handle-audit dump):
|
||||
```
|
||||
created cycle=0 tid=13 lr=0x824a9f6c [src=NtCreateEvent thunk return]
|
||||
created stack (6 frames):
|
||||
[ 0] fp=0x715a7a10 lr=0x824a9f6c ← ctx.lr at nt_create_event
|
||||
[ 1] fp=0x715a7aa0 lr=0x821cb15c ← guest caller's post-bl PC (filter on this)
|
||||
[ 2] fp=0x715a7bd0 lr=0x821cbae0 ← sub_821CBA08 frame
|
||||
[ 3] fp=0x715a7cd0 lr=0x821cc454 ← sub_821CC3F8 frame
|
||||
[ 4] fp=0x715a7d60 lr=0x821c4f18 ← sub_821C4EB0 frame (silph::UImpl@GamePart_Title)
|
||||
[ 5] fp=0x715a7e00 lr=0x82174a80 ← sub_821748F0 trampoline frame
|
||||
```
|
||||
|
||||
**Downstream cxx_throw stack** (after auto-signal fires, tid=5 throws runtime_error):
|
||||
```
|
||||
L0 lr=0x82612b50 std::exception throw path
|
||||
L1 lr=0x825f2444
|
||||
L2 lr=0x824547e8
|
||||
L3 lr=0x82451418
|
||||
L4 lr=0x82450d08 ← sub_82450B60+0x1A8 (dispatcher, audit-059 R26)
|
||||
L5 lr=0x82450b34
|
||||
L6 lr=0x82450a50 ← sub_82450a50 (worker dispatch)
|
||||
|
||||
cxx_throw runtime_error decoded magic=0x19930520
|
||||
cxx_throw BST ceil search candidate_key=0x828e2b2c match_found=false
|
||||
cxx_throw lhs (not-registered instance) lhs=0x715a7af0
|
||||
```
|
||||
|
||||
This confirms the dispatcher reached audit-049 territory (R26's `sub_82450B60+0x1A8` PC `0x82450D08`), looked up a runtime instance in its BST keyed by VA, and the instance was never registered. **The auto-signal bypassed an upstream registration step** the real signaler would have driven.
|
||||
|
||||
## Recommendation
|
||||
|
||||
Ship the POC env-gated (default off; no behavior change unless opted in). The verdict-B success makes it a useful diagnostic flag for future audit-049 work: future investigations can set `XENIA_SILPH_UI_AUTOSIGNAL_DELAY=10000` to skip the wedge and probe downstream behavior without first writing the proper signaler.
|
||||
|
||||
Long-term fix path remains the R33 silph_ui_synth.rs analogue: synthesize the missing signaler + its precondition state (BST instance registration at `0x715a7af0`-equivalent, work-item state `[+8]` per R26). The auto-signal POC is **not** the final fix — it confirms diagnosis but doesn't honor the dispatcher's BST registry invariant.
|
||||
|
||||
## Artifacts
|
||||
|
||||
- `poc-sr1.log`, `poc-sr1.stderr` — initial run, filter mismatch (D)
|
||||
- `poc-sr2.log`, `poc-sr2.stderr` — frame-1-LR fix, no fire (D)
|
||||
- `poc-sr3.log`, `poc-sr3.stderr` — diagnostic added, no fire (D, parallel path unwired)
|
||||
- `poc-sr4.log`, `poc-sr4.stderr` — parallel-path wired, **fires + partial unwedge (B)**
|
||||
|
||||
All `.log`/`.stderr` files are `.gitignore`d; this `FINDINGS.md` is the only artifact-side commit.
|
||||
@@ -0,0 +1,200 @@
|
||||
0x82450b60: lwz r18, 9792(r31)
|
||||
0x82450b64: lwz r16, 13880(r14)
|
||||
0x82450b68: mflr r12
|
||||
0x82450b6c: bl 0x825F0F74
|
||||
0x82450b70: subi r31, r1, 176
|
||||
0x82450b74: stwu r1, -176(r1)
|
||||
0x82450b78: mr r29, r4
|
||||
0x82450b7c: mr r27, r3
|
||||
0x82450b80: cmpwi cr6, r29, 5
|
||||
0x82450b84: bne cr6, 0x82450B94
|
||||
0x82450b88: addi r28, r27, 196
|
||||
0x82450b8c: addi r26, r27, 28
|
||||
0x82450b90: b 0x82450BAC
|
||||
0x82450b94: slwi r11, r29, 2
|
||||
0x82450b98: mr r26, r27
|
||||
0x82450b9c: add r11, r29, r11
|
||||
0x82450ba0: slwi r11, r11, 2
|
||||
0x82450ba4: add r11, r11, r27
|
||||
0x82450ba8: addi r28, r11, 96
|
||||
0x82450bac: addi r23, r27, 56
|
||||
0x82450bb0: mr r3, r23
|
||||
0x82450bb4: stw r23, 84(r31)
|
||||
0x82450bb8: bl 0x8284DCFC
|
||||
0x82450bbc: mr r3, r26
|
||||
0x82450bc0: bl 0x8284DCFC
|
||||
0x82450bc4: lwz r7, 16(r28)
|
||||
0x82450bc8: cntlzw r11, r7
|
||||
0x82450bcc: extrwi r11, r11, 1, 26
|
||||
0x82450bd0: cmplwi cr6, r11, 0x0
|
||||
0x82450bd4: beq cr6, 0x82450BEC
|
||||
0x82450bd8: mr r3, r26
|
||||
0x82450bdc: bl 0x8284DD0C
|
||||
0x82450be0: mr r3, r23
|
||||
0x82450be4: bl 0x8284DD0C
|
||||
0x82450be8: b 0x82450EE8
|
||||
0x82450bec: lwz r11, 12(r28)
|
||||
0x82450bf0: lwz r9, 8(r28)
|
||||
0x82450bf4: srwi r10, r11, 2
|
||||
0x82450bf8: clrlwi r8, r11, 30
|
||||
0x82450bfc: cmplw cr6, r9, r10
|
||||
0x82450c00: bgt cr6, 0x82450C08
|
||||
0x82450c04: sub r10, r10, r9
|
||||
0x82450c08: lwz r9, 4(r28)
|
||||
0x82450c0c: slwi r10, r10, 2
|
||||
0x82450c10: slwi r8, r8, 2
|
||||
0x82450c14: lwz r6, 8(r28)
|
||||
0x82450c18: addi r11, r11, 1
|
||||
0x82450c1c: slwi r6, r6, 2
|
||||
0x82450c20: li r24, 0
|
||||
0x82450c24: lwzx r10, r10, r9
|
||||
0x82450c28: cmplw cr6, r6, r11
|
||||
0x82450c2c: lwzx r30, r10, r8
|
||||
0x82450c30: stw r11, 12(r28)
|
||||
0x82450c34: stw r30, 80(r31)
|
||||
0x82450c38: bgt cr6, 0x82450C40
|
||||
0x82450c3c: stw r24, 12(r28)
|
||||
0x82450c40: subic. r11, r7, 1
|
||||
0x82450c44: stw r11, 16(r28)
|
||||
0x82450c48: bne 0x82450C50
|
||||
0x82450c4c: stw r24, 12(r28)
|
||||
0x82450c50: addi r25, r27, 28
|
||||
0x82450c54: mr r3, r25
|
||||
0x82450c58: bl 0x8284DCFC
|
||||
0x82450c5c: mr r3, r25
|
||||
0x82450c60: stw r30, 216(r27)
|
||||
0x82450c64: bl 0x8284DD0C
|
||||
0x82450c68: mr r3, r26
|
||||
0x82450c6c: bl 0x8284DD0C
|
||||
0x82450c70: lwz r11, 28(r30)
|
||||
0x82450c74: clrlwi r11, r11, 31
|
||||
0x82450c78: cmplwi cr6, r11, 0x0
|
||||
0x82450c7c: bne cr6, 0x82450D30
|
||||
0x82450c80: lwz r11, 8(r30)
|
||||
0x82450c84: cmplwi cr6, r11, 0x1
|
||||
0x82450c88: blt cr6, 0x82450CE4
|
||||
0x82450c8c: bne cr6, 0x82450D3C
|
||||
0x82450c90: lwz r11, 28(r30)
|
||||
0x82450c94: rlwinm r11, r11, 0, 29, 29
|
||||
0x82450c98: cmplwi cr6, r11, 0x0
|
||||
0x82450c9c: beq cr6, 0x82450CB0
|
||||
0x82450ca0: mr r4, r30
|
||||
0x82450ca4: mr r3, r27
|
||||
0x82450ca8: bl 0x824510E0
|
||||
0x82450cac: b 0x82450CBC
|
||||
0x82450cb0: mr r4, r30
|
||||
0x82450cb4: mr r3, r27
|
||||
0x82450cb8: bl 0x824517B0
|
||||
0x82450cbc: stw r29, 220(r27)
|
||||
0x82450cc0: bl 0x824AA830
|
||||
0x82450cc4: mr r11, r3
|
||||
0x82450cc8: lwz r3, 92(r27)
|
||||
0x82450ccc: li r5, 0
|
||||
0x82450cd0: addi r11, r11, 66
|
||||
0x82450cd4: li r4, 1
|
||||
0x82450cd8: stw r11, 224(r27)
|
||||
0x82450cdc: bl 0x824AB158
|
||||
0x82450ce0: b 0x82450D3C
|
||||
0x82450ce4: lwz r11, 28(r30)
|
||||
0x82450ce8: mr r4, r30
|
||||
0x82450cec: mr r3, r27
|
||||
0x82450cf0: rlwinm r11, r11, 0, 29, 29
|
||||
0x82450cf4: cmplwi cr6, r11, 0x0
|
||||
0x82450cf8: beq cr6, 0x82450D04
|
||||
0x82450cfc: bl 0x82450F68
|
||||
0x82450d00: b 0x82450D08
|
||||
0x82450d04: bl 0x82451238
|
||||
0x82450d08: stw r29, 220(r27)
|
||||
0x82450d0c: bl 0x824AA830
|
||||
0x82450d10: mr r11, r3
|
||||
0x82450d14: lwz r3, 92(r27)
|
||||
0x82450d18: li r5, 0
|
||||
0x82450d1c: addi r11, r11, 66
|
||||
0x82450d20: li r4, 1
|
||||
0x82450d24: stw r11, 224(r27)
|
||||
0x82450d28: bl 0x824AB158
|
||||
0x82450d2c: b 0x82450D3C
|
||||
0x82450d30: lwz r11, 28(r30)
|
||||
0x82450d34: ori r11, r11, 0x2
|
||||
0x82450d38: stw r11, 28(r30)
|
||||
0x82450d3c: lwz r11, 8(r30)
|
||||
0x82450d40: mr r29, r24
|
||||
0x82450d44: cmpwi cr6, r11, 2
|
||||
0x82450d48: blt cr6, 0x82450E08
|
||||
0x82450d4c: cmpwi cr6, r11, 3
|
||||
0x82450d50: ble cr6, 0x82450DA0
|
||||
0x82450d54: cmpwi cr6, r11, 4
|
||||
0x82450d58: bne cr6, 0x82450E08
|
||||
0x82450d5c: lwz r11, 28(r30)
|
||||
0x82450d60: rlwinm r11, r11, 0, 29, 29
|
||||
0x82450d64: cmplwi cr6, r11, 0x0
|
||||
0x82450d68: bne cr6, 0x82450D98
|
||||
0x82450d6c: lwz r29, 36(r30)
|
||||
0x82450d70: mr r3, r29
|
||||
0x82450d74: lwz r11, 0(r29)
|
||||
0x82450d78: lwz r11, 4(r11)
|
||||
0x82450d7c: mtctr r11
|
||||
0x82450d80: bctrl
|
||||
0x82450d84: clrlwi r11, r3, 24
|
||||
0x82450d88: cmplwi cr6, r11, 0x0
|
||||
0x82450d8c: beq cr6, 0x82450D98
|
||||
0x82450d90: mr r3, r29
|
||||
0x82450d94: bl 0x8244FB38
|
||||
0x82450d98: li r29, 1
|
||||
0x82450d9c: b 0x82450E28
|
||||
0x82450da0: addi r3, r30, 40
|
||||
0x82450da4: bl 0x82451DB8
|
||||
0x82450da8: lwz r11, 32(r30)
|
||||
0x82450dac: cmplwi cr6, r11, 0x0
|
||||
0x82450db0: beq cr6, 0x82450DCC
|
||||
0x82450db4: rlwinm r11, r11, 0, 0, 31
|
||||
0x82450db8: lwz r10, 4(r30)
|
||||
0x82450dbc: lwz r11, 4(r11)
|
||||
0x82450dc0: cmplw cr6, r10, r11
|
||||
0x82450dc4: li r11, 1
|
||||
0x82450dc8: beq cr6, 0x82450DD0
|
||||
0x82450dcc: mr r11, r24
|
||||
0x82450dd0: clrlwi r11, r11, 24
|
||||
0x82450dd4: cmplwi cr6, r11, 0x0
|
||||
0x82450dd8: beq cr6, 0x82450E00
|
||||
0x82450ddc: lwz r4, 8(r30)
|
||||
0x82450de0: lwz r5, 0(r30)
|
||||
0x82450de4: lwz r3, 32(r30)
|
||||
0x82450de8: cmpwi cr6, r4, 1
|
||||
0x82450dec: ble cr6, 0x82450DFC
|
||||
0x82450df0: bl 0x8245D9D8
|
||||
0x82450df4: li r29, 1
|
||||
0x82450df8: b 0x82450E28
|
||||
0x82450dfc: stw r4, 8(r3)
|
||||
0x82450e00: li r29, 1
|
||||
0x82450e04: b 0x82450E28
|
||||
0x82450e08: mr r3, r26
|
||||
0x82450e0c: stw r26, 88(r31)
|
||||
0x82450e10: bl 0x8284DCFC
|
||||
0x82450e14: addi r4, r31, 80
|
||||
0x82450e18: mr r3, r28
|
||||
0x82450e1c: bl 0x823232C0
|
||||
0x82450e20: mr r3, r26
|
||||
0x82450e24: bl 0x8284DD0C
|
||||
0x82450e28: clrlwi r11, r29, 24
|
||||
0x82450e2c: cmplwi cr6, r11, 0x0
|
||||
0x82450e30: beq cr6, 0x82450ECC
|
||||
0x82450e34: lwz r11, 28(r30)
|
||||
0x82450e38: rlwinm r11, r11, 0, 30, 30
|
||||
0x82450e3c: cmplwi cr6, r11, 0x0
|
||||
0x82450e40: beq cr6, 0x82450E68
|
||||
0x82450e44: mr r3, r26
|
||||
0x82450e48: stw r26, 88(r31)
|
||||
0x82450e4c: bl 0x8284DCFC
|
||||
0x82450e50: addi r4, r31, 80
|
||||
0x82450e54: mr r3, r28
|
||||
0x82450e58: bl 0x823232C0
|
||||
0x82450e5c: mr r3, r26
|
||||
0x82450e60: bl 0x8284DD0C
|
||||
0x82450e64: b 0x82450ECC
|
||||
0x82450e68: lwz r11, 40(r30)
|
||||
0x82450e6c: cmplwi cr6, r11, 0x0
|
||||
0x82450e70: beq cr6, 0x82450EA4
|
||||
0x82450e74: rlwinm r3, r11, 0, 0, 31
|
||||
0x82450e78: bl 0x82458A70
|
||||
0x82450e7c: lwz r29, 40(r30)
|
||||
@@ -0,0 +1,80 @@
|
||||
0x82451238: mflr r12
|
||||
0x8245123c: li r0, 0
|
||||
0x82451240: stw r0, 4(r1)
|
||||
0x82451244: bl 0x825F0F80
|
||||
0x82451248: subi r31, r1, 160
|
||||
0x8245124c: stwu r1, -160(r1)
|
||||
0x82451250: mr r30, r4
|
||||
0x82451254: li r9, 1
|
||||
0x82451258: lwz r10, 32(r30)
|
||||
0x8245125c: stw r30, 188(r31)
|
||||
0x82451260: stw r9, 8(r30)
|
||||
0x82451264: cmplwi cr6, r10, 0x0
|
||||
0x82451268: beq cr6, 0x82451288
|
||||
0x8245126c: lwz r11, 4(r30)
|
||||
0x82451270: lwz r8, 4(r10)
|
||||
0x82451274: cmplw cr6, r11, r8
|
||||
0x82451278: bne cr6, 0x82451288
|
||||
0x8245127c: mr r11, r9
|
||||
0x82451280: li r26, 0
|
||||
0x82451284: b 0x82451290
|
||||
0x82451288: li r26, 0
|
||||
0x8245128c: mr r11, r26
|
||||
0x82451290: clrlwi r11, r11, 24
|
||||
0x82451294: cmplwi cr6, r11, 0x0
|
||||
0x82451298: beq cr6, 0x824512A0
|
||||
0x8245129c: stw r9, 8(r10)
|
||||
0x824512a0: lwz r3, 36(r30)
|
||||
0x824512a4: lwz r11, 0(r3)
|
||||
0x824512a8: lwz r11, 32(r11)
|
||||
0x824512ac: mtctr r11
|
||||
0x824512b0: bctrl
|
||||
0x824512b4: mr r27, r3
|
||||
0x824512b8: stw r26, 84(r31)
|
||||
0x824512bc: stw r27, 96(r31)
|
||||
0x824512c0: bl 0x82454498
|
||||
0x824512c4: addi r4, r31, 84
|
||||
0x824512c8: bl 0x82454580
|
||||
0x824512cc: stw r26, 92(r31)
|
||||
0x824512d0: addi r11, r27, 2047
|
||||
0x824512d4: lis r10, 0x2
|
||||
0x824512d8: clrrwi r11, r11, 11
|
||||
0x824512dc: cmplw cr6, r11, r10
|
||||
0x824512e0: stw r11, 100(r31)
|
||||
0x824512e4: ble cr6, 0x824512F4
|
||||
0x824512e8: lis r11, 0x8207
|
||||
0x824512ec: addi r11, r11, 6724
|
||||
0x824512f0: b 0x824512F8
|
||||
0x824512f4: addi r11, r31, 100
|
||||
0x824512f8: addi r3, r31, 84
|
||||
0x824512fc: lwz r4, 0(r11)
|
||||
0x82451300: bl 0x82454B08
|
||||
0x82451304: mr r8, r8
|
||||
0x82451308: mr r28, r3
|
||||
0x8245130c: stw r28, 92(r31)
|
||||
0x82451310: b 0x82451324
|
||||
0x82451314: lwz r30, 188(r31)
|
||||
0x82451318: lwz r27, 96(r31)
|
||||
0x8245131c: li r26, 0
|
||||
0x82451320: lwz r28, 92(r31)
|
||||
0x82451324: addi r3, r31, 84
|
||||
0x82451328: bl 0x82454AA0
|
||||
0x8245132c: mr r29, r3
|
||||
0x82451330: cmplwi cr6, r28, 0x0
|
||||
0x82451334: beq cr6, 0x82451684
|
||||
0x82451338: lwz r3, 36(r30)
|
||||
0x8245133c: li r8, 0
|
||||
0x82451340: addi r7, r31, 88
|
||||
0x82451344: mr r6, r29
|
||||
0x82451348: mr r5, r29
|
||||
0x8245134c: mr r4, r28
|
||||
0x82451350: lwz r11, 0(r3)
|
||||
0x82451354: lwz r11, 28(r11)
|
||||
0x82451358: mtctr r11
|
||||
0x8245135c: bctrl
|
||||
0x82451360: clrlwi r11, r3, 24
|
||||
0x82451364: cmplwi cr6, r11, 0x0
|
||||
0x82451368: beq cr6, 0x82451684
|
||||
0x8245136c: lwz r11, 28(r30)
|
||||
0x82451370: rlwinm r11, r11, 0, 28, 28
|
||||
0x82451374: cmplwi cr6, r11, 0x0
|
||||
@@ -0,0 +1,52 @@
|
||||
=== Fire counts ===
|
||||
ours: 3
|
||||
canary: 7
|
||||
|
||||
=== Per-LR breakdown ===
|
||||
ours:
|
||||
lr=0x82458674: 3
|
||||
canary:
|
||||
lr=0x82457bd4: 2
|
||||
lr=0x82458674: 5
|
||||
|
||||
=== Side-by-side first 5 fires (entry registers) ===
|
||||
|
||||
--- fire #0 ---
|
||||
ours: tid=6 cycle=363 lr=0x82458674 r3=0x40ba9ac0
|
||||
dump: 419fecda 000007f6 00000000 41d7dd10 00001688 00000000 00000000 41f5dd80 82457958 823f53f0 00000000 00000000 00000001 00000000 00000000 4024a5c0
|
||||
canary: tid=11 cycle=<unk> lr=0x82458674 r3=0xbccc4ac0 r4=0x00000000 r5=0x00000001 r6=0x00000001 r7=0x00000000
|
||||
dump: bdb19cda 000007f6 00000000 bde98d10 00001688 00000000 00000000 be078d80 82457958 823f53f0 00000000 00000000 00000001 00000000 00000000 bc365760
|
||||
|
||||
--- fire #1 ---
|
||||
ours: tid=6 cycle=140548 lr=0x82458674 r3=0x40ba9b80
|
||||
dump: 42c0f09a 00018ff6 00000000 43777210 0004d055 00000000 00000000 41f60d80 82457958 823f53f0 00000000 00000000 00000001 00000000 00000000 4024a960
|
||||
canary: tid=11 cycle=<unk> lr=0x82458674 r3=0xbccc4b80 r4=0x00000000 r5=0x00000001 r6=0x00000001 r7=0x00000000
|
||||
dump: bed2a09a 00018ff6 00000000 bf892210 0004d055 00000000 00000000 be07bd80 82457958 823f53f0 00000000 00000000 00000001 00000000 00000000 bc365840
|
||||
|
||||
--- fire #2 ---
|
||||
ours: tid=6 cycle=5957876 lr=0x82458674 r3=0x40ba9b80
|
||||
dump: 419fecda 000007f6 00000000 414f5f70 000003b9 00000000 00000000 41f60d80 82457958 823f53f0 00000000 00000040 00000001 00000000 00000000 4024a980
|
||||
canary: tid=11 cycle=<unk> lr=0x82458674 r3=0xbccc4b80 r4=0x00000000 r5=0x00000001 r6=0x00000001 r7=0x00000000
|
||||
dump: bdb19cda 000007f6 00000000 bd610b90 000003b9 00000000 00000000 be07bd80 82457958 823f53f0 00000000 00000040 00000001 00000000 00000000 bc365860
|
||||
|
||||
--- fire #3 ---
|
||||
ours: <no fire>
|
||||
canary: tid=11 cycle=<unk> lr=0x82458674 r3=0xbccc5300 r4=0x00000000 r5=0x00000001 r6=0x00000001 r7=0x00000000
|
||||
dump: bdb1acda 000007f6 00000000 bce24ed0 00000167 00000000 00000000 be07bd80 82457958 823f53f0 00000000 00000000 00000001 00000000 00000000 bc365f40
|
||||
|
||||
--- fire #4 ---
|
||||
ours: <no fire>
|
||||
canary: tid=6 cycle=<unk> lr=0x82457bd4 r3=0x701cf3c0 r4=0x00000004 r5=0x00002530 r6=0x00008000 r7=0x00000001
|
||||
dump: be95af9a 0000c170 00000000 b2050010 000681e9 00000000 00000000 be07bd80 82457958 823f53f0 00000000 0000c17a 00000001 701cf4e0 00000000 be95af90
|
||||
|
||||
=== Equivalence check: u32 lanes at +0x04 and +0x10 (work-item magic + counter) ===
|
||||
Both fields are stable identifiers across engines (host VAs differ but data should match).
|
||||
|
||||
Index of fields:
|
||||
[+0x04] = work-item 'size?' (looks like a length field)
|
||||
[+0x10] = state counter (per round 30, this is [+128/4 ?]) — but in dump it's u32[4]
|
||||
|
||||
ours [+04,+10]: [(2038, 5768), (102390, 315477), (2038, 953)]
|
||||
canary [+04,+10]: [(2038, 5768), (102390, 315477), (2038, 953), (2038, 359), (49520, 426473), (232195, 999643), (6134, 13763)]
|
||||
|
||||
ours fires whose [+04,+10] match a canary fire: 3/3
|
||||
@@ -0,0 +1,175 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Round 35 lockstep diff: align sub_8280AD40 entry fires between
|
||||
ours (--audit-pc-probe-hex AUDIT-PC-PROBE / AUDIT-R3-DUMP) and
|
||||
canary (AUDIT-HLC JitProlog).
|
||||
|
||||
Outputs side-by-side rendering of:
|
||||
- per-fire entry register snapshot (r3..r10, lr)
|
||||
- 64-byte r3 dump (u32 lanes, big-endian)
|
||||
Alignment is by tid + invocation order (no input-equivalence required).
|
||||
"""
|
||||
import re
|
||||
import sys
|
||||
import os
|
||||
|
||||
THIS_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
OURS_LOG = os.path.join(THIS_DIR, "ours.log")
|
||||
CANARY_LOG = os.path.join(
|
||||
os.path.dirname(THIS_DIR), "round35-lockstep-inflate-canary", "canary.log"
|
||||
)
|
||||
|
||||
PC_TARGET = 0x8280AD40
|
||||
|
||||
|
||||
def parse_ours(path):
|
||||
"""Pair AUDIT-PC-PROBE lines with their following AUDIT-R3-DUMP lines."""
|
||||
fires = []
|
||||
cur = None
|
||||
with open(path) as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line.startswith("AUDIT-PC-PROBE"):
|
||||
m = re.search(
|
||||
r"pc=0x([0-9a-f]+) tid=(\d+) hw=\d+ cycle=(\d+) lr=0x([0-9a-f]+) r3=0x([0-9a-f]+) r11=0x([0-9a-f]+)",
|
||||
line,
|
||||
)
|
||||
if not m:
|
||||
continue
|
||||
pc = int(m.group(1), 16)
|
||||
if pc != PC_TARGET:
|
||||
cur = None
|
||||
continue
|
||||
cur = {
|
||||
"tid": int(m.group(2)),
|
||||
"cycle": int(m.group(3)),
|
||||
"lr": int(m.group(4), 16),
|
||||
"r3": int(m.group(5), 16),
|
||||
"dump": [],
|
||||
}
|
||||
fires.append(cur)
|
||||
elif line.startswith("AUDIT-R3-DUMP") and cur is not None:
|
||||
lanes = re.findall(r"\+0x[0-9a-f]+=0x([0-9a-f]+)", line)
|
||||
cur["dump"] = [int(x, 16) for x in lanes]
|
||||
cur = None
|
||||
return fires
|
||||
|
||||
|
||||
def parse_canary(path):
|
||||
"""Pair AUDIT-HLC JitProlog header lines with following r3+NN dump lines."""
|
||||
fires = []
|
||||
cur = None
|
||||
hdr_re = re.compile(
|
||||
r"AUDIT-HLC JitProlog pc=8280AD40 tid=([0-9A-F]+) r3=([0-9A-F]+) r4=([0-9A-F]+) "
|
||||
r"r5=([0-9A-F]+) r6=([0-9A-F]+) r7=([0-9A-F]+) r8=([0-9A-F]+) r9=([0-9A-F]+) r10=([0-9A-F]+) lr=([0-9A-F]+)"
|
||||
)
|
||||
dump_re = re.compile(
|
||||
r"AUDIT-HLC JitProlog pc=8280AD40 r3\+([0-9A-F]+): ([0-9A-F]+) ([0-9A-F]+) ([0-9A-F]+) ([0-9A-F]+)"
|
||||
)
|
||||
with open(path) as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
m = hdr_re.search(line)
|
||||
if m:
|
||||
cur = {
|
||||
"tid": int(m.group(1), 16),
|
||||
"r3": int(m.group(2), 16),
|
||||
"r4": int(m.group(3), 16),
|
||||
"r5": int(m.group(4), 16),
|
||||
"r6": int(m.group(5), 16),
|
||||
"r7": int(m.group(6), 16),
|
||||
"r8": int(m.group(7), 16),
|
||||
"r9": int(m.group(8), 16),
|
||||
"r10": int(m.group(9), 16),
|
||||
"lr": int(m.group(10), 16),
|
||||
"dump": [],
|
||||
}
|
||||
fires.append(cur)
|
||||
continue
|
||||
m = dump_re.search(line)
|
||||
if m and cur is not None:
|
||||
off = int(m.group(1), 16)
|
||||
for i in range(4):
|
||||
word = int(m.group(2 + i), 16)
|
||||
# extend dump to fit
|
||||
idx = off // 4 + i
|
||||
while len(cur["dump"]) <= idx:
|
||||
cur["dump"].append(0)
|
||||
cur["dump"][idx] = word
|
||||
return fires
|
||||
|
||||
|
||||
def fmt_dump(d):
|
||||
return " ".join(f"{w:08x}" for w in d[:16])
|
||||
|
||||
|
||||
def main():
|
||||
ours = parse_ours(OURS_LOG)
|
||||
canary = parse_canary(CANARY_LOG)
|
||||
|
||||
print(f"=== Fire counts ===")
|
||||
print(f" ours: {len(ours)}")
|
||||
print(f" canary: {len(canary)}")
|
||||
print()
|
||||
|
||||
print(f"=== Per-LR breakdown ===")
|
||||
for label, fires in (("ours", ours), ("canary", canary)):
|
||||
lr_counts = {}
|
||||
for f in fires:
|
||||
lr_counts[f["lr"]] = lr_counts.get(f["lr"], 0) + 1
|
||||
print(f" {label}:")
|
||||
for lr, n in sorted(lr_counts.items()):
|
||||
print(f" lr=0x{lr:08x}: {n}")
|
||||
print()
|
||||
|
||||
print(f"=== Side-by-side first 5 fires (entry registers) ===")
|
||||
n = max(len(ours), len(canary))
|
||||
n = min(n, 5)
|
||||
for i in range(n):
|
||||
print(f"\n--- fire #{i} ---")
|
||||
if i < len(ours):
|
||||
f = ours[i]
|
||||
print(
|
||||
f" ours: tid={f['tid']:<3} cycle={f['cycle']:<10} lr=0x{f['lr']:08x} r3=0x{f['r3']:08x}"
|
||||
)
|
||||
print(f" dump: {fmt_dump(f['dump'])}")
|
||||
else:
|
||||
print(f" ours: <no fire>")
|
||||
if i < len(canary):
|
||||
f = canary[i]
|
||||
print(
|
||||
f" canary: tid={f['tid']:<3} cycle=<unk> lr=0x{f['lr']:08x} r3=0x{f['r3']:08x} "
|
||||
f"r4=0x{f['r4']:08x} r5=0x{f['r5']:08x} r6=0x{f['r6']:08x} r7=0x{f['r7']:08x}"
|
||||
)
|
||||
print(f" dump: {fmt_dump(f['dump'])}")
|
||||
else:
|
||||
print(f" canary: <no fire>")
|
||||
|
||||
print()
|
||||
print("=== Equivalence check: u32 lanes at +0x04 and +0x10 (work-item magic + counter) ===")
|
||||
print(" Both fields are stable identifiers across engines (host VAs differ but data should match).")
|
||||
print()
|
||||
print(" Index of fields:")
|
||||
print(" [+0x04] = work-item 'size?' (looks like a length field)")
|
||||
print(" [+0x10] = state counter (per round 30, this is [+128/4 ?]) — but in dump it's u32[4]")
|
||||
print()
|
||||
# +0x04 is dump[1], +0x10 is dump[4]
|
||||
ours_keys = [(f["dump"][1], f["dump"][4]) if len(f["dump"]) > 4 else None for f in ours]
|
||||
canary_keys = [(f["dump"][1], f["dump"][4]) if len(f["dump"]) > 4 else None for f in canary]
|
||||
print(f" ours [+04,+10]: {ours_keys}")
|
||||
print(f" canary [+04,+10]: {canary_keys}")
|
||||
print()
|
||||
# Cross-match: every ours key should appear in canary (canary is a superset)
|
||||
matched = []
|
||||
unmatched_ours = []
|
||||
for k in ours_keys:
|
||||
if k in canary_keys:
|
||||
matched.append(k)
|
||||
else:
|
||||
unmatched_ours.append(k)
|
||||
print(f" ours fires whose [+04,+10] match a canary fire: {len(matched)}/{len(ours)}")
|
||||
if unmatched_ours:
|
||||
print(f" ours fires with NO canary match: {unmatched_ours}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,17 @@
|
||||
K> F8000008 AUDIT-HLC JitProlog pc=821741C8 tid=00000006 r3=BCCC4A80 r4=00000018 r5=828F3888 r6=701CF924 r7=82456F00 r8=00000000 r9=00000000 r10=00000018 lr=822F1D5C
|
||||
K> F8000008 AUDIT-HLC JitProlog pc=821741C8 r3+00: BC22C910 00010004 00000000 000003E8
|
||||
K> F8000008 AUDIT-HLC JitProlog pc=821741C8 r3+10: 0101FFFF 00000000 00000000 01010000
|
||||
K> F8000008 AUDIT-HLC JitProlog pc=821741C8 r3+20: FFFFFFFF 00000000 00000000 00000000
|
||||
K> F8000008 AUDIT-HLC JitProlog pc=821741C8 r3+30: 00000000 BC365BC0 00000000 00000000
|
||||
K> F8000008 AUDIT-HLC JitProlog pc=821741C8 r3+40: 00000000 00000000 00000000 BDE9A398
|
||||
K> F8000008 AUDIT-HLC JitProlog pc=821741C8 r3+50: BC365560 00000000 00000000 00000000
|
||||
K> F8000008 AUDIT-HLC JitProlog pc=821741C8 r3+60: 00000000 00000000 00000000 01010040
|
||||
K> F8000008 AUDIT-HLC JitProlog pc=821741C8 r3+70: 00000000 00000000 00000000 FFFFFFFF
|
||||
K> F8000008 AUDIT-HLC JitProlog pc=821741C8 r3+80: 00000000 00000000 00000000 BC22C930
|
||||
K> F8000008 AUDIT-HLC JitProlog pc=821741C8 r3+90: 00000000 00000001 00000800 00000000
|
||||
K> F8000008 AUDIT-HLC JitProlog pc=821741C8 r3+A0: F800004C 00000000 00000000 BC365220
|
||||
K> F8000008 AUDIT-HLC JitProlog pc=821741C8 r3+B0: BC3655C0 00000000 00000000 00000000
|
||||
K> F8000008 AUDIT-HLC JitProlog pc=821741C8 r3+C0: 00CC0048 00460020 00460072 00650071
|
||||
K> F8000008 AUDIT-HLC JitProlog pc=821741C8 r3+D0: 00750065 006E0063 00790000 01010000
|
||||
K> F8000008 AUDIT-HLC JitProlog pc=821741C8 r3+E0: 00000000 00000000 00000000 FFFFFFFF
|
||||
K> F8000008 AUDIT-HLC JitProlog pc=821741C8 r3+F0: 00000000 00000000 00000000 BD610B80
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,89 @@
|
||||
warn: CreateDXGIFactory2: Ignoring flags
|
||||
info: Game: xenia_canary.exe
|
||||
info: DXVK: v2.7.1
|
||||
info: Build: x86_64 gcc 15.1.0
|
||||
info: Vulkan: Found vkGetInstanceProcAddr in winevulkan.dll @ 0x6ffffbd84000
|
||||
info: Extension providers:
|
||||
info: Platform WSI
|
||||
info: OpenVR
|
||||
info: OpenVR: could not open registry key, status 2
|
||||
info: OpenVR: Failed to locate module
|
||||
info: OpenXR
|
||||
info: Enabled instance extensions:
|
||||
info: VK_EXT_surface_maintenance1
|
||||
info: VK_KHR_get_surface_capabilities2
|
||||
info: VK_KHR_surface
|
||||
info: VK_KHR_win32_surface
|
||||
info: Found device: NVIDIA GeForce GTX 1070 Ti (NVIDIA 580.159.3)
|
||||
info: Found device: llvmpipe (LLVM 20.1.2, 256 bits) (llvmpipe 25.2.8)
|
||||
info: Skipping: Software driver
|
||||
info: DXGI: Hiding actual GPU, reporting:
|
||||
info: vendor ID: 0x1002
|
||||
info: device ID: 0x73df
|
||||
warn: DxgiAdapter::QueryInterface: Unknown interface query
|
||||
warn: f0db4c7f-fe5a-42a2-bd62-f2a6cf6fc83e
|
||||
564.236:00dc:013c:info:vkd3d-proton:vkd3d_instance_apply_application_workarounds: Program name: "xenia_canary.exe" (hash: c099ade372da5277)
|
||||
564.236:00dc:013c:info:vkd3d-proton:vkd3d_instance_deduce_config_flags_from_environment: shader_cache is used, global_pipeline_cache is enforced.
|
||||
564.236:00dc:013c:info:vkd3d-proton:vkd3d_config_flags_init_once: VKD3D_CONFIG=''.
|
||||
564.240:00dc:013c:info:vkd3d-proton:vkd3d_get_vk_version: vkd3d-proton - applicationVersion: 3.0.1.
|
||||
564.240:00dc:013c:info:vkd3d-proton:vkd3d_instance_init: vkd3d-proton - build: 3b10bd7a7ec6a73.
|
||||
564.399:00dc:013c:info:vkd3d-proton:vkd3d_init_device_caps: Not all relevant pipeline stages are supported by EXT_dgc. Skipping.
|
||||
564.825:00dc:013c:info:vkd3d-proton:vkd3d_memory_info_decide_hvv_usage: Topology: Device heaps are split. Assuming small BAR situation.
|
||||
564.825:00dc:013c:info:vkd3d-proton:vkd3d_memory_info_upload_hvv_memory_properties: Topology: HVV usage is not allowed, using HOST_COHERENT for UPLOAD.
|
||||
564.827:00dc:013c:info:vkd3d-proton:vkd3d_bindless_state_get_bindless_flags: Device does not support VK_EXT_mutable_descriptor_type (or VALVE).
|
||||
564.827:00dc:013c:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
564.827:00dc:013c:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
564.827:00dc:013c:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
564.827:00dc:013c:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
564.827:00dc:013c:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
564.827:00dc:013c:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
564.827:00dc:013c:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
564.839:00dc:013c:info:vkd3d-proton:d3d12_device_caps_init_shader_model: Enabling support for SM 6.6.
|
||||
564.839:00dc:013c:fixme:vkd3d-proton:d3d12_device_caps_init_feature_options1: TotalLaneCount = 2432, may be inaccurate.
|
||||
564.839:00dc:013c:info:vkd3d-proton:d3d12_device_determine_ray_tracing_tier: DXR support enabled.
|
||||
564.840:00dc:013c:info:vkd3d-proton:vkd3d_pipeline_library_init_disk_cache: Remapping VKD3D_SHADER_CACHE to: vkd3d-proton.cache.
|
||||
564.840:00dc:013c:info:vkd3d-proton:vkd3d_pipeline_library_init_disk_cache: Attempting to load disk cache from: vkd3d-proton.cache.
|
||||
564.843:00dc:0154:info:vkd3d-proton:vkd3d_pipeline_library_disk_thread_main: Performing async setup of stream archive ...
|
||||
564.844:00dc:0154:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_merge: Promoting write cache to read cache. No need to merge any disk caches.
|
||||
564.844:00dc:0154:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Merging pipeline libraries took 1.012 ms.
|
||||
564.845:00dc:0154:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Mapping read-only cache took 0.607 ms.
|
||||
564.845:00dc:0154:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Parsing stream archive took 0.370 ms.
|
||||
564.845:00dc:0154:info:vkd3d-proton:vkd3d_pipeline_library_disk_thread_main: Done performing async setup of stream archive.
|
||||
564.903:00dc:013c:info:vkd3d-proton:vkd3d_get_vk_version: vkd3d-proton - applicationVersion: 3.0.1.
|
||||
564.903:00dc:013c:info:vkd3d-proton:vkd3d_instance_init: vkd3d-proton - build: 3b10bd7a7ec6a73.
|
||||
564.946:00dc:013c:info:vkd3d-proton:vkd3d_init_device_caps: Not all relevant pipeline stages are supported by EXT_dgc. Skipping.
|
||||
565.065:00dc:013c:info:vkd3d-proton:vkd3d_memory_info_decide_hvv_usage: Topology: Device heaps are split. Assuming small BAR situation.
|
||||
565.065:00dc:013c:info:vkd3d-proton:vkd3d_memory_info_upload_hvv_memory_properties: Topology: HVV usage is not allowed, using HOST_COHERENT for UPLOAD.
|
||||
565.066:00dc:013c:info:vkd3d-proton:vkd3d_bindless_state_get_bindless_flags: Device does not support VK_EXT_mutable_descriptor_type (or VALVE).
|
||||
565.066:00dc:013c:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
565.066:00dc:013c:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
565.066:00dc:013c:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
565.066:00dc:013c:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
565.066:00dc:013c:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
565.066:00dc:013c:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
565.066:00dc:013c:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
565.067:00dc:013c:info:vkd3d-proton:d3d12_device_caps_init_shader_model: Enabling support for SM 6.6.
|
||||
565.067:00dc:013c:fixme:vkd3d-proton:d3d12_device_caps_init_feature_options1: TotalLaneCount = 2432, may be inaccurate.
|
||||
565.067:00dc:013c:info:vkd3d-proton:d3d12_device_determine_ray_tracing_tier: DXR support enabled.
|
||||
565.067:00dc:013c:info:vkd3d-proton:vkd3d_pipeline_library_init_disk_cache: Remapping VKD3D_SHADER_CACHE to: vkd3d-proton.cache.
|
||||
565.067:00dc:013c:info:vkd3d-proton:vkd3d_pipeline_library_init_disk_cache: Attempting to load disk cache from: vkd3d-proton.cache.
|
||||
565.068:00dc:015c:info:vkd3d-proton:vkd3d_pipeline_library_disk_thread_main: Performing async setup of stream archive ...
|
||||
565.068:00dc:015c:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_merge: No write cache exists. No need to merge any disk caches.
|
||||
565.068:00dc:015c:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Merging pipeline libraries took 0.136 ms.
|
||||
565.068:00dc:015c:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Mapping read-only cache took 0.221 ms.
|
||||
565.069:00dc:015c:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Parsing stream archive took 0.031 ms.
|
||||
565.069:00dc:015c:info:vkd3d-proton:vkd3d_pipeline_library_disk_thread_main: Done performing async setup of stream archive.
|
||||
565.075:00dc:013c:fixme:vkd3d-proton:d3d12_command_queue_init: Ignoring priority 0x64.
|
||||
warn: DXGIGetDebugInterface1: Stub
|
||||
info: DXGI: Hiding actual GPU, reporting:
|
||||
info: vendor ID: 0x1002
|
||||
info: device ID: 0x73df
|
||||
565.173:00dc:00e0:info:vkd3d-proton:dxgi_vk_swap_chain_init: Creating swapchain (1280 x 720), BufferCount = 3.
|
||||
565.194:00dc:00e0:info:vkd3d-proton:dxgi_vk_swap_chain_init_sync_objects: Ensure maximum latency of 3 frames with KHR_present_wait.
|
||||
565.195:00dc:00e0:info:vkd3d-proton:dxgi_vk_swap_chain_init_sleep_state: Timer interval is 1.0 ms.
|
||||
warn: DXGI: MakeWindowAssociation: Ignoring flags
|
||||
warn: DxgiOutput::WaitForVBlank: Inaccurate
|
||||
info: Setting timer interval to 1000 us
|
||||
565.773:00dc:0164:info:vkd3d-proton:dxgi_vk_swap_chain_recreate_swapchain_in_present_task: Got 3 swapchain images.
|
||||
566.349:00dc:016c:fixme:vkd3d-proton:vkd3d_texture_view_desc_fixup: Remapping 2D to 2D_ARRAY. Needs Vulkan spec tightening to match D3D12 properly.
|
||||
566.387:00dc:0164:info:vkd3d-proton:dxgi_vk_swap_chain_recreate_swapchain_in_present_task: Got 3 swapchain images.
|
||||
@@ -0,0 +1,89 @@
|
||||
warn: CreateDXGIFactory2: Ignoring flags
|
||||
info: Game: xenia_canary.exe
|
||||
info: DXVK: v2.7.1
|
||||
info: Build: x86_64 gcc 15.1.0
|
||||
info: Vulkan: Found vkGetInstanceProcAddr in winevulkan.dll @ 0x6ffffbfb4000
|
||||
info: Extension providers:
|
||||
info: Platform WSI
|
||||
info: OpenVR
|
||||
info: OpenVR: could not open registry key, status 2
|
||||
info: OpenVR: Failed to locate module
|
||||
info: OpenXR
|
||||
info: Enabled instance extensions:
|
||||
info: VK_EXT_surface_maintenance1
|
||||
info: VK_KHR_get_surface_capabilities2
|
||||
info: VK_KHR_surface
|
||||
info: VK_KHR_win32_surface
|
||||
info: Found device: NVIDIA GeForce GTX 1070 Ti (NVIDIA 580.159.3)
|
||||
info: Found device: llvmpipe (LLVM 20.1.2, 256 bits) (llvmpipe 25.2.8)
|
||||
info: Skipping: Software driver
|
||||
info: DXGI: Hiding actual GPU, reporting:
|
||||
info: vendor ID: 0x1002
|
||||
info: device ID: 0x73df
|
||||
warn: DxgiAdapter::QueryInterface: Unknown interface query
|
||||
warn: f0db4c7f-fe5a-42a2-bd62-f2a6cf6fc83e
|
||||
805.907:00d0:0124:info:vkd3d-proton:vkd3d_instance_apply_application_workarounds: Program name: "xenia_canary.exe" (hash: c099ade372da5277)
|
||||
805.907:00d0:0124:info:vkd3d-proton:vkd3d_instance_deduce_config_flags_from_environment: shader_cache is used, global_pipeline_cache is enforced.
|
||||
805.907:00d0:0124:info:vkd3d-proton:vkd3d_config_flags_init_once: VKD3D_CONFIG=''.
|
||||
805.910:00d0:0124:info:vkd3d-proton:vkd3d_get_vk_version: vkd3d-proton - applicationVersion: 3.0.1.
|
||||
805.910:00d0:0124:info:vkd3d-proton:vkd3d_instance_init: vkd3d-proton - build: 3b10bd7a7ec6a73.
|
||||
805.955:00d0:0124:info:vkd3d-proton:vkd3d_init_device_caps: Not all relevant pipeline stages are supported by EXT_dgc. Skipping.
|
||||
806.100:00d0:0124:info:vkd3d-proton:vkd3d_memory_info_decide_hvv_usage: Topology: Device heaps are split. Assuming small BAR situation.
|
||||
806.100:00d0:0124:info:vkd3d-proton:vkd3d_memory_info_upload_hvv_memory_properties: Topology: HVV usage is not allowed, using HOST_COHERENT for UPLOAD.
|
||||
806.101:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_get_bindless_flags: Device does not support VK_EXT_mutable_descriptor_type (or VALVE).
|
||||
806.101:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
806.101:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
806.101:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
806.101:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
806.101:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
806.101:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
806.101:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
806.105:00d0:0124:info:vkd3d-proton:d3d12_device_caps_init_shader_model: Enabling support for SM 6.6.
|
||||
806.105:00d0:0124:fixme:vkd3d-proton:d3d12_device_caps_init_feature_options1: TotalLaneCount = 2432, may be inaccurate.
|
||||
806.105:00d0:0124:info:vkd3d-proton:d3d12_device_determine_ray_tracing_tier: DXR support enabled.
|
||||
806.105:00d0:0124:info:vkd3d-proton:vkd3d_pipeline_library_init_disk_cache: Remapping VKD3D_SHADER_CACHE to: vkd3d-proton.cache.
|
||||
806.105:00d0:0124:info:vkd3d-proton:vkd3d_pipeline_library_init_disk_cache: Attempting to load disk cache from: vkd3d-proton.cache.
|
||||
806.106:00d0:013c:info:vkd3d-proton:vkd3d_pipeline_library_disk_thread_main: Performing async setup of stream archive ...
|
||||
806.106:00d0:013c:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_merge: No write cache exists. No need to merge any disk caches.
|
||||
806.106:00d0:013c:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Merging pipeline libraries took 0.161 ms.
|
||||
806.107:00d0:013c:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Mapping read-only cache took 0.185 ms.
|
||||
806.107:00d0:013c:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Parsing stream archive took 0.028 ms.
|
||||
806.107:00d0:013c:info:vkd3d-proton:vkd3d_pipeline_library_disk_thread_main: Done performing async setup of stream archive.
|
||||
806.154:00d0:0124:info:vkd3d-proton:vkd3d_get_vk_version: vkd3d-proton - applicationVersion: 3.0.1.
|
||||
806.154:00d0:0124:info:vkd3d-proton:vkd3d_instance_init: vkd3d-proton - build: 3b10bd7a7ec6a73.
|
||||
806.197:00d0:0124:info:vkd3d-proton:vkd3d_init_device_caps: Not all relevant pipeline stages are supported by EXT_dgc. Skipping.
|
||||
806.310:00d0:0124:info:vkd3d-proton:vkd3d_memory_info_decide_hvv_usage: Topology: Device heaps are split. Assuming small BAR situation.
|
||||
806.310:00d0:0124:info:vkd3d-proton:vkd3d_memory_info_upload_hvv_memory_properties: Topology: HVV usage is not allowed, using HOST_COHERENT for UPLOAD.
|
||||
806.310:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_get_bindless_flags: Device does not support VK_EXT_mutable_descriptor_type (or VALVE).
|
||||
806.310:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
806.310:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
806.310:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
806.310:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
806.310:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
806.310:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
806.310:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
806.312:00d0:0124:info:vkd3d-proton:d3d12_device_caps_init_shader_model: Enabling support for SM 6.6.
|
||||
806.312:00d0:0124:fixme:vkd3d-proton:d3d12_device_caps_init_feature_options1: TotalLaneCount = 2432, may be inaccurate.
|
||||
806.312:00d0:0124:info:vkd3d-proton:d3d12_device_determine_ray_tracing_tier: DXR support enabled.
|
||||
806.312:00d0:0124:info:vkd3d-proton:vkd3d_pipeline_library_init_disk_cache: Remapping VKD3D_SHADER_CACHE to: vkd3d-proton.cache.
|
||||
806.312:00d0:0124:info:vkd3d-proton:vkd3d_pipeline_library_init_disk_cache: Attempting to load disk cache from: vkd3d-proton.cache.
|
||||
806.313:00d0:0144:info:vkd3d-proton:vkd3d_pipeline_library_disk_thread_main: Performing async setup of stream archive ...
|
||||
806.313:00d0:0144:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_merge: No write cache exists. No need to merge any disk caches.
|
||||
806.313:00d0:0144:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Merging pipeline libraries took 0.156 ms.
|
||||
806.314:00d0:0144:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Mapping read-only cache took 0.659 ms.
|
||||
806.314:00d0:0144:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Parsing stream archive took 0.035 ms.
|
||||
806.314:00d0:0144:info:vkd3d-proton:vkd3d_pipeline_library_disk_thread_main: Done performing async setup of stream archive.
|
||||
806.319:00d0:0124:fixme:vkd3d-proton:d3d12_command_queue_init: Ignoring priority 0x64.
|
||||
warn: DXGIGetDebugInterface1: Stub
|
||||
info: DXGI: Hiding actual GPU, reporting:
|
||||
info: vendor ID: 0x1002
|
||||
info: device ID: 0x73df
|
||||
806.408:00d0:00d4:info:vkd3d-proton:dxgi_vk_swap_chain_init: Creating swapchain (1280 x 720), BufferCount = 3.
|
||||
806.422:00d0:00d4:info:vkd3d-proton:dxgi_vk_swap_chain_init_sync_objects: Ensure maximum latency of 3 frames with KHR_present_wait.
|
||||
806.423:00d0:00d4:info:vkd3d-proton:dxgi_vk_swap_chain_init_sleep_state: Timer interval is 1.0 ms.
|
||||
warn: DXGI: MakeWindowAssociation: Ignoring flags
|
||||
warn: DxgiOutput::WaitForVBlank: Inaccurate
|
||||
info: Setting timer interval to 1000 us
|
||||
806.948:00d0:014c:info:vkd3d-proton:dxgi_vk_swap_chain_recreate_swapchain_in_present_task: Got 3 swapchain images.
|
||||
807.499:00d0:0154:fixme:vkd3d-proton:vkd3d_texture_view_desc_fixup: Remapping 2D to 2D_ARRAY. Needs Vulkan spec tightening to match D3D12 properly.
|
||||
807.521:00d0:014c:info:vkd3d-proton:dxgi_vk_swap_chain_recreate_swapchain_in_present_task: Got 3 swapchain images.
|
||||
@@ -0,0 +1,89 @@
|
||||
warn: CreateDXGIFactory2: Ignoring flags
|
||||
info: Game: xenia_canary.exe
|
||||
info: DXVK: v2.7.1
|
||||
info: Build: x86_64 gcc 15.1.0
|
||||
info: Vulkan: Found vkGetInstanceProcAddr in winevulkan.dll @ 0x6ffffbfb4000
|
||||
info: Extension providers:
|
||||
info: Platform WSI
|
||||
info: OpenVR
|
||||
info: OpenVR: could not open registry key, status 2
|
||||
info: OpenVR: Failed to locate module
|
||||
info: OpenXR
|
||||
info: Enabled instance extensions:
|
||||
info: VK_EXT_surface_maintenance1
|
||||
info: VK_KHR_get_surface_capabilities2
|
||||
info: VK_KHR_surface
|
||||
info: VK_KHR_win32_surface
|
||||
info: Found device: NVIDIA GeForce GTX 1070 Ti (NVIDIA 580.159.3)
|
||||
info: Found device: llvmpipe (LLVM 20.1.2, 256 bits) (llvmpipe 25.2.8)
|
||||
info: Skipping: Software driver
|
||||
info: DXGI: Hiding actual GPU, reporting:
|
||||
info: vendor ID: 0x1002
|
||||
info: device ID: 0x73df
|
||||
warn: DxgiAdapter::QueryInterface: Unknown interface query
|
||||
warn: f0db4c7f-fe5a-42a2-bd62-f2a6cf6fc83e
|
||||
893.096:00d4:0128:info:vkd3d-proton:vkd3d_instance_apply_application_workarounds: Program name: "xenia_canary.exe" (hash: c099ade372da5277)
|
||||
893.096:00d4:0128:info:vkd3d-proton:vkd3d_instance_deduce_config_flags_from_environment: shader_cache is used, global_pipeline_cache is enforced.
|
||||
893.096:00d4:0128:info:vkd3d-proton:vkd3d_config_flags_init_once: VKD3D_CONFIG=''.
|
||||
893.099:00d4:0128:info:vkd3d-proton:vkd3d_get_vk_version: vkd3d-proton - applicationVersion: 3.0.1.
|
||||
893.099:00d4:0128:info:vkd3d-proton:vkd3d_instance_init: vkd3d-proton - build: 3b10bd7a7ec6a73.
|
||||
893.145:00d4:0128:info:vkd3d-proton:vkd3d_init_device_caps: Not all relevant pipeline stages are supported by EXT_dgc. Skipping.
|
||||
893.308:00d4:0128:info:vkd3d-proton:vkd3d_memory_info_decide_hvv_usage: Topology: Device heaps are split. Assuming small BAR situation.
|
||||
893.308:00d4:0128:info:vkd3d-proton:vkd3d_memory_info_upload_hvv_memory_properties: Topology: HVV usage is not allowed, using HOST_COHERENT for UPLOAD.
|
||||
893.308:00d4:0128:info:vkd3d-proton:vkd3d_bindless_state_get_bindless_flags: Device does not support VK_EXT_mutable_descriptor_type (or VALVE).
|
||||
893.308:00d4:0128:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
893.308:00d4:0128:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
893.308:00d4:0128:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
893.308:00d4:0128:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
893.308:00d4:0128:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
893.308:00d4:0128:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
893.308:00d4:0128:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
893.310:00d4:0128:info:vkd3d-proton:d3d12_device_caps_init_shader_model: Enabling support for SM 6.6.
|
||||
893.310:00d4:0128:fixme:vkd3d-proton:d3d12_device_caps_init_feature_options1: TotalLaneCount = 2432, may be inaccurate.
|
||||
893.310:00d4:0128:info:vkd3d-proton:d3d12_device_determine_ray_tracing_tier: DXR support enabled.
|
||||
893.310:00d4:0128:info:vkd3d-proton:vkd3d_pipeline_library_init_disk_cache: Remapping VKD3D_SHADER_CACHE to: vkd3d-proton.cache.
|
||||
893.310:00d4:0128:info:vkd3d-proton:vkd3d_pipeline_library_init_disk_cache: Attempting to load disk cache from: vkd3d-proton.cache.
|
||||
893.311:00d4:0140:info:vkd3d-proton:vkd3d_pipeline_library_disk_thread_main: Performing async setup of stream archive ...
|
||||
893.311:00d4:0140:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_merge: No write cache exists. No need to merge any disk caches.
|
||||
893.311:00d4:0140:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Merging pipeline libraries took 0.187 ms.
|
||||
893.312:00d4:0140:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Mapping read-only cache took 0.161 ms.
|
||||
893.312:00d4:0140:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Parsing stream archive took 0.040 ms.
|
||||
893.312:00d4:0140:info:vkd3d-proton:vkd3d_pipeline_library_disk_thread_main: Done performing async setup of stream archive.
|
||||
893.360:00d4:0128:info:vkd3d-proton:vkd3d_get_vk_version: vkd3d-proton - applicationVersion: 3.0.1.
|
||||
893.360:00d4:0128:info:vkd3d-proton:vkd3d_instance_init: vkd3d-proton - build: 3b10bd7a7ec6a73.
|
||||
893.405:00d4:0128:info:vkd3d-proton:vkd3d_init_device_caps: Not all relevant pipeline stages are supported by EXT_dgc. Skipping.
|
||||
893.520:00d4:0128:info:vkd3d-proton:vkd3d_memory_info_decide_hvv_usage: Topology: Device heaps are split. Assuming small BAR situation.
|
||||
893.520:00d4:0128:info:vkd3d-proton:vkd3d_memory_info_upload_hvv_memory_properties: Topology: HVV usage is not allowed, using HOST_COHERENT for UPLOAD.
|
||||
893.520:00d4:0128:info:vkd3d-proton:vkd3d_bindless_state_get_bindless_flags: Device does not support VK_EXT_mutable_descriptor_type (or VALVE).
|
||||
893.520:00d4:0128:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
893.520:00d4:0128:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
893.520:00d4:0128:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
893.520:00d4:0128:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
893.520:00d4:0128:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
893.520:00d4:0128:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
893.520:00d4:0128:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
893.522:00d4:0128:info:vkd3d-proton:d3d12_device_caps_init_shader_model: Enabling support for SM 6.6.
|
||||
893.522:00d4:0128:fixme:vkd3d-proton:d3d12_device_caps_init_feature_options1: TotalLaneCount = 2432, may be inaccurate.
|
||||
893.522:00d4:0128:info:vkd3d-proton:d3d12_device_determine_ray_tracing_tier: DXR support enabled.
|
||||
893.522:00d4:0128:info:vkd3d-proton:vkd3d_pipeline_library_init_disk_cache: Remapping VKD3D_SHADER_CACHE to: vkd3d-proton.cache.
|
||||
893.522:00d4:0128:info:vkd3d-proton:vkd3d_pipeline_library_init_disk_cache: Attempting to load disk cache from: vkd3d-proton.cache.
|
||||
893.523:00d4:0148:info:vkd3d-proton:vkd3d_pipeline_library_disk_thread_main: Performing async setup of stream archive ...
|
||||
893.523:00d4:0148:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_merge: No write cache exists. No need to merge any disk caches.
|
||||
893.523:00d4:0148:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Merging pipeline libraries took 0.153 ms.
|
||||
893.523:00d4:0148:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Mapping read-only cache took 0.199 ms.
|
||||
893.523:00d4:0148:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Parsing stream archive took 0.034 ms.
|
||||
893.523:00d4:0148:info:vkd3d-proton:vkd3d_pipeline_library_disk_thread_main: Done performing async setup of stream archive.
|
||||
893.529:00d4:0128:fixme:vkd3d-proton:d3d12_command_queue_init: Ignoring priority 0x64.
|
||||
warn: DXGIGetDebugInterface1: Stub
|
||||
info: DXGI: Hiding actual GPU, reporting:
|
||||
info: vendor ID: 0x1002
|
||||
info: device ID: 0x73df
|
||||
893.622:00d4:00d8:info:vkd3d-proton:dxgi_vk_swap_chain_init: Creating swapchain (1280 x 720), BufferCount = 3.
|
||||
893.631:00d4:00d8:info:vkd3d-proton:dxgi_vk_swap_chain_init_sync_objects: Ensure maximum latency of 3 frames with KHR_present_wait.
|
||||
893.632:00d4:00d8:info:vkd3d-proton:dxgi_vk_swap_chain_init_sleep_state: Timer interval is 1.0 ms.
|
||||
warn: DXGI: MakeWindowAssociation: Ignoring flags
|
||||
warn: DxgiOutput::WaitForVBlank: Inaccurate
|
||||
info: Setting timer interval to 1000 us
|
||||
894.203:00d4:0150:info:vkd3d-proton:dxgi_vk_swap_chain_recreate_swapchain_in_present_task: Got 3 swapchain images.
|
||||
894.705:00d4:0158:fixme:vkd3d-proton:vkd3d_texture_view_desc_fixup: Remapping 2D to 2D_ARRAY. Needs Vulkan spec tightening to match D3D12 properly.
|
||||
894.727:00d4:0150:info:vkd3d-proton:dxgi_vk_swap_chain_recreate_swapchain_in_present_task: Got 3 swapchain images.
|
||||
@@ -0,0 +1,89 @@
|
||||
warn: CreateDXGIFactory2: Ignoring flags
|
||||
info: Game: xenia_canary.exe
|
||||
info: DXVK: v2.7.1
|
||||
info: Build: x86_64 gcc 15.1.0
|
||||
info: Vulkan: Found vkGetInstanceProcAddr in winevulkan.dll @ 0x6ffffbfb4000
|
||||
info: Extension providers:
|
||||
info: Platform WSI
|
||||
info: OpenVR
|
||||
info: OpenVR: could not open registry key, status 2
|
||||
info: OpenVR: Failed to locate module
|
||||
info: OpenXR
|
||||
info: Enabled instance extensions:
|
||||
info: VK_EXT_surface_maintenance1
|
||||
info: VK_KHR_get_surface_capabilities2
|
||||
info: VK_KHR_surface
|
||||
info: VK_KHR_win32_surface
|
||||
info: Found device: NVIDIA GeForce GTX 1070 Ti (NVIDIA 580.159.3)
|
||||
info: Found device: llvmpipe (LLVM 20.1.2, 256 bits) (llvmpipe 25.2.8)
|
||||
info: Skipping: Software driver
|
||||
info: DXGI: Hiding actual GPU, reporting:
|
||||
info: vendor ID: 0x1002
|
||||
info: device ID: 0x73df
|
||||
warn: DxgiAdapter::QueryInterface: Unknown interface query
|
||||
warn: f0db4c7f-fe5a-42a2-bd62-f2a6cf6fc83e
|
||||
956.778:00d0:0124:info:vkd3d-proton:vkd3d_instance_apply_application_workarounds: Program name: "xenia_canary.exe" (hash: c099ade372da5277)
|
||||
956.778:00d0:0124:info:vkd3d-proton:vkd3d_instance_deduce_config_flags_from_environment: shader_cache is used, global_pipeline_cache is enforced.
|
||||
956.778:00d0:0124:info:vkd3d-proton:vkd3d_config_flags_init_once: VKD3D_CONFIG=''.
|
||||
956.781:00d0:0124:info:vkd3d-proton:vkd3d_get_vk_version: vkd3d-proton - applicationVersion: 3.0.1.
|
||||
956.781:00d0:0124:info:vkd3d-proton:vkd3d_instance_init: vkd3d-proton - build: 3b10bd7a7ec6a73.
|
||||
956.826:00d0:0124:info:vkd3d-proton:vkd3d_init_device_caps: Not all relevant pipeline stages are supported by EXT_dgc. Skipping.
|
||||
956.983:00d0:0124:info:vkd3d-proton:vkd3d_memory_info_decide_hvv_usage: Topology: Device heaps are split. Assuming small BAR situation.
|
||||
956.983:00d0:0124:info:vkd3d-proton:vkd3d_memory_info_upload_hvv_memory_properties: Topology: HVV usage is not allowed, using HOST_COHERENT for UPLOAD.
|
||||
956.983:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_get_bindless_flags: Device does not support VK_EXT_mutable_descriptor_type (or VALVE).
|
||||
956.983:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
956.983:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
956.983:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
956.983:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
956.983:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
956.983:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
956.983:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
956.985:00d0:0124:info:vkd3d-proton:d3d12_device_caps_init_shader_model: Enabling support for SM 6.6.
|
||||
956.985:00d0:0124:fixme:vkd3d-proton:d3d12_device_caps_init_feature_options1: TotalLaneCount = 2432, may be inaccurate.
|
||||
956.985:00d0:0124:info:vkd3d-proton:d3d12_device_determine_ray_tracing_tier: DXR support enabled.
|
||||
956.985:00d0:0124:info:vkd3d-proton:vkd3d_pipeline_library_init_disk_cache: Remapping VKD3D_SHADER_CACHE to: vkd3d-proton.cache.
|
||||
956.985:00d0:0124:info:vkd3d-proton:vkd3d_pipeline_library_init_disk_cache: Attempting to load disk cache from: vkd3d-proton.cache.
|
||||
956.985:00d0:013c:info:vkd3d-proton:vkd3d_pipeline_library_disk_thread_main: Performing async setup of stream archive ...
|
||||
956.986:00d0:013c:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_merge: No write cache exists. No need to merge any disk caches.
|
||||
956.986:00d0:013c:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Merging pipeline libraries took 0.171 ms.
|
||||
956.986:00d0:013c:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Mapping read-only cache took 0.269 ms.
|
||||
956.986:00d0:013c:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Parsing stream archive took 0.028 ms.
|
||||
956.986:00d0:013c:info:vkd3d-proton:vkd3d_pipeline_library_disk_thread_main: Done performing async setup of stream archive.
|
||||
957.031:00d0:0124:info:vkd3d-proton:vkd3d_get_vk_version: vkd3d-proton - applicationVersion: 3.0.1.
|
||||
957.031:00d0:0124:info:vkd3d-proton:vkd3d_instance_init: vkd3d-proton - build: 3b10bd7a7ec6a73.
|
||||
957.075:00d0:0124:info:vkd3d-proton:vkd3d_init_device_caps: Not all relevant pipeline stages are supported by EXT_dgc. Skipping.
|
||||
957.186:00d0:0124:info:vkd3d-proton:vkd3d_memory_info_decide_hvv_usage: Topology: Device heaps are split. Assuming small BAR situation.
|
||||
957.186:00d0:0124:info:vkd3d-proton:vkd3d_memory_info_upload_hvv_memory_properties: Topology: HVV usage is not allowed, using HOST_COHERENT for UPLOAD.
|
||||
957.186:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_get_bindless_flags: Device does not support VK_EXT_mutable_descriptor_type (or VALVE).
|
||||
957.186:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
957.186:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
957.186:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
957.186:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
957.186:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
957.186:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
957.186:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
957.188:00d0:0124:info:vkd3d-proton:d3d12_device_caps_init_shader_model: Enabling support for SM 6.6.
|
||||
957.188:00d0:0124:fixme:vkd3d-proton:d3d12_device_caps_init_feature_options1: TotalLaneCount = 2432, may be inaccurate.
|
||||
957.188:00d0:0124:info:vkd3d-proton:d3d12_device_determine_ray_tracing_tier: DXR support enabled.
|
||||
957.188:00d0:0124:info:vkd3d-proton:vkd3d_pipeline_library_init_disk_cache: Remapping VKD3D_SHADER_CACHE to: vkd3d-proton.cache.
|
||||
957.188:00d0:0124:info:vkd3d-proton:vkd3d_pipeline_library_init_disk_cache: Attempting to load disk cache from: vkd3d-proton.cache.
|
||||
957.188:00d0:0144:info:vkd3d-proton:vkd3d_pipeline_library_disk_thread_main: Performing async setup of stream archive ...
|
||||
957.188:00d0:0144:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_merge: No write cache exists. No need to merge any disk caches.
|
||||
957.189:00d0:0144:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Merging pipeline libraries took 0.172 ms.
|
||||
957.189:00d0:0144:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Mapping read-only cache took 0.231 ms.
|
||||
957.189:00d0:0144:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Parsing stream archive took 0.029 ms.
|
||||
957.189:00d0:0144:info:vkd3d-proton:vkd3d_pipeline_library_disk_thread_main: Done performing async setup of stream archive.
|
||||
957.195:00d0:0124:fixme:vkd3d-proton:d3d12_command_queue_init: Ignoring priority 0x64.
|
||||
warn: DXGIGetDebugInterface1: Stub
|
||||
info: DXGI: Hiding actual GPU, reporting:
|
||||
info: vendor ID: 0x1002
|
||||
info: device ID: 0x73df
|
||||
957.285:00d0:00d4:info:vkd3d-proton:dxgi_vk_swap_chain_init: Creating swapchain (1280 x 720), BufferCount = 3.
|
||||
957.295:00d0:00d4:info:vkd3d-proton:dxgi_vk_swap_chain_init_sync_objects: Ensure maximum latency of 3 frames with KHR_present_wait.
|
||||
957.295:00d0:00d4:info:vkd3d-proton:dxgi_vk_swap_chain_init_sleep_state: Timer interval is 1.0 ms.
|
||||
warn: DXGI: MakeWindowAssociation: Ignoring flags
|
||||
warn: DxgiOutput::WaitForVBlank: Inaccurate
|
||||
info: Setting timer interval to 1000 us
|
||||
957.806:00d0:014c:info:vkd3d-proton:dxgi_vk_swap_chain_recreate_swapchain_in_present_task: Got 3 swapchain images.
|
||||
958.343:00d0:0154:fixme:vkd3d-proton:vkd3d_texture_view_desc_fixup: Remapping 2D to 2D_ARRAY. Needs Vulkan spec tightening to match D3D12 properly.
|
||||
958.382:00d0:014c:info:vkd3d-proton:dxgi_vk_swap_chain_recreate_swapchain_in_present_task: Got 3 swapchain images.
|
||||
@@ -0,0 +1,89 @@
|
||||
warn: CreateDXGIFactory2: Ignoring flags
|
||||
info: Game: xenia_canary.exe
|
||||
info: DXVK: v2.7.1
|
||||
info: Build: x86_64 gcc 15.1.0
|
||||
info: Vulkan: Found vkGetInstanceProcAddr in winevulkan.dll @ 0x6ffffbfb4000
|
||||
info: Extension providers:
|
||||
info: Platform WSI
|
||||
info: OpenVR
|
||||
info: OpenVR: could not open registry key, status 2
|
||||
info: OpenVR: Failed to locate module
|
||||
info: OpenXR
|
||||
info: Enabled instance extensions:
|
||||
info: VK_EXT_surface_maintenance1
|
||||
info: VK_KHR_get_surface_capabilities2
|
||||
info: VK_KHR_surface
|
||||
info: VK_KHR_win32_surface
|
||||
info: Found device: NVIDIA GeForce GTX 1070 Ti (NVIDIA 580.159.3)
|
||||
info: Found device: llvmpipe (LLVM 20.1.2, 256 bits) (llvmpipe 25.2.8)
|
||||
info: Skipping: Software driver
|
||||
info: DXGI: Hiding actual GPU, reporting:
|
||||
info: vendor ID: 0x1002
|
||||
info: device ID: 0x73df
|
||||
warn: DxgiAdapter::QueryInterface: Unknown interface query
|
||||
warn: f0db4c7f-fe5a-42a2-bd62-f2a6cf6fc83e
|
||||
1217.108:00d4:0128:info:vkd3d-proton:vkd3d_instance_apply_application_workarounds: Program name: "xenia_canary.exe" (hash: c099ade372da5277)
|
||||
1217.108:00d4:0128:info:vkd3d-proton:vkd3d_instance_deduce_config_flags_from_environment: shader_cache is used, global_pipeline_cache is enforced.
|
||||
1217.108:00d4:0128:info:vkd3d-proton:vkd3d_config_flags_init_once: VKD3D_CONFIG=''.
|
||||
1217.111:00d4:0128:info:vkd3d-proton:vkd3d_get_vk_version: vkd3d-proton - applicationVersion: 3.0.1.
|
||||
1217.111:00d4:0128:info:vkd3d-proton:vkd3d_instance_init: vkd3d-proton - build: 3b10bd7a7ec6a73.
|
||||
1217.160:00d4:0128:info:vkd3d-proton:vkd3d_init_device_caps: Not all relevant pipeline stages are supported by EXT_dgc. Skipping.
|
||||
1217.307:00d4:0128:info:vkd3d-proton:vkd3d_memory_info_decide_hvv_usage: Topology: Device heaps are split. Assuming small BAR situation.
|
||||
1217.307:00d4:0128:info:vkd3d-proton:vkd3d_memory_info_upload_hvv_memory_properties: Topology: HVV usage is not allowed, using HOST_COHERENT for UPLOAD.
|
||||
1217.307:00d4:0128:info:vkd3d-proton:vkd3d_bindless_state_get_bindless_flags: Device does not support VK_EXT_mutable_descriptor_type (or VALVE).
|
||||
1217.307:00d4:0128:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
1217.307:00d4:0128:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
1217.307:00d4:0128:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
1217.307:00d4:0128:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
1217.307:00d4:0128:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
1217.307:00d4:0128:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
1217.307:00d4:0128:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
1217.309:00d4:0128:info:vkd3d-proton:d3d12_device_caps_init_shader_model: Enabling support for SM 6.6.
|
||||
1217.309:00d4:0128:fixme:vkd3d-proton:d3d12_device_caps_init_feature_options1: TotalLaneCount = 2432, may be inaccurate.
|
||||
1217.309:00d4:0128:info:vkd3d-proton:d3d12_device_determine_ray_tracing_tier: DXR support enabled.
|
||||
1217.309:00d4:0128:info:vkd3d-proton:vkd3d_pipeline_library_init_disk_cache: Remapping VKD3D_SHADER_CACHE to: vkd3d-proton.cache.
|
||||
1217.309:00d4:0128:info:vkd3d-proton:vkd3d_pipeline_library_init_disk_cache: Attempting to load disk cache from: vkd3d-proton.cache.
|
||||
1217.310:00d4:0140:info:vkd3d-proton:vkd3d_pipeline_library_disk_thread_main: Performing async setup of stream archive ...
|
||||
1217.310:00d4:0140:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_merge: No write cache exists. No need to merge any disk caches.
|
||||
1217.310:00d4:0140:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Merging pipeline libraries took 0.166 ms.
|
||||
1217.310:00d4:0140:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Mapping read-only cache took 0.173 ms.
|
||||
1217.310:00d4:0140:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Parsing stream archive took 0.031 ms.
|
||||
1217.310:00d4:0140:info:vkd3d-proton:vkd3d_pipeline_library_disk_thread_main: Done performing async setup of stream archive.
|
||||
1217.360:00d4:0128:info:vkd3d-proton:vkd3d_get_vk_version: vkd3d-proton - applicationVersion: 3.0.1.
|
||||
1217.360:00d4:0128:info:vkd3d-proton:vkd3d_instance_init: vkd3d-proton - build: 3b10bd7a7ec6a73.
|
||||
1217.403:00d4:0128:info:vkd3d-proton:vkd3d_init_device_caps: Not all relevant pipeline stages are supported by EXT_dgc. Skipping.
|
||||
1217.515:00d4:0128:info:vkd3d-proton:vkd3d_memory_info_decide_hvv_usage: Topology: Device heaps are split. Assuming small BAR situation.
|
||||
1217.515:00d4:0128:info:vkd3d-proton:vkd3d_memory_info_upload_hvv_memory_properties: Topology: HVV usage is not allowed, using HOST_COHERENT for UPLOAD.
|
||||
1217.515:00d4:0128:info:vkd3d-proton:vkd3d_bindless_state_get_bindless_flags: Device does not support VK_EXT_mutable_descriptor_type (or VALVE).
|
||||
1217.515:00d4:0128:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
1217.515:00d4:0128:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
1217.515:00d4:0128:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
1217.515:00d4:0128:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
1217.515:00d4:0128:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
1217.515:00d4:0128:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
1217.515:00d4:0128:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
1217.516:00d4:0128:info:vkd3d-proton:d3d12_device_caps_init_shader_model: Enabling support for SM 6.6.
|
||||
1217.516:00d4:0128:fixme:vkd3d-proton:d3d12_device_caps_init_feature_options1: TotalLaneCount = 2432, may be inaccurate.
|
||||
1217.516:00d4:0128:info:vkd3d-proton:d3d12_device_determine_ray_tracing_tier: DXR support enabled.
|
||||
1217.516:00d4:0128:info:vkd3d-proton:vkd3d_pipeline_library_init_disk_cache: Remapping VKD3D_SHADER_CACHE to: vkd3d-proton.cache.
|
||||
1217.516:00d4:0128:info:vkd3d-proton:vkd3d_pipeline_library_init_disk_cache: Attempting to load disk cache from: vkd3d-proton.cache.
|
||||
1217.517:00d4:0148:info:vkd3d-proton:vkd3d_pipeline_library_disk_thread_main: Performing async setup of stream archive ...
|
||||
1217.517:00d4:0148:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_merge: No write cache exists. No need to merge any disk caches.
|
||||
1217.517:00d4:0148:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Merging pipeline libraries took 0.157 ms.
|
||||
1217.517:00d4:0148:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Mapping read-only cache took 0.208 ms.
|
||||
1217.518:00d4:0148:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Parsing stream archive took 0.032 ms.
|
||||
1217.518:00d4:0148:info:vkd3d-proton:vkd3d_pipeline_library_disk_thread_main: Done performing async setup of stream archive.
|
||||
1217.524:00d4:0128:fixme:vkd3d-proton:d3d12_command_queue_init: Ignoring priority 0x64.
|
||||
warn: DXGIGetDebugInterface1: Stub
|
||||
info: DXGI: Hiding actual GPU, reporting:
|
||||
info: vendor ID: 0x1002
|
||||
info: device ID: 0x73df
|
||||
1217.612:00d4:00d8:info:vkd3d-proton:dxgi_vk_swap_chain_init: Creating swapchain (1280 x 720), BufferCount = 3.
|
||||
1217.622:00d4:00d8:info:vkd3d-proton:dxgi_vk_swap_chain_init_sync_objects: Ensure maximum latency of 3 frames with KHR_present_wait.
|
||||
1217.622:00d4:00d8:info:vkd3d-proton:dxgi_vk_swap_chain_init_sleep_state: Timer interval is 1.0 ms.
|
||||
warn: DXGI: MakeWindowAssociation: Ignoring flags
|
||||
warn: DxgiOutput::WaitForVBlank: Inaccurate
|
||||
info: Setting timer interval to 1000 us
|
||||
1218.136:00d4:0150:info:vkd3d-proton:dxgi_vk_swap_chain_recreate_swapchain_in_present_task: Got 3 swapchain images.
|
||||
1218.678:00d4:0158:fixme:vkd3d-proton:vkd3d_texture_view_desc_fixup: Remapping 2D to 2D_ARRAY. Needs Vulkan spec tightening to match D3D12 properly.
|
||||
1218.699:00d4:0150:info:vkd3d-proton:dxgi_vk_swap_chain_recreate_swapchain_in_present_task: Got 3 swapchain images.
|
||||
@@ -0,0 +1,89 @@
|
||||
warn: CreateDXGIFactory2: Ignoring flags
|
||||
info: Game: xenia_canary.exe
|
||||
info: DXVK: v2.7.1
|
||||
info: Build: x86_64 gcc 15.1.0
|
||||
info: Vulkan: Found vkGetInstanceProcAddr in winevulkan.dll @ 0x6ffffbfb4000
|
||||
info: Extension providers:
|
||||
info: Platform WSI
|
||||
info: OpenVR
|
||||
info: OpenVR: could not open registry key, status 2
|
||||
info: OpenVR: Failed to locate module
|
||||
info: OpenXR
|
||||
info: Enabled instance extensions:
|
||||
info: VK_EXT_surface_maintenance1
|
||||
info: VK_KHR_get_surface_capabilities2
|
||||
info: VK_KHR_surface
|
||||
info: VK_KHR_win32_surface
|
||||
info: Found device: NVIDIA GeForce GTX 1070 Ti (NVIDIA 580.159.3)
|
||||
info: Found device: llvmpipe (LLVM 20.1.2, 256 bits) (llvmpipe 25.2.8)
|
||||
info: Skipping: Software driver
|
||||
info: DXGI: Hiding actual GPU, reporting:
|
||||
info: vendor ID: 0x1002
|
||||
info: device ID: 0x73df
|
||||
warn: DxgiAdapter::QueryInterface: Unknown interface query
|
||||
warn: f0db4c7f-fe5a-42a2-bd62-f2a6cf6fc83e
|
||||
1413.916:00d0:0124:info:vkd3d-proton:vkd3d_instance_apply_application_workarounds: Program name: "xenia_canary.exe" (hash: c099ade372da5277)
|
||||
1413.916:00d0:0124:info:vkd3d-proton:vkd3d_instance_deduce_config_flags_from_environment: shader_cache is used, global_pipeline_cache is enforced.
|
||||
1413.916:00d0:0124:info:vkd3d-proton:vkd3d_config_flags_init_once: VKD3D_CONFIG=''.
|
||||
1413.919:00d0:0124:info:vkd3d-proton:vkd3d_get_vk_version: vkd3d-proton - applicationVersion: 3.0.1.
|
||||
1413.919:00d0:0124:info:vkd3d-proton:vkd3d_instance_init: vkd3d-proton - build: 3b10bd7a7ec6a73.
|
||||
1413.963:00d0:0124:info:vkd3d-proton:vkd3d_init_device_caps: Not all relevant pipeline stages are supported by EXT_dgc. Skipping.
|
||||
1414.109:00d0:0124:info:vkd3d-proton:vkd3d_memory_info_decide_hvv_usage: Topology: Device heaps are split. Assuming small BAR situation.
|
||||
1414.109:00d0:0124:info:vkd3d-proton:vkd3d_memory_info_upload_hvv_memory_properties: Topology: HVV usage is not allowed, using HOST_COHERENT for UPLOAD.
|
||||
1414.109:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_get_bindless_flags: Device does not support VK_EXT_mutable_descriptor_type (or VALVE).
|
||||
1414.109:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
1414.109:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
1414.109:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
1414.109:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
1414.109:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
1414.109:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
1414.109:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
1414.111:00d0:0124:info:vkd3d-proton:d3d12_device_caps_init_shader_model: Enabling support for SM 6.6.
|
||||
1414.111:00d0:0124:fixme:vkd3d-proton:d3d12_device_caps_init_feature_options1: TotalLaneCount = 2432, may be inaccurate.
|
||||
1414.111:00d0:0124:info:vkd3d-proton:d3d12_device_determine_ray_tracing_tier: DXR support enabled.
|
||||
1414.111:00d0:0124:info:vkd3d-proton:vkd3d_pipeline_library_init_disk_cache: Remapping VKD3D_SHADER_CACHE to: vkd3d-proton.cache.
|
||||
1414.111:00d0:0124:info:vkd3d-proton:vkd3d_pipeline_library_init_disk_cache: Attempting to load disk cache from: vkd3d-proton.cache.
|
||||
1414.112:00d0:013c:info:vkd3d-proton:vkd3d_pipeline_library_disk_thread_main: Performing async setup of stream archive ...
|
||||
1414.112:00d0:013c:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_merge: No write cache exists. No need to merge any disk caches.
|
||||
1414.112:00d0:013c:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Merging pipeline libraries took 0.173 ms.
|
||||
1414.113:00d0:013c:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Mapping read-only cache took 0.276 ms.
|
||||
1414.113:00d0:013c:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Parsing stream archive took 0.029 ms.
|
||||
1414.113:00d0:013c:info:vkd3d-proton:vkd3d_pipeline_library_disk_thread_main: Done performing async setup of stream archive.
|
||||
1414.157:00d0:0124:info:vkd3d-proton:vkd3d_get_vk_version: vkd3d-proton - applicationVersion: 3.0.1.
|
||||
1414.157:00d0:0124:info:vkd3d-proton:vkd3d_instance_init: vkd3d-proton - build: 3b10bd7a7ec6a73.
|
||||
1414.199:00d0:0124:info:vkd3d-proton:vkd3d_init_device_caps: Not all relevant pipeline stages are supported by EXT_dgc. Skipping.
|
||||
1414.310:00d0:0124:info:vkd3d-proton:vkd3d_memory_info_decide_hvv_usage: Topology: Device heaps are split. Assuming small BAR situation.
|
||||
1414.310:00d0:0124:info:vkd3d-proton:vkd3d_memory_info_upload_hvv_memory_properties: Topology: HVV usage is not allowed, using HOST_COHERENT for UPLOAD.
|
||||
1414.311:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_get_bindless_flags: Device does not support VK_EXT_mutable_descriptor_type (or VALVE).
|
||||
1414.311:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
1414.311:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
1414.311:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
1414.311:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
1414.311:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
1414.311:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
1414.311:00d0:0124:info:vkd3d-proton:vkd3d_bindless_state_add_binding: Device supports VK_EXT_descriptor_buffer!
|
||||
1414.312:00d0:0124:info:vkd3d-proton:d3d12_device_caps_init_shader_model: Enabling support for SM 6.6.
|
||||
1414.312:00d0:0124:fixme:vkd3d-proton:d3d12_device_caps_init_feature_options1: TotalLaneCount = 2432, may be inaccurate.
|
||||
1414.312:00d0:0124:info:vkd3d-proton:d3d12_device_determine_ray_tracing_tier: DXR support enabled.
|
||||
1414.312:00d0:0124:info:vkd3d-proton:vkd3d_pipeline_library_init_disk_cache: Remapping VKD3D_SHADER_CACHE to: vkd3d-proton.cache.
|
||||
1414.312:00d0:0124:info:vkd3d-proton:vkd3d_pipeline_library_init_disk_cache: Attempting to load disk cache from: vkd3d-proton.cache.
|
||||
1414.313:00d0:0144:info:vkd3d-proton:vkd3d_pipeline_library_disk_thread_main: Performing async setup of stream archive ...
|
||||
1414.313:00d0:0144:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_merge: No write cache exists. No need to merge any disk caches.
|
||||
1414.313:00d0:0144:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Merging pipeline libraries took 0.158 ms.
|
||||
1414.313:00d0:0144:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Mapping read-only cache took 0.256 ms.
|
||||
1414.313:00d0:0144:info:vkd3d-proton:vkd3d_pipeline_library_disk_cache_initial_setup: Parsing stream archive took 0.031 ms.
|
||||
1414.313:00d0:0144:info:vkd3d-proton:vkd3d_pipeline_library_disk_thread_main: Done performing async setup of stream archive.
|
||||
1414.319:00d0:0124:fixme:vkd3d-proton:d3d12_command_queue_init: Ignoring priority 0x64.
|
||||
warn: DXGIGetDebugInterface1: Stub
|
||||
info: DXGI: Hiding actual GPU, reporting:
|
||||
info: vendor ID: 0x1002
|
||||
info: device ID: 0x73df
|
||||
1414.406:00d0:00d4:info:vkd3d-proton:dxgi_vk_swap_chain_init: Creating swapchain (1280 x 720), BufferCount = 3.
|
||||
1414.416:00d0:00d4:info:vkd3d-proton:dxgi_vk_swap_chain_init_sync_objects: Ensure maximum latency of 3 frames with KHR_present_wait.
|
||||
1414.416:00d0:00d4:info:vkd3d-proton:dxgi_vk_swap_chain_init_sleep_state: Timer interval is 1.0 ms.
|
||||
warn: DXGI: MakeWindowAssociation: Ignoring flags
|
||||
warn: DxgiOutput::WaitForVBlank: Inaccurate
|
||||
info: Setting timer interval to 1000 us
|
||||
1414.927:00d0:014c:info:vkd3d-proton:dxgi_vk_swap_chain_recreate_swapchain_in_present_task: Got 3 swapchain images.
|
||||
1415.477:00d0:0154:fixme:vkd3d-proton:vkd3d_texture_view_desc_fixup: Remapping 2D to 2D_ARRAY. Needs Vulkan spec tightening to match D3D12 properly.
|
||||
1415.500:00d0:014c:info:vkd3d-proton:dxgi_vk_swap_chain_recreate_swapchain_in_present_task: Got 3 swapchain images.
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
47
audit-runs/iterate-2D-deferred-fixes/DEFERRED_FIXES.md
Normal file
47
audit-runs/iterate-2D-deferred-fixes/DEFERRED_FIXES.md
Normal file
@@ -0,0 +1,47 @@
|
||||
# iterate-2D Deferred Structural Fixes — Outcome
|
||||
|
||||
Branch `iterate-2D/subsystem-fixes`. After verification + the user's go-ahead:
|
||||
|
||||
## Issue 1 — 32-bit word-form ALU truncation (PPCBUG-020) — ✅ FIXED & LANDED
|
||||
Commit **341196a**. Confirmed load-bearing via runtime ours-vs-canary capture:
|
||||
Sylpheed's ms→LARGE_INTEGER converter `sub_824ACA88` (`clrldi; mulli r11,r11,-10000; std`)
|
||||
produced `0x00000000_FFFD8F00` in ours vs canary's correct `0xFFFFFFFF_FFFD8F00` for a 16 ms
|
||||
wait — a positive (absolute) timeout → ~26000× over-wait that froze the main frame loop.
|
||||
Fixed the 17 data-losing word-form ops (full 64-bit result, CA/OV/CR0 preserved byte-identical),
|
||||
updated 7 bug-asserting tests, re-baselined `sylpheed_n50m` (imports 40454→1790936), `sylpheed_n2m`
|
||||
unchanged. 660/660 + ignored oracle green; lockstep determinism preserved. Boot unwedged
|
||||
(parallel NtWaitForMultipleObjectsEx 94→30428; frozen worker/critical-section loops now run).
|
||||
VdSwap still 1 — rendering progression needs the out-of-scope acd1656 fixes (nt_create_event
|
||||
polarity + 2.AF), not in this branch.
|
||||
|
||||
## Issue 2 — Memory page-size per-region collapse — DEFERRED (verified NOT load-bearing)
|
||||
Sylpheed requests `MmAllocatePhysicalMemoryEx` with flags=0, alignment(r8)=0 (default); ours returns
|
||||
self-consistent 4K-aligned addresses and boots. ours has no 0xA0/0xC0/0xE0 physical-region model at
|
||||
all, so a faithful fix is a region-model rewrite that shifts every physical guest VA (golden-breaking,
|
||||
invalidates the audit-059 VA map) with no demonstrated boot benefit. A partial page-size-only change
|
||||
would shift VAs for zero correctness gain — do NOT do it piecemeal. Pursue only if a render-path
|
||||
struct is proven to depend on physical region/alignment.
|
||||
|
||||
## Issue 3 — Timing — LEFT (not load-bearing / determinism-coupled)
|
||||
- 3d DPC/APC: INERT — the only timer (NtSetTimerEx) passes a NULL APC routine; no
|
||||
NtQueueApcThread/KeInsertQueueDpc imported.
|
||||
- 3b timeout sign: was a SYMPTOM of Issue 1 (the "positive absolute" timeouts were mulli-corruption
|
||||
artifacts) — resolved by the Issue 1 fix.
|
||||
- 3a/3c timebase/skew: timebase = instruction-count IS the deterministic lockstep clock; must not
|
||||
become wallclock. 2.AF deadline-drain already present. Not load-bearing for Sylpheed.
|
||||
|
||||
## Issue 4 — VFS synthesized-success-on-miss — LEFT (risky / coupled to Issue 1 trajectory)
|
||||
The synthesis fallback handles a MIX (writable-partition probes partition0/Cache0 + a genuine disc
|
||||
miss dat/files.tbl, verified absent from the ISO). Canary doesn't fire XamShowDirtyDiscErrorUI during
|
||||
boot (the one "DirtyDisc" log hit is the import-table declaration). Not cleanly separable without
|
||||
heuristic disc-vs-partition routing. Re-verify on the corrected post-Issue-1 (and post-acd1656)
|
||||
trajectory before changing.
|
||||
|
||||
## Issue 5 — Mutant object — SKIPPED (verified unused)
|
||||
Sylpheed's XEX import table contains NO mutant symbols (NtCreateMutant/NtReleaseMutant/KeReleaseMutant/
|
||||
KeInitializeMutant/NtQueryMutant) — the game cannot call them; unimplemented=0 across boot. A correct
|
||||
implementation needs mutant hand-off semantics + an owner-type redesign (the existing
|
||||
`Mutex { owner: Option<u8> }` tracks a HW slot, not a thread) in the determinism-critical wait path,
|
||||
for code that never executes. Per the mandate's skip-if-unused criterion, left unimplemented. Can be
|
||||
added on request as a pure canary-parity / future-title feature (determinism-safe since no Sylpheed
|
||||
mutant ever exists at runtime).
|
||||
@@ -7,7 +7,11 @@ build = "build.rs"
|
||||
|
||||
[dependencies]
|
||||
xenia-xex = { workspace = true }
|
||||
xenia-cpu = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
anyhow = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
rusqlite = { workspace = true }
|
||||
metrics = { workspace = true }
|
||||
duckdb = { workspace = true }
|
||||
msvc-demangler = "0.11"
|
||||
|
||||
570
crates/xenia-analysis/SCHEMA.md
Normal file
570
crates/xenia-analysis/SCHEMA.md
Normal file
@@ -0,0 +1,570 @@
|
||||
# `xenia-analysis` schema reference
|
||||
|
||||
Authoritative documentation for the DuckDB tables and SQL views produced by
|
||||
`xenia-rs dis --db sylpheed.db`. Track schema changes here alongside any
|
||||
update to the `db_schema_golden` test fixture.
|
||||
|
||||
The base + disasm tables (`metadata`, `sections`, `imports`, `functions`,
|
||||
`labels`, `instructions`, `xrefs`, opt-in `exec_trace` / `import_calls` /
|
||||
`branch_trace`) are documented inline in `src/db.rs` doc comment. This file
|
||||
collects layered analysis additions and forward-work notes.
|
||||
|
||||
---
|
||||
|
||||
## Layer M1 — `.pdata` boundary correction (landed)
|
||||
|
||||
### Schema additions
|
||||
- `functions.pdata_validated BOOLEAN NOT NULL` — `true` when the row's
|
||||
`address` matches a `RUNTIME_FUNCTION.BeginAddress` from `.pdata`. Linker
|
||||
ground truth.
|
||||
- `functions.pdata_length BIGINT NULL` — `function_length` (bytes) from the
|
||||
matching pdata entry; `NULL` when the row is prologue-only.
|
||||
- New table `pdata_entries(begin_address BIGINT PRIMARY KEY, end_address
|
||||
BIGINT, function_length BIGINT, prolog_length BIGINT, flags BIGINT)` — every
|
||||
parsed `.pdata` `RUNTIME_FUNCTION` entry (raw, before any merge with
|
||||
prologue analysis).
|
||||
- Index `idx_functions_pdata_validated` on `functions(pdata_validated)`.
|
||||
|
||||
### What this layer does
|
||||
- Parses `.pdata` 8-byte `RUNTIME_FUNCTION` entries (PowerPC PE32 layout):
|
||||
word 0 `BeginAddress` (absolute VA), word 1 packed
|
||||
`{prolog_length:8, function_length:22, flags:2}`, both big-endian.
|
||||
- Unions pdata `BeginAddress` values into the function-candidate set fed to
|
||||
the prologue walker, so functions our prologue heuristic missed still get
|
||||
rows.
|
||||
- When pdata supplies a longer `function_length` than the prologue walk
|
||||
found, extends `end_address` to the pdata-implied end (catches mis-split
|
||||
where the walker stopped at an early `blr`).
|
||||
- After the walker, performs a forward pass that trims `function.end` to the
|
||||
next start when they overlap (catches mis-merge where one row spanned two
|
||||
prologues — the audit-031 `sub_824D23B0` / `sub_824D29F0` case).
|
||||
|
||||
### What this layer does NOT do
|
||||
- Does not adjust prolog-derived `frame_size` / `saved_gprs` from `.pdata`'s
|
||||
`prolog_length` field — those remain prologue-only inferences.
|
||||
- Does not classify functions further than the existing `is_leaf` /
|
||||
`is_saverestore` columns. Class membership is M3.
|
||||
- Does not detect functions whose entries are missing from BOTH `.pdata`
|
||||
and the bl-target scan (extremely rare; would require executable-byte
|
||||
linear sweep).
|
||||
|
||||
### Reference docs
|
||||
- Microsoft PE32+ exception data spec for PowerPC RUNTIME_FUNCTION.
|
||||
- xenia-canary `src/xenia/cpu/xex_module.cc:1570-1587` — canary's reference
|
||||
parser (extracts `BeginAddress` only; we additionally decode word 1).
|
||||
|
||||
### Validation queries
|
||||
```sql
|
||||
-- All pdata entries found
|
||||
SELECT COUNT(*) FROM pdata_entries; -- ~23073 for Sylpheed
|
||||
-- Functions cross-validated against pdata
|
||||
SELECT COUNT(*) FROM functions WHERE pdata_validated;
|
||||
-- Functions detected ONLY by prologue (orphans of pdata)
|
||||
SELECT COUNT(*) FROM functions WHERE NOT pdata_validated;
|
||||
-- Pdata orphans NOT yet in functions (should be 0 after this layer)
|
||||
SELECT COUNT(*) FROM pdata_entries p
|
||||
LEFT JOIN functions f ON f.address = p.begin_address
|
||||
WHERE f.address IS NULL;
|
||||
-- Audit-031 mis-merge resolved: 0x824D29F0 should have its own row
|
||||
SELECT name FROM functions WHERE address = 2186674160; -- 0x824D29F0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Layer M2 — MSVC C++ name demangler (landed)
|
||||
|
||||
### Schema additions
|
||||
- New table `demangled_names(address BIGINT NULL, mangled VARCHAR NOT NULL,
|
||||
raw_demangled VARCHAR NOT NULL, namespace_path VARCHAR NULL,
|
||||
class_name VARCHAR NULL, method_name VARCHAR NULL,
|
||||
params_signature VARCHAR NULL)`.
|
||||
- Indices on `address`, `class_name`, `method_name`.
|
||||
|
||||
### What this layer does
|
||||
- Wraps `msvc_demangler::demangle` (a Rust port of LLVM's
|
||||
`MicrosoftDemangle.cpp`) and splits the formatted output into structured
|
||||
fields via a heuristic top-level parser (handles templates and nested parens
|
||||
correctly).
|
||||
- Populates `demangled_names` from any label whose name starts with `?` plus
|
||||
any import name that happens to be mangled (defensive — typical kernel
|
||||
imports use C names).
|
||||
|
||||
### What this layer does NOT do
|
||||
- Does not parse the AST returned by `msvc_demangler::parse` — uses the formatted
|
||||
string and a heuristic split. Adequate for typical class member functions
|
||||
and RTTI strings; exotic template / lambda forms still get `raw_demangled`
|
||||
populated but may have NULL structured fields.
|
||||
- Does not yet ingest RTTI strings discovered in `.rdata` — that's M3's job;
|
||||
M3 will append rows to this table at the addresses where it finds RTTI
|
||||
TypeDescriptors.
|
||||
|
||||
### Reference docs
|
||||
- `msvc-demangler` crate (`https://docs.rs/msvc-demangler/0.11`).
|
||||
- LLVM `MicrosoftDemangle.cpp` (the parser this crate ports).
|
||||
|
||||
## Layer M3 — Vtable + RTTI detection (landed)
|
||||
|
||||
### Schema additions
|
||||
- `vtables(address PK, length, col_address NULL, class_name, rtti_present,
|
||||
base_classes_json NULL)` — every detected static vtable.
|
||||
- `methods(vtable_address, slot, function_address, mangled_name NULL,
|
||||
demangled_name NULL, PRIMARY KEY (vtable_address, slot))` — one row per
|
||||
method slot.
|
||||
- `classes(name PK, vtable_address, rtti_present, base_classes_json NULL)` —
|
||||
deduped by class name (first-detected vtable wins).
|
||||
- Indices: `methods.function_address`, `classes.rtti_present`.
|
||||
|
||||
### What this layer does
|
||||
- Walks `.rdata` and `.data` looking for runs of ≥3 consecutive 4-byte BE
|
||||
values where each value is a known function start (from M1's corrected
|
||||
`functions` table). Single-2-method vtables are intentionally rejected to
|
||||
control false-positive rate.
|
||||
- Attempts the MSVC RTTI walk `vtable[-1] → CompleteObjectLocator → TypeDescriptor`
|
||||
for each candidate. When successful, the demangled `class ClassName`
|
||||
string fills `class_name` and a best-effort
|
||||
`RTTIClassHierarchyDescriptor` walk fills `base_classes_json` (JSON array
|
||||
of base class names).
|
||||
- Falls back to `ANON_Class_<8-hex>` keyed by FNV-1a hash of the sorted
|
||||
method-PC tuple when RTTI is absent (typical for shipped game binaries).
|
||||
Identical vtables across the binary (multiple instances) collapse to the
|
||||
same anonymous name.
|
||||
|
||||
### What this layer does NOT do
|
||||
- Vtables built at runtime in heap-allocated memory (e.g. by ctors copying
|
||||
static templates) are out of scope — only static `.rdata`/`.data` content.
|
||||
- Multiple-inheritance "extra" vftables (one per base subobject) are detected
|
||||
as independent vtables with no link between them.
|
||||
- Inheritance-tree walking beyond `RTTIClassHierarchyDescriptor`'s direct
|
||||
base list is not attempted.
|
||||
|
||||
### Reference docs
|
||||
- openrce.org "Reversing Microsoft Visual C++" — RTTI layout articles
|
||||
(CompleteObjectLocator at vtable[-1]; TypeDescriptor at COL+0xC; mangled
|
||||
name at TD+0x8).
|
||||
|
||||
## Layer M4 — Class-aware probe targeting (landed)
|
||||
|
||||
CLI extension only — no schema changes. The probe-token grammar adds three
|
||||
symbolic forms on top of the existing `0xADDR` literal:
|
||||
|
||||
- `Class::method` — joins `classes` × `methods` × `demangled_names` to find
|
||||
every PC whose vtable belongs to that class and whose demangled
|
||||
`method_name` matches.
|
||||
- `Class::*` — joins `classes` × `methods` to find every method PC of that
|
||||
class.
|
||||
- `function_name` — falls back to `functions.name` lookup for free functions
|
||||
/ saverestore stubs / labels.
|
||||
|
||||
Numeric tokens never touch the DB (preserves zero-IO fast path; lockstep
|
||||
digest unaffected). Symbolic tokens require the DuckDB at `--probe-db PATH`
|
||||
or `XENIA_PROBE_DB`; default is `sylpheed.db` next to the .iso when present.
|
||||
|
||||
Resolution happens BEFORE guest exec begins, so it cannot affect the
|
||||
lockstep digest.
|
||||
|
||||
See `crates/xenia-analysis/src/lookup.rs`.
|
||||
|
||||
---
|
||||
|
||||
## Layer M5 — Indirect-dispatch reachability (landed)
|
||||
|
||||
### Schema additions
|
||||
- New value `'ind_call'` in the `xrefs.kind` set.
|
||||
- New SQL view `v_indirect_reachability_from_entry` — strict superset of
|
||||
`v_reachability_from_entry`, taking `ind_call` edges in the BFS.
|
||||
|
||||
### What this layer does
|
||||
- Walks each `FuncAnalysis.functions` entry with a per-basic-block register
|
||||
tracker. Recognises the canonical static-vtable pattern:
|
||||
`lis+addi → lwz off(rA) → mtctr → bcctrl`, where `rA` ends up holding a
|
||||
known vtable's start address from M3.
|
||||
- Honours the PowerPC ABI: `bl`-style calls (op 18 / 16 with LK=1) clobber
|
||||
volatile r0..r12 + ctr but preserve non-volatile r13..r31, so a vtable
|
||||
pointer parked in r30/r31 before a call survives.
|
||||
- Treats every M3 `loc_*` label as a basic-block boundary (kills register
|
||||
state) so jump-IN paths cannot induce false positives.
|
||||
|
||||
### What this layer does NOT do (and observed impact)
|
||||
- Vtable pointer loaded from a `this`-pointer field
|
||||
(`lwz r_vt, off(rA)` where `rA = this`) — by far the dominant pattern in
|
||||
real C++ — is unresolvable without alias / points-to analysis.
|
||||
- On Sylpheed: the layer detects 0 edges. The binary's 1,001 lis+addi
|
||||
references into vtables are mostly constructor-side **vptr writes**
|
||||
(`stw rVtable, vptr_offset(this)`), not direct dispatches. The renderer
|
||||
hunt's audit-009 cluster therefore needs a future M5.5 with `this`-flow
|
||||
tracking before this layer surfaces it.
|
||||
|
||||
### Reference docs
|
||||
- IBM PowerPC ABI: register-save convention (volatile r0..r12 + ctr,
|
||||
non-volatile r13..r31).
|
||||
|
||||
## Layer M7 — String / constant-pool detection (landed)
|
||||
|
||||
### Schema additions
|
||||
- New table `strings(address PK, encoding, length, content)`.
|
||||
- Index `idx_strings_encoding`.
|
||||
|
||||
### What this layer does
|
||||
- Scans `.rdata` for runs of length ≥ 6 of printable ASCII bytes followed by
|
||||
a NUL terminator.
|
||||
- Scans `.rdata` for UTF-16LE runs of length ≥ 6 code units (printable-ASCII
|
||||
basic plane only) followed by a u16 NUL terminator.
|
||||
- Cross-reference is implicit: existing `xrefs.kind='ref'` rows whose
|
||||
`target` falls in `strings.address`'s exact match set name the referencing
|
||||
PCs. SQL: `SELECT s.content, x.source FROM xrefs x JOIN strings s
|
||||
ON s.address = x.target WHERE x.kind='ref'`.
|
||||
|
||||
### What this layer does NOT do
|
||||
- No UTF-8 multibyte / non-ASCII basic plane in either encoding.
|
||||
- No `.data` scan (read-only-section bias).
|
||||
- No multi-byte CJK encodings — Japanese text in localised builds may be
|
||||
represented in shift_jis / utf-8 with non-printable bytes that this
|
||||
scanner skips.
|
||||
|
||||
### Sylpheed yield
|
||||
- 6,311 ASCII strings (including full embedded HLSL shader source).
|
||||
- 0 UTF-16LE strings (binary uses ASCII / native CJK encoding).
|
||||
- 9,132 lis+addi sites cross-reference into the detected strings — names
|
||||
the source PCs that reference each string.
|
||||
|
||||
## Layer M6 — Extended store-class xrefs + `addr_mode` column (landed)
|
||||
|
||||
### Schema additions
|
||||
- `xrefs.addr_mode VARCHAR NULL` — sub-classifies how the source instruction
|
||||
computes its target. NULL for control-flow edges (call / ind_call / j /
|
||||
br); one of the following tags for data edges:
|
||||
- `d_form` — standard signed-16 displacement (lwz/stw/lfs/stfs/etc.)
|
||||
- `lis_addi` — address materialised via `lis + addi` register tracking
|
||||
- `lis_ori` — address materialised via `lis + ori`
|
||||
- `multiword` — `lmw / stmw` (one xref per slot; up to 32-rS slots)
|
||||
- `x_form_indexed` — `stwx / stbx / sthx / stwux / stbux / sthux / stdx /
|
||||
stdux / lwzx / lbzx / lhzx / lhax / lwzux / lbzux / lhzux / lhaux / ldx /
|
||||
ldux` — emitted only when both rA and rB are tracked constants
|
||||
- `x_form_byterev` — `stwbrx / sthbrx / lwbrx / lhbrx`
|
||||
- `atomic` — `stwcx. / stdcx.` reservation-conditional stores
|
||||
- `dcbz` — cache-line clear (32-byte zero at rA+rB)
|
||||
- Index `idx_xrefs_addr_mode`.
|
||||
|
||||
### What this layer does
|
||||
- Tags every existing data xref with its addressing mode (`d_form` for the
|
||||
bulk; `lis_addi` / `lis_ori` for the lift-and-add cases that produce
|
||||
DataRef rows).
|
||||
- Adds new dispatch for opcode 47 (`stmw`) and 46 (`lmw`), expanding to
|
||||
per-slot DataWrite / DataRead rows.
|
||||
- Adds new dispatch for opcode 31 X-form: stores, atomic, byte-reverse,
|
||||
dcbz. X-form rows are emitted ONLY when both rA and rB resolve to known
|
||||
constants (otherwise the address is runtime-dependent and we skip).
|
||||
|
||||
### What this layer does NOT do
|
||||
- VMX / VMX128 vector stores (opcode 31 with vector XO codes) are not
|
||||
emitted — they always have register-indexed addresses that the
|
||||
lis+addi tracker can't usually resolve, and detecting them adds noise
|
||||
without improving target resolution.
|
||||
- The dominant runtime-of-stwx pattern (rA = base, rB = runtime index) is
|
||||
not resolved — by design; mem-watch covers the runtime side per VERIFY-B.
|
||||
|
||||
### Sylpheed yield
|
||||
- 28,834 `lis_addi` refs, 18,485 `d_form` reads, 3,288 `d_form` writes —
|
||||
the existing baseline now properly tagged.
|
||||
- **442 newly-detected `x_form_indexed` reads** — primarily lwzx/lhzx
|
||||
reads from in-table dispatch (each pair (rA,rB) resolved statically).
|
||||
- **40 newly-detected `atomic` writes** — every `stwcx.` site with a
|
||||
resolvable address; useful for reservation-table audits.
|
||||
- 9 `lis_ori` refs.
|
||||
- 0 multiword / dcbz / byterev — these instructions exist in the binary
|
||||
but are not in lis+addi-tracked code paths.
|
||||
|
||||
## Layer M8 + M11 — Function-pointer arrays beyond vtables (landed)
|
||||
|
||||
### Schema additions
|
||||
- New table `function_pointer_arrays(address PK, length, kind)` where
|
||||
`kind` is `'vtable'` (M3 re-emit), `'dispatch_table'` (M8), or
|
||||
`'static_init'` (M11).
|
||||
- New table `function_pointer_array_entries(array_address, slot,
|
||||
function_address, PRIMARY KEY (array_address, slot))` — one row per
|
||||
slot of every detected array (vtable + non-vtable).
|
||||
- Indices on `function_pointer_arrays.kind` and
|
||||
`function_pointer_array_entries.function_address`.
|
||||
|
||||
### What this layer does
|
||||
- Walks `.rdata` (only — `.data` produces too many false positives) for
|
||||
runs of ≥ 2 consecutive 4-byte BE values where each value is a known
|
||||
function entry from M1's `functions` table.
|
||||
- Skips runs whose start matches an M3 vtable head — those are re-emitted
|
||||
in this table with `kind='vtable'` for unified queries but not
|
||||
re-classified.
|
||||
- Heuristically classifies non-vtable runs:
|
||||
- `static_init` (M11): every entry's first instruction is `mfspr r12, LR`
|
||||
AND the next is `stwu r1, -N(r1)` with `N ≤ 0x80` (or a save-stub `bl`).
|
||||
Mirrors the typical C++ static-initialiser prologue.
|
||||
- `dispatch_table` (M8): everything else.
|
||||
|
||||
### What this layer does NOT do
|
||||
- Does not parse symbol-table-bracketed regions like `__xc_a` / `__xc_z`
|
||||
/ `__xi_a` / `__xi_z` directly — Sylpheed's symbol table is stripped.
|
||||
- Does not chain multi-segment static-init drivers; future M11.5 could
|
||||
walk the entry-point's static-init driver call chain to surface
|
||||
ground-truth ctor PCs.
|
||||
- 2-slot runs in `.rdata` may be false positives where two struct fields
|
||||
happen to alias function VAs; downstream queries should use a length
|
||||
filter (`WHERE length >= 3`) when high precision matters.
|
||||
|
||||
### Sylpheed yield
|
||||
- 722 vtables (M3 re-emit) + 388 dispatch_tables = 1,110 arrays in
|
||||
`function_pointer_arrays`.
|
||||
- 0 static_init detected — Sylpheed's ctors don't all match the
|
||||
conservative prologue heuristic. Lengths concentrate at 2 slots
|
||||
(typical of switch-case jump tables).
|
||||
|
||||
## Layer M9 — `has_eh` from `.pdata` exception flag (landed)
|
||||
|
||||
### Schema additions
|
||||
- `functions.has_eh BOOLEAN NOT NULL` — true when `.pdata`'s exception-
|
||||
handler-present bit (bit 31 of word 1, the high bit) is set.
|
||||
- Index `idx_functions_has_eh`.
|
||||
|
||||
### What this layer does
|
||||
- Derived directly from M1's already-parsed `pdata.flags` bit field (no
|
||||
new parsing). The bit was always available in `pdata_entries.flags`;
|
||||
this layer surfaces it as a first-class column on `functions`.
|
||||
|
||||
### What this layer does NOT do
|
||||
- Does not parse the actual `__CxxFrameHandler` / `__C_specific_handler`
|
||||
scope-table records that the exception bit gates. Walking those tables
|
||||
would let us name try/catch ranges and per-state cleanup actions, but
|
||||
is out of scope for a derive-only milestone.
|
||||
|
||||
### Sylpheed yield
|
||||
- 2,975 of 23,073 pdata-validated functions have `has_eh=true` (12.9%) —
|
||||
plausible MSVC C++ EH coverage rate. Largest EH function: 26,328 bytes
|
||||
(`sub_823518F0`).
|
||||
|
||||
## Layer M10 — `.tls` section / TLS directory (landed)
|
||||
|
||||
### Schema additions
|
||||
- New table `tls_info(raw_data_start, raw_data_end, index_address,
|
||||
callback_array, zero_fill_size, characteristics)` — at most one row
|
||||
(the IMAGE_TLS_DIRECTORY32).
|
||||
- New table `tls_callbacks(slot PK, address)` — one row per resolved TLS
|
||||
callback function.
|
||||
|
||||
### What this layer does
|
||||
- Reads the first 24 bytes of the `.tls` section as an
|
||||
`IMAGE_TLS_DIRECTORY32` and walks the zero-terminated callback array.
|
||||
- All addresses stored as absolute VAs.
|
||||
|
||||
### What this layer does NOT do
|
||||
- Does not parse the raw TLS template content (the variable initialiser
|
||||
block); just records its start/end VAs.
|
||||
|
||||
### Sylpheed yield
|
||||
- 0 rows — Sylpheed has no `.tls` section. Infrastructure ready for any
|
||||
binary that uses `__declspec(thread)` storage.
|
||||
|
||||
## Layer M12 — `--lr-trace` runtime canary-diff harness (landed)
|
||||
|
||||
### Runtime additions (no DB)
|
||||
- New CLI flag `--lr-trace=PC[,PC,...]` on `exec` — comma-separated PCs
|
||||
to capture as JSONL records on every fire. Symbolic tokens (`Class::method`)
|
||||
resolve via M4's lookup against `--probe-db`. Settable via
|
||||
`XENIA_LR_TRACE`.
|
||||
- New CLI flag `--lr-trace-out=PATH` — writes JSONL to a file (one
|
||||
record per line). Stdout when omitted. Settable via `XENIA_LR_TRACE_OUT`.
|
||||
- New kernel state fields `lr_trace_pcs: HashSet<u32>` +
|
||||
`lr_trace_writer: Option<Mutex<File>>` and helper
|
||||
`KernelState::fire_lr_trace_if_match(hw_id)` invoked from the
|
||||
per-instruction probe slot.
|
||||
|
||||
### JSONL record fields
|
||||
`pc, tid, hw, cycle, r3, r4, r5, r6, lr` — superset of what
|
||||
xenia-canary's `--log_lr_on_pc` patch emits, with a cycle counter added
|
||||
for cross-run reproducibility.
|
||||
|
||||
### What this layer does NOT do
|
||||
- Does not capture VMX / FP register state (only GPRs r3..r6).
|
||||
- Does not buffer / batch records — one `write_all` per fire. For
|
||||
high-frequency probes (e.g. tight loops at >1M fires/sec), redirect
|
||||
to a file and use a SSD.
|
||||
|
||||
### Determinism
|
||||
Lockstep digest unaffected: probe firing happens after the per-instr
|
||||
hooks for ctor/branch probes and only emits side-channel output. Verified
|
||||
end-of-session: `check sylpheed.iso --stable-digest -n 2M` ×2 produced
|
||||
byte-identical digests (`instructions=2000005`).
|
||||
|
||||
---
|
||||
|
||||
## Layer M5.5 — `this`-flow indirect-dispatch resolution (landed)
|
||||
|
||||
### Schema additions
|
||||
- New table `vptr_writes(writer_pc, vtable_address, vptr_offset, writer_function)` —
|
||||
every detected `stw rVtable, vptr_off(rThis)` site.
|
||||
- New table `indirect_dispatch_sites(dispatch_pc PK, vptr_offset, slot, candidate_count)` —
|
||||
one row per resolved dispatch.
|
||||
- New table `indirect_dispatch_candidates(dispatch_pc, vtable_address, method_address)` —
|
||||
one row per (dispatch × candidate vtable). Joined to existing
|
||||
`xrefs.kind='ind_call'` edges (one ind_call row per candidate).
|
||||
- New indices on `vptr_writes.vtable_address`, `vptr_writes.vptr_offset`,
|
||||
`indirect_dispatch_candidates.method_address`,
|
||||
`indirect_dispatch_candidates.vtable_address`,
|
||||
`indirect_dispatch_sites.(vptr_offset, slot)`.
|
||||
|
||||
### What this layer does (class-membership inference)
|
||||
1. **Phase 1 — vptr-write scan**: walk every function with the lis+addi
|
||||
tracker; whenever `stw rA, off(rB)` writes a known M3 vtable address,
|
||||
record `(vtable_addr, vptr_offset, writer_pc)`.
|
||||
2. **Phase 2 — invert**: build `vtables_by_offset[vptr_off] = {V}` for the
|
||||
set of vtables ever written at that offset.
|
||||
3. **Phase 3 — dispatch detection**: walk back ≤16 instructions from each
|
||||
`bcctrl`/`bctr LK=1`, find the canonical
|
||||
`lwz vt, off(this); lwz fn, slot*4(vt); mtctr fn` chain. Extract
|
||||
`(vptr_off, slot)`. Bail on register clobber, branch, or label
|
||||
boundary.
|
||||
4. **Phase 4 — emit**: for each `(dispatch_pc, vptr_off, slot)`, emit one
|
||||
`xrefs.kind='ind_call'` row per candidate vtable that has a
|
||||
matching slot. Multi-candidate rows are an over-approximation.
|
||||
|
||||
### What this layer does NOT do
|
||||
- No alias resolution at multi-candidate sites — emits one edge per
|
||||
matching vtable. Downstream queries should filter
|
||||
`indirect_dispatch_sites WHERE candidate_count=1` for high-confidence
|
||||
edges.
|
||||
- No flow-sensitive analysis: register state is killed at every label
|
||||
(basic-block boundary) and at `bl`/`bcl` calls (volatile r0..r12 +
|
||||
ctr). We do NOT propagate values across calls in the chain-walker.
|
||||
- No tracking of vptr writes via X-form indexed (`stwx`), VMX, or
|
||||
multiword stores. Only D-form `stw rA, off(rB)`.
|
||||
- Does not synthesise vptr writes for inlined / elided constructors.
|
||||
If a class never has a writer at offset `vptr_off`, dispatches
|
||||
through that offset find no candidates.
|
||||
|
||||
### Sylpheed yield
|
||||
- 567 vptr writes covering 214 distinct vtables (~30% of M3's 722).
|
||||
- 29 distinct vptr offsets used; offset 0 dominates (501/567 = 88%,
|
||||
single-inheritance).
|
||||
- **6,842 dispatch sites resolved**: 97 single-candidate
|
||||
(high-confidence) + 6,745 multi-candidate (over-approximation).
|
||||
- 687,963 `ind_call` xref rows total.
|
||||
- **2,746 newly-reachable functions** via the M5 BFS view
|
||||
(`v_indirect_reachability_from_entry`) compared to call/j/br alone.
|
||||
- Audit-009 cluster (renderer plateau): functions newly visible
|
||||
include `0x823BC9E0`, `0x823BC290`, `0x823BC5A0`, `0x823BB158`,
|
||||
`0x823BB1E0`, `0x823BCAF0`, `0x823BC4C8` — actionable starting
|
||||
points for the cluster's reachability hunt.
|
||||
|
||||
### Reference docs
|
||||
- IBM PowerPC ABI (volatile/non-volatile register partition).
|
||||
- Itanium C++ ABI on vtable layout (offset-from-`this` model adapted
|
||||
by MSVC for Win32 PPC).
|
||||
|
||||
## Layer M9.5 — `__CxxFrameHandler` scope-table parsing (landed)
|
||||
|
||||
### Schema additions
|
||||
- New table `eh_funcinfo(address PK, magic, max_state, p_unwind_map,
|
||||
n_try_blocks, p_try_block_map, n_ip_map_entries, p_ip_to_state_map,
|
||||
p_es_type_list, eh_flags)`.
|
||||
- New table `eh_unwind_map(funcinfo_address, state_index, to_state, action_pc,
|
||||
PRIMARY KEY (funcinfo_address, state_index))`.
|
||||
- New table `eh_try_blocks(funcinfo_address, try_index, try_low, try_high,
|
||||
catch_high, n_catches, p_handler_array,
|
||||
PRIMARY KEY (funcinfo_address, try_index))`.
|
||||
|
||||
### What this layer does
|
||||
- Magic-scans `.rdata` for the documented MSVC FuncInfo signatures
|
||||
(0x19930520 / 0x19930521 / 0x19930522), reading 4-byte BE values
|
||||
on 4-byte alignment.
|
||||
- Sanity-checks `max_state` ≤ 10,000, `n_try_blocks` ≤ 1,000, all
|
||||
internal pointers landing in valid sections.
|
||||
- Walks `pUnwindMap` (8-byte UnwindMapEntry) and `pTryBlockMap`
|
||||
(20-byte TryBlockMapEntry) into one row each.
|
||||
|
||||
### What this layer does NOT do
|
||||
- Does not associate FuncInfo records with their owning function via
|
||||
the `bl __CxxFrameHandler` registration site — joins to `functions`
|
||||
by best-effort PC-range queries. A future M9.6 can chase the
|
||||
registration to make the link explicit.
|
||||
- Does not parse `pHandlerArray` (per-try-block catch type info).
|
||||
|
||||
### Sylpheed yield
|
||||
- 2,588 FuncInfo records (all version 0x19930522).
|
||||
- 10,019 unwind-map entries.
|
||||
- 315 try-blocks across the binary.
|
||||
|
||||
## Layer M11.5 — Static-init driver chain detection (landed)
|
||||
|
||||
### Schema additions
|
||||
- Reuses existing `function_pointer_arrays` table — drivers' arrays are
|
||||
emitted with `kind='static_init'`, replacing M11's prologue-heuristic
|
||||
output where the structurally-grounded pattern fires.
|
||||
|
||||
### What this layer does
|
||||
- Walks every detected function looking for the canonical `_initterm`-
|
||||
style loop: `lwz cursor; mtctr; bcctrl; addi cursor, cursor, 4`
|
||||
bounded by a comparison against another constant register.
|
||||
- Extracts `(array_start, array_end)` from the cursor's initial
|
||||
constant value and the end-comparand register.
|
||||
- Reads the array, validates each entry against
|
||||
`func_analysis.functions`, and emits the array as `static_init`.
|
||||
|
||||
### What this layer does NOT do
|
||||
- Doesn't handle drivers with multiple back-to-back trampoline loops.
|
||||
- Doesn't follow `_initterm_e` return-status semantics — both
|
||||
`_initterm` and `_initterm_e` match if the loop body matches.
|
||||
|
||||
### Sylpheed yield
|
||||
- 0 drivers detected. Sylpheed's static-init structure does not match
|
||||
the canonical CRT loop pattern; the binary likely calls ctors via
|
||||
another mechanism (inline at the entry point, or via a different
|
||||
driver shape). Infrastructure ready for any binary with the
|
||||
documented MSVC pattern.
|
||||
|
||||
## Layer VMX — Vector-store xrefs (M6 follow-up, landed)
|
||||
|
||||
Extends the M6 X-form opcode-31 dispatch in `xref.rs` with AltiVec/VMX
|
||||
vector loads and stores. New entries (XO codes):
|
||||
|
||||
- `lvx` (103), `lvxl` (359), `lvebx` (7), `lvehx` (39), `lvewx` (71)
|
||||
— `addr_mode='x_form_indexed'`, `kind='read'`.
|
||||
- `stvx` (231), `stvxl` (487), `stvebx` (135), `stvehx` (167),
|
||||
`stvewx` (199) — `addr_mode='x_form_indexed'`, `kind='write'`.
|
||||
|
||||
Same constraint as M6: rows emitted only when both `rA` and `rB`
|
||||
resolve to known constants (rare but useful).
|
||||
|
||||
### Sylpheed yield
|
||||
- 110 `stvx` writes newly resolved.
|
||||
|
||||
## Layer SJIS+UTF-8 — Localised-string detection (M7 follow-up, landed)
|
||||
|
||||
Extends `xenia_analysis::strings::analyze` with two additional scanners.
|
||||
|
||||
### Shift_JIS detection
|
||||
Per JIS X 0208: lead byte ∈ [0x81, 0x9F] ∪ [0xE0, 0xEF];
|
||||
trail byte ∈ [0x40, 0x7E] ∪ [0x80, 0xFC]. Single-byte ASCII and JIS
|
||||
half-width katakana (0xA1..=0xDF) are passed through. At least one
|
||||
multi-byte pair must be present (so we don't double-count pure ASCII).
|
||||
SJIS bytes are rendered as `\\xHH` escapes in the `content` column for
|
||||
diagnostic readability — full SJIS→UTF-8 decoding is a future
|
||||
enhancement.
|
||||
|
||||
### UTF-8 detection
|
||||
Validates 2-byte (`110xxxxx 10xxxxxx`) and 3-byte
|
||||
(`1110xxxx 10xxxxxx 10xxxxxx`) sequences plus printable ASCII. Skips
|
||||
4-byte (supplementary plane) which is rare in game text.
|
||||
|
||||
### Sylpheed yield
|
||||
- 790 Shift_JIS strings (Japanese debug + UI text, including
|
||||
`[WARNING] ノードに割り当てるエフェクトIDの指定がない ノードデータが見つからない` style mission strings).
|
||||
- 39 UTF-8 strings.
|
||||
- 6,311 ASCII strings (unchanged from M7).
|
||||
|
||||
## Forward work (not yet landed)
|
||||
|
||||
- **M9.6** — link `eh_funcinfo` records back to their owning functions
|
||||
via `bl __CxxFrameHandler` registration sites + per-try-block
|
||||
`pHandlerArray` parsing.
|
||||
- **M11.6** — relax M11.5 to detect non-canonical static-init driver
|
||||
shapes (`_initterm_e` with status return, custom drivers).
|
||||
- Full SJIS → UTF-8 decoding in the `strings.content` column.
|
||||
- VMX128 (opcode 4) vector-store xrefs — separate encoding space, low
|
||||
ROI; document if Sylpheed's renderer cluster uses it.
|
||||
File diff suppressed because it is too large
Load Diff
277
crates/xenia-analysis/src/demangle.rs
Normal file
277
crates/xenia-analysis/src/demangle.rs
Normal file
@@ -0,0 +1,277 @@
|
||||
//! MSVC C++ name demangling for Xbox 360 binaries.
|
||||
//!
|
||||
//! Wraps [`msvc_demangler::demangle`] (a Rust port of LLVM's
|
||||
//! `MicrosoftDemangle.cpp`) and splits the resulting human-readable string
|
||||
//! into structured fields (namespace path, class name, method name, params
|
||||
//! signature) for storage in the `demangled_names` DB table.
|
||||
//!
|
||||
//! The structured split is heuristic — it operates on the formatted output,
|
||||
//! not the parsed AST. This is good enough for typical RTTI strings of the
|
||||
//! form `?AVClassName@Namespace@@` and standard member functions; exotic
|
||||
//! template / lambda forms degrade gracefully (the structured fields end up
|
||||
//! `None` while `raw_demangled` retains the full LLVM-style output).
|
||||
//!
|
||||
//! Reference: <https://docs.rs/msvc-demangler> (LLVM `MicrosoftDemangle.cpp` port).
|
||||
|
||||
use msvc_demangler::DemangleFlags;
|
||||
|
||||
/// Structured view of one demangled MSVC symbol.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Demangled {
|
||||
/// Original mangled string.
|
||||
pub mangled: String,
|
||||
/// Full LLVM-style demangled output (e.g. `xe::apu::AudioSystem::Setup(void)`).
|
||||
pub raw_demangled: String,
|
||||
/// `::`-joined namespace path leading up to the class, e.g. `xe::apu`. None
|
||||
/// when the symbol is at global scope.
|
||||
pub namespace_path: Option<String>,
|
||||
/// Class name for member functions, e.g. `AudioSystem`. None when the
|
||||
/// symbol is a free function.
|
||||
pub class_name: Option<String>,
|
||||
/// Method or free-function name, e.g. `Setup`. None when the heuristic
|
||||
/// could not separate the name from the rest of the demangled string.
|
||||
pub method_name: Option<String>,
|
||||
/// Parameter signature without the surrounding parens, e.g. `void` or
|
||||
/// `int, char *`. None when not a function or no `(...)` was found.
|
||||
pub params_signature: Option<String>,
|
||||
}
|
||||
|
||||
/// Demangle one mangled MSVC C++ symbol. Returns `None` if the input does not
|
||||
/// start with `?` (early-out for non-mangled names) OR if the underlying
|
||||
/// demangler fails to parse it. Callers that want a "best effort" record
|
||||
/// (NULL fields + raw=mangled) should use [`demangle_or_raw`] instead.
|
||||
pub fn demangle(mangled: &str) -> Option<Demangled> {
|
||||
if !mangled.starts_with('?') {
|
||||
return None;
|
||||
}
|
||||
let raw = msvc_demangler::demangle(mangled, DemangleFlags::llvm()).ok()?;
|
||||
Some(split_structured(mangled.to_string(), raw))
|
||||
}
|
||||
|
||||
/// Demangle, or fall back to a record that just carries the original mangled
|
||||
/// string in `raw_demangled` and leaves all structured fields `None`. Useful
|
||||
/// for DB insert paths that want one row per mangled input regardless of
|
||||
/// parser success.
|
||||
pub fn demangle_or_raw(mangled: &str) -> Demangled {
|
||||
if let Some(d) = demangle(mangled) {
|
||||
return d;
|
||||
}
|
||||
Demangled {
|
||||
mangled: mangled.to_string(),
|
||||
raw_demangled: mangled.to_string(),
|
||||
namespace_path: None,
|
||||
class_name: None,
|
||||
method_name: None,
|
||||
params_signature: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Split a fully-formatted demangled string into structured fields.
|
||||
///
|
||||
/// Strategy:
|
||||
/// 1. Find the first un-nested `(` — everything before it is the qualified
|
||||
/// name; everything inside the matching parens is `params_signature`.
|
||||
/// 2. Strip leading return-type tokens before the qualified name (everything
|
||||
/// up to the LAST whitespace not inside `<...>` or `(...)` brackets).
|
||||
/// 3. Split the qualified name on `::` (top-level only) — last segment is
|
||||
/// `method_name`, second-to-last is `class_name`, the rest joined back
|
||||
/// with `::` is `namespace_path`.
|
||||
fn split_structured(mangled: String, raw: String) -> Demangled {
|
||||
let raw_view = raw.as_str();
|
||||
|
||||
let (qualified_name, params) = match find_paren_split(raw_view) {
|
||||
Some((before, inside)) => (before.trim_end().to_string(), Some(inside.to_string())),
|
||||
None => (raw_view.to_string(), None),
|
||||
};
|
||||
|
||||
// Drop any return-type prefix: keep everything after the last top-level
|
||||
// whitespace boundary (where "top-level" means depth-0 in <...>/(...)).
|
||||
let qname_clean = strip_return_type_prefix(&qualified_name);
|
||||
|
||||
let (namespace_path, class_name, method_name) = split_qname(&qname_clean);
|
||||
|
||||
Demangled {
|
||||
mangled,
|
||||
raw_demangled: raw,
|
||||
namespace_path,
|
||||
class_name,
|
||||
method_name,
|
||||
params_signature: params,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `(text_before_paren, text_inside_outer_parens)` for the first
|
||||
/// top-level `(` in `s`. Returns `None` when no top-level paren is present.
|
||||
fn find_paren_split(s: &str) -> Option<(&str, &str)> {
|
||||
let bytes = s.as_bytes();
|
||||
let mut depth_angle: i32 = 0;
|
||||
for (i, &b) in bytes.iter().enumerate() {
|
||||
match b {
|
||||
b'<' => depth_angle += 1,
|
||||
b'>' if depth_angle > 0 => depth_angle -= 1,
|
||||
b'(' if depth_angle == 0 => {
|
||||
// Find matching close at depth 0 on parens.
|
||||
let mut depth_paren = 1i32;
|
||||
let mut depth_angle2 = 0i32;
|
||||
for (j, &b2) in bytes.iter().enumerate().skip(i + 1) {
|
||||
match b2 {
|
||||
b'<' => depth_angle2 += 1,
|
||||
b'>' if depth_angle2 > 0 => depth_angle2 -= 1,
|
||||
b'(' => depth_paren += 1,
|
||||
b')' => {
|
||||
depth_paren -= 1;
|
||||
if depth_paren == 0 {
|
||||
return Some((&s[..i], &s[i + 1..j]));
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
return None;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Strip a leading return-type token (everything up to and including the
|
||||
/// last top-level whitespace). E.g. `void __cdecl Foo::Bar` → `Foo::Bar`.
|
||||
fn strip_return_type_prefix(s: &str) -> String {
|
||||
let bytes = s.as_bytes();
|
||||
let mut depth_angle: i32 = 0;
|
||||
let mut depth_paren: i32 = 0;
|
||||
let mut last_ws_at: Option<usize> = None;
|
||||
for (i, &b) in bytes.iter().enumerate() {
|
||||
match b {
|
||||
b'<' => depth_angle += 1,
|
||||
b'>' if depth_angle > 0 => depth_angle -= 1,
|
||||
b'(' => depth_paren += 1,
|
||||
b')' if depth_paren > 0 => depth_paren -= 1,
|
||||
b' ' if depth_angle == 0 && depth_paren == 0 => last_ws_at = Some(i),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
match last_ws_at {
|
||||
Some(i) => s[i + 1..].to_string(),
|
||||
None => s.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Split a fully-qualified name on top-level `::` and tag the parts.
|
||||
fn split_qname(qname: &str) -> (Option<String>, Option<String>, Option<String>) {
|
||||
if qname.is_empty() {
|
||||
return (None, None, None);
|
||||
}
|
||||
let parts = top_level_split_colon_colon(qname);
|
||||
match parts.len() {
|
||||
0 => (None, None, None),
|
||||
1 => (None, None, Some(parts[0].clone())),
|
||||
2 => (None, Some(parts[0].clone()), Some(parts[1].clone())),
|
||||
_ => {
|
||||
let n = parts.len();
|
||||
let method = parts[n - 1].clone();
|
||||
let class = parts[n - 2].clone();
|
||||
let ns = parts[..n - 2].join("::");
|
||||
(Some(ns), Some(class), Some(method))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Split on top-level `::` — `::` inside `<...>` or `(...)` is preserved.
|
||||
fn top_level_split_colon_colon(s: &str) -> Vec<String> {
|
||||
let bytes = s.as_bytes();
|
||||
let mut depth_angle: i32 = 0;
|
||||
let mut depth_paren: i32 = 0;
|
||||
let mut out: Vec<String> = Vec::new();
|
||||
let mut start = 0usize;
|
||||
let mut i = 0usize;
|
||||
while i < bytes.len() {
|
||||
let b = bytes[i];
|
||||
match b {
|
||||
b'<' => depth_angle += 1,
|
||||
b'>' if depth_angle > 0 => depth_angle -= 1,
|
||||
b'(' => depth_paren += 1,
|
||||
b')' if depth_paren > 0 => depth_paren -= 1,
|
||||
b':' if depth_angle == 0
|
||||
&& depth_paren == 0
|
||||
&& i + 1 < bytes.len()
|
||||
&& bytes[i + 1] == b':' =>
|
||||
{
|
||||
out.push(s[start..i].to_string());
|
||||
start = i + 2;
|
||||
i += 2;
|
||||
continue;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
out.push(s[start..].to_string());
|
||||
out.into_iter().filter(|p| !p.is_empty()).collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn early_out_on_non_mangled() {
|
||||
assert!(demangle("plain_c_name").is_none());
|
||||
assert!(demangle("Foo::Bar").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn demangle_or_raw_records_failures() {
|
||||
let d = demangle_or_raw("not_mangled");
|
||||
assert_eq!(d.mangled, "not_mangled");
|
||||
assert_eq!(d.raw_demangled, "not_mangled");
|
||||
assert!(d.method_name.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple_member_function() {
|
||||
// ?Setup@AudioSystem@apu@xe@@QEAAXXZ → public: __cdecl xe::apu::AudioSystem::Setup(void)
|
||||
let d = demangle("?Setup@AudioSystem@apu@xe@@QEAAXXZ").expect("should parse");
|
||||
assert_eq!(d.method_name.as_deref(), Some("Setup"));
|
||||
assert_eq!(d.class_name.as_deref(), Some("AudioSystem"));
|
||||
assert_eq!(d.namespace_path.as_deref(), Some("xe::apu"));
|
||||
assert_eq!(d.params_signature.as_deref(), Some("void"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rtti_type_descriptor_string() {
|
||||
// RTTI TypeDescriptor mangled name format: ".?AVClassName@@" → "class ClassName".
|
||||
// We strip the leading "." and call demangle on the "?AV…" part below in M3.
|
||||
// For now confirm the demangler handles the minimal class form.
|
||||
let d = demangle("?AVAudioSystem@apu@xe@@").expect("should parse");
|
||||
assert!(
|
||||
d.raw_demangled.contains("AudioSystem"),
|
||||
"raw='{}'",
|
||||
d.raw_demangled
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split_qname_handles_namespace_chain() {
|
||||
let (ns, cls, m) = split_qname("a::b::c::Klass::method");
|
||||
assert_eq!(ns.as_deref(), Some("a::b::c"));
|
||||
assert_eq!(cls.as_deref(), Some("Klass"));
|
||||
assert_eq!(m.as_deref(), Some("method"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn paren_split_handles_template_in_args() {
|
||||
// Templates inside the param list must not confuse paren matching.
|
||||
let s = "void __cdecl Foo::Bar(std::vector<int>, std::map<a, b>)";
|
||||
let (before, inside) = find_paren_split(s).expect("paren found");
|
||||
assert_eq!(before, "void __cdecl Foo::Bar");
|
||||
assert_eq!(inside, "std::vector<int>, std::map<a, b>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn double_colon_inside_template_not_split() {
|
||||
let parts = top_level_split_colon_colon("a::b<c::d>::e");
|
||||
assert_eq!(parts, vec!["a", "b<c::d>", "e"]);
|
||||
}
|
||||
}
|
||||
51
crates/xenia-analysis/src/disasm.rs
Normal file
51
crates/xenia-analysis/src/disasm.rs
Normal file
@@ -0,0 +1,51 @@
|
||||
//! Analysis-side enrichment over [`xenia_cpu::disasm::iter_disasm`].
|
||||
//!
|
||||
//! Turns a stream of decoder-only [`xenia_cpu::disasm::DisasmItem`]s into a
|
||||
//! stream of [`RichDisasmItem`]s carrying section name + enclosing function +
|
||||
//! label name. The three sinks in [`crate::sinks`] (text, JSON, DuckDB) all
|
||||
//! consume `RichDisasmItem`.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use xenia_cpu::disasm::DisasmItem;
|
||||
|
||||
use crate::func::FuncAnalysis;
|
||||
|
||||
/// `DisasmItem` plus the analysis context (section/function/label).
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RichDisasmItem<'a> {
|
||||
pub item: DisasmItem,
|
||||
pub section: &'a str,
|
||||
pub function: Option<u32>,
|
||||
pub label: Option<&'a str>,
|
||||
}
|
||||
|
||||
/// Walk one code section, yielding rich items annotated with section name,
|
||||
/// rolling-window enclosing function, and label-at-address.
|
||||
///
|
||||
/// The `function` field tracks the most recent function-start the iterator
|
||||
/// has crossed — matching the legacy `current_func` behaviour in
|
||||
/// `db.rs::insert_instructions_streaming`.
|
||||
pub fn enrich_section<'a>(
|
||||
image: &'a [u8],
|
||||
image_base: u32,
|
||||
section_name: &'a str,
|
||||
va_start: u32,
|
||||
va_end: u32,
|
||||
func_analysis: &'a FuncAnalysis,
|
||||
labels: &'a HashMap<u32, String>,
|
||||
) -> impl Iterator<Item = RichDisasmItem<'a>> + 'a {
|
||||
let mut current_func: Option<u32> = None;
|
||||
xenia_cpu::disasm::iter_disasm(image, image_base, va_start, va_end).map(move |item| {
|
||||
if func_analysis.is_function_start(item.addr) {
|
||||
current_func = Some(item.addr);
|
||||
}
|
||||
let label = labels.get(&item.addr).map(|s| s.as_str());
|
||||
RichDisasmItem {
|
||||
item,
|
||||
section: section_name,
|
||||
function: current_func,
|
||||
label,
|
||||
}
|
||||
})
|
||||
}
|
||||
296
crates/xenia-analysis/src/eh_scope.rs
Normal file
296
crates/xenia-analysis/src/eh_scope.rs
Normal file
@@ -0,0 +1,296 @@
|
||||
//! M9.5 — MSVC `__CxxFrameHandler` scope-table parsing.
|
||||
//!
|
||||
//! When MSVC compiles C++ try/catch on Win32 PowerPC, the compiler emits
|
||||
//! per-function `FuncInfo` records in `.rdata` containing the scope-state
|
||||
//! tables that `__CxxFrameHandler` walks during unwinding. Each record
|
||||
//! starts with one of the documented magic numbers:
|
||||
//!
|
||||
//! - `0x19930520` — original FuncInfo (no aligned-state-array)
|
||||
//! - `0x19930521` — adds `pESTypeList` field
|
||||
//! - `0x19930522` — adds `EHFlags` field
|
||||
//!
|
||||
//! Layout (4-byte little-endian on x86; **on Xbox 360 PowerPC PE the
|
||||
//! struct is big-endian** because the binary is BE throughout):
|
||||
//!
|
||||
//! ```text
|
||||
//! +0x00 uint32 magicNumber (one of 0x199305{20,21,22})
|
||||
//! +0x04 int32 maxState (number of UnwindMapEntry rows)
|
||||
//! +0x08 uint32 pUnwindMap (VA → UnwindMapEntry[])
|
||||
//! +0x0C uint32 nTryBlocks
|
||||
//! +0x10 uint32 pTryBlockMap (VA → TryBlockMapEntry[])
|
||||
//! +0x14 uint32 nIPMapEntries (ignored on x86; present on PPC)
|
||||
//! +0x18 uint32 pIPtoStateMap (VA → IPtoStateMapEntry[])
|
||||
//! +0x1C uint32 pESTypeList (only when magic ≥ 0x19930521)
|
||||
//! +0x20 uint32 EHFlags (only when magic = 0x19930522)
|
||||
//! ```
|
||||
//!
|
||||
//! Each `UnwindMapEntry` is 8 bytes: `(toState i32, action u32)`.
|
||||
//! Each `TryBlockMapEntry` is 20 bytes:
|
||||
//! `(tryLow i32, tryHigh i32, catchHigh i32, nCatches u32, pHandlerArray u32)`.
|
||||
//!
|
||||
//! ### What this module does
|
||||
//!
|
||||
//! - Magic-scan `.rdata` for the three FuncInfo signatures (read as BE u32).
|
||||
//! - Parse the FuncInfo record + walk the unwind map and try-block map.
|
||||
//! - Skip records whose internal pointers don't land in valid sections,
|
||||
//! or whose lengths exceed sane caps.
|
||||
//!
|
||||
//! ### What this module does NOT do
|
||||
//!
|
||||
//! - Does not associate a FuncInfo back to its owning function. The
|
||||
//! `bl __CxxFrameHandler` registration would name that linkage, but
|
||||
//! it requires walking all `has_eh=true` functions' prologues; a
|
||||
//! future M9.6 can do that. For now the FuncInfo record stands on its
|
||||
//! own — joins to `functions` by best-effort PC range queries.
|
||||
//! - Does not parse the `pHandlerArray` per try-block (catch type info).
|
||||
//!
|
||||
//! Reference: LLVM `llvm/lib/CodeGen/AsmPrinter/WinException.cpp`,
|
||||
//! Microsoft openrce.org documentation on FuncInfo.
|
||||
|
||||
use xenia_xex::pe::PeSection;
|
||||
|
||||
const MAGIC_OLD: u32 = 0x1993_0520;
|
||||
const MAGIC_V21: u32 = 0x1993_0521;
|
||||
const MAGIC_V22: u32 = 0x1993_0522;
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct UnwindMapEntry {
|
||||
pub to_state: i32,
|
||||
pub action_pc: u32, // VA of the cleanup action; 0 if none
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct TryBlockMapEntry {
|
||||
pub try_low: i32,
|
||||
pub try_high: i32,
|
||||
pub catch_high: i32,
|
||||
pub n_catches: u32,
|
||||
pub p_handler_array: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct EhFuncInfo {
|
||||
pub address: u32, // VA of the FuncInfo record itself
|
||||
pub magic: u32,
|
||||
pub max_state: i32,
|
||||
pub p_unwind_map: u32,
|
||||
pub n_try_blocks: u32,
|
||||
pub p_try_block_map: u32,
|
||||
pub n_ip_map_entries: u32,
|
||||
pub p_ip_to_state_map: u32,
|
||||
pub p_es_type_list: Option<u32>,
|
||||
pub eh_flags: Option<u32>,
|
||||
pub unwind_map: Vec<UnwindMapEntry>,
|
||||
pub try_blocks: Vec<TryBlockMapEntry>,
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all, fields(image_base = format_args!("{:#010x}", image_base)))]
|
||||
pub fn analyze(
|
||||
pe: &[u8],
|
||||
image_base: u32,
|
||||
sections: &[PeSection],
|
||||
) -> Vec<EhFuncInfo> {
|
||||
let started = std::time::Instant::now();
|
||||
let mut out: Vec<EhFuncInfo> = Vec::new();
|
||||
|
||||
// Compute the union of valid VA ranges across all sections — used to
|
||||
// sanity-check internal pointers in the FuncInfo records.
|
||||
let valid_ranges: Vec<(u32, u32)> = sections.iter()
|
||||
.map(|s| (image_base + s.virtual_address,
|
||||
image_base + s.virtual_address + s.virtual_size))
|
||||
.collect();
|
||||
let in_valid = |va: u32| valid_ranges.iter().any(|(lo, hi)| va >= *lo && va < *hi);
|
||||
|
||||
let read_u32 = |abs: u32| -> Option<u32> {
|
||||
let off = abs.wrapping_sub(image_base) as usize;
|
||||
if off + 4 > pe.len() { return None; }
|
||||
Some(u32::from_be_bytes([pe[off], pe[off + 1], pe[off + 2], pe[off + 3]]))
|
||||
};
|
||||
let read_i32 = |abs: u32| -> Option<i32> { read_u32(abs).map(|u| u as i32) };
|
||||
|
||||
for section in sections {
|
||||
if section.name != ".rdata" { continue; }
|
||||
let raw_start = section.virtual_address as usize;
|
||||
let raw_end = (section.virtual_address + section.virtual_size) as usize;
|
||||
if raw_end > pe.len() { continue; }
|
||||
let bytes = &pe[raw_start..raw_end.min(pe.len())];
|
||||
let va_base = image_base + section.virtual_address;
|
||||
|
||||
// Walk on 4-byte alignment looking for the magic.
|
||||
let mut i = 0;
|
||||
while i + 4 <= bytes.len() {
|
||||
if !i.is_multiple_of(4) { i += 1; continue; }
|
||||
let m = u32::from_be_bytes([bytes[i], bytes[i + 1], bytes[i + 2], bytes[i + 3]]);
|
||||
if m == MAGIC_OLD || m == MAGIC_V21 || m == MAGIC_V22 {
|
||||
let addr = va_base + i as u32;
|
||||
if let Some(rec) = parse_funcinfo(addr, m, &read_u32, &read_i32, &in_valid) {
|
||||
out.push(rec);
|
||||
}
|
||||
}
|
||||
i += 4;
|
||||
}
|
||||
}
|
||||
|
||||
let elapsed_ms = started.elapsed().as_millis() as f64;
|
||||
let n_unwind: usize = out.iter().map(|r| r.unwind_map.len()).sum();
|
||||
let n_try: usize = out.iter().map(|r| r.try_blocks.len()).sum();
|
||||
metrics::histogram!("analysis.phase_ms", "phase" => "eh_scope").record(elapsed_ms);
|
||||
tracing::info!(
|
||||
records = out.len(),
|
||||
unwind_entries = n_unwind,
|
||||
try_blocks = n_try,
|
||||
elapsed_ms,
|
||||
"M9.5 EH scope-table scan complete",
|
||||
);
|
||||
out
|
||||
}
|
||||
|
||||
fn parse_funcinfo(
|
||||
addr: u32,
|
||||
magic: u32,
|
||||
read_u32: &impl Fn(u32) -> Option<u32>,
|
||||
read_i32: &impl Fn(u32) -> Option<i32>,
|
||||
in_valid: &impl Fn(u32) -> bool,
|
||||
) -> Option<EhFuncInfo> {
|
||||
let max_state = read_i32(addr + 0x04)?;
|
||||
let p_unwind_map = read_u32(addr + 0x08)?;
|
||||
let n_try_blocks = read_u32(addr + 0x0C)?;
|
||||
let p_try_block_map = read_u32(addr + 0x10)?;
|
||||
let n_ip_map_entries = read_u32(addr + 0x14)?;
|
||||
let p_ip_to_state_map = read_u32(addr + 0x18)?;
|
||||
|
||||
// Sanity caps: real FuncInfo records have max_state ≤ a few thousand,
|
||||
// n_try_blocks ≤ a few hundred. Reject obviously bogus values that
|
||||
// happened to alias the magic.
|
||||
if !(0..=10_000).contains(&max_state) { return None; }
|
||||
if n_try_blocks > 1_000 { return None; }
|
||||
if n_ip_map_entries > 100_000 { return None; }
|
||||
// Pointers must either be NULL or land in a valid section.
|
||||
if p_unwind_map != 0 && !in_valid(p_unwind_map) { return None; }
|
||||
if p_try_block_map != 0 && !in_valid(p_try_block_map) { return None; }
|
||||
if p_ip_to_state_map != 0 && !in_valid(p_ip_to_state_map) { return None; }
|
||||
|
||||
let (p_es_type_list, eh_flags) = if magic == MAGIC_V21 {
|
||||
(read_u32(addr + 0x1C), None)
|
||||
} else if magic == MAGIC_V22 {
|
||||
(read_u32(addr + 0x1C), read_u32(addr + 0x20))
|
||||
} else {
|
||||
(None, None)
|
||||
};
|
||||
|
||||
// Walk unwind map (8-byte entries).
|
||||
let mut unwind_map: Vec<UnwindMapEntry> = Vec::with_capacity(max_state as usize);
|
||||
if p_unwind_map != 0 && max_state > 0 {
|
||||
for i in 0..max_state {
|
||||
let p = p_unwind_map.wrapping_add((i * 8) as u32);
|
||||
let to_state = read_i32(p)?;
|
||||
let action_pc = read_u32(p + 4)?;
|
||||
unwind_map.push(UnwindMapEntry { to_state, action_pc });
|
||||
}
|
||||
}
|
||||
|
||||
// Walk try-block map (20-byte entries).
|
||||
let mut try_blocks: Vec<TryBlockMapEntry> = Vec::with_capacity(n_try_blocks as usize);
|
||||
if p_try_block_map != 0 && n_try_blocks > 0 {
|
||||
for i in 0..n_try_blocks {
|
||||
let p = p_try_block_map.wrapping_add(i * 20);
|
||||
let try_low = read_i32(p)?;
|
||||
let try_high = read_i32(p + 4)?;
|
||||
let catch_high = read_i32(p + 8)?;
|
||||
let n_catches = read_u32(p + 12)?;
|
||||
let p_handler_a = read_u32(p + 16)?;
|
||||
try_blocks.push(TryBlockMapEntry {
|
||||
try_low, try_high, catch_high, n_catches, p_handler_array: p_handler_a,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Some(EhFuncInfo {
|
||||
address: addr,
|
||||
magic,
|
||||
max_state,
|
||||
p_unwind_map,
|
||||
n_try_blocks,
|
||||
p_try_block_map,
|
||||
n_ip_map_entries,
|
||||
p_ip_to_state_map,
|
||||
p_es_type_list,
|
||||
eh_flags,
|
||||
unwind_map,
|
||||
try_blocks,
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use xenia_xex::pe::PeSection;
|
||||
|
||||
fn mk_section(name: &str, va: u32, size: u32) -> PeSection {
|
||||
PeSection {
|
||||
name: name.into(),
|
||||
virtual_address: va, virtual_size: size,
|
||||
raw_offset: va, raw_size: size,
|
||||
flags: 0x4000_0040,
|
||||
}
|
||||
}
|
||||
|
||||
fn write_be(pe: &mut [u8], at: usize, v: u32) {
|
||||
pe[at..at + 4].copy_from_slice(&v.to_be_bytes());
|
||||
}
|
||||
fn write_be_i32(pe: &mut [u8], at: usize, v: i32) {
|
||||
pe[at..at + 4].copy_from_slice(&v.to_be_bytes());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_minimal_funcinfo_v0() {
|
||||
let image_base = 0x82000000u32;
|
||||
let rdata_va = 0x1000u32;
|
||||
let mut pe = vec![0u8; 0x4000];
|
||||
|
||||
// FuncInfo at .rdata + 0x10.
|
||||
let fi_off = (rdata_va + 0x10) as usize;
|
||||
let fi_va = image_base + rdata_va + 0x10;
|
||||
let unwind_off = (rdata_va + 0x80) as usize;
|
||||
let unwind_va = image_base + rdata_va + 0x80;
|
||||
|
||||
write_be(&mut pe, fi_off, MAGIC_OLD); // magic
|
||||
write_be_i32(&mut pe, fi_off + 4, 2); // maxState
|
||||
write_be(&mut pe, fi_off + 8, unwind_va); // pUnwindMap
|
||||
write_be(&mut pe, fi_off + 12, 0); // nTryBlocks
|
||||
write_be(&mut pe, fi_off + 16, 0); // pTryBlockMap
|
||||
write_be(&mut pe, fi_off + 20, 0); // nIPMapEntries
|
||||
write_be(&mut pe, fi_off + 24, 0); // pIPtoStateMap
|
||||
|
||||
// Two unwind entries.
|
||||
write_be_i32(&mut pe, unwind_off, -1); // to_state
|
||||
write_be(&mut pe, unwind_off + 4, image_base + 0x500); // action_pc
|
||||
write_be_i32(&mut pe, unwind_off + 8, 0);
|
||||
write_be(&mut pe, unwind_off + 12, image_base + 0x600);
|
||||
|
||||
let sections = vec![mk_section(".rdata", rdata_va, 0x100)];
|
||||
let recs = analyze(&pe, image_base, §ions);
|
||||
assert_eq!(recs.len(), 1);
|
||||
let r = &recs[0];
|
||||
assert_eq!(r.address, fi_va);
|
||||
assert_eq!(r.magic, MAGIC_OLD);
|
||||
assert_eq!(r.max_state, 2);
|
||||
assert_eq!(r.unwind_map.len(), 2);
|
||||
assert_eq!(r.unwind_map[0].to_state, -1);
|
||||
assert_eq!(r.unwind_map[0].action_pc, image_base + 0x500);
|
||||
assert_eq!(r.try_blocks.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_bogus_max_state() {
|
||||
let image_base = 0x82000000u32;
|
||||
let rdata_va = 0x1000u32;
|
||||
let mut pe = vec![0u8; 0x4000];
|
||||
let fi_off = (rdata_va + 0x10) as usize;
|
||||
write_be(&mut pe, fi_off, MAGIC_OLD);
|
||||
write_be_i32(&mut pe, fi_off + 4, 0xFFFF); // bogus maxState
|
||||
let sections = vec![mk_section(".rdata", rdata_va, 0x100)];
|
||||
let recs = analyze(&pe, image_base, §ions);
|
||||
assert_eq!(recs.len(), 0);
|
||||
}
|
||||
}
|
||||
@@ -6,8 +6,10 @@ use std::io::Write;
|
||||
use xenia_xex::header::ImportLibrary;
|
||||
use xenia_xex::pe::PeSection;
|
||||
|
||||
use crate::disasm::enrich_section;
|
||||
use crate::func::FuncAnalysis;
|
||||
use crate::xref::{XrefKind, Xref, XrefMap, section_for_addr, resolve_source_label};
|
||||
use crate::sinks::text::write_instr_line;
|
||||
use crate::xref::{XrefKind, Xref, XrefMap, resolve_source_label};
|
||||
|
||||
/// Metadata passed to the formatter (avoids exposing full Xex2Header internals).
|
||||
pub struct DisasmInfo<'a> {
|
||||
@@ -88,11 +90,14 @@ pub fn write_asm(
|
||||
writeln!(out)?;
|
||||
|
||||
let mut in_function = false;
|
||||
let mut addr = va_start;
|
||||
while addr < va_end {
|
||||
let abs_addr = info.image_base + addr;
|
||||
let off = (addr - va_start) as usize + file_start;
|
||||
if off + 4 > pe.len() { break; }
|
||||
let abs_start = info.image_base + va_start;
|
||||
let abs_end = info.image_base + va_end;
|
||||
|
||||
let items = enrich_section(
|
||||
pe, info.image_base, §ion.name, abs_start, abs_end, func_analysis, labels,
|
||||
);
|
||||
for ri in items {
|
||||
let abs_addr = ri.item.addr;
|
||||
|
||||
// Function start? Emit separator + header
|
||||
if let Some(fi) = func_analysis.get(abs_addr) {
|
||||
@@ -126,7 +131,6 @@ pub fn write_asm(
|
||||
writeln!(out, "; FUNCTION: {lbl}{detail_str}")?;
|
||||
}
|
||||
|
||||
// Xrefs for function entry
|
||||
if let Some(xref_lines) = format_xrefs(abs_addr, xrefs, func_analysis, labels) {
|
||||
for line in &xref_lines {
|
||||
writeln!(out, "{line}")?;
|
||||
@@ -141,7 +145,6 @@ pub fn write_asm(
|
||||
if let Some(lbl) = labels.get(&abs_addr) {
|
||||
if !func_analysis.is_function_start(abs_addr) {
|
||||
writeln!(out)?;
|
||||
// Xrefs for local labels
|
||||
if let Some(xref_lines) = format_xrefs(abs_addr, xrefs, func_analysis, labels) {
|
||||
for line in &xref_lines {
|
||||
writeln!(out, "{line}")?;
|
||||
@@ -159,37 +162,8 @@ pub fn write_asm(
|
||||
writeln!(out, " ; IMPORT: {imp_name}")?;
|
||||
}
|
||||
|
||||
let instr = u32::from_be_bytes([
|
||||
pe[off], pe[off+1], pe[off+2], pe[off+3]
|
||||
]);
|
||||
|
||||
let decoded = crate::ppc::disasm(instr, abs_addr);
|
||||
let disasm_text = decoded.display().to_string();
|
||||
|
||||
// Annotate branch targets with label names
|
||||
let mut annotated = annotate_branch(&disasm_text, labels);
|
||||
|
||||
// Annotate data references
|
||||
if let Some(&(data_addr, kind)) = data_annotations.get(&abs_addr) {
|
||||
let tag = match kind {
|
||||
XrefKind::DataRead => "[R]",
|
||||
XrefKind::DataWrite => "[W]",
|
||||
_ => "[&]",
|
||||
};
|
||||
let sec = section_for_addr(data_addr, info.sections, info.image_base)
|
||||
.unwrap_or("?");
|
||||
let data_lbl = labels.get(&data_addr)
|
||||
.map(|s| format!(" = {s}"))
|
||||
.unwrap_or_default();
|
||||
if !annotated.contains("; ->") {
|
||||
annotated = format!("{annotated:<40} ; {tag} 0x{data_addr:08X} ({sec}){data_lbl}");
|
||||
} else {
|
||||
annotated = format!("{annotated} {tag} 0x{data_addr:08X} ({sec}){data_lbl}");
|
||||
}
|
||||
}
|
||||
|
||||
writeln!(out, " {:08X}: {:08X} {}", abs_addr, instr, annotated)?;
|
||||
addr += 4;
|
||||
let data_annot = data_annotations.get(&abs_addr).copied();
|
||||
write_instr_line(out, &ri, labels, info.sections, info.image_base, data_annot)?;
|
||||
}
|
||||
if in_function {
|
||||
writeln!(out, "; end function")?;
|
||||
@@ -298,21 +272,3 @@ fn format_xrefs(
|
||||
|
||||
Some(lines)
|
||||
}
|
||||
|
||||
fn annotate_branch(disasm: &str, labels: &HashMap<u32, String>) -> String {
|
||||
if let Some(pos) = disasm.find("0x") {
|
||||
let hex_start = pos + 2;
|
||||
let hex_end = disasm[hex_start..].find(|c: char| !c.is_ascii_hexdigit())
|
||||
.map(|i| hex_start + i)
|
||||
.unwrap_or(disasm.len());
|
||||
let hex_str = &disasm[hex_start..hex_end];
|
||||
if hex_str.len() == 8 {
|
||||
if let Ok(addr) = u32::from_str_radix(hex_str, 16) {
|
||||
if let Some(lbl) = labels.get(&addr) {
|
||||
return format!("{disasm:<40} ; -> {lbl}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
disasm.to_string()
|
||||
}
|
||||
|
||||
@@ -32,6 +32,17 @@ pub struct FuncInfo {
|
||||
pub is_leaf: bool,
|
||||
/// True if this is a save/restore GPR helper stub.
|
||||
pub is_saverestore: bool,
|
||||
/// True if `.pdata` has a RUNTIME_FUNCTION whose `BeginAddress` matches `start`.
|
||||
/// Authoritative ground truth from the linker; rows without this flag are
|
||||
/// prologue-detected only and may carry boundary errors.
|
||||
pub pdata_validated: bool,
|
||||
/// Function size in bytes per `.pdata`'s `function_length` field, if known.
|
||||
/// Absent (None) when this row is prologue-only.
|
||||
pub pdata_length: Option<u32>,
|
||||
/// True when `.pdata`'s exception-flag bit is set on this entry — the
|
||||
/// function has a registered C++ EH (or SEH) frame handler. Always false
|
||||
/// for entries without `.pdata` coverage. (M9)
|
||||
pub has_eh: bool,
|
||||
}
|
||||
|
||||
/// Result of the function analysis pass.
|
||||
@@ -42,6 +53,9 @@ pub struct FuncAnalysis {
|
||||
pub save_gpr_base: Option<u32>,
|
||||
/// Addresses in the restore-GPR region (start of __restgprlr block).
|
||||
pub restore_gpr_base: Option<u32>,
|
||||
/// Raw `.pdata` entries from the binary, in original order. Empty when no
|
||||
/// `.pdata` was supplied. Mirrored into the DB as `pdata_entries`.
|
||||
pub pdata_entries: Vec<xenia_xex::pdata::PdataEntry>,
|
||||
}
|
||||
|
||||
// ── Instruction field helpers ──────────────────────────────────────────────
|
||||
@@ -184,12 +198,37 @@ fn find_saverestore_stubs(
|
||||
|
||||
// ── Main analysis ──────────────────────────────────────────────────────────
|
||||
|
||||
#[tracing::instrument(skip_all, fields(image_base = format_args!("{:#010x}", image_base), entry_point = format_args!("{:#010x}", entry_point)))]
|
||||
pub fn analyze(
|
||||
pe: &[u8],
|
||||
image_base: u32,
|
||||
entry_point: u32,
|
||||
code_sections: &[(u32, u32, u32)], // (va_start, va_size, flags)
|
||||
) -> FuncAnalysis {
|
||||
analyze_with_pdata(pe, image_base, entry_point, code_sections, &[])
|
||||
}
|
||||
|
||||
/// Same as [`analyze`] but also unions `.pdata` `RUNTIME_FUNCTION` entries
|
||||
/// into the candidate set. Each surviving function carries `pdata_validated`
|
||||
/// when its start matches a pdata `BeginAddress`, and `pdata_length` when
|
||||
/// the linker-supplied length disagrees with the prologue walk.
|
||||
///
|
||||
/// Pdata entries that have no prologue match (orphans) are still emitted,
|
||||
/// using the linker-supplied length to bound the function.
|
||||
///
|
||||
/// What this layer does NOT do:
|
||||
/// - Does not edit the `prolog_length` we'd derive from prologue analysis;
|
||||
/// `frame_size` and `saved_gprs` remain best-effort prologue inferences.
|
||||
/// - Does not infer base/derived call edges — that's M3+M5.
|
||||
#[tracing::instrument(skip_all, fields(image_base = format_args!("{:#010x}", image_base), entry_point = format_args!("{:#010x}", entry_point), pdata_entries = pdata.len()))]
|
||||
pub fn analyze_with_pdata(
|
||||
pe: &[u8],
|
||||
image_base: u32,
|
||||
entry_point: u32,
|
||||
code_sections: &[(u32, u32, u32)],
|
||||
pdata: &[xenia_xex::pdata::PdataEntry],
|
||||
) -> FuncAnalysis {
|
||||
let started = std::time::Instant::now();
|
||||
let code_ranges: Vec<(u32, u32)> = code_sections.iter()
|
||||
.map(|(va, sz, _)| (image_base + va, image_base + va + sz))
|
||||
.collect();
|
||||
@@ -197,10 +236,10 @@ pub fn analyze(
|
||||
// 1. Find save/restore stubs
|
||||
let (save_base, restore_base) = find_saverestore_stubs(pe, image_base, &code_ranges);
|
||||
if let Some(sb) = save_base {
|
||||
eprintln!("[func] __savegprlr stub at 0x{sb:08X}");
|
||||
tracing::debug!(addr = format_args!("{:#010x}", sb), "__savegprlr stub");
|
||||
}
|
||||
if let Some(rb) = restore_base {
|
||||
eprintln!("[func] __restgprlr stub at 0x{rb:08X}");
|
||||
tracing::debug!(addr = format_args!("{:#010x}", rb), "__restgprlr stub");
|
||||
}
|
||||
|
||||
// Set of addresses in the save/restore region (to exclude from function detection)
|
||||
@@ -214,32 +253,79 @@ pub fn analyze(
|
||||
for i in 0..21 { saverestore_addrs.insert(rb + i * 4); }
|
||||
}
|
||||
|
||||
// 2. Collect all bl targets as candidate function entries
|
||||
// 2. Collect all bl targets as candidate function entries.
|
||||
// Union: bl targets ∪ pdata BeginAddresses ∪ entry_point.
|
||||
let mut call_targets: HashSet<u32> = HashSet::new();
|
||||
call_targets.insert(entry_point);
|
||||
|
||||
for &(start, end) in &code_ranges {
|
||||
let mut addr = start;
|
||||
while addr < end {
|
||||
if let Some(instr) = read_instr(pe, addr, image_base) {
|
||||
if let Some(target) = bl_target(instr, addr) {
|
||||
if let Some(instr) = read_instr(pe, addr, image_base)
|
||||
&& let Some(target) = bl_target(instr, addr) {
|
||||
// Don't count calls into save/restore stubs as function entries
|
||||
if !saverestore_addrs.contains(&target) {
|
||||
call_targets.insert(target);
|
||||
}
|
||||
}
|
||||
}
|
||||
addr += 4;
|
||||
}
|
||||
}
|
||||
eprintln!("[func] {} bl targets (candidate functions)", call_targets.len());
|
||||
|
||||
// 3. For each candidate, detect prologue and walk to epilogue
|
||||
// Index pdata by begin_address for O(1) prologue → length lookup.
|
||||
let pdata_by_begin: HashMap<u32, &xenia_xex::pdata::PdataEntry> =
|
||||
pdata.iter().map(|e| (e.begin_address, e)).collect();
|
||||
for e in pdata {
|
||||
if !saverestore_addrs.contains(&e.begin_address) {
|
||||
call_targets.insert(e.begin_address);
|
||||
}
|
||||
}
|
||||
tracing::debug!(
|
||||
candidates = call_targets.len(),
|
||||
pdata_entries = pdata.len(),
|
||||
"function candidates (bl ∪ pdata)"
|
||||
);
|
||||
|
||||
// 3. For each candidate, detect prologue and walk to epilogue. Pdata
|
||||
// metadata is layered on after the prologue walk so a missing prologue
|
||||
// still yields an entry when pdata covers it.
|
||||
let mut functions: BTreeMap<u32, FuncInfo> = BTreeMap::new();
|
||||
|
||||
for &func_addr in &call_targets {
|
||||
if let Some(fi) = analyze_function(pe, image_base, func_addr, &code_ranges, save_base, restore_base) {
|
||||
let pdata_entry = pdata_by_begin.get(&func_addr).copied();
|
||||
|
||||
if let Some(mut fi) = analyze_function(
|
||||
pe, image_base, func_addr, &code_ranges, save_base, restore_base,
|
||||
) {
|
||||
if let Some(p) = pdata_entry {
|
||||
fi.pdata_validated = true;
|
||||
fi.pdata_length = Some(p.function_length);
|
||||
// bit 0 of the packed flags = exception-handler-present
|
||||
fi.has_eh = (p.flags & 0x2) != 0;
|
||||
// If the prologue walk ended too early, trust pdata's length.
|
||||
let pdata_end = p.begin_address.wrapping_add(p.function_length);
|
||||
if pdata_end > fi.end {
|
||||
fi.end = pdata_end;
|
||||
}
|
||||
}
|
||||
functions.insert(func_addr, fi);
|
||||
} else if let Some(p) = pdata_entry {
|
||||
// Orphan: pdata claims a function here but no prologue matched.
|
||||
// Emit a synthetic entry so the row exists for downstream queries.
|
||||
functions.insert(
|
||||
func_addr,
|
||||
FuncInfo {
|
||||
start: func_addr,
|
||||
end: p.begin_address.wrapping_add(p.function_length),
|
||||
frame_size: 0,
|
||||
saved_gprs: 0,
|
||||
is_leaf: false,
|
||||
is_saverestore: false,
|
||||
pdata_validated: true,
|
||||
pdata_length: Some(p.function_length),
|
||||
has_eh: (p.flags & 0x2) != 0,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -247,6 +333,7 @@ pub fn analyze(
|
||||
if let Some(sb) = save_base {
|
||||
// The save block is one cascade: entry at each rN, falls through to blr
|
||||
// Treat as a single function with the first entry point
|
||||
let pe_sb = pdata_by_begin.get(&sb).copied();
|
||||
functions.insert(sb, FuncInfo {
|
||||
start: sb,
|
||||
end: sb + 20 * 4, // 18 std + stw r12 + blr
|
||||
@@ -254,9 +341,13 @@ pub fn analyze(
|
||||
saved_gprs: 18,
|
||||
is_leaf: true,
|
||||
is_saverestore: true,
|
||||
pdata_validated: pe_sb.is_some(),
|
||||
pdata_length: pe_sb.map(|p| p.function_length),
|
||||
has_eh: pe_sb.map(|p| (p.flags & 0x2) != 0).unwrap_or(false),
|
||||
});
|
||||
}
|
||||
if let Some(rb) = restore_base {
|
||||
let pe_rb = pdata_by_begin.get(&rb).copied();
|
||||
functions.insert(rb, FuncInfo {
|
||||
start: rb,
|
||||
end: rb + 21 * 4, // 18 ld + lwz r12 + mtspr LR + blr
|
||||
@@ -264,15 +355,43 @@ pub fn analyze(
|
||||
saved_gprs: 18,
|
||||
is_leaf: true,
|
||||
is_saverestore: true,
|
||||
pdata_validated: pe_rb.is_some(),
|
||||
pdata_length: pe_rb.map(|p| p.function_length),
|
||||
has_eh: pe_rb.map(|p| (p.flags & 0x2) != 0).unwrap_or(false),
|
||||
});
|
||||
}
|
||||
|
||||
eprintln!("[func] {} functions detected", functions.len());
|
||||
// 5. Fix up `end_address` collisions: if function A's `end` overlaps
|
||||
// function B's `start` (B > A), trim A. This catches mis-merged
|
||||
// prologue walks where pdata revealed an interleaved second prologue.
|
||||
// We do this in a single forward pass.
|
||||
let starts: Vec<u32> = functions.keys().copied().collect();
|
||||
for i in 0..starts.len().saturating_sub(1) {
|
||||
let cur = starts[i];
|
||||
let next = starts[i + 1];
|
||||
if let Some(fi) = functions.get_mut(&cur)
|
||||
&& fi.end > next
|
||||
{
|
||||
fi.end = next;
|
||||
}
|
||||
}
|
||||
|
||||
let elapsed_ms = started.elapsed().as_millis() as f64;
|
||||
metrics::histogram!("analysis.phase_ms", "phase" => "functions").record(elapsed_ms);
|
||||
let pdata_validated_count = functions.values().filter(|f| f.pdata_validated).count();
|
||||
tracing::info!(
|
||||
functions = functions.len(),
|
||||
pdata_entries = pdata.len(),
|
||||
pdata_validated = pdata_validated_count,
|
||||
elapsed_ms,
|
||||
"function detection complete"
|
||||
);
|
||||
|
||||
FuncAnalysis {
|
||||
functions,
|
||||
save_gpr_base: save_base,
|
||||
restore_gpr_base: restore_base,
|
||||
pdata_entries: pdata.to_vec(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -302,15 +421,13 @@ fn analyze_function(
|
||||
let instr1 = read_instr(pe, func_addr + 4, image_base).unwrap_or(0);
|
||||
|
||||
// Check if next is bl to save stub
|
||||
if let Some(target) = bl_target(instr1, func_addr + 4) {
|
||||
if let Some(sb) = save_base {
|
||||
if target >= sb && target < sb + 18 * 4 {
|
||||
if let Some(target) = bl_target(instr1, func_addr + 4)
|
||||
&& let Some(sb) = save_base
|
||||
&& target >= sb && target < sb + 18 * 4 {
|
||||
let idx = (target - sb) / 4;
|
||||
saved_gprs = 18 - idx;
|
||||
prologue_len = 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Next should be stwu r1, -N(r1)
|
||||
let stwu_instr = read_instr(pe, func_addr + prologue_len, image_base).unwrap_or(0);
|
||||
@@ -356,14 +473,12 @@ fn analyze_function(
|
||||
}
|
||||
|
||||
// Epilogue: b __restgprlr_NN (tail branch into restore stub)
|
||||
if let Some(target) = b_target(instr, addr) {
|
||||
if let Some(rb) = restore_base {
|
||||
if target >= rb && target < rb + 18 * 4 {
|
||||
if let Some(target) = b_target(instr, addr)
|
||||
&& let Some(rb) = restore_base
|
||||
&& target >= rb && target < rb + 18 * 4 {
|
||||
end_addr = addr + 4;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Epilogue: bctr (indirect tail call — end of function)
|
||||
if is_bctr(instr) {
|
||||
@@ -392,6 +507,9 @@ fn analyze_function(
|
||||
saved_gprs,
|
||||
is_leaf,
|
||||
is_saverestore: false,
|
||||
pdata_validated: false,
|
||||
pdata_length: None,
|
||||
has_eh: false,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -407,24 +525,22 @@ impl FuncAnalysis {
|
||||
for (&addr, fi) in &self.functions {
|
||||
if fi.is_saverestore {
|
||||
// Label the block start, plus individual register entry points
|
||||
if let Some(sb) = self.save_gpr_base {
|
||||
if addr == sb {
|
||||
if let Some(sb) = self.save_gpr_base
|
||||
&& addr == sb {
|
||||
for i in 0u32..18 {
|
||||
let reg = 14 + i;
|
||||
labels.insert(sb + i * 4, format!("__savegprlr_{reg}"));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if let Some(rb) = self.restore_gpr_base {
|
||||
if addr == rb {
|
||||
if let Some(rb) = self.restore_gpr_base
|
||||
&& addr == rb {
|
||||
for i in 0u32..18 {
|
||||
let reg = 14 + i;
|
||||
labels.insert(rb + i * 4, format!("__restgprlr_{reg}"));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
labels.insert(addr, format!("sub_{addr:08X}"));
|
||||
}
|
||||
|
||||
257
crates/xenia-analysis/src/funcptr_arrays.rs
Normal file
257
crates/xenia-analysis/src/funcptr_arrays.rs
Normal file
@@ -0,0 +1,257 @@
|
||||
//! Generic function-pointer array detection (M8 + M11).
|
||||
//!
|
||||
//! M3 already detects "vtable" candidates — runs of ≥3 contiguous function
|
||||
//! pointers in `.rdata` / `.data` (with COL/RTTI walk on top). This module
|
||||
//! widens the net:
|
||||
//!
|
||||
//! - **Dispatch tables** (M8): runs of ≥2 function pointers in `.rdata` /
|
||||
//! `.data` that are NOT already classified as vtables. Captures switch
|
||||
//! jump tables, callback registries, command tables, gameplay state
|
||||
//! machines, etc.
|
||||
//! - **Static initialiser tables** (M11): function-pointer arrays in
|
||||
//! `.rdata` whose entries all have classic constructor-like prologues
|
||||
//! (small frame; either leaf or calling well-known runtime helpers).
|
||||
//! The MSVC convention names the bracketing symbols `__xc_a` /
|
||||
//! `__xc_z` (C++ ctors) and `__xi_a` / `__xi_z` (C runtime), but the
|
||||
//! names are stripped from Sylpheed; we classify by structure.
|
||||
//!
|
||||
//! All findings are written to a single `function_pointer_arrays` table
|
||||
//! with a `kind` column — `"vtable"`, `"dispatch_table"`, or `"static_init"`.
|
||||
//! Vtable rows are duplicated from M3's `vtables` table for join
|
||||
//! convenience (so a single query covers all classification kinds).
|
||||
//!
|
||||
//! ### What this module does NOT do
|
||||
//!
|
||||
//! - No alias-based classification — `static_init` is heuristic and may
|
||||
//! include any function-pointer array near the binary's `__xc_*` region.
|
||||
//! - Does not parse the bracket symbols' actual addresses — we'd need
|
||||
//! debug symbols, which Sylpheed doesn't ship.
|
||||
//! - Two-element runs in `.data` are common false positives (struct fields
|
||||
//! that happen to alias function entries); we only emit `dispatch_table`
|
||||
//! rows for `.rdata`.
|
||||
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
use xenia_xex::pe::PeSection;
|
||||
|
||||
use crate::vtables::Vtable;
|
||||
|
||||
/// One detected function-pointer array.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FuncPtrArray {
|
||||
pub address: u32,
|
||||
pub length: u32,
|
||||
pub kind: &'static str, // "vtable" | "dispatch_table" | "static_init"
|
||||
/// Array entries (function VAs).
|
||||
pub entries: Vec<u32>,
|
||||
}
|
||||
|
||||
/// Run the pass. `vtables` is the M3 result — those addresses are skipped
|
||||
/// in the dispatch-table scan to avoid duplication. `function_starts` is
|
||||
/// the M1 corrected function-start set (used to validate that each array
|
||||
/// entry actually points at a known function).
|
||||
#[tracing::instrument(skip_all, fields(image_base = format_args!("{:#010x}", image_base)))]
|
||||
pub fn analyze(
|
||||
pe: &[u8],
|
||||
image_base: u32,
|
||||
sections: &[PeSection],
|
||||
function_starts: &BTreeSet<u32>,
|
||||
vtables: &[Vtable],
|
||||
) -> Vec<FuncPtrArray> {
|
||||
let started = std::time::Instant::now();
|
||||
let vtable_addrs: BTreeSet<u32> = vtables.iter().map(|v| v.address).collect();
|
||||
let mut out: Vec<FuncPtrArray> = Vec::new();
|
||||
|
||||
// Re-emit vtables in this table for unified-query convenience.
|
||||
for v in vtables {
|
||||
out.push(FuncPtrArray {
|
||||
address: v.address,
|
||||
length: v.length,
|
||||
kind: "vtable",
|
||||
entries: v.methods.clone(),
|
||||
});
|
||||
}
|
||||
|
||||
// Scan only .rdata for dispatch tables — .data has too many false
|
||||
// positives from struct fields aliasing function VAs.
|
||||
for section in sections {
|
||||
if section.name != ".rdata" { continue; }
|
||||
let raw_start = section.virtual_address as usize;
|
||||
let raw_end = (section.virtual_address + section.virtual_size) as usize;
|
||||
if raw_end > pe.len() { continue; }
|
||||
let bytes = &pe[raw_start..raw_end.min(pe.len())];
|
||||
let va_base = image_base + section.virtual_address;
|
||||
|
||||
let mut i = 0usize;
|
||||
while i + 8 <= bytes.len() {
|
||||
if !i.is_multiple_of(4) { i += 1; continue; }
|
||||
let mut entries: Vec<u32> = Vec::new();
|
||||
let mut j = i;
|
||||
while j + 4 <= bytes.len() {
|
||||
let val = u32::from_be_bytes([bytes[j], bytes[j + 1], bytes[j + 2], bytes[j + 3]]);
|
||||
if function_starts.contains(&val) {
|
||||
entries.push(val);
|
||||
j += 4;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if entries.len() >= 2 {
|
||||
let address = va_base + (i as u32);
|
||||
if !vtable_addrs.contains(&address) {
|
||||
let kind = classify_run(image_base, &entries, pe);
|
||||
out.push(FuncPtrArray {
|
||||
address,
|
||||
length: entries.len() as u32,
|
||||
kind,
|
||||
entries,
|
||||
});
|
||||
}
|
||||
i += j - i;
|
||||
} else {
|
||||
i += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let elapsed_ms = started.elapsed().as_millis() as f64;
|
||||
let n_vt = out.iter().filter(|a| a.kind == "vtable").count();
|
||||
let n_dt = out.iter().filter(|a| a.kind == "dispatch_table").count();
|
||||
let n_si = out.iter().filter(|a| a.kind == "static_init").count();
|
||||
metrics::histogram!("analysis.phase_ms", "phase" => "funcptr_arrays").record(elapsed_ms);
|
||||
tracing::info!(
|
||||
total = out.len(), vtable = n_vt, dispatch_table = n_dt, static_init = n_si,
|
||||
elapsed_ms,
|
||||
"function-pointer array scan complete",
|
||||
);
|
||||
out
|
||||
}
|
||||
|
||||
/// Classify a non-vtable function-pointer array. Currently distinguishes
|
||||
/// only "static_init" (all entries have constructor-like prologues — a
|
||||
/// brief mfspr+stwu prologue with a small frame) from "dispatch_table"
|
||||
/// (anything else).
|
||||
fn classify_run(image_base: u32, entries: &[u32], pe: &[u8]) -> &'static str {
|
||||
// Heuristic: a static initialiser's prologue is small (frame ≤ 0x80,
|
||||
// typically ≤ 0x40). If every entry's first instruction is mfspr+LR
|
||||
// (opcode 31, xo 339, spr 8) followed by a small stwu, classify as
|
||||
// static_init.
|
||||
let mut all_ctor = true;
|
||||
let mut any_ctor = false;
|
||||
for &fn_va in entries {
|
||||
if !is_ctor_like(pe, image_base, fn_va) {
|
||||
all_ctor = false;
|
||||
} else {
|
||||
any_ctor = true;
|
||||
}
|
||||
}
|
||||
if all_ctor && any_ctor && entries.len() >= 3 {
|
||||
"static_init"
|
||||
} else {
|
||||
"dispatch_table"
|
||||
}
|
||||
}
|
||||
|
||||
/// True if the function at `fn_va` looks like a tiny C++ static initialiser:
|
||||
/// `mfspr r12, LR` immediately followed by `stwu r1, -N(r1)` with `N ≤ 0x80`.
|
||||
fn is_ctor_like(pe: &[u8], image_base: u32, fn_va: u32) -> bool {
|
||||
let off = fn_va.wrapping_sub(image_base) as usize;
|
||||
if off + 8 > pe.len() { return false; }
|
||||
let i0 = u32::from_be_bytes([pe[off], pe[off + 1], pe[off + 2], pe[off + 3]]);
|
||||
let i1 = u32::from_be_bytes([pe[off + 4], pe[off + 5], pe[off + 6], pe[off + 7]]);
|
||||
// i0: mfspr rD, LR — opcode 31, xo 339, spr 8.
|
||||
let op0 = i0 >> 26;
|
||||
let xo0 = (i0 >> 1) & 0x3FF;
|
||||
let spr0 = (((i0 >> 11) & 0x1F) << 5) | ((i0 >> 16) & 0x1F);
|
||||
if !(op0 == 31 && xo0 == 339 && spr0 == 8) { return false; }
|
||||
// i1 must be stwu r1, -N(r1) with N ≤ 0x80, OR a `bl __savegprlr_*`
|
||||
// followed eventually by stwu (full prologue). Allow either.
|
||||
let op1 = i1 >> 26;
|
||||
if op1 == 37 {
|
||||
// stwu D-form: rS=1, rA=1
|
||||
let rs = (i1 >> 21) & 0x1F;
|
||||
let ra = (i1 >> 16) & 0x1F;
|
||||
let d = ((i1 & 0xFFFF) as i16) as i32;
|
||||
rs == 1 && ra == 1 && d <= 0 && (-d) <= 0x80
|
||||
} else if op1 == 18 {
|
||||
// bl __savegprlr_NN — accept; ctor with frame ≤ 0x80 is the
|
||||
// common case, but if the compiler emits a save-stub call we
|
||||
// can't easily verify the frame size without walking further.
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use xenia_xex::pe::PeSection;
|
||||
|
||||
fn mk_section(name: &str, va: u32, size: u32) -> PeSection {
|
||||
PeSection {
|
||||
name: name.into(),
|
||||
virtual_address: va,
|
||||
virtual_size: size,
|
||||
raw_offset: va,
|
||||
raw_size: size,
|
||||
flags: 0x4000_0040,
|
||||
}
|
||||
}
|
||||
|
||||
fn write_be_u32(buf: &mut [u8], at: usize, val: u32) {
|
||||
buf[at..at + 4].copy_from_slice(&val.to_be_bytes());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_dispatch_table_in_rdata() {
|
||||
let image_base = 0x82000000u32;
|
||||
let rdata_va = 0x1000u32;
|
||||
let mut pe = vec![0u8; 0x4000];
|
||||
|
||||
// Two consecutive function pointers, no vtable shadowing them.
|
||||
let pcs = [image_base + 0x2000, image_base + 0x2010];
|
||||
for (i, p) in pcs.iter().enumerate() {
|
||||
write_be_u32(&mut pe, rdata_va as usize + i * 4, *p);
|
||||
}
|
||||
|
||||
let sections = vec![mk_section(".rdata", rdata_va, 0x100)];
|
||||
let mut starts = BTreeSet::new();
|
||||
for &p in &pcs { starts.insert(p); }
|
||||
|
||||
let arrs = analyze(&pe, image_base, §ions, &starts, &[]);
|
||||
assert_eq!(arrs.len(), 1);
|
||||
assert_eq!(arrs[0].kind, "dispatch_table");
|
||||
assert_eq!(arrs[0].length, 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vtable_overrides_dispatch_classification() {
|
||||
let image_base = 0x82000000u32;
|
||||
let rdata_va = 0x1000u32;
|
||||
let mut pe = vec![0u8; 0x4000];
|
||||
|
||||
let pcs = [image_base + 0x2000, image_base + 0x2010, image_base + 0x2020];
|
||||
for (i, p) in pcs.iter().enumerate() {
|
||||
write_be_u32(&mut pe, rdata_va as usize + i * 4, *p);
|
||||
}
|
||||
let sections = vec![mk_section(".rdata", rdata_va, 0x100)];
|
||||
let mut starts = BTreeSet::new();
|
||||
for &p in &pcs { starts.insert(p); }
|
||||
|
||||
let vt = Vtable {
|
||||
address: image_base + rdata_va,
|
||||
length: 3,
|
||||
col_address: None,
|
||||
class_name: "ANON_test".into(),
|
||||
rtti_present: false,
|
||||
base_classes_json: None,
|
||||
methods: pcs.to_vec(),
|
||||
};
|
||||
let arrs = analyze(&pe, image_base, §ions, &starts, &[vt]);
|
||||
// Vtable + (no dispatch-table dup): the M3 vtable is re-emitted, but
|
||||
// the scan also skips the same address from re-classification.
|
||||
assert_eq!(arrs.len(), 1);
|
||||
assert_eq!(arrs[0].kind, "vtable");
|
||||
}
|
||||
}
|
||||
636
crates/xenia-analysis/src/ind_dispatch_typed.rs
Normal file
636
crates/xenia-analysis/src/ind_dispatch_typed.rs
Normal file
@@ -0,0 +1,636 @@
|
||||
//! M5.5 — `this`-flow indirect-dispatch resolution.
|
||||
//!
|
||||
//! M5 only resolved the canonical `lis+addi → lwz off(vt) → mtctr → bcctrl`
|
||||
//! pattern (vtable address materialised statically; rare in real C++).
|
||||
//! This layer closes the dominant case, where the dispatch reads through
|
||||
//! the object's `vptr` field:
|
||||
//!
|
||||
//! ```text
|
||||
//! lwz rVt, vptr_off(this) ; rVt = this->vptr
|
||||
//! ... ; (rVt not clobbered)
|
||||
//! lwz rFn, slot*4(rVt) ; rFn = vtable[slot]
|
||||
//! ... ; (rFn / ctr not clobbered)
|
||||
//! mtctr rFn
|
||||
//! ...
|
||||
//! bcctrl
|
||||
//! ```
|
||||
//!
|
||||
//! Resolution strategy (class-membership inference):
|
||||
//!
|
||||
//! 1. **Phase 1 — vptr-write scan.** Walk every function with a tiny
|
||||
//! register tracker (mirrors the lis+addi propagation in
|
||||
//! `xenia_analysis::xref`). Whenever a `stw rA, off(rB)` writes a
|
||||
//! known M3 vtable address into `off(rB)`, record
|
||||
//! `(vtable_addr, vptr_offset, writer_pc)`. These are constructor-
|
||||
//! side vptr stores.
|
||||
//!
|
||||
//! 2. **Phase 2 — invert by offset.** Build
|
||||
//! `vtables_by_offset[vptr_off] = set of vtables ever written at
|
||||
//! that offset`. Most classes use offset 0 (single inheritance);
|
||||
//! multiple-inheritance secondary vptrs land at non-zero offsets.
|
||||
//!
|
||||
//! 3. **Phase 3 — dispatch-site scan.** For each `bcctrl`, walk back
|
||||
//! up to 16 instructions looking for the canonical sequence,
|
||||
//! extracting `(vptr_off, slot)`. Bail on any clobber of the
|
||||
//! tracked register, on any branch instruction, or on a label
|
||||
//! boundary.
|
||||
//!
|
||||
//! 4. **Phase 4 — emit edges.** For each detected
|
||||
//! `(dispatch_pc, vptr_off, slot)`:
|
||||
//! - Look up all candidate vtables `V` where:
|
||||
//! - `vtables_by_offset[vptr_off]` contains `V`, AND
|
||||
//! - `V.length > slot`
|
||||
//! - Emit one `ind_call` edge from `dispatch_pc` to
|
||||
//! `V.methods[slot]` per candidate.
|
||||
//!
|
||||
//! Multi-candidate sites are an over-approximation: the analysis can't
|
||||
//! distinguish without alias info which of the matching classes the
|
||||
//! `this` register actually holds. Downstream queries can filter by
|
||||
//! the exposed `candidate_count` column — single-candidate edges are
|
||||
//! high-confidence, multi-candidate edges are reachability-only.
|
||||
//!
|
||||
//! ### What this layer does NOT do
|
||||
//!
|
||||
//! - No flow-sensitive analysis: register state is killed at every
|
||||
//! label (basic-block boundary), and we do not propagate values
|
||||
//! across calls (since the ABI's volatile/non-volatile partition is
|
||||
//! unreliable for `this`-pointer chains).
|
||||
//! - No alias resolution: a multi-candidate site emits one edge per
|
||||
//! matching vtable, not the exact one used at runtime.
|
||||
//! - Does not handle vptr writes via X-form indexed stores (`stwx`)
|
||||
//! or VMX/VMX128 stores — only D-form `stw rA, off(rB)`. The MSVC
|
||||
//! compiler uses D-form for all canonical vptr writes we've seen.
|
||||
//! - Does not synthesise vptr writes for inlined / elided constructors.
|
||||
//! If a class never has a writer at offset `vptr_off`, dispatches
|
||||
//! through that offset will not find candidates.
|
||||
//!
|
||||
//! Reference: IBM PowerPC ABI, Itanium C++ ABI on vtable layout (the
|
||||
//! same offset-from-`this` model applies on Win32 PPC).
|
||||
|
||||
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
|
||||
|
||||
use crate::func::FuncAnalysis;
|
||||
use crate::vtables::Vtable;
|
||||
|
||||
/// One detected dispatch site after typed resolution.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TypedDispatch {
|
||||
pub dispatch_pc: u32,
|
||||
pub vptr_offset: u32,
|
||||
pub slot: u32,
|
||||
/// Set of candidate vtable addresses whose `(vptr_offset, slot)` matched.
|
||||
pub candidate_vtables: Vec<u32>,
|
||||
/// Set of resolved method PCs (one per candidate vtable).
|
||||
pub method_pcs: Vec<u32>,
|
||||
}
|
||||
|
||||
/// Result of the M5.5 pass.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct TypedIndirectResult {
|
||||
pub dispatches: Vec<TypedDispatch>,
|
||||
/// Phase-1 raw output, exposed for diagnostics.
|
||||
pub vptr_writes: Vec<VptrWrite>,
|
||||
}
|
||||
|
||||
/// One detected constructor-side vptr write.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct VptrWrite {
|
||||
pub vtable_addr: u32,
|
||||
pub vptr_offset: u32,
|
||||
pub writer_pc: u32,
|
||||
pub writer_function: u32,
|
||||
}
|
||||
|
||||
const OP_ADDI: u32 = 14;
|
||||
const OP_ADDIS: u32 = 15;
|
||||
const OP_BCCTR: u32 = 19;
|
||||
const OP_LWZ: u32 = 32;
|
||||
const OP_ORI: u32 = 24;
|
||||
const OP_STW: u32 = 36;
|
||||
const OP_X_FORM: u32 = 31;
|
||||
|
||||
/// Run the full M5.5 analysis.
|
||||
#[tracing::instrument(skip_all, fields(image_base = format_args!("{:#010x}", image_base)))]
|
||||
pub fn analyze(
|
||||
pe: &[u8],
|
||||
image_base: u32,
|
||||
func_analysis: &FuncAnalysis,
|
||||
vtables: &[Vtable],
|
||||
labels: &HashMap<u32, String>,
|
||||
) -> TypedIndirectResult {
|
||||
let started = std::time::Instant::now();
|
||||
|
||||
let vtable_addrs: BTreeSet<u32> = vtables.iter().map(|v| v.address).collect();
|
||||
let vtable_by_addr: BTreeMap<u32, &Vtable> =
|
||||
vtables.iter().map(|v| (v.address, v)).collect();
|
||||
|
||||
let block_boundaries: HashSet<u32> = labels.keys().copied().collect();
|
||||
|
||||
// Phase 1: scan for vptr writes.
|
||||
let vptr_writes = scan_vptr_writes(
|
||||
pe, image_base, func_analysis, &vtable_addrs, &block_boundaries,
|
||||
);
|
||||
|
||||
// Phase 2: invert by offset.
|
||||
let mut vtables_by_offset: HashMap<u32, HashSet<u32>> = HashMap::new();
|
||||
for w in &vptr_writes {
|
||||
vtables_by_offset.entry(w.vptr_offset).or_default().insert(w.vtable_addr);
|
||||
}
|
||||
|
||||
// Phase 3 + 4: scan dispatches and emit edges.
|
||||
let dispatches = scan_dispatches_and_resolve(
|
||||
pe, image_base, func_analysis, &block_boundaries,
|
||||
&vtables_by_offset, &vtable_by_addr,
|
||||
);
|
||||
|
||||
let elapsed_ms = started.elapsed().as_millis() as f64;
|
||||
let single_candidate = dispatches.iter().filter(|d| d.candidate_vtables.len() == 1).count();
|
||||
let multi_candidate = dispatches.len() - single_candidate;
|
||||
let total_edges: usize = dispatches.iter().map(|d| d.method_pcs.len()).sum();
|
||||
metrics::histogram!("analysis.phase_ms", "phase" => "ind_dispatch_typed").record(elapsed_ms);
|
||||
tracing::info!(
|
||||
vptr_writes = vptr_writes.len(),
|
||||
offsets = vtables_by_offset.len(),
|
||||
dispatches = dispatches.len(),
|
||||
single = single_candidate,
|
||||
multi = multi_candidate,
|
||||
edges = total_edges,
|
||||
elapsed_ms,
|
||||
"M5.5 typed indirect-dispatch scan complete",
|
||||
);
|
||||
|
||||
TypedIndirectResult { dispatches, vptr_writes }
|
||||
}
|
||||
|
||||
fn read_instr(pe: &[u8], image_base: u32, addr: u32) -> Option<u32> {
|
||||
let off = addr.wrapping_sub(image_base) as usize;
|
||||
if off + 4 > pe.len() { return None; }
|
||||
Some(u32::from_be_bytes([pe[off], pe[off + 1], pe[off + 2], pe[off + 3]]))
|
||||
}
|
||||
|
||||
/// Phase 1 — find every `stw rA, off(rB)` where the lis+addi-tracked
|
||||
/// value of `rA` equals a known vtable address.
|
||||
fn scan_vptr_writes(
|
||||
pe: &[u8],
|
||||
image_base: u32,
|
||||
func_analysis: &FuncAnalysis,
|
||||
vtable_addrs: &BTreeSet<u32>,
|
||||
block_boundaries: &HashSet<u32>,
|
||||
) -> Vec<VptrWrite> {
|
||||
let mut writes: Vec<VptrWrite> = Vec::new();
|
||||
for (&fn_start, fi) in &func_analysis.functions {
|
||||
if fi.is_saverestore { continue; }
|
||||
let mut reg: [Option<u32>; 32] = [None; 32];
|
||||
let mut pc = fn_start;
|
||||
while pc < fi.end {
|
||||
if pc != fn_start && block_boundaries.contains(&pc) {
|
||||
reg = [None; 32];
|
||||
}
|
||||
let Some(instr) = read_instr(pe, image_base, pc) else { break };
|
||||
let op = instr >> 26;
|
||||
let rd = ((instr >> 21) & 0x1F) as usize;
|
||||
let ra = ((instr >> 16) & 0x1F) as usize;
|
||||
let simm = ((instr & 0xFFFF) as i16) as i32;
|
||||
let uimm = instr & 0xFFFF;
|
||||
match op {
|
||||
OP_ADDIS if ra == 0 => reg[rd] = Some(uimm << 16),
|
||||
OP_ADDIS => {
|
||||
reg[rd] = reg[ra].map(|b| b.wrapping_add(uimm << 16));
|
||||
}
|
||||
OP_ADDI if ra != 0 => {
|
||||
reg[rd] = reg[ra].map(|b| b.wrapping_add(simm as u32));
|
||||
}
|
||||
OP_ADDI => reg[rd] = Some(simm as u32),
|
||||
OP_ORI => {
|
||||
let rs = rd;
|
||||
reg[ra] = reg[rs].map(|b| b | uimm);
|
||||
}
|
||||
OP_STW => {
|
||||
// `stw rS, off(rA)` — rS in bits 21..25, rA in 16..20.
|
||||
if ra != 0
|
||||
&& let Some(vtable_addr) = reg[rd]
|
||||
&& vtable_addrs.contains(&vtable_addr)
|
||||
{
|
||||
// The vptr offset is the displacement; rB's value
|
||||
// is irrelevant for class-membership inference.
|
||||
writes.push(VptrWrite {
|
||||
vtable_addr,
|
||||
vptr_offset: simm as u32,
|
||||
writer_pc: pc,
|
||||
writer_function: fn_start,
|
||||
});
|
||||
}
|
||||
// stw doesn't write to rD.
|
||||
}
|
||||
OP_LWZ => reg[rd] = None,
|
||||
32..=35 | 40..=43 | 48..=51 => reg[rd] = None,
|
||||
OP_X_FORM => {
|
||||
let xo = (instr >> 1) & 0x3FF;
|
||||
if xo != 444 && xo != 467 { reg[rd] = None; }
|
||||
}
|
||||
18 => {
|
||||
// `bl` (LK=1) clobbers volatile r0..r12 + ctr. Plain
|
||||
// `b` makes the next instruction unreachable; the
|
||||
// label-based reset handles join points.
|
||||
if (instr & 1) != 0 {
|
||||
for r in 0..=12 { reg[r] = None; }
|
||||
}
|
||||
}
|
||||
16 => {
|
||||
if (instr & 1) != 0 {
|
||||
for r in 0..=12 { reg[r] = None; }
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
pc = pc.wrapping_add(4);
|
||||
}
|
||||
}
|
||||
writes
|
||||
}
|
||||
|
||||
/// Phase 3 + 4 — scan every `bcctrl`/`bctr` instruction; for each, walk
|
||||
/// backward up to 16 instructions to find the canonical
|
||||
/// `lwz vt, vptr_off(this); lwz fn, slot(vt); mtctr fn; bcctrl` sequence.
|
||||
/// Emit one `TypedDispatch` per dispatch site that resolves to ≥ 1
|
||||
/// candidate vtable.
|
||||
fn scan_dispatches_and_resolve(
|
||||
pe: &[u8],
|
||||
image_base: u32,
|
||||
func_analysis: &FuncAnalysis,
|
||||
block_boundaries: &HashSet<u32>,
|
||||
vtables_by_offset: &HashMap<u32, HashSet<u32>>,
|
||||
vtable_by_addr: &BTreeMap<u32, &Vtable>,
|
||||
) -> Vec<TypedDispatch> {
|
||||
let mut out: Vec<TypedDispatch> = Vec::new();
|
||||
for (&fn_start, fi) in &func_analysis.functions {
|
||||
if fi.is_saverestore { continue; }
|
||||
let mut pc = fn_start;
|
||||
while pc < fi.end {
|
||||
let Some(instr) = read_instr(pe, image_base, pc) else { break };
|
||||
let op = instr >> 26;
|
||||
if op == OP_BCCTR {
|
||||
let xo = (instr >> 1) & 0x3FF;
|
||||
let lk = (instr & 1) != 0;
|
||||
if xo == 528 && lk
|
||||
&& let Some(d) = try_resolve_dispatch_site(
|
||||
pe, image_base, fn_start, fi.end, pc,
|
||||
block_boundaries, vtables_by_offset, vtable_by_addr,
|
||||
)
|
||||
{
|
||||
out.push(d);
|
||||
}
|
||||
}
|
||||
pc = pc.wrapping_add(4);
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Backwards scan from `bcctrl` at `pc` (looking back at most 16 instrs
|
||||
/// within the same basic block). Returns `Some(_)` only when the full
|
||||
/// `lwz vt, off(rA); lwz fn, slot(vt); mtctr fn` chain is present and the
|
||||
/// `(vptr_off, slot)` pair has at least one candidate vtable.
|
||||
fn try_resolve_dispatch_site(
|
||||
pe: &[u8],
|
||||
image_base: u32,
|
||||
fn_start: u32,
|
||||
_fn_end: u32,
|
||||
bcctrl_pc: u32,
|
||||
block_boundaries: &HashSet<u32>,
|
||||
vtables_by_offset: &HashMap<u32, HashSet<u32>>,
|
||||
vtable_by_addr: &BTreeMap<u32, &Vtable>,
|
||||
) -> Option<TypedDispatch> {
|
||||
const LOOKBACK: u32 = 16;
|
||||
|
||||
// Walk back 1..LOOKBACK instrs to find `mtctr rFn`.
|
||||
let mut mtctr_rs: Option<usize> = None;
|
||||
let mut mtctr_pc: Option<u32> = None;
|
||||
for i in 1..=LOOKBACK {
|
||||
let p = bcctrl_pc.wrapping_sub(i * 4);
|
||||
if p < fn_start { break; }
|
||||
if block_boundaries.contains(&p) { break; }
|
||||
let Some(instr) = read_instr(pe, image_base, p) else { break };
|
||||
let op = instr >> 26;
|
||||
if op == OP_X_FORM {
|
||||
let xo = (instr >> 1) & 0x3FF;
|
||||
if xo == 467 {
|
||||
let spr = (((instr >> 11) & 0x1F) << 5) | ((instr >> 16) & 0x1F);
|
||||
if spr == 9 {
|
||||
mtctr_rs = Some(((instr >> 21) & 0x1F) as usize);
|
||||
mtctr_pc = Some(p);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
let mtctr_rs = mtctr_rs?;
|
||||
let mtctr_pc = mtctr_pc?;
|
||||
|
||||
// Walk back from mtctr to find `lwz rFn, slot(rVt)` defining mtctr_rs.
|
||||
let mut slot: Option<u32> = None;
|
||||
let mut vt_reg: Option<usize> = None;
|
||||
let mut fn_lwz_pc: Option<u32> = None;
|
||||
for i in 1..=LOOKBACK {
|
||||
let p = mtctr_pc.wrapping_sub(i * 4);
|
||||
if p < fn_start { break; }
|
||||
if block_boundaries.contains(&p) { break; }
|
||||
let Some(instr) = read_instr(pe, image_base, p) else { break };
|
||||
let op = instr >> 26;
|
||||
let rd = ((instr >> 21) & 0x1F) as usize;
|
||||
if op == OP_LWZ {
|
||||
if rd == mtctr_rs {
|
||||
let ra = ((instr >> 16) & 0x1F) as usize;
|
||||
if ra == 0 { return None; }
|
||||
let off = ((instr & 0xFFFF) as i16) as i32;
|
||||
if off < 0 || (off % 4) != 0 { return None; }
|
||||
slot = Some((off as u32) / 4);
|
||||
vt_reg = Some(ra);
|
||||
fn_lwz_pc = Some(p);
|
||||
break;
|
||||
}
|
||||
// Other lwz; if it writes our target reg, it's a clobber, but
|
||||
// the loop already keys on the lwz that produces the value, so
|
||||
// no clobber check needed beyond seeing rd == mtctr_rs.
|
||||
} else if writes_reg(instr, mtctr_rs as u32) {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
let slot = slot?;
|
||||
let vt_reg = vt_reg?;
|
||||
let fn_lwz_pc = fn_lwz_pc?;
|
||||
|
||||
// Walk back from the fn-lwz to find `lwz rVt, vptr_off(rThis)` defining vt_reg.
|
||||
let mut vptr_off: Option<u32> = None;
|
||||
for i in 1..=LOOKBACK {
|
||||
let p = fn_lwz_pc.wrapping_sub(i * 4);
|
||||
if p < fn_start { break; }
|
||||
if block_boundaries.contains(&p) { break; }
|
||||
let Some(instr) = read_instr(pe, image_base, p) else { break };
|
||||
let op = instr >> 26;
|
||||
let rd = ((instr >> 21) & 0x1F) as usize;
|
||||
if op == OP_LWZ && rd == vt_reg {
|
||||
let ra = ((instr >> 16) & 0x1F) as usize;
|
||||
if ra == 0 { return None; }
|
||||
let off = ((instr & 0xFFFF) as i16) as i32;
|
||||
// Negative offsets are valid in C++ (multiple inheritance casts
|
||||
// can produce them in some ABIs); reinterpret as u32 wrap.
|
||||
vptr_off = Some(off as u32);
|
||||
break;
|
||||
}
|
||||
if writes_reg(instr, vt_reg as u32) {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
let vptr_off = vptr_off?;
|
||||
|
||||
// Phase 4 — resolve to candidate vtables.
|
||||
let candidates = vtables_by_offset.get(&vptr_off)?;
|
||||
let mut candidate_vtables: Vec<u32> = Vec::new();
|
||||
let mut method_pcs: Vec<u32> = Vec::new();
|
||||
for &vt_addr in candidates {
|
||||
if let Some(vt) = vtable_by_addr.get(&vt_addr)
|
||||
&& vt.length > slot
|
||||
&& let Some(&method_pc) = vt.methods.get(slot as usize)
|
||||
{
|
||||
candidate_vtables.push(vt_addr);
|
||||
method_pcs.push(method_pc);
|
||||
}
|
||||
}
|
||||
if method_pcs.is_empty() { return None; }
|
||||
|
||||
Some(TypedDispatch {
|
||||
dispatch_pc: bcctrl_pc,
|
||||
vptr_offset: vptr_off,
|
||||
slot,
|
||||
candidate_vtables,
|
||||
method_pcs,
|
||||
})
|
||||
}
|
||||
|
||||
/// Conservative "does this instruction write to register `r`" predicate.
|
||||
/// Used to detect register clobbers between the value-producing lwz and
|
||||
/// its consumer.
|
||||
fn writes_reg(instr: u32, r: u32) -> bool {
|
||||
let op = instr >> 26;
|
||||
let rd = (instr >> 21) & 0x1F;
|
||||
let _ra = (instr >> 16) & 0x1F;
|
||||
match op {
|
||||
// Most arithmetic / load opcodes use bits 21..25 = rD/rT.
|
||||
14 | 15 | 32..=43 | 46 | 48..=51 => rd == r,
|
||||
// ori/oris/xor/etc. opcodes 24..29 — rA in bits 16..20 is the dest.
|
||||
24 | 25 | 26 | 27 | 28 | 29 => ((instr >> 16) & 0x1F) == r,
|
||||
// X-form: most write rD; some write rA. Check both, conservatively.
|
||||
OP_X_FORM => {
|
||||
let xo = (instr >> 1) & 0x3FF;
|
||||
// Logical X-form (and/or/xor/etc.): rA is the dest.
|
||||
// Logical X-form ops (and/or/xor/etc.) write rA, not rD.
|
||||
if matches!(xo, 26 | 28 | 60 | 124 | 284 | 316 | 444 | 476 | 536 | 539 | 922 | 954) {
|
||||
((instr >> 16) & 0x1F) == r
|
||||
} else {
|
||||
rd == r
|
||||
}
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::func::FuncInfo;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
fn mk_vtable(addr: u32, methods: Vec<u32>) -> Vtable {
|
||||
Vtable {
|
||||
address: addr,
|
||||
length: methods.len() as u32,
|
||||
col_address: None,
|
||||
class_name: format!("ANON_{addr:08X}"),
|
||||
rtti_present: false,
|
||||
base_classes_json: None,
|
||||
methods,
|
||||
}
|
||||
}
|
||||
|
||||
fn mk_func_analysis(start: u32, len: u32) -> FuncAnalysis {
|
||||
let mut functions: BTreeMap<u32, FuncInfo> = BTreeMap::new();
|
||||
functions.insert(start, FuncInfo {
|
||||
start,
|
||||
end: start + len,
|
||||
frame_size: 0,
|
||||
saved_gprs: 0,
|
||||
is_leaf: false,
|
||||
is_saverestore: false,
|
||||
pdata_validated: false,
|
||||
pdata_length: None,
|
||||
has_eh: false,
|
||||
});
|
||||
FuncAnalysis { functions, save_gpr_base: None, restore_gpr_base: None, pdata_entries: Vec::new() }
|
||||
}
|
||||
|
||||
fn write_be(pe: &mut [u8], at: usize, v: u32) {
|
||||
pe[at..at + 4].copy_from_slice(&v.to_be_bytes());
|
||||
}
|
||||
|
||||
/// Encode a vptr-write site: `lis rN, hi(vt); addi rN, rN, lo(vt); stw rN, off(rOther)`.
|
||||
fn enc_vptr_write(pe: &mut [u8], at: usize, vt: u32, write_off: i16, dest_reg: u32) {
|
||||
let hi = (vt >> 16) as u16;
|
||||
let lo = (vt & 0xFFFF) as i16;
|
||||
let lis = (15u32 << 26) | (3 << 21) | 0 << 16 | (hi as u32);
|
||||
let addi = (14u32 << 26) | (3 << 21) | (3 << 16) | ((lo as u16) as u32);
|
||||
let stw = (36u32 << 26) | (3 << 21) | (dest_reg << 16) | ((write_off as u16) as u32);
|
||||
write_be(pe, at, lis);
|
||||
write_be(pe, at + 4, addi);
|
||||
write_be(pe, at + 8, stw);
|
||||
}
|
||||
|
||||
/// Encode a dispatch site:
|
||||
/// lwz r4, vptr_off(r3) ; r4 = this->vptr
|
||||
/// lwz r5, slot*4(r4) ; r5 = vptr[slot]
|
||||
/// mtctr r5
|
||||
/// bcctrl
|
||||
fn enc_dispatch(pe: &mut [u8], at: usize, vptr_off: i16, slot: u32) {
|
||||
let lwz_vt = (32u32 << 26) | (4 << 21) | (3 << 16) | ((vptr_off as u16) as u32);
|
||||
let lwz_fn = (32u32 << 26) | (5 << 21) | (4 << 16) | ((slot * 4) & 0xFFFF);
|
||||
// mtctr r5 = mtspr CTR(=9), r5: SPR_low (=9) → bits 16..20.
|
||||
let mtctr = (31u32 << 26) | (5 << 21) | (9 << 16) | (467 << 1);
|
||||
let bcctrl = (19u32 << 26) | (20 << 21) | (528 << 1) | 1;
|
||||
write_be(pe, at, lwz_vt);
|
||||
write_be(pe, at + 4, lwz_fn);
|
||||
write_be(pe, at + 8, mtctr);
|
||||
write_be(pe, at + 12, bcctrl);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn single_candidate_vtable_resolves_to_one_method() {
|
||||
let image_base = 0x82000000u32;
|
||||
let mut pe = vec![0u8; 0x4000];
|
||||
|
||||
// Function A — constructor — at 0x82001000. Writes vt=0x82010000 at off=0.
|
||||
let ctor_pc = 0x82001000u32;
|
||||
enc_vptr_write(&mut pe, (ctor_pc - image_base) as usize, 0x82010000, 0, 31);
|
||||
|
||||
// Function B — dispatcher — at 0x82002000. Calls slot 2 of vptr at off 0.
|
||||
let disp_pc = 0x82002000u32;
|
||||
enc_dispatch(&mut pe, (disp_pc - image_base) as usize, 0, 2);
|
||||
let bcctrl_pc = disp_pc + 12;
|
||||
|
||||
// Both functions in func_analysis (synthesise).
|
||||
let mut fa = mk_func_analysis(ctor_pc, 0x40);
|
||||
fa.functions.insert(disp_pc, FuncInfo {
|
||||
start: disp_pc, end: disp_pc + 0x40, frame_size: 0, saved_gprs: 0,
|
||||
is_leaf: false, is_saverestore: false,
|
||||
pdata_validated: false, pdata_length: None, has_eh: false,
|
||||
});
|
||||
|
||||
let vt = mk_vtable(0x82010000, vec![0xAA, 0xBB, 0xCC, 0xDD]);
|
||||
let labels: HashMap<u32, String> = HashMap::new();
|
||||
let r = analyze(&pe, image_base, &fa, &[vt], &labels);
|
||||
|
||||
assert_eq!(r.vptr_writes.len(), 1);
|
||||
assert_eq!(r.vptr_writes[0].vtable_addr, 0x82010000);
|
||||
assert_eq!(r.vptr_writes[0].vptr_offset, 0);
|
||||
|
||||
assert_eq!(r.dispatches.len(), 1);
|
||||
let d = &r.dispatches[0];
|
||||
assert_eq!(d.dispatch_pc, bcctrl_pc);
|
||||
assert_eq!(d.vptr_offset, 0);
|
||||
assert_eq!(d.slot, 2);
|
||||
assert_eq!(d.method_pcs, vec![0xCC]);
|
||||
assert_eq!(d.candidate_vtables, vec![0x82010000]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multi_candidate_emits_one_edge_per_match() {
|
||||
let image_base = 0x82000000u32;
|
||||
let mut pe = vec![0u8; 0x4000];
|
||||
|
||||
// Two ctors, each writing a different vtable at offset 0.
|
||||
let ctor_a = 0x82001000u32;
|
||||
enc_vptr_write(&mut pe, (ctor_a - image_base) as usize, 0x82010000, 0, 31);
|
||||
let ctor_b = 0x82001100u32;
|
||||
enc_vptr_write(&mut pe, (ctor_b - image_base) as usize, 0x82010040, 0, 31);
|
||||
|
||||
// One dispatch at slot 1.
|
||||
let disp = 0x82002000u32;
|
||||
enc_dispatch(&mut pe, (disp - image_base) as usize, 0, 1);
|
||||
|
||||
let mut fa = mk_func_analysis(ctor_a, 0x40);
|
||||
fa.functions.insert(ctor_b, FuncInfo {
|
||||
start: ctor_b, end: ctor_b + 0x40, frame_size: 0, saved_gprs: 0,
|
||||
is_leaf: false, is_saverestore: false,
|
||||
pdata_validated: false, pdata_length: None, has_eh: false,
|
||||
});
|
||||
fa.functions.insert(disp, FuncInfo {
|
||||
start: disp, end: disp + 0x40, frame_size: 0, saved_gprs: 0,
|
||||
is_leaf: false, is_saverestore: false,
|
||||
pdata_validated: false, pdata_length: None, has_eh: false,
|
||||
});
|
||||
|
||||
let vts = vec![
|
||||
mk_vtable(0x82010000, vec![0x11, 0x22, 0x33, 0x44]),
|
||||
mk_vtable(0x82010040, vec![0x55, 0x66, 0x77, 0x88]),
|
||||
];
|
||||
let labels: HashMap<u32, String> = HashMap::new();
|
||||
let r = analyze(&pe, image_base, &fa, &vts, &labels);
|
||||
|
||||
assert_eq!(r.vptr_writes.len(), 2);
|
||||
assert_eq!(r.dispatches.len(), 1);
|
||||
let d = &r.dispatches[0];
|
||||
assert_eq!(d.candidate_vtables.len(), 2);
|
||||
assert!(d.method_pcs.contains(&0x22));
|
||||
assert!(d.method_pcs.contains(&0x66));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn out_of_bounds_slot_yields_no_dispatch() {
|
||||
let image_base = 0x82000000u32;
|
||||
let mut pe = vec![0u8; 0x4000];
|
||||
|
||||
let ctor = 0x82001000u32;
|
||||
enc_vptr_write(&mut pe, (ctor - image_base) as usize, 0x82010000, 0, 31);
|
||||
|
||||
let disp = 0x82002000u32;
|
||||
// slot 10 — vtable only has 4 methods.
|
||||
enc_dispatch(&mut pe, (disp - image_base) as usize, 0, 10);
|
||||
|
||||
let mut fa = mk_func_analysis(ctor, 0x40);
|
||||
fa.functions.insert(disp, FuncInfo {
|
||||
start: disp, end: disp + 0x40, frame_size: 0, saved_gprs: 0,
|
||||
is_leaf: false, is_saverestore: false,
|
||||
pdata_validated: false, pdata_length: None, has_eh: false,
|
||||
});
|
||||
|
||||
let vt = mk_vtable(0x82010000, vec![0x11, 0x22, 0x33, 0x44]);
|
||||
let labels: HashMap<u32, String> = HashMap::new();
|
||||
let r = analyze(&pe, image_base, &fa, &[vt], &labels);
|
||||
assert_eq!(r.dispatches.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_writer_at_offset_yields_no_dispatch() {
|
||||
let image_base = 0x82000000u32;
|
||||
let mut pe = vec![0u8; 0x4000];
|
||||
|
||||
// ctor writes at offset 0
|
||||
let ctor = 0x82001000u32;
|
||||
enc_vptr_write(&mut pe, (ctor - image_base) as usize, 0x82010000, 0, 31);
|
||||
|
||||
// dispatch reads from offset 8 — no class writes vptr there.
|
||||
let disp = 0x82002000u32;
|
||||
enc_dispatch(&mut pe, (disp - image_base) as usize, 8, 1);
|
||||
|
||||
let mut fa = mk_func_analysis(ctor, 0x40);
|
||||
fa.functions.insert(disp, FuncInfo {
|
||||
start: disp, end: disp + 0x40, frame_size: 0, saved_gprs: 0,
|
||||
is_leaf: false, is_saverestore: false,
|
||||
pdata_validated: false, pdata_length: None, has_eh: false,
|
||||
});
|
||||
|
||||
let vt = mk_vtable(0x82010000, vec![0x11, 0x22, 0x33, 0x44]);
|
||||
let labels: HashMap<u32, String> = HashMap::new();
|
||||
let r = analyze(&pe, image_base, &fa, &[vt], &labels);
|
||||
assert_eq!(r.dispatches.len(), 0);
|
||||
}
|
||||
}
|
||||
471
crates/xenia-analysis/src/indirect.rs
Normal file
471
crates/xenia-analysis/src/indirect.rs
Normal file
@@ -0,0 +1,471 @@
|
||||
//! Indirect-dispatch reachability for vtable-bound `bcctrl`/`bctrl` sites.
|
||||
//!
|
||||
//! Walks each detected function with a tiny per-basic-block register tracker,
|
||||
//! recognising the canonical MSVC PowerPC pattern that loads a slot from a
|
||||
//! statically-addressed vtable into CTR and indirectly calls it:
|
||||
//!
|
||||
//! ```text
|
||||
//! lis rA, hi
|
||||
//! addi rA, rA, lo ; rA = vtable_address
|
||||
//! lwz rB, slot*4(rA) ; rB = vtable[slot]
|
||||
//! mtctr rB ; CTR = vtable[slot]
|
||||
//! bcctrl ; indirect call → vtable[slot]
|
||||
//! ```
|
||||
//!
|
||||
//! Pattern hits are emitted as `(source_pc, target_pc)` pairs that callers
|
||||
//! insert into the `xrefs` table with `kind='ind_call'`.
|
||||
//!
|
||||
//! ### What this does NOT cover
|
||||
//!
|
||||
//! - Vtable pointer loaded from a `this`-pointer field (`lwz rA, off(this)`)
|
||||
//! is the dominant pattern in real C++ code; resolving it requires
|
||||
//! alias / points-to analysis that's far beyond this layer's scope.
|
||||
//! - Indirect calls via function-pointer fields (callbacks) are similarly
|
||||
//! unresolvable without object-flow analysis.
|
||||
//! - Register state is intentionally killed at every label (basic-block
|
||||
//! boundary) — we don't try to do flow-sensitive merging across joins.
|
||||
//!
|
||||
//! Reference: IBM PowerPC ABI on register-save convention, plus the
|
||||
//! `xenia_analysis::xref` `lis+addi`/`lis+ori` tracker which we mirror
|
||||
//! conceptually.
|
||||
|
||||
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||
|
||||
use crate::func::FuncAnalysis;
|
||||
use crate::vtables::Vtable;
|
||||
|
||||
/// One detected indirect-call edge: `bcctrl` at `source` jumps to `target`.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub struct IndirectEdge {
|
||||
pub source: u32,
|
||||
pub target: u32,
|
||||
/// Vtable the source resolved through.
|
||||
pub via_vtable: u32,
|
||||
/// Method slot index within the vtable.
|
||||
pub slot: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
enum RegVal {
|
||||
/// Register holds a known constant (e.g. after `lis+addi`).
|
||||
Const(u32),
|
||||
/// Register holds a method pointer loaded from a known vtable slot.
|
||||
MethodPtr {
|
||||
vtable_addr: u32,
|
||||
slot: u32,
|
||||
method_pc: u32,
|
||||
},
|
||||
}
|
||||
|
||||
const OP_ADDI: u32 = 14;
|
||||
const OP_ADDIS: u32 = 15;
|
||||
const OP_BCCTR: u32 = 19; // also covers blr — distinguish via XO
|
||||
const OP_LWZ: u32 = 32;
|
||||
const OP_ORI: u32 = 24;
|
||||
const OP_X_FORM: u32 = 31; // mtspr / mr / etc.
|
||||
|
||||
/// Run the static indirect-dispatch scan. Returns one edge per resolvable
|
||||
/// `bcctrl` site.
|
||||
#[tracing::instrument(skip_all, fields(image_base = format_args!("{:#010x}", image_base)))]
|
||||
pub fn analyze(
|
||||
pe: &[u8],
|
||||
image_base: u32,
|
||||
func_analysis: &FuncAnalysis,
|
||||
vtables: &[Vtable],
|
||||
labels: &HashMap<u32, String>,
|
||||
) -> Vec<IndirectEdge> {
|
||||
let started = std::time::Instant::now();
|
||||
// Index vtables by their start VA so the lwz handler can decide
|
||||
// whether a given Const(addr) is "really" a vtable.
|
||||
let vtable_by_addr: BTreeMap<u32, &Vtable> =
|
||||
vtables.iter().map(|v| (v.address, v)).collect();
|
||||
|
||||
// Set of all "label"-bearing PCs in the analyzed binary. We treat each
|
||||
// label as a basic-block boundary (anything `loc_*` is a jump target,
|
||||
// so register state arriving at it is unreliable).
|
||||
let mut block_boundaries: HashSet<u32> = HashSet::with_capacity(labels.len());
|
||||
for &addr in labels.keys() {
|
||||
block_boundaries.insert(addr);
|
||||
}
|
||||
|
||||
let mut edges: Vec<IndirectEdge> = Vec::new();
|
||||
|
||||
for (&fn_start, fi) in &func_analysis.functions {
|
||||
if fi.is_saverestore { continue; }
|
||||
let mut reg: [Option<RegVal>; 32] = [None; 32];
|
||||
let mut ctr: Option<RegVal> = None;
|
||||
let mut pc = fn_start;
|
||||
while pc < fi.end {
|
||||
// Reset register state on basic-block entry. We don't reset on
|
||||
// the function entry itself (PC == fn_start) because labels and
|
||||
// function-starts coincide; the initial state is already None.
|
||||
if pc != fn_start && block_boundaries.contains(&pc) {
|
||||
reg = [None; 32];
|
||||
ctr = None;
|
||||
}
|
||||
|
||||
let instr = match read_instr(pe, image_base, pc) {
|
||||
Some(i) => i,
|
||||
None => break,
|
||||
};
|
||||
|
||||
let op = instr >> 26;
|
||||
let rd = ((instr >> 21) & 0x1F) as usize;
|
||||
let ra = ((instr >> 16) & 0x1F) as usize;
|
||||
let simm = ((instr & 0xFFFF) as i16) as i32;
|
||||
let uimm = instr & 0xFFFF;
|
||||
|
||||
match op {
|
||||
// lis rD, IMM (== addis rD, r0, IMM)
|
||||
OP_ADDIS if ra == 0 => {
|
||||
reg[rd] = Some(RegVal::Const(uimm << 16));
|
||||
}
|
||||
// addis rD, rA, IMM
|
||||
OP_ADDIS => {
|
||||
if let Some(RegVal::Const(b)) = reg[ra] {
|
||||
reg[rd] = Some(RegVal::Const(b.wrapping_add(uimm << 16)));
|
||||
} else {
|
||||
reg[rd] = None;
|
||||
}
|
||||
}
|
||||
// addi rD, rA, IMM
|
||||
OP_ADDI if ra != 0 => {
|
||||
if let Some(RegVal::Const(b)) = reg[ra] {
|
||||
reg[rd] = Some(RegVal::Const(b.wrapping_add(simm as u32)));
|
||||
} else {
|
||||
reg[rd] = None;
|
||||
}
|
||||
}
|
||||
// li rD, IMM (== addi rD, 0, IMM)
|
||||
OP_ADDI => {
|
||||
reg[rd] = Some(RegVal::Const(simm as u32));
|
||||
}
|
||||
// ori rA, rS, IMM — note operand order: bits 21..25 = rS, 16..20 = rA
|
||||
OP_ORI => {
|
||||
let rs = rd; // bits 21..25 = source
|
||||
if let Some(RegVal::Const(b)) = reg[rs] {
|
||||
reg[ra] = Some(RegVal::Const(b | uimm));
|
||||
} else {
|
||||
reg[ra] = None;
|
||||
}
|
||||
}
|
||||
// lwz rD, off(rA) — try to resolve as vtable slot load.
|
||||
OP_LWZ => {
|
||||
if ra != 0
|
||||
&& let Some(RegVal::Const(base)) = reg[ra]
|
||||
{
|
||||
let target = base.wrapping_add(simm as u32);
|
||||
// Two-step lookup so we accept both:
|
||||
// (a) base = exact vtable head, simm/4 = slot
|
||||
// (b) base + simm = exact vtable head (rare;
|
||||
// compiler hoists the slot offset into addi)
|
||||
let resolved = resolve_vtable_slot(target, &vtable_by_addr)
|
||||
.or_else(|| resolve_vtable_slot_via_off(base, simm, &vtable_by_addr));
|
||||
reg[rd] = resolved.map(|(vt, slot, pc)| RegVal::MethodPtr {
|
||||
vtable_addr: vt, slot, method_pc: pc,
|
||||
});
|
||||
} else {
|
||||
reg[rd] = None;
|
||||
}
|
||||
}
|
||||
// X-form: mtspr/mtctr, bcctrl, mr, etc.
|
||||
OP_X_FORM => {
|
||||
let xo = (instr >> 1) & 0x3FF;
|
||||
match xo {
|
||||
467 => {
|
||||
// mtspr SPR, rS — PPC SPR field is split: high 5 bits
|
||||
// in PPC bits 16:20 (= Rust bits 11..15), low 5 bits
|
||||
// in PPC bits 11:15 (= Rust bits 16..20). Mirrors
|
||||
// the convention in `func.rs::is_mfspr_lr`.
|
||||
let spr = (((instr >> 11) & 0x1F) << 5) | ((instr >> 16) & 0x1F);
|
||||
if spr == 9 {
|
||||
ctr = reg[rd];
|
||||
}
|
||||
// Otherwise no observable effect on tracked state.
|
||||
}
|
||||
// Anything that writes rD (most arithmetic, loads, etc.) clobbers it.
|
||||
// Conservative: invalidate rD on any X-form that has rD in bits 21..25
|
||||
// and is NOT a comparison or branch.
|
||||
_ => {
|
||||
// Heuristic: most X-form ops with non-zero RC encode rD; we
|
||||
// invalidate to avoid stale Const propagation past arithmetic.
|
||||
// This is over-eager but safe (false negatives on edges, never
|
||||
// false positives).
|
||||
reg[rd] = None;
|
||||
}
|
||||
}
|
||||
}
|
||||
// bcctr/bcctrl — opcode 19, XO=528. LK in low bit.
|
||||
OP_BCCTR => {
|
||||
let xo = (instr >> 1) & 0x3FF;
|
||||
if xo == 528 {
|
||||
let lk = (instr & 1) != 0;
|
||||
if lk
|
||||
&& let Some(RegVal::MethodPtr { vtable_addr, slot, method_pc }) = ctr
|
||||
{
|
||||
edges.push(IndirectEdge {
|
||||
source: pc,
|
||||
target: method_pc,
|
||||
via_vtable: vtable_addr,
|
||||
slot,
|
||||
});
|
||||
}
|
||||
// After the call, CTR is preserved but rD register
|
||||
// values across the call boundary are not trustworthy.
|
||||
// Don't touch reg state — most ABIs preserve only
|
||||
// some regs anyway.
|
||||
}
|
||||
}
|
||||
// op 18: b / bl / ba / bla. LK=1 is a call; LK=0 is an
|
||||
// unconditional branch with no fall-through (next PC is
|
||||
// reached only via a different basic block, which the
|
||||
// label-based reset already handles). On a call, the
|
||||
// PowerPC ABI marks r0..r12 + ctr as volatile and
|
||||
// r13..r31 as non-volatile (callee-saved); preserve the
|
||||
// non-volatile half so vtable pointers loaded into r30/r31
|
||||
// before a `bl` survive the call.
|
||||
18 => {
|
||||
let lk = (instr & 1) != 0;
|
||||
if lk {
|
||||
for r in 0..=12 { reg[r] = None; }
|
||||
ctr = None;
|
||||
}
|
||||
// LK=0 (`b`) makes fall-through unreachable; nothing to do —
|
||||
// any next reachable PC will hit a label boundary.
|
||||
}
|
||||
// Conditional branches (op 16) fall through; preserve all reg
|
||||
// state for the fall-through path. The label-based join-point
|
||||
// invalidation bounds false-positive risk for jump-IN paths.
|
||||
16 => {
|
||||
let lk = (instr & 1) != 0;
|
||||
if lk {
|
||||
for r in 0..=12 { reg[r] = None; }
|
||||
ctr = None;
|
||||
}
|
||||
}
|
||||
// Stores and loads we don't track explicitly clobber rD only
|
||||
// when rD is on the destination side; the conservative rule
|
||||
// is "any non-recognised opcode that may write rD invalidates it".
|
||||
36..=55 => {
|
||||
// Loads write rD; stores don't. The safe pessimisation is
|
||||
// to invalidate rD for the load family (32..=35, 40..=43, etc.)
|
||||
// and leave it alone for stores. We've already handled lwz
|
||||
// above; for the rest, invalidate rD.
|
||||
if matches!(op, 32..=35 | 40..=43 | 48..=51) {
|
||||
reg[rd] = None;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
pc = pc.wrapping_add(4);
|
||||
}
|
||||
}
|
||||
|
||||
let elapsed_ms = started.elapsed().as_millis() as f64;
|
||||
metrics::histogram!("analysis.phase_ms", "phase" => "indirect").record(elapsed_ms);
|
||||
tracing::info!(
|
||||
edges = edges.len(),
|
||||
elapsed_ms,
|
||||
"indirect-dispatch scan complete"
|
||||
);
|
||||
edges
|
||||
}
|
||||
|
||||
fn read_instr(pe: &[u8], image_base: u32, addr: u32) -> Option<u32> {
|
||||
let off = addr.wrapping_sub(image_base) as usize;
|
||||
if off + 4 > pe.len() { return None; }
|
||||
Some(u32::from_be_bytes([pe[off], pe[off + 1], pe[off + 2], pe[off + 3]]))
|
||||
}
|
||||
|
||||
/// `target = base + simm` where `target` is an exact vtable head (rare,
|
||||
/// compiler hoists the slot offset into the addi).
|
||||
fn resolve_vtable_slot_via_off(
|
||||
base: u32,
|
||||
simm: i32,
|
||||
vtable_by_addr: &BTreeMap<u32, &Vtable>,
|
||||
) -> Option<(u32, u32, u32)> {
|
||||
let target = base.wrapping_add(simm as u32);
|
||||
if let Some(v) = vtable_by_addr.get(&target)
|
||||
&& !v.methods.is_empty()
|
||||
{
|
||||
return Some((v.address, 0, v.methods[0]));
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// `target` is an absolute address. If it falls inside a known vtable's
|
||||
/// `[address, address + length*4)` range AND is 4-aligned to a slot,
|
||||
/// return `(vtable_addr, slot, method_pc)`.
|
||||
fn resolve_vtable_slot(
|
||||
target: u32,
|
||||
vtable_by_addr: &BTreeMap<u32, &Vtable>,
|
||||
) -> Option<(u32, u32, u32)> {
|
||||
// BTreeMap range search for the largest key ≤ target.
|
||||
let (&vt_addr, vt) = vtable_by_addr.range(..=target).next_back()?;
|
||||
if target < vt_addr { return None; }
|
||||
let off = target - vt_addr;
|
||||
if !off.is_multiple_of(4) { return None; }
|
||||
let slot = off / 4;
|
||||
if slot >= vt.length { return None; }
|
||||
let method_pc = *vt.methods.get(slot as usize)?;
|
||||
Some((vt_addr, slot, method_pc))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::func::FuncInfo;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
fn mk_vtable(addr: u32, methods: Vec<u32>) -> Vtable {
|
||||
Vtable {
|
||||
address: addr,
|
||||
length: methods.len() as u32,
|
||||
col_address: None,
|
||||
class_name: "ANON_test".into(),
|
||||
rtti_present: false,
|
||||
base_classes_json: None,
|
||||
methods,
|
||||
}
|
||||
}
|
||||
|
||||
/// Encode the canonical pattern at PC `start`:
|
||||
/// lis r3, hi
|
||||
/// addi r3, r3, lo ; r3 = vtable_addr
|
||||
/// lwz r4, slot*4(r3) ; r4 = vtable[slot]
|
||||
/// mtctr r4
|
||||
/// bcctrl
|
||||
fn encode_pattern(buf: &mut [u8], offset: usize, vtable_addr: u32, slot_off: i32) {
|
||||
let hi = (vtable_addr >> 16) as u16;
|
||||
let lo = (vtable_addr & 0xFFFF) as i16;
|
||||
let lis = (15u32 << 26) | (3 << 21) | (0 << 16) | (hi as u32);
|
||||
// addi r3, r3, lo (signed) — note: addi is treated as signed
|
||||
let addi = (14u32 << 26) | (3 << 21) | (3 << 16) | ((lo as u16) as u32);
|
||||
let lwz = (32u32 << 26) | (4 << 21) | (3 << 16) | ((slot_off as u16) as u32);
|
||||
// mtctr r4 = mtspr CTR(=9), r4. SPR_low (=9) → Rust bits 16-20;
|
||||
// SPR_high (=0) → Rust bits 11-15. Rc bit 0.
|
||||
let mtctr = (31u32 << 26) | (4 << 21) | (9 << 16) | (0 << 11) | (467 << 1);
|
||||
let bcctrl = (19u32 << 26) | (20 << 21) | (528 << 1) | 1; // bcctrl 20, 0
|
||||
let words = [lis, addi, lwz, mtctr, bcctrl];
|
||||
for (i, w) in words.iter().enumerate() {
|
||||
buf[offset + i * 4..offset + i * 4 + 4].copy_from_slice(&w.to_be_bytes());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_canonical_lis_addi_lwz_mtctr_bcctrl() {
|
||||
let image_base = 0x82000000u32;
|
||||
let text_va = 0x1000u32;
|
||||
let pc_start = image_base + text_va;
|
||||
let vtable_addr = 0x82010000u32;
|
||||
|
||||
// PE: just the .text we'll write the pattern into.
|
||||
let mut pe = vec![0u8; 0x1100];
|
||||
encode_pattern(&mut pe, text_va as usize, vtable_addr, 8); // slot 2
|
||||
|
||||
let mut functions: BTreeMap<u32, FuncInfo> = BTreeMap::new();
|
||||
functions.insert(pc_start, FuncInfo {
|
||||
start: pc_start,
|
||||
end: pc_start + 5 * 4,
|
||||
frame_size: 0,
|
||||
saved_gprs: 0,
|
||||
is_leaf: false,
|
||||
is_saverestore: false,
|
||||
pdata_validated: false,
|
||||
pdata_length: None,
|
||||
has_eh: false,
|
||||
});
|
||||
let func_analysis = FuncAnalysis {
|
||||
functions,
|
||||
save_gpr_base: None,
|
||||
restore_gpr_base: None,
|
||||
pdata_entries: Vec::new(),
|
||||
};
|
||||
|
||||
let vtables = vec![mk_vtable(vtable_addr, vec![0xAA, 0xBB, 0xCC, 0xDD])];
|
||||
let labels: HashMap<u32, String> = HashMap::new();
|
||||
let edges = analyze(&pe, image_base, &func_analysis, &vtables, &labels);
|
||||
|
||||
assert_eq!(edges.len(), 1);
|
||||
assert_eq!(edges[0].source, pc_start + 4 * 4); // bcctrl at 5th instruction
|
||||
assert_eq!(edges[0].target, 0xCC); // slot 2
|
||||
assert_eq!(edges[0].via_vtable, vtable_addr);
|
||||
assert_eq!(edges[0].slot, 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn out_of_range_slot_yields_no_edge() {
|
||||
let image_base = 0x82000000u32;
|
||||
let text_va = 0x1000u32;
|
||||
let pc_start = image_base + text_va;
|
||||
let vtable_addr = 0x82010000u32;
|
||||
|
||||
let mut pe = vec![0u8; 0x1100];
|
||||
// Encode slot 12, but vtable only has 4 methods.
|
||||
encode_pattern(&mut pe, text_va as usize, vtable_addr, 48);
|
||||
|
||||
let mut functions: BTreeMap<u32, FuncInfo> = BTreeMap::new();
|
||||
functions.insert(pc_start, FuncInfo {
|
||||
start: pc_start,
|
||||
end: pc_start + 5 * 4,
|
||||
frame_size: 0,
|
||||
saved_gprs: 0,
|
||||
is_leaf: false,
|
||||
is_saverestore: false,
|
||||
pdata_validated: false,
|
||||
pdata_length: None,
|
||||
has_eh: false,
|
||||
});
|
||||
let func_analysis = FuncAnalysis {
|
||||
functions,
|
||||
save_gpr_base: None,
|
||||
restore_gpr_base: None,
|
||||
pdata_entries: Vec::new(),
|
||||
};
|
||||
|
||||
let vtables = vec![mk_vtable(vtable_addr, vec![0xAA, 0xBB, 0xCC, 0xDD])];
|
||||
let labels: HashMap<u32, String> = HashMap::new();
|
||||
let edges = analyze(&pe, image_base, &func_analysis, &vtables, &labels);
|
||||
assert_eq!(edges.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn label_in_middle_kills_state() {
|
||||
let image_base = 0x82000000u32;
|
||||
let text_va = 0x1000u32;
|
||||
let pc_start = image_base + text_va;
|
||||
let vtable_addr = 0x82010000u32;
|
||||
|
||||
let mut pe = vec![0u8; 0x1100];
|
||||
encode_pattern(&mut pe, text_va as usize, vtable_addr, 0);
|
||||
|
||||
let mut functions: BTreeMap<u32, FuncInfo> = BTreeMap::new();
|
||||
functions.insert(pc_start, FuncInfo {
|
||||
start: pc_start,
|
||||
end: pc_start + 5 * 4,
|
||||
frame_size: 0,
|
||||
saved_gprs: 0,
|
||||
is_leaf: false,
|
||||
is_saverestore: false,
|
||||
pdata_validated: false,
|
||||
pdata_length: None,
|
||||
has_eh: false,
|
||||
});
|
||||
let func_analysis = FuncAnalysis {
|
||||
functions,
|
||||
save_gpr_base: None,
|
||||
restore_gpr_base: None,
|
||||
pdata_entries: Vec::new(),
|
||||
};
|
||||
|
||||
let vtables = vec![mk_vtable(vtable_addr, vec![0xAA, 0xBB])];
|
||||
|
||||
// Label between addi and lwz — must kill the Const tracking.
|
||||
let mut labels: HashMap<u32, String> = HashMap::new();
|
||||
labels.insert(pc_start + 8, "loc_mid".to_string());
|
||||
|
||||
let edges = analyze(&pe, image_base, &func_analysis, &vtables, &labels);
|
||||
assert_eq!(edges.len(), 0, "label in middle of pattern must kill register state");
|
||||
}
|
||||
}
|
||||
@@ -2,9 +2,22 @@ pub mod ppc;
|
||||
pub mod func;
|
||||
pub mod xref;
|
||||
pub mod db;
|
||||
pub mod disasm;
|
||||
pub mod formatter;
|
||||
pub mod sinks;
|
||||
pub mod sql_views;
|
||||
pub mod demangle;
|
||||
pub mod vtables;
|
||||
pub mod lookup;
|
||||
pub mod indirect;
|
||||
pub mod ind_dispatch_typed;
|
||||
pub mod strings;
|
||||
pub mod funcptr_arrays;
|
||||
pub mod eh_scope;
|
||||
pub mod static_init;
|
||||
|
||||
mod ordinals;
|
||||
pub use ordinals::resolve_ordinal;
|
||||
pub use xref::{XrefKind, Xref, XrefMap, resolve_source_label};
|
||||
pub use db::{DbWriter, ExecTraceEntry, ImportCallEntry, BranchTraceEntry};
|
||||
pub use disasm::{RichDisasmItem, enrich_section};
|
||||
|
||||
222
crates/xenia-analysis/src/lookup.rs
Normal file
222
crates/xenia-analysis/src/lookup.rs
Normal file
@@ -0,0 +1,222 @@
|
||||
//! Symbolic-name resolution for runtime probes (M4).
|
||||
//!
|
||||
//! Lets `--pc-probe` / `--branch-probe` / `--ctor-probe` accept names like
|
||||
//! `xe::apu::AudioSystem::Setup` or `MyClass::*` instead of bare PC literals.
|
||||
//! Resolution joins the M3-produced `classes` × `methods` × `functions` tables
|
||||
//! and the M2 `demangled_names` table.
|
||||
//!
|
||||
//! Numeric tokens (`0x824D6640`, `2186674160`) are returned unchanged; symbolic
|
||||
//! tokens require a path to an existing `sylpheed.db` (passed by the caller).
|
||||
//!
|
||||
//! All DB access is read-only and happens before guest execution, so the
|
||||
//! lockstep digest is unaffected.
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use duckdb::params;
|
||||
|
||||
/// Parse one probe token into one or more PCs.
|
||||
///
|
||||
/// Recognized forms:
|
||||
/// - `0xADDR` / `ADDR` (decimal) → returns one PC unchanged.
|
||||
/// - `Class::method` → all `methods.function_address` matching that
|
||||
/// `class_name` + `method_name` pair.
|
||||
/// - `Class::*` → all `methods.function_address` for that class.
|
||||
/// - `func::Name` (free function) → falls back to `functions.name` lookup.
|
||||
///
|
||||
/// `db_path` is consulted ONLY if the token is non-numeric. When `db_path` is
|
||||
/// `None` and the token is symbolic, returns an error suggesting the user
|
||||
/// either pass `--db` or use a numeric address.
|
||||
pub fn resolve_probe_token(db_path: Option<&Path>, token: &str) -> Result<Vec<u32>> {
|
||||
let token = token.trim();
|
||||
if token.is_empty() {
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
if let Some(pc) = parse_numeric(token) {
|
||||
return Ok(vec![pc]);
|
||||
}
|
||||
|
||||
let db = db_path.ok_or_else(|| {
|
||||
anyhow!(
|
||||
"symbolic probe token {token:?} requires a sylpheed.db; \
|
||||
pass --probe-db=PATH or use a numeric 0x… address",
|
||||
)
|
||||
})?;
|
||||
|
||||
if !db.exists() {
|
||||
return Err(anyhow!("--probe-db not found: {}", db.display()));
|
||||
}
|
||||
|
||||
let conn = duckdb::Connection::open_with_flags(
|
||||
db,
|
||||
duckdb::Config::default().access_mode(duckdb::AccessMode::ReadOnly)?,
|
||||
)?;
|
||||
|
||||
// Class::method or Class::*
|
||||
if let Some((class, method)) = token.split_once("::") {
|
||||
if method == "*" {
|
||||
return resolve_class_star(&conn, class);
|
||||
}
|
||||
// Try Class::method first, then fall back to functions.name lookup.
|
||||
let pcs = resolve_class_method(&conn, class, method)?;
|
||||
if !pcs.is_empty() {
|
||||
return Ok(pcs);
|
||||
}
|
||||
}
|
||||
|
||||
// Last-resort: functions.name match (e.g. for `entry_point` or
|
||||
// `__savegprlr_22`). Substring-free; user gets a clear error if missing.
|
||||
resolve_function_name(&conn, token)
|
||||
}
|
||||
|
||||
fn parse_numeric(token: &str) -> Option<u32> {
|
||||
if let Some(hex) = token.strip_prefix("0x").or_else(|| token.strip_prefix("0X")) {
|
||||
return u32::from_str_radix(hex, 16).ok();
|
||||
}
|
||||
token.parse::<u32>().ok()
|
||||
}
|
||||
|
||||
fn resolve_class_method(conn: &duckdb::Connection, class: &str, method: &str) -> Result<Vec<u32>> {
|
||||
// Two-step lookup so we can give better errors:
|
||||
// 1. find matching methods rows joined to classes;
|
||||
// 2. surface the function_address column.
|
||||
let mut stmt = conn.prepare(
|
||||
"SELECT DISTINCT m.function_address FROM methods m
|
||||
JOIN classes c ON c.vtable_address = m.vtable_address
|
||||
JOIN demangled_names dn ON dn.address = m.function_address
|
||||
WHERE c.name = ? AND dn.method_name = ?",
|
||||
)?;
|
||||
let pcs: Vec<u32> = stmt
|
||||
.query_map(params![class, method], |r| r.get::<_, i64>(0).map(|x| x as u32))?
|
||||
.filter_map(|r| r.ok())
|
||||
.collect();
|
||||
Ok(pcs)
|
||||
}
|
||||
|
||||
fn resolve_class_star(conn: &duckdb::Connection, class: &str) -> Result<Vec<u32>> {
|
||||
let mut stmt = conn.prepare(
|
||||
"SELECT DISTINCT m.function_address FROM methods m
|
||||
JOIN classes c ON c.vtable_address = m.vtable_address
|
||||
WHERE c.name = ?",
|
||||
)?;
|
||||
let pcs: Vec<u32> = stmt
|
||||
.query_map(params![class], |r| r.get::<_, i64>(0).map(|x| x as u32))?
|
||||
.filter_map(|r| r.ok())
|
||||
.collect();
|
||||
if pcs.is_empty() {
|
||||
return Err(anyhow!(
|
||||
"no class named {class:?} found in classes table — has --dis populated this DB?",
|
||||
));
|
||||
}
|
||||
Ok(pcs)
|
||||
}
|
||||
|
||||
fn resolve_function_name(conn: &duckdb::Connection, name: &str) -> Result<Vec<u32>> {
|
||||
let mut stmt = conn.prepare("SELECT address FROM functions WHERE name = ?")?;
|
||||
let pcs: Vec<u32> = stmt
|
||||
.query_map(params![name], |r| r.get::<_, i64>(0).map(|x| x as u32))?
|
||||
.filter_map(|r| r.ok())
|
||||
.collect();
|
||||
if pcs.is_empty() {
|
||||
return Err(anyhow!(
|
||||
"probe token {name:?} did not match any classes::methods or functions row",
|
||||
));
|
||||
}
|
||||
Ok(pcs)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use duckdb::Connection;
|
||||
|
||||
fn build_synthetic_db(path: &Path) {
|
||||
let conn = Connection::open(path).expect("open");
|
||||
conn.execute_batch(
|
||||
"
|
||||
CREATE TABLE functions (
|
||||
address BIGINT PRIMARY KEY,
|
||||
name VARCHAR
|
||||
);
|
||||
CREATE TABLE classes (
|
||||
name VARCHAR PRIMARY KEY,
|
||||
vtable_address BIGINT,
|
||||
rtti_present BOOLEAN,
|
||||
base_classes_json VARCHAR
|
||||
);
|
||||
CREATE TABLE methods (
|
||||
vtable_address BIGINT,
|
||||
slot BIGINT,
|
||||
function_address BIGINT,
|
||||
mangled_name VARCHAR,
|
||||
demangled_name VARCHAR,
|
||||
PRIMARY KEY (vtable_address, slot)
|
||||
);
|
||||
CREATE TABLE demangled_names (
|
||||
address BIGINT,
|
||||
mangled VARCHAR,
|
||||
raw_demangled VARCHAR,
|
||||
namespace_path VARCHAR,
|
||||
class_name VARCHAR,
|
||||
method_name VARCHAR,
|
||||
params_signature VARCHAR
|
||||
);
|
||||
INSERT INTO classes VALUES ('Foo', 11000, true, NULL);
|
||||
INSERT INTO functions VALUES (12000, 'sub_2EE0'), (12100, 'sub_2F44');
|
||||
INSERT INTO methods VALUES (11000, 0, 12000, NULL, NULL),
|
||||
(11000, 1, 12100, NULL, NULL);
|
||||
INSERT INTO demangled_names (address, mangled, raw_demangled, class_name, method_name)
|
||||
VALUES (12000, '?bar@Foo@@QEAAXXZ', 'void Foo::bar(void)', 'Foo', 'bar'),
|
||||
(12100, '?baz@Foo@@QEAAXXZ', 'void Foo::baz(void)', 'Foo', 'baz');
|
||||
",
|
||||
)
|
||||
.expect("seed");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn numeric_passthrough_no_db_needed() {
|
||||
let pcs = resolve_probe_token(None, "0x824D6640").unwrap();
|
||||
assert_eq!(pcs, vec![0x824D6640]);
|
||||
let pcs = resolve_probe_token(None, "2186095088").unwrap();
|
||||
assert_eq!(pcs, vec![0x824D29F0]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn symbolic_token_without_db_errors() {
|
||||
let err = resolve_probe_token(None, "Foo::bar").unwrap_err();
|
||||
assert!(format!("{err}").contains("requires a sylpheed.db"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn class_method_resolves() {
|
||||
let tmp = std::env::temp_dir().join("xenia_lookup_test.duckdb");
|
||||
let _ = std::fs::remove_file(&tmp);
|
||||
build_synthetic_db(&tmp);
|
||||
let pcs = resolve_probe_token(Some(&tmp), "Foo::bar").unwrap();
|
||||
assert_eq!(pcs, vec![12000]);
|
||||
let _ = std::fs::remove_file(&tmp);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn class_star_returns_all_methods() {
|
||||
let tmp = std::env::temp_dir().join("xenia_lookup_star.duckdb");
|
||||
let _ = std::fs::remove_file(&tmp);
|
||||
build_synthetic_db(&tmp);
|
||||
let mut pcs = resolve_probe_token(Some(&tmp), "Foo::*").unwrap();
|
||||
pcs.sort();
|
||||
assert_eq!(pcs, vec![12000, 12100]);
|
||||
let _ = std::fs::remove_file(&tmp);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn function_name_fallback() {
|
||||
let tmp = std::env::temp_dir().join("xenia_lookup_fn.duckdb");
|
||||
let _ = std::fs::remove_file(&tmp);
|
||||
build_synthetic_db(&tmp);
|
||||
let pcs = resolve_probe_token(Some(&tmp), "sub_2EE0").unwrap();
|
||||
assert_eq!(pcs, vec![12000]);
|
||||
let _ = std::fs::remove_file(&tmp);
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
37
crates/xenia-analysis/src/sinks/duckdb.rs
Normal file
37
crates/xenia-analysis/src/sinks/duckdb.rs
Normal file
@@ -0,0 +1,37 @@
|
||||
//! DuckDB sink — appends rich disasm items to the `instructions` table.
|
||||
//!
|
||||
//! Column layout matches [`crate::db`]: address, raw, mnemonic, operands,
|
||||
//! disasm, ext_mnemonic, ext_operands, ext_disasm, section, function, label.
|
||||
|
||||
use duckdb::{Appender, params};
|
||||
|
||||
use crate::disasm::RichDisasmItem;
|
||||
|
||||
/// Append every item to the appender. Returns the number of rows written.
|
||||
/// Does NOT flush — the caller decides when to flush, since multiple
|
||||
/// section iterators typically share one appender.
|
||||
pub fn append_instructions<'a>(
|
||||
appender: &mut Appender<'_>,
|
||||
items: impl IntoIterator<Item = RichDisasmItem<'a>>,
|
||||
) -> duckdb::Result<u64> {
|
||||
let mut count: u64 = 0;
|
||||
for ri in items {
|
||||
let t = &ri.item.text;
|
||||
appender.append_row(params![
|
||||
ri.item.addr as i64,
|
||||
ri.item.raw as i64,
|
||||
t.mnemonic.as_str(),
|
||||
t.operands.as_str(),
|
||||
t.disasm.as_str(),
|
||||
t.ext_mnemonic.as_deref(),
|
||||
t.ext_operands.as_deref(),
|
||||
t.ext_disasm.as_deref(),
|
||||
t.branch_target.map(|t| t as i64),
|
||||
ri.section,
|
||||
ri.function.map(|f| f as i64),
|
||||
ri.label,
|
||||
])?;
|
||||
count += 1;
|
||||
}
|
||||
Ok(count)
|
||||
}
|
||||
63
crates/xenia-analysis/src/sinks/json.rs
Normal file
63
crates/xenia-analysis/src/sinks/json.rs
Normal file
@@ -0,0 +1,63 @@
|
||||
//! JSON Lines sink — one structured row per line, constant memory.
|
||||
//!
|
||||
//! Suited for piping into `jq`, importing into pandas / DuckDB's
|
||||
//! `read_json_auto`, or feeding downstream tooling that expects a
|
||||
//! line-delimited stream rather than a single megaobject.
|
||||
|
||||
use std::io::{self, Write};
|
||||
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::disasm::RichDisasmItem;
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct JsonRow<'a> {
|
||||
addr: u32,
|
||||
raw: u32,
|
||||
mnemonic: &'a str,
|
||||
operands: &'a str,
|
||||
disasm: &'a str,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
ext_mnemonic: Option<&'a str>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
ext_operands: Option<&'a str>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
ext_disasm: Option<&'a str>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
branch_target: Option<u32>,
|
||||
section: &'a str,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
function: Option<u32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
label: Option<&'a str>,
|
||||
}
|
||||
|
||||
/// Write each item as a single JSON object on its own line. Returns the
|
||||
/// number of rows written.
|
||||
pub fn write_jsonl<'a, W: Write>(
|
||||
out: &mut W,
|
||||
items: impl IntoIterator<Item = RichDisasmItem<'a>>,
|
||||
) -> io::Result<u64> {
|
||||
let mut count: u64 = 0;
|
||||
for ri in items {
|
||||
let t = &ri.item.text;
|
||||
let row = JsonRow {
|
||||
addr: ri.item.addr,
|
||||
raw: ri.item.raw,
|
||||
mnemonic: &t.mnemonic,
|
||||
operands: &t.operands,
|
||||
disasm: &t.disasm,
|
||||
ext_mnemonic: t.ext_mnemonic.as_deref(),
|
||||
ext_operands: t.ext_operands.as_deref(),
|
||||
ext_disasm: t.ext_disasm.as_deref(),
|
||||
branch_target: t.branch_target,
|
||||
section: ri.section,
|
||||
function: ri.function,
|
||||
label: ri.label,
|
||||
};
|
||||
serde_json::to_writer(&mut *out, &row)?;
|
||||
out.write_all(b"\n")?;
|
||||
count += 1;
|
||||
}
|
||||
Ok(count)
|
||||
}
|
||||
8
crates/xenia-analysis/src/sinks/mod.rs
Normal file
8
crates/xenia-analysis/src/sinks/mod.rs
Normal file
@@ -0,0 +1,8 @@
|
||||
//! Output sinks for [`crate::disasm::RichDisasmItem`] streams.
|
||||
//!
|
||||
//! Each sink consumes the same iterator shape and writes to a different
|
||||
//! medium: human-readable .asm text, JSON Lines, or DuckDB rows.
|
||||
|
||||
pub mod duckdb;
|
||||
pub mod json;
|
||||
pub mod text;
|
||||
58
crates/xenia-analysis/src/sinks/text.rs
Normal file
58
crates/xenia-analysis/src/sinks/text.rs
Normal file
@@ -0,0 +1,58 @@
|
||||
//! Text sink — renders one .asm instruction line with optional
|
||||
//! branch-target / data-ref annotations.
|
||||
//!
|
||||
//! The full `write_asm` orchestration (section headers, function prologue
|
||||
//! info, xref comment blocks, hex-dump of data sections) stays in
|
||||
//! [`crate::formatter`]; this sink only owns the per-instruction line.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::io::{self, Write};
|
||||
|
||||
use xenia_xex::pe::PeSection;
|
||||
|
||||
use crate::disasm::RichDisasmItem;
|
||||
use crate::xref::{XrefKind, section_for_addr};
|
||||
|
||||
/// Render one instruction line:
|
||||
/// ` 82000000: 60000000 nop`
|
||||
/// ` 82000004: 4800FFFC bl 0x82000000 ; -> entry_point`
|
||||
/// ` 82000010: 812A0000 lwz r9, 0(r10) ; [R] 0x828A0000 (.rdata) = dat_…`
|
||||
pub fn write_instr_line<W: Write + ?Sized>(
|
||||
out: &mut W,
|
||||
item: &RichDisasmItem<'_>,
|
||||
labels: &HashMap<u32, String>,
|
||||
sections: &[PeSection],
|
||||
image_base: u32,
|
||||
data_annotation: Option<(u32, XrefKind)>,
|
||||
) -> io::Result<()> {
|
||||
let disasm_text = item.item.text.display();
|
||||
|
||||
// Branch-target → label annotation. Uses the structured `branch_target`
|
||||
// field (cleaner than the legacy "find 0x in disasm string" regex).
|
||||
let mut annotated = match item.item.text.branch_target {
|
||||
Some(target) => match labels.get(&target) {
|
||||
Some(lbl) => format!("{disasm_text:<40} ; -> {lbl}"),
|
||||
None => disasm_text.to_string(),
|
||||
},
|
||||
None => disasm_text.to_string(),
|
||||
};
|
||||
|
||||
if let Some((data_addr, kind)) = data_annotation {
|
||||
let tag = match kind {
|
||||
XrefKind::DataRead => "[R]",
|
||||
XrefKind::DataWrite => "[W]",
|
||||
_ => "[&]",
|
||||
};
|
||||
let sec = section_for_addr(data_addr, sections, image_base).unwrap_or("?");
|
||||
let data_lbl = labels.get(&data_addr)
|
||||
.map(|s| format!(" = {s}"))
|
||||
.unwrap_or_default();
|
||||
if !annotated.contains("; ->") {
|
||||
annotated = format!("{annotated:<40} ; {tag} 0x{data_addr:08X} ({sec}){data_lbl}");
|
||||
} else {
|
||||
annotated = format!("{annotated} {tag} 0x{data_addr:08X} ({sec}){data_lbl}");
|
||||
}
|
||||
}
|
||||
|
||||
writeln!(out, " {:08X}: {:08X} {}", item.item.addr, item.item.raw, annotated)
|
||||
}
|
||||
165
crates/xenia-analysis/src/sql_views.rs
Normal file
165
crates/xenia-analysis/src/sql_views.rs
Normal file
@@ -0,0 +1,165 @@
|
||||
//! Additive SQL views over the Phase-3 ingest tables.
|
||||
//!
|
||||
//! These views are created when `--analyze=sql` or `--analyze=both` is set.
|
||||
//! They are *not* a replacement for the Rust passes ([`crate::xref`],
|
||||
//! [`crate::func`]) — those still own data-ref resolution and prologue
|
||||
//! pattern matching. The views cover the cleanly-relational parts:
|
||||
//!
|
||||
//! - branch xrefs (self-join on `instructions.target_hex`)
|
||||
//! - call graph + reachability (recursive CTE over `xrefs`)
|
||||
//! - convenience joins (function-first-instruction, imports-called)
|
||||
//!
|
||||
//! All views are read-only and stable across re-creation: dropping and
|
||||
//! recreating the database via [`crate::db::DbWriter::open_fresh`] re-runs
|
||||
//! these definitions.
|
||||
//!
|
||||
//! ## Cross-check semantics
|
||||
//!
|
||||
//! `v_branch_xrefs` is intended to produce *exactly* the same `(source,
|
||||
//! target, kind)` tuples as the Rust `xref.rs` first pass — given the same
|
||||
//! input image. [`crate::db::DbWriter::cross_check_branch_xrefs`] queries
|
||||
//! the symmetric difference and returns the row counts; both should be
|
||||
//! zero. A non-zero count means the formatter's `mnemonic` column or the
|
||||
//! kind-classification CASE drifted out of agreement with `xref.rs`, and
|
||||
//! is worth a one-line warning at log time.
|
||||
|
||||
/// `(view_name, CREATE VIEW … SQL)` pairs in the order they must run.
|
||||
/// Later views may depend on earlier ones (e.g. `v_call_graph` reads
|
||||
/// `xrefs`, which is the Rust-pass table; `v_branch_xrefs` is independent).
|
||||
pub const ALL_VIEWS: &[(&str, &str)] = &[
|
||||
("v_branch_xrefs", V_BRANCH_XREFS),
|
||||
("v_call_graph", V_CALL_GRAPH),
|
||||
("v_reachability_from_entry", V_REACHABILITY_FROM_ENTRY),
|
||||
("v_indirect_reachability_from_entry", V_INDIRECT_REACHABILITY_FROM_ENTRY),
|
||||
("v_function_first_instruction", V_FUNCTION_FIRST_INSTRUCTION),
|
||||
("v_imports_called", V_IMPORTS_CALLED),
|
||||
];
|
||||
|
||||
/// Branch cross-references derived purely from `instructions.target_hex`.
|
||||
///
|
||||
/// Mirrors the kind classification in [`crate::xref::collect_branch_target`]
|
||||
/// and the short tags returned by [`crate::xref::XrefKind::tag`] (which are
|
||||
/// what `xrefs.kind` actually stores):
|
||||
/// - I-form (`b`/`bl`/`ba`/`bla`): `bl`/`bla` → `"call"`, `b`/`ba` → `"j"`
|
||||
/// - B-form (`bc`/`bcl`/`bca`/`bcla`): always → `"br"`
|
||||
///
|
||||
/// Indirect branches (`bclr`/`bcctr`) leave `target_hex` NULL and are
|
||||
/// excluded from this view by design.
|
||||
const V_BRANCH_XREFS: &str = "
|
||||
CREATE OR REPLACE VIEW v_branch_xrefs AS
|
||||
SELECT
|
||||
address AS source,
|
||||
target_hex AS target,
|
||||
CASE
|
||||
WHEN mnemonic IN ('bl', 'bla') THEN 'call'
|
||||
WHEN mnemonic IN ('b', 'ba') THEN 'j'
|
||||
WHEN mnemonic IN ('bc', 'bcl', 'bca', 'bcla') THEN 'br'
|
||||
ELSE 'br'
|
||||
END AS kind,
|
||||
mnemonic AS instruction,
|
||||
function AS source_func
|
||||
FROM instructions
|
||||
WHERE target_hex IS NOT NULL;
|
||||
";
|
||||
|
||||
/// Call-graph edges resolved against function names.
|
||||
///
|
||||
/// Reads from `xrefs` (the Rust-pass table) — this is the canonical source
|
||||
/// for *all* edge kinds, including indirect/data; SQL can't reconstruct the
|
||||
/// data-ref edges cleanly because they require register tracking. For pure
|
||||
/// branch edges, `v_branch_xrefs` produces equivalent rows directly from
|
||||
/// `instructions`.
|
||||
const V_CALL_GRAPH: &str = "
|
||||
CREATE OR REPLACE VIEW v_call_graph AS
|
||||
SELECT
|
||||
x.source AS caller_addr,
|
||||
cf.name AS caller_name,
|
||||
x.target AS callee_addr,
|
||||
tf.name AS callee_name,
|
||||
x.kind AS edge_kind
|
||||
FROM xrefs x
|
||||
LEFT JOIN functions cf ON cf.address = x.source_func
|
||||
LEFT JOIN functions tf ON tf.address = x.target
|
||||
WHERE x.kind = 'call';
|
||||
";
|
||||
|
||||
/// Transitive function-level reachability from the entry point over
|
||||
/// call/jump/branch edges. Useful for finding dead code
|
||||
/// (`SELECT address FROM functions
|
||||
/// WHERE address NOT IN (SELECT addr FROM v_reachability_from_entry)`)
|
||||
/// and for scoping analysis to the live subset.
|
||||
///
|
||||
/// Seeds from the function containing the `entry_point` label and walks
|
||||
/// the recursive closure: a reachable function's instructions branch into
|
||||
/// the functions enclosing the branch targets, which are then reachable
|
||||
/// in turn. `UNION` (not `UNION ALL`) deduplicates to handle call-graph
|
||||
/// cycles (recursive functions, mutually-recursive pairs).
|
||||
const V_REACHABILITY_FROM_ENTRY: &str = "
|
||||
CREATE OR REPLACE VIEW v_reachability_from_entry AS
|
||||
WITH RECURSIVE reach(fn) AS (
|
||||
SELECT i.function FROM instructions i
|
||||
JOIN labels l ON l.address = i.address
|
||||
WHERE l.name = 'entry_point' AND i.function IS NOT NULL
|
||||
UNION
|
||||
SELECT tgt.function FROM xrefs x
|
||||
JOIN instructions src ON src.address = x.source
|
||||
JOIN instructions tgt ON tgt.address = x.target
|
||||
JOIN reach r ON src.function = r.fn
|
||||
WHERE x.kind IN ('call', 'j', 'br')
|
||||
AND tgt.function IS NOT NULL
|
||||
)
|
||||
SELECT fn AS addr FROM reach;
|
||||
";
|
||||
|
||||
/// Reachability extended over `kind='ind_call'` edges from M5. Strict
|
||||
/// superset of `v_reachability_from_entry` — every fn there is also here,
|
||||
/// plus any function reached only via a vtable bcctrl whose vtable+slot
|
||||
/// the M5 dataflow could resolve. Sample 5 newly-reachable PCs in canary
|
||||
/// before trusting widely; the analysis intentionally leaves out alias-
|
||||
/// dependent indirect calls (vtable loaded from a `this` field).
|
||||
const V_INDIRECT_REACHABILITY_FROM_ENTRY: &str = "
|
||||
CREATE OR REPLACE VIEW v_indirect_reachability_from_entry AS
|
||||
WITH RECURSIVE reach(fn) AS (
|
||||
SELECT i.function FROM instructions i
|
||||
JOIN labels l ON l.address = i.address
|
||||
WHERE l.name = 'entry_point' AND i.function IS NOT NULL
|
||||
UNION
|
||||
SELECT tgt.function FROM xrefs x
|
||||
JOIN instructions src ON src.address = x.source
|
||||
JOIN instructions tgt ON tgt.address = x.target
|
||||
JOIN reach r ON src.function = r.fn
|
||||
WHERE x.kind IN ('call', 'ind_call', 'j', 'br')
|
||||
AND tgt.function IS NOT NULL
|
||||
)
|
||||
SELECT fn AS addr FROM reach;
|
||||
";
|
||||
|
||||
/// Convenience join: each function's first decoded instruction. Useful for
|
||||
/// quickly inspecting prologue patterns without computing offsets manually.
|
||||
const V_FUNCTION_FIRST_INSTRUCTION: &str = "
|
||||
CREATE OR REPLACE VIEW v_function_first_instruction AS
|
||||
SELECT
|
||||
f.address AS function_addr,
|
||||
f.name AS function_name,
|
||||
i.raw AS first_raw,
|
||||
i.disasm AS first_disasm,
|
||||
i.ext_disasm AS first_ext_disasm
|
||||
FROM functions f
|
||||
JOIN instructions i ON i.address = f.address;
|
||||
";
|
||||
|
||||
/// Per-function summary of which kernel/library imports it calls. Joins
|
||||
/// xrefs (call edges) against the labels table to surface import names.
|
||||
const V_IMPORTS_CALLED: &str = "
|
||||
CREATE OR REPLACE VIEW v_imports_called AS
|
||||
SELECT
|
||||
x.source_func AS function_addr,
|
||||
f.name AS function_name,
|
||||
x.target AS import_addr,
|
||||
l.name AS import_name
|
||||
FROM xrefs x
|
||||
JOIN labels l ON l.address = x.target
|
||||
LEFT JOIN functions f ON f.address = x.source_func
|
||||
WHERE x.kind = 'call'
|
||||
AND l.kind = 'import';
|
||||
";
|
||||
399
crates/xenia-analysis/src/static_init.rs
Normal file
399
crates/xenia-analysis/src/static_init.rs
Normal file
@@ -0,0 +1,399 @@
|
||||
//! M11.5 — static-initialiser driver detection.
|
||||
//!
|
||||
//! MSVC's CRT static-init driver (`_initterm` / `_initterm_e` style)
|
||||
//! is a tight loop that walks a function-pointer array between two
|
||||
//! addresses, calling each non-null entry:
|
||||
//!
|
||||
//! ```text
|
||||
//! loop_top:
|
||||
//! cmpw[l] rA, rB ; compare cursor vs end
|
||||
//! beq done
|
||||
//! lwz rN, 0(rA) ; load fn ptr
|
||||
//! cmpwi rN, 0 ; null-skip (optional)
|
||||
//! beq skip
|
||||
//! mtctr rN
|
||||
//! bcctrl
|
||||
//! skip:
|
||||
//! addi rA, rA, 4
|
||||
//! b loop_top
|
||||
//! done:
|
||||
//! ```
|
||||
//!
|
||||
//! Two static addresses (`rA` and `rB` at loop start) bracket the
|
||||
//! function-pointer array. Detection strategy: scan every function for
|
||||
//! the canonical pattern; when found, extract the array bounds and
|
||||
//! emit one row in `function_pointer_arrays` with `kind='static_init'`.
|
||||
//!
|
||||
//! ### What this layer does
|
||||
//!
|
||||
//! - Walks each function looking for an `lwz; mtctr; bcctrl` sequence
|
||||
//! inside a loop bounded by a comparison against another constant.
|
||||
//! - When the loop's cursor register is observed to be incremented by
|
||||
//! exactly 4 per iteration, classifies it as a static-init driver
|
||||
//! and records the (start, end) array bounds.
|
||||
//!
|
||||
//! ### What this layer does NOT do
|
||||
//!
|
||||
//! - No support for back-to-back drivers sharing a common loop trampoline.
|
||||
//! - No detection of the M11 prologue-style heuristic; M11.5 is
|
||||
//! structure-grounded and replaces the prior heuristic where it fires.
|
||||
//! - Does not handle CRT-style `_initterm_e` (the `_e` variant returns
|
||||
//! a status); detection works for both as long as the loop shape
|
||||
//! matches.
|
||||
//!
|
||||
//! Reference: Microsoft CRT `crt0.c::_initterm` source pattern.
|
||||
|
||||
use std::collections::{BTreeSet, HashMap, HashSet};
|
||||
|
||||
use crate::func::FuncAnalysis;
|
||||
use crate::funcptr_arrays::FuncPtrArray;
|
||||
use xenia_xex::pe::PeSection;
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct StaticInitDriver {
|
||||
/// VA of the driver function (the one containing the loop).
|
||||
pub driver_function: u32,
|
||||
/// VA of the array start.
|
||||
pub array_start: u32,
|
||||
/// VA one-past-end of the array.
|
||||
pub array_end: u32,
|
||||
/// Detected length in slots.
|
||||
pub length: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct StaticInitResult {
|
||||
pub drivers: Vec<StaticInitDriver>,
|
||||
/// Newly-detected static-init arrays, ready to be merged into the
|
||||
/// `function_pointer_arrays` table with `kind='static_init'`.
|
||||
pub arrays: Vec<FuncPtrArray>,
|
||||
}
|
||||
|
||||
const OP_ADDI: u32 = 14;
|
||||
const OP_ADDIS: u32 = 15;
|
||||
const OP_BCCTR: u32 = 19;
|
||||
const OP_LWZ: u32 = 32;
|
||||
const OP_X_FORM: u32 = 31;
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
enum RegVal {
|
||||
Const(u32),
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all, fields(image_base = format_args!("{:#010x}", image_base)))]
|
||||
pub fn analyze(
|
||||
pe: &[u8],
|
||||
image_base: u32,
|
||||
sections: &[PeSection],
|
||||
func_analysis: &FuncAnalysis,
|
||||
function_starts: &BTreeSet<u32>,
|
||||
labels: &HashMap<u32, String>,
|
||||
) -> StaticInitResult {
|
||||
let started = std::time::Instant::now();
|
||||
let block_boundaries: HashSet<u32> = labels.keys().copied().collect();
|
||||
|
||||
let mut drivers: Vec<StaticInitDriver> = Vec::new();
|
||||
|
||||
for (&fn_start, fi) in &func_analysis.functions {
|
||||
if fi.is_saverestore { continue; }
|
||||
if let Some(d) = scan_function_for_driver(
|
||||
pe, image_base, fn_start, fi.end, &block_boundaries,
|
||||
) {
|
||||
drivers.push(d);
|
||||
}
|
||||
}
|
||||
|
||||
// Build arrays from the discovered drivers + section data.
|
||||
let mut arrays: Vec<FuncPtrArray> = Vec::new();
|
||||
for d in &drivers {
|
||||
if let Some(entries) = read_array(pe, image_base, sections, d.array_start, d.array_end, function_starts) {
|
||||
arrays.push(FuncPtrArray {
|
||||
address: d.array_start,
|
||||
length: entries.len() as u32,
|
||||
kind: "static_init",
|
||||
entries,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let elapsed_ms = started.elapsed().as_millis() as f64;
|
||||
metrics::histogram!("analysis.phase_ms", "phase" => "static_init").record(elapsed_ms);
|
||||
tracing::info!(
|
||||
drivers = drivers.len(),
|
||||
arrays = arrays.len(),
|
||||
elapsed_ms,
|
||||
"M11.5 static-init driver scan complete",
|
||||
);
|
||||
|
||||
StaticInitResult { drivers, arrays }
|
||||
}
|
||||
|
||||
/// Read the function-pointer array between [start, end) from .rdata/.data.
|
||||
/// NULL entries are skipped (CRT _initterm explicitly tolerates them).
|
||||
/// Non-function-start entries cause us to bail (the driver bounds were
|
||||
/// likely misidentified).
|
||||
fn read_array(
|
||||
pe: &[u8],
|
||||
image_base: u32,
|
||||
sections: &[PeSection],
|
||||
start: u32,
|
||||
end: u32,
|
||||
function_starts: &BTreeSet<u32>,
|
||||
) -> Option<Vec<u32>> {
|
||||
if end <= start || (end - start) > 4096 { return None; }
|
||||
let _section = sections.iter().find(|s| {
|
||||
let lo = image_base + s.virtual_address;
|
||||
let hi = lo + s.virtual_size;
|
||||
start >= lo && end <= hi && (s.name == ".rdata" || s.name == ".data")
|
||||
})?;
|
||||
let mut entries = Vec::new();
|
||||
let mut p = start;
|
||||
while p < end {
|
||||
let off = p.wrapping_sub(image_base) as usize;
|
||||
if off + 4 > pe.len() { return None; }
|
||||
let v = u32::from_be_bytes([pe[off], pe[off + 1], pe[off + 2], pe[off + 3]]);
|
||||
if v != 0 {
|
||||
if !function_starts.contains(&v) { return None; }
|
||||
entries.push(v);
|
||||
}
|
||||
p = p.wrapping_add(4);
|
||||
}
|
||||
if entries.is_empty() { return None; }
|
||||
Some(entries)
|
||||
}
|
||||
|
||||
/// Walk one function looking for the canonical static-init driver shape.
|
||||
/// Returns Some when the loop's cursor register starts at a known constant
|
||||
/// `rA`, terminates at another known constant `rB` via a compare, and
|
||||
/// increments by 4 per iteration with an `lwz; mtctr; bcctrl` body.
|
||||
fn scan_function_for_driver(
|
||||
pe: &[u8],
|
||||
image_base: u32,
|
||||
fn_start: u32,
|
||||
fn_end: u32,
|
||||
block_boundaries: &HashSet<u32>,
|
||||
) -> Option<StaticInitDriver> {
|
||||
let mut reg: [Option<RegVal>; 32] = [None; 32];
|
||||
// Pattern features observed during the walk.
|
||||
let mut cursor_reg: Option<usize> = None;
|
||||
let mut cursor_init: Option<u32> = None;
|
||||
let mut end_reg: Option<usize> = None;
|
||||
let mut end_init: Option<u32> = None;
|
||||
let mut saw_lwz_through_cursor = false;
|
||||
let mut saw_mtctr = false;
|
||||
let mut saw_bcctrl = false;
|
||||
let mut saw_addi_4 = false;
|
||||
|
||||
let mut pc = fn_start;
|
||||
while pc < fn_end {
|
||||
if pc != fn_start && block_boundaries.contains(&pc) {
|
||||
// Heuristic: when we cross a basic-block boundary that
|
||||
// is not the loop-top, accumulated state remains valid for
|
||||
// pattern-matching purposes — but we drop register Const
|
||||
// tracking to be safe.
|
||||
reg = [None; 32];
|
||||
}
|
||||
let off = pc.wrapping_sub(image_base) as usize;
|
||||
if off + 4 > pe.len() { break; }
|
||||
let instr = u32::from_be_bytes([pe[off], pe[off + 1], pe[off + 2], pe[off + 3]]);
|
||||
let op = instr >> 26;
|
||||
let rd = ((instr >> 21) & 0x1F) as usize;
|
||||
let ra = ((instr >> 16) & 0x1F) as usize;
|
||||
let simm = ((instr & 0xFFFF) as i16) as i32;
|
||||
let uimm = instr & 0xFFFF;
|
||||
|
||||
match op {
|
||||
OP_ADDIS if ra == 0 => reg[rd] = Some(RegVal::Const(uimm << 16)),
|
||||
OP_ADDIS => {
|
||||
if let Some(RegVal::Const(b)) = reg[ra] {
|
||||
reg[rd] = Some(RegVal::Const(b.wrapping_add(uimm << 16)));
|
||||
} else { reg[rd] = None; }
|
||||
}
|
||||
OP_ADDI if ra != 0 => {
|
||||
let prev = reg[ra];
|
||||
if let Some(RegVal::Const(b)) = prev {
|
||||
let v = b.wrapping_add(simm as u32);
|
||||
reg[rd] = Some(RegVal::Const(v));
|
||||
// Was this an `addi r, r, 4`? Mark cursor-increment.
|
||||
if rd == ra && simm == 4 {
|
||||
if Some(rd) == cursor_reg {
|
||||
saw_addi_4 = true;
|
||||
}
|
||||
} else if cursor_reg.is_none() {
|
||||
// First time we see a known-constant register that
|
||||
// *could* be the cursor — defer the choice until we
|
||||
// see a load through it.
|
||||
cursor_init = Some(v);
|
||||
cursor_reg = Some(rd);
|
||||
} else if end_reg.is_none() && Some(rd) != cursor_reg {
|
||||
end_init = Some(v);
|
||||
end_reg = Some(rd);
|
||||
}
|
||||
} else { reg[rd] = None; }
|
||||
}
|
||||
OP_LWZ => {
|
||||
if ra != 0 && Some(ra) == cursor_reg {
|
||||
saw_lwz_through_cursor = true;
|
||||
}
|
||||
reg[rd] = None;
|
||||
}
|
||||
OP_X_FORM => {
|
||||
let xo = (instr >> 1) & 0x3FF;
|
||||
if xo == 467 {
|
||||
let spr = (((instr >> 11) & 0x1F) << 5) | ((instr >> 16) & 0x1F);
|
||||
if spr == 9 && saw_lwz_through_cursor { saw_mtctr = true; }
|
||||
}
|
||||
if xo != 444 && xo != 467 { reg[rd] = None; }
|
||||
}
|
||||
OP_BCCTR => {
|
||||
let xo = (instr >> 1) & 0x3FF;
|
||||
let lk = (instr & 1) != 0;
|
||||
if xo == 528 && lk && saw_mtctr {
|
||||
saw_bcctrl = true;
|
||||
}
|
||||
}
|
||||
18 => {
|
||||
if (instr & 1) != 0 {
|
||||
for r in 0..=12 { reg[r] = None; }
|
||||
}
|
||||
}
|
||||
16 => {
|
||||
if (instr & 1) != 0 {
|
||||
for r in 0..=12 { reg[r] = None; }
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
pc = pc.wrapping_add(4);
|
||||
}
|
||||
|
||||
// Validate that all four pattern features fired.
|
||||
if !(saw_lwz_through_cursor && saw_mtctr && saw_bcctrl && saw_addi_4) {
|
||||
return None;
|
||||
}
|
||||
let cursor_init = cursor_init?;
|
||||
let end_init = end_init?;
|
||||
if end_init <= cursor_init { return None; }
|
||||
if end_init - cursor_init > 4096 { return None; }
|
||||
|
||||
Some(StaticInitDriver {
|
||||
driver_function: fn_start,
|
||||
array_start: cursor_init,
|
||||
array_end: end_init,
|
||||
length: (end_init - cursor_init) / 4,
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::func::FuncInfo;
|
||||
use std::collections::BTreeMap;
|
||||
use xenia_xex::pe::PeSection;
|
||||
|
||||
fn mk_section(name: &str, va: u32, size: u32) -> PeSection {
|
||||
PeSection {
|
||||
name: name.into(),
|
||||
virtual_address: va, virtual_size: size,
|
||||
raw_offset: va, raw_size: size,
|
||||
flags: 0x4000_0040,
|
||||
}
|
||||
}
|
||||
fn write_be(pe: &mut [u8], at: usize, v: u32) {
|
||||
pe[at..at + 4].copy_from_slice(&v.to_be_bytes());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_canonical_initterm_loop() {
|
||||
// Build a tiny driver that loops over a 3-entry array.
|
||||
let image_base = 0x82000000u32;
|
||||
let mut pe = vec![0u8; 0x4000];
|
||||
|
||||
// Array at .rdata + 0x800: 3 function pointers.
|
||||
let arr_va_lo = 0x800u32;
|
||||
let fns = [image_base + 0x2000, image_base + 0x2010, image_base + 0x2020];
|
||||
for (i, p) in fns.iter().enumerate() {
|
||||
write_be(&mut pe, arr_va_lo as usize + i * 4, *p);
|
||||
}
|
||||
let array_start = image_base + arr_va_lo;
|
||||
let array_end = array_start + 12;
|
||||
|
||||
// Driver function at 0x82001000:
|
||||
// lis r3, hi(array_start)
|
||||
// addi r3, r3, lo(array_start)
|
||||
// lis r4, hi(array_end)
|
||||
// addi r4, r4, lo(array_end)
|
||||
// lwz r5, 0(r3)
|
||||
// mtctr r5
|
||||
// bcctrl
|
||||
// addi r3, r3, 4
|
||||
// blr
|
||||
let driver = 0x82001000u32;
|
||||
let off = (driver - image_base) as usize;
|
||||
let lis_r3 = (15u32 << 26) | (3 << 21) | ((array_start >> 16) as u32);
|
||||
let addi_r3 = (14u32 << 26) | (3 << 21) | (3 << 16) | ((array_start as u16) as u32);
|
||||
let lis_r4 = (15u32 << 26) | (4 << 21) | ((array_end >> 16) as u32);
|
||||
let addi_r4 = (14u32 << 26) | (4 << 21) | (4 << 16) | ((array_end as u16) as u32);
|
||||
let lwz = (32u32 << 26) | (5 << 21) | (3 << 16);
|
||||
let mtctr = (31u32 << 26) | (5 << 21) | (9 << 16) | (467 << 1);
|
||||
let bcctrl = (19u32 << 26) | (20 << 21) | (528 << 1) | 1;
|
||||
let addi_inc = (14u32 << 26) | (3 << 21) | (3 << 16) | 4;
|
||||
let blr = (19u32 << 26) | (20 << 21) | (16 << 1);
|
||||
for (i, w) in [lis_r3, addi_r3, lis_r4, addi_r4, lwz, mtctr, bcctrl, addi_inc, blr].iter().enumerate() {
|
||||
write_be(&mut pe, off + i * 4, *w);
|
||||
}
|
||||
|
||||
let mut functions: BTreeMap<u32, FuncInfo> = BTreeMap::new();
|
||||
functions.insert(driver, FuncInfo {
|
||||
start: driver, end: driver + 0x40, frame_size: 0, saved_gprs: 0,
|
||||
is_leaf: false, is_saverestore: false,
|
||||
pdata_validated: false, pdata_length: None, has_eh: false,
|
||||
});
|
||||
let fa = FuncAnalysis {
|
||||
functions, save_gpr_base: None, restore_gpr_base: None, pdata_entries: Vec::new(),
|
||||
};
|
||||
|
||||
let sections = vec![mk_section(".rdata", 0x800, 0x100)];
|
||||
let mut starts = BTreeSet::new();
|
||||
for &p in &fns { starts.insert(p); }
|
||||
let labels: HashMap<u32, String> = HashMap::new();
|
||||
|
||||
let r = analyze(&pe, image_base, §ions, &fa, &starts, &labels);
|
||||
|
||||
assert_eq!(r.drivers.len(), 1, "should detect one driver");
|
||||
let d = &r.drivers[0];
|
||||
assert_eq!(d.driver_function, driver);
|
||||
assert_eq!(d.array_start, array_start);
|
||||
assert_eq!(d.array_end, array_end);
|
||||
assert_eq!(d.length, 3);
|
||||
|
||||
assert_eq!(r.arrays.len(), 1);
|
||||
assert_eq!(r.arrays[0].kind, "static_init");
|
||||
assert_eq!(r.arrays[0].entries.len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_function_without_pattern() {
|
||||
let image_base = 0x82000000u32;
|
||||
let mut pe = vec![0u8; 0x4000];
|
||||
let driver = 0x82001000u32;
|
||||
// Just a blr — no driver pattern.
|
||||
let blr = (19u32 << 26) | (20 << 21) | (16 << 1);
|
||||
write_be(&mut pe, (driver - image_base) as usize, blr);
|
||||
|
||||
let mut functions: BTreeMap<u32, FuncInfo> = BTreeMap::new();
|
||||
functions.insert(driver, FuncInfo {
|
||||
start: driver, end: driver + 0x40, frame_size: 0, saved_gprs: 0,
|
||||
is_leaf: true, is_saverestore: false,
|
||||
pdata_validated: false, pdata_length: None, has_eh: false,
|
||||
});
|
||||
let fa = FuncAnalysis {
|
||||
functions, save_gpr_base: None, restore_gpr_base: None, pdata_entries: Vec::new(),
|
||||
};
|
||||
let sections = vec![mk_section(".rdata", 0x800, 0x100)];
|
||||
let starts: BTreeSet<u32> = BTreeSet::new();
|
||||
let labels: HashMap<u32, String> = HashMap::new();
|
||||
let r = analyze(&pe, image_base, §ions, &fa, &starts, &labels);
|
||||
assert_eq!(r.drivers.len(), 0);
|
||||
}
|
||||
}
|
||||
382
crates/xenia-analysis/src/strings.rs
Normal file
382
crates/xenia-analysis/src/strings.rs
Normal file
@@ -0,0 +1,382 @@
|
||||
//! String / constant-pool detection in `.rdata`.
|
||||
//!
|
||||
//! Scans the `.rdata` section for runs of printable ASCII or null-terminated
|
||||
//! UTF-16LE characters of length ≥ 6, emitting one row per discovered string.
|
||||
//! Cross-references against `xrefs.target` are computed by the caller —
|
||||
//! this module only finds the strings; downstream queries can join.
|
||||
//!
|
||||
//! ### What this layer does NOT do
|
||||
//!
|
||||
//! - No UTF-8 multibyte detection — Xbox 360 game binaries reliably use
|
||||
//! ASCII for debug strings and UTF-16LE for localised text.
|
||||
//! - Strings in `.data` (mutable globals) are not scanned by default.
|
||||
//! - Wide strings on Xbox 360 are little-endian (compiler convention even
|
||||
//! on this big-endian platform); we do NOT try big-endian UTF-16.
|
||||
//! - No language detection / classification beyond encoding.
|
||||
//!
|
||||
//! Extends the original ASCII / UTF-16LE pass with Shift_JIS detection
|
||||
//! (Sylpheed is originally Japanese — likely yields mission/UI text
|
||||
//! invisible to ASCII-only) and UTF-8 multi-byte detection.
|
||||
//!
|
||||
//! Reference: `objdump -s` `.rdata` walks rely on the same heuristic;
|
||||
//! Shift_JIS lead/trail byte ranges per JIS X 0208.
|
||||
|
||||
use xenia_xex::pe::PeSection;
|
||||
|
||||
/// One detected string.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DetectedString {
|
||||
/// Absolute VA of the first byte.
|
||||
pub address: u32,
|
||||
/// `"ascii"` | `"utf16le"` | `"shift_jis"` | `"utf8"`.
|
||||
pub encoding: &'static str,
|
||||
/// Length in bytes (excluding the NUL terminator).
|
||||
pub length: u32,
|
||||
/// UTF-8 representation of the string content.
|
||||
pub content: String,
|
||||
}
|
||||
|
||||
/// Scan all `.rdata` sections (and any other read-only data section the user
|
||||
/// configures) for ASCII and UTF-16LE strings.
|
||||
#[tracing::instrument(skip_all, fields(image_base = format_args!("{:#010x}", image_base)))]
|
||||
pub fn analyze(pe: &[u8], image_base: u32, sections: &[PeSection]) -> Vec<DetectedString> {
|
||||
let started = std::time::Instant::now();
|
||||
let mut out: Vec<DetectedString> = Vec::new();
|
||||
|
||||
for section in sections {
|
||||
if section.name != ".rdata" { continue; }
|
||||
let raw_start = section.virtual_address as usize;
|
||||
let raw_end = (section.virtual_address + section.virtual_size) as usize;
|
||||
if raw_end > pe.len() { continue; }
|
||||
let bytes = &pe[raw_start..raw_end.min(pe.len())];
|
||||
let va_base = image_base + section.virtual_address;
|
||||
|
||||
scan_ascii(bytes, va_base, &mut out);
|
||||
scan_utf16le(bytes, va_base, &mut out);
|
||||
scan_shift_jis(bytes, va_base, &mut out);
|
||||
scan_utf8(bytes, va_base, &mut out);
|
||||
}
|
||||
|
||||
let elapsed_ms = started.elapsed().as_millis() as f64;
|
||||
let n_ascii = out.iter().filter(|s| s.encoding == "ascii").count();
|
||||
let n_utf16 = out.iter().filter(|s| s.encoding == "utf16le").count();
|
||||
let n_sjis = out.iter().filter(|s| s.encoding == "shift_jis").count();
|
||||
let n_utf8 = out.iter().filter(|s| s.encoding == "utf8").count();
|
||||
metrics::histogram!("analysis.phase_ms", "phase" => "strings").record(elapsed_ms);
|
||||
tracing::info!(
|
||||
ascii = n_ascii,
|
||||
utf16le = n_utf16,
|
||||
shift_jis = n_sjis,
|
||||
utf8 = n_utf8,
|
||||
total = out.len(),
|
||||
elapsed_ms,
|
||||
"string scan complete"
|
||||
);
|
||||
out
|
||||
}
|
||||
|
||||
const MIN_LEN: usize = 6;
|
||||
|
||||
fn is_printable_ascii(b: u8) -> bool {
|
||||
// Printable + the common whitespace characters used in real strings.
|
||||
matches!(b, 0x20..=0x7E | b'\t' | b'\n' | b'\r')
|
||||
}
|
||||
|
||||
fn scan_ascii(bytes: &[u8], va_base: u32, out: &mut Vec<DetectedString>) {
|
||||
let mut i = 0;
|
||||
while i < bytes.len() {
|
||||
if !is_printable_ascii(bytes[i]) {
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
let start = i;
|
||||
while i < bytes.len() && is_printable_ascii(bytes[i]) { i += 1; }
|
||||
let run_len = i - start;
|
||||
// Require NUL termination and minimum length.
|
||||
if run_len >= MIN_LEN && i < bytes.len() && bytes[i] == 0 {
|
||||
let s = std::str::from_utf8(&bytes[start..i]).unwrap_or("");
|
||||
out.push(DetectedString {
|
||||
address: va_base + start as u32,
|
||||
encoding: "ascii",
|
||||
length: run_len as u32,
|
||||
content: s.to_string(),
|
||||
});
|
||||
}
|
||||
// Skip the NUL (if any) before continuing.
|
||||
if i < bytes.len() && bytes[i] == 0 { i += 1; }
|
||||
}
|
||||
}
|
||||
|
||||
fn scan_utf16le(bytes: &[u8], va_base: u32, out: &mut Vec<DetectedString>) {
|
||||
// UTF-16LE strings are 2-byte aligned in MSVC output. Walk on even
|
||||
// offsets to avoid misaligned hits.
|
||||
let mut i = 0;
|
||||
while i + 2 <= bytes.len() {
|
||||
if !i.is_multiple_of(2) { i += 1; continue; }
|
||||
let lo = bytes[i];
|
||||
let hi = bytes[i + 1];
|
||||
// Restrict scan-start to printable ASCII range with a zero high byte —
|
||||
// this is what real Xbox 360 wide strings look like.
|
||||
if hi != 0 || !is_printable_ascii(lo) {
|
||||
i += 2;
|
||||
continue;
|
||||
}
|
||||
let start = i;
|
||||
let mut codeunits: Vec<u16> = Vec::new();
|
||||
while i + 2 <= bytes.len() {
|
||||
let l = bytes[i];
|
||||
let h = bytes[i + 1];
|
||||
if h != 0 || !is_printable_ascii(l) { break; }
|
||||
codeunits.push((h as u16) << 8 | l as u16);
|
||||
i += 2;
|
||||
}
|
||||
// Require NUL u16 terminator.
|
||||
let nul_terminated = i + 2 <= bytes.len() && bytes[i] == 0 && bytes[i + 1] == 0;
|
||||
if codeunits.len() >= MIN_LEN && nul_terminated {
|
||||
let s: String = String::from_utf16_lossy(&codeunits);
|
||||
out.push(DetectedString {
|
||||
address: va_base + start as u32,
|
||||
encoding: "utf16le",
|
||||
length: ((i - start) as u32),
|
||||
content: s,
|
||||
});
|
||||
}
|
||||
// Skip past the terminator.
|
||||
if nul_terminated { i += 2; }
|
||||
}
|
||||
}
|
||||
|
||||
/// Per JIS X 0208: Shift_JIS first byte ∈ [0x81, 0x9F] ∪ [0xE0, 0xEF];
|
||||
/// trail byte ∈ [0x40, 0x7E] ∪ [0x80, 0xFC]. Single-byte ASCII and JIS
|
||||
/// half-width katakana (0xA1..=0xDF) are passed through.
|
||||
fn is_sjis_lead(b: u8) -> bool {
|
||||
(0x81..=0x9F).contains(&b) || (0xE0..=0xEF).contains(&b)
|
||||
}
|
||||
fn is_sjis_trail(b: u8) -> bool {
|
||||
(0x40..=0x7E).contains(&b) || (0x80..=0xFC).contains(&b)
|
||||
}
|
||||
fn is_sjis_singlebyte(b: u8) -> bool {
|
||||
is_printable_ascii(b) || (0xA1..=0xDF).contains(&b)
|
||||
}
|
||||
|
||||
/// Scan for Shift_JIS strings — runs of ≥ 6 bytes consisting of valid
|
||||
/// SJIS code units (single-byte ASCII / half-width katakana, OR a
|
||||
/// lead+trail pair). At least one multi-byte pair must be present so we
|
||||
/// don't double-count strings that are purely ASCII.
|
||||
fn scan_shift_jis(bytes: &[u8], va_base: u32, out: &mut Vec<DetectedString>) {
|
||||
let mut i = 0;
|
||||
while i < bytes.len() {
|
||||
let start = i;
|
||||
let mut has_multibyte = false;
|
||||
let mut nbytes = 0;
|
||||
while i < bytes.len() {
|
||||
let b = bytes[i];
|
||||
if is_sjis_lead(b) && i + 1 < bytes.len() && is_sjis_trail(bytes[i + 1]) {
|
||||
has_multibyte = true;
|
||||
nbytes += 2;
|
||||
i += 2;
|
||||
} else if is_sjis_singlebyte(b) {
|
||||
nbytes += 1;
|
||||
i += 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Require NUL terminator + min length + at least one multi-byte char.
|
||||
if has_multibyte
|
||||
&& nbytes >= MIN_LEN
|
||||
&& i < bytes.len() && bytes[i] == 0
|
||||
{
|
||||
// Decode SJIS → UTF-8 best-effort. We don't ship a full
|
||||
// SJIS decoder; keep the bytes as a `\u{XX}\u{YY}…` style
|
||||
// rendering for diagnostic readability, and let downstream
|
||||
// tooling re-decode if needed.
|
||||
let raw = &bytes[start..i];
|
||||
let mut s = String::with_capacity(raw.len() * 4);
|
||||
let mut p = 0;
|
||||
while p < raw.len() {
|
||||
let b = raw[p];
|
||||
if is_sjis_lead(b) && p + 1 < raw.len() && is_sjis_trail(raw[p + 1]) {
|
||||
// Render as SJIS hex pair so the string is identifiable
|
||||
// even without a decoder. Real Japanese decoding is a
|
||||
// future enhancement.
|
||||
s.push_str(&format!("\\x{:02X}\\x{:02X}", b, raw[p + 1]));
|
||||
p += 2;
|
||||
} else {
|
||||
s.push(b as char);
|
||||
p += 1;
|
||||
}
|
||||
}
|
||||
out.push(DetectedString {
|
||||
address: va_base + start as u32,
|
||||
encoding: "shift_jis",
|
||||
length: nbytes as u32,
|
||||
content: s,
|
||||
});
|
||||
i += 1; // skip NUL
|
||||
} else {
|
||||
// Advance past whatever didn't match.
|
||||
i = start + 1;
|
||||
if i < bytes.len() && bytes[i] == 0 { i += 1; }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Scan for UTF-8 strings carrying multi-byte sequences (we already
|
||||
/// catch pure-ASCII via `scan_ascii`). Validates 2/3-byte sequences;
|
||||
/// 4-byte (supplementary plane) is uncommon in game text and skipped.
|
||||
fn scan_utf8(bytes: &[u8], va_base: u32, out: &mut Vec<DetectedString>) {
|
||||
let mut i = 0;
|
||||
while i < bytes.len() {
|
||||
let start = i;
|
||||
let mut has_multibyte = false;
|
||||
let mut nbytes = 0;
|
||||
while i < bytes.len() {
|
||||
let b = bytes[i];
|
||||
if b < 0x80 {
|
||||
if !is_printable_ascii(b) { break; }
|
||||
nbytes += 1;
|
||||
i += 1;
|
||||
} else if (b & 0xE0) == 0xC0 {
|
||||
// 2-byte: 110xxxxx 10xxxxxx
|
||||
if i + 1 >= bytes.len() || (bytes[i + 1] & 0xC0) != 0x80 { break; }
|
||||
has_multibyte = true;
|
||||
nbytes += 2;
|
||||
i += 2;
|
||||
} else if (b & 0xF0) == 0xE0 {
|
||||
// 3-byte: 1110xxxx 10xxxxxx 10xxxxxx
|
||||
if i + 2 >= bytes.len()
|
||||
|| (bytes[i + 1] & 0xC0) != 0x80
|
||||
|| (bytes[i + 2] & 0xC0) != 0x80 { break; }
|
||||
has_multibyte = true;
|
||||
nbytes += 3;
|
||||
i += 3;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if has_multibyte
|
||||
&& nbytes >= MIN_LEN
|
||||
&& i < bytes.len() && bytes[i] == 0
|
||||
&& let Ok(s) = std::str::from_utf8(&bytes[start..i])
|
||||
{
|
||||
out.push(DetectedString {
|
||||
address: va_base + start as u32,
|
||||
encoding: "utf8",
|
||||
length: nbytes as u32,
|
||||
content: s.to_string(),
|
||||
});
|
||||
i += 1; // skip NUL
|
||||
} else {
|
||||
i = start + 1;
|
||||
if i < bytes.len() && bytes[i] == 0 { i += 1; }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn mk_section(name: &str, va: u32, size: u32) -> PeSection {
|
||||
PeSection {
|
||||
name: name.into(),
|
||||
virtual_address: va,
|
||||
virtual_size: size,
|
||||
raw_offset: va,
|
||||
raw_size: size,
|
||||
flags: 0x4000_0040,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_ascii_string() {
|
||||
let image_base = 0x82000000u32;
|
||||
let mut pe = vec![0u8; 0x1100];
|
||||
let off = 0x1000usize;
|
||||
let s = b"Hello, world!\0";
|
||||
pe[off..off + s.len()].copy_from_slice(s);
|
||||
let sections = vec![mk_section(".rdata", 0x1000, 0x100)];
|
||||
let strings = analyze(&pe, image_base, §ions);
|
||||
assert_eq!(strings.len(), 1);
|
||||
assert_eq!(strings[0].encoding, "ascii");
|
||||
assert_eq!(strings[0].content, "Hello, world!");
|
||||
assert_eq!(strings[0].address, image_base + 0x1000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_short_runs() {
|
||||
let image_base = 0x82000000u32;
|
||||
let mut pe = vec![0u8; 0x1100];
|
||||
let off = 0x1000usize;
|
||||
let s = b"Hi\0longer string here\0";
|
||||
pe[off..off + s.len()].copy_from_slice(s);
|
||||
let sections = vec![mk_section(".rdata", 0x1000, 0x100)];
|
||||
let strings = analyze(&pe, image_base, §ions);
|
||||
assert_eq!(strings.len(), 1);
|
||||
assert_eq!(strings[0].content, "longer string here");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_utf16le_string() {
|
||||
let image_base = 0x82000000u32;
|
||||
let mut pe = vec![0u8; 0x1100];
|
||||
let off = 0x1000usize;
|
||||
// "Hello!" in UTF-16LE + NUL u16
|
||||
let s: &[u8] = b"H\0e\0l\0l\0o\0!\0\0\0";
|
||||
pe[off..off + s.len()].copy_from_slice(s);
|
||||
let sections = vec![mk_section(".rdata", 0x1000, 0x100)];
|
||||
let strings = analyze(&pe, image_base, §ions);
|
||||
// Both ASCII and UTF-16 may detect — UTF-16 should find it as wide;
|
||||
// ASCII pass scans bytes and won't see this as a contiguous run
|
||||
// because of the interleaved 0 bytes (non-printable).
|
||||
let utf16: Vec<_> = strings.iter().filter(|s| s.encoding == "utf16le").collect();
|
||||
assert!(utf16.iter().any(|s| s.content == "Hello!"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_shift_jis_string() {
|
||||
let image_base = 0x82000000u32;
|
||||
let mut pe = vec![0u8; 0x1100];
|
||||
let off = 0x1000usize;
|
||||
// "ABC" + (SJIS hiragana 'a' = 0x82 0xA0) + (SJIS 'i' = 0x82 0xA2) + NUL
|
||||
let s: &[u8] = b"ABC\x82\xA0\x82\xA2\0";
|
||||
pe[off..off + s.len()].copy_from_slice(s);
|
||||
let sections = vec![mk_section(".rdata", 0x1000, 0x100)];
|
||||
let strings = analyze(&pe, image_base, §ions);
|
||||
let sjis: Vec<_> = strings.iter().filter(|s| s.encoding == "shift_jis").collect();
|
||||
assert_eq!(sjis.len(), 1);
|
||||
assert!(sjis[0].content.contains("ABC"));
|
||||
assert!(sjis[0].content.contains("\\x82\\xA0"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_utf8_multibyte_string() {
|
||||
let image_base = 0x82000000u32;
|
||||
let mut pe = vec![0u8; 0x1100];
|
||||
let off = 0x1000usize;
|
||||
// "Café" = 'C', 'a', 'f', 0xC3 0xA9 (é), then more ASCII to reach min length
|
||||
let s: &[u8] = b"Caf\xC3\xA9eteria\0";
|
||||
pe[off..off + s.len()].copy_from_slice(s);
|
||||
let sections = vec![mk_section(".rdata", 0x1000, 0x100)];
|
||||
let strings = analyze(&pe, image_base, §ions);
|
||||
let u8s: Vec<_> = strings.iter().filter(|s| s.encoding == "utf8").collect();
|
||||
assert_eq!(u8s.len(), 1);
|
||||
assert_eq!(u8s[0].content, "Café".to_string() + "eteria");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn requires_nul_terminator() {
|
||||
let image_base = 0x82000000u32;
|
||||
let mut pe = vec![0u8; 0x1100];
|
||||
// No trailing NUL — should NOT be detected.
|
||||
let off = 0x1000usize;
|
||||
let s = b"abcdefghij";
|
||||
pe[off..off + s.len()].copy_from_slice(s);
|
||||
// Fill rest of section with 0xFF so the run terminates cleanly without NUL.
|
||||
for j in off + s.len()..off + 0x100 { pe[j] = 0xFF; }
|
||||
let sections = vec![mk_section(".rdata", 0x1000, 0x100)];
|
||||
let strings = analyze(&pe, image_base, §ions);
|
||||
assert_eq!(strings.len(), 0);
|
||||
}
|
||||
}
|
||||
424
crates/xenia-analysis/src/vtables.rs
Normal file
424
crates/xenia-analysis/src/vtables.rs
Normal file
@@ -0,0 +1,424 @@
|
||||
//! MSVC vtable + RTTI detection.
|
||||
//!
|
||||
//! Heuristic two-pass scan over the binary's read-only data sections. Pass 1
|
||||
//! finds candidate vtables — runs of ≥3 contiguous big-endian u32 values that
|
||||
//! all land on known function entries. Pass 2 attempts the MSVC RTTI walk
|
||||
//! `vtable[-1] → CompleteObjectLocator → TypeDescriptor → mangled name`. When
|
||||
//! RTTI is stripped (typical for shipped game binaries), each anonymous vtable
|
||||
//! gets a deterministic name `ANON_Class_<hex>` keyed by a hash of its
|
||||
//! sorted method PCs (so identical vtables across multiple class instances
|
||||
//! collapse to one entry).
|
||||
//!
|
||||
//! What this module does NOT do:
|
||||
//! - Vtables in heap-allocated memory (built at runtime by ctors) are out of
|
||||
//! scope — only vtables present statically in `.rdata` / `.data`.
|
||||
//! - RTTI inheritance (`BaseClassDescriptor` walk) is best-effort; we record
|
||||
//! the first-level base list when present and leave it NULL otherwise.
|
||||
//! - Multiple-inheritance "extra" vftables (one per base subobject) are
|
||||
//! detected as independent vtables; we don't link them.
|
||||
//!
|
||||
//! Reference: openrce.org "Reversing Microsoft Visual C++" RTTI articles
|
||||
//! (CompleteObjectLocator / TypeDescriptor / BaseClassDescriptor layout).
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use xenia_xex::pe::PeSection;
|
||||
|
||||
use crate::demangle;
|
||||
|
||||
/// One detected vtable.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Vtable {
|
||||
/// Absolute VA of `vtable[0]` (first method slot).
|
||||
pub address: u32,
|
||||
/// Number of methods in the vtable.
|
||||
pub length: u32,
|
||||
/// Absolute VA of the `CompleteObjectLocator` from `vtable[-1]`, if it
|
||||
/// looked like a valid pointer into `.rdata`. NULL when no RTTI / stripped.
|
||||
pub col_address: Option<u32>,
|
||||
/// Class name. Demangled from RTTI when available, otherwise the synthetic
|
||||
/// `ANON_Class_<hex>` form.
|
||||
pub class_name: String,
|
||||
/// True when the COL → TypeDescriptor walk succeeded.
|
||||
pub rtti_present: bool,
|
||||
/// First-level base class names from `RTTIClassHierarchyDescriptor`, JSON-encoded.
|
||||
/// `None` when not parseable.
|
||||
pub base_classes_json: Option<String>,
|
||||
/// One entry per slot: function VA in `.text`.
|
||||
pub methods: Vec<u32>,
|
||||
}
|
||||
|
||||
/// Run the vtable scan + RTTI walk. `function_starts` is the set of valid
|
||||
/// `.text` function entry VAs from M1's corrected `functions` table.
|
||||
#[tracing::instrument(skip_all, fields(image_base = format_args!("{:#010x}", image_base)))]
|
||||
pub fn analyze(
|
||||
pe: &[u8],
|
||||
image_base: u32,
|
||||
sections: &[PeSection],
|
||||
function_starts: &std::collections::BTreeSet<u32>,
|
||||
) -> Vec<Vtable> {
|
||||
let started = std::time::Instant::now();
|
||||
// Sections we'll scan for vtable bodies.
|
||||
let scan_targets: Vec<&PeSection> = sections
|
||||
.iter()
|
||||
.filter(|s| matches!(s.name.as_str(), ".rdata" | ".data"))
|
||||
.collect();
|
||||
|
||||
// Range table for "is this VA in .rdata or .data?"
|
||||
let rdata_ranges: Vec<(u32, u32)> = sections
|
||||
.iter()
|
||||
.filter(|s| s.name == ".rdata")
|
||||
.map(|s| (image_base + s.virtual_address, image_base + s.virtual_address + s.virtual_size))
|
||||
.collect();
|
||||
|
||||
let mut candidates: Vec<Vtable> = Vec::new();
|
||||
|
||||
for section in scan_targets {
|
||||
let va_start = image_base + section.virtual_address;
|
||||
let va_end = va_start + section.virtual_size;
|
||||
let raw_start = section.virtual_address as usize;
|
||||
let raw_end = (section.virtual_address + section.virtual_size) as usize;
|
||||
if raw_end > pe.len() { continue; }
|
||||
let bytes = &pe[raw_start..raw_end.min(pe.len())];
|
||||
|
||||
let mut i = 0usize;
|
||||
while i + 12 <= bytes.len() {
|
||||
// Try to start a run at this 4-aligned offset.
|
||||
if !i.is_multiple_of(4) { i += 1; continue; }
|
||||
let mut run_len = 0usize;
|
||||
let mut methods: Vec<u32> = Vec::new();
|
||||
let mut j = i;
|
||||
while j + 4 <= bytes.len() {
|
||||
let val = u32::from_be_bytes([bytes[j], bytes[j + 1], bytes[j + 2], bytes[j + 3]]);
|
||||
if function_starts.contains(&val) {
|
||||
methods.push(val);
|
||||
run_len += 1;
|
||||
j += 4;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if run_len >= 3 {
|
||||
let address = va_start + (i as u32);
|
||||
candidates.push(Vtable {
|
||||
address,
|
||||
length: run_len as u32,
|
||||
col_address: None,
|
||||
class_name: synth_anon_name(&methods),
|
||||
rtti_present: false,
|
||||
base_classes_json: None,
|
||||
methods,
|
||||
});
|
||||
i += run_len * 4;
|
||||
} else {
|
||||
i += 4;
|
||||
}
|
||||
}
|
||||
let _ = (va_start, va_end);
|
||||
}
|
||||
|
||||
// RTTI walk: for each candidate, look at vtable[-1].
|
||||
let pe_image_base = image_base;
|
||||
for v in &mut candidates {
|
||||
if v.address < 4 { continue; }
|
||||
let col_off = (v.address - pe_image_base - 4) as usize;
|
||||
if col_off + 4 > pe.len() { continue; }
|
||||
let col_ptr = u32::from_be_bytes([pe[col_off], pe[col_off + 1], pe[col_off + 2], pe[col_off + 3]]);
|
||||
if col_ptr == 0 { continue; }
|
||||
if !is_in_ranges(col_ptr, &rdata_ranges) { continue; }
|
||||
|
||||
// Try to extract the TypeDescriptor mangled-name string.
|
||||
if let Some((td_ptr, hierarchy_ptr)) = read_col(pe, image_base, col_ptr)
|
||||
&& let Some(mangled) = read_typedescriptor_name(pe, image_base, td_ptr, &rdata_ranges)
|
||||
&& let Some(class) = demangle_rtti_typename(&mangled)
|
||||
{
|
||||
v.col_address = Some(col_ptr);
|
||||
v.class_name = class;
|
||||
v.rtti_present = true;
|
||||
v.base_classes_json = read_class_hierarchy(pe, image_base, hierarchy_ptr, &rdata_ranges);
|
||||
}
|
||||
}
|
||||
|
||||
let elapsed_ms = started.elapsed().as_millis() as f64;
|
||||
let rtti_count = candidates.iter().filter(|v| v.rtti_present).count();
|
||||
metrics::histogram!("analysis.phase_ms", "phase" => "vtables").record(elapsed_ms);
|
||||
tracing::info!(
|
||||
vtables = candidates.len(),
|
||||
rtti = rtti_count,
|
||||
anon = candidates.len() - rtti_count,
|
||||
elapsed_ms,
|
||||
"vtable scan complete"
|
||||
);
|
||||
candidates
|
||||
}
|
||||
|
||||
fn is_in_ranges(addr: u32, ranges: &[(u32, u32)]) -> bool {
|
||||
ranges.iter().any(|&(s, e)| addr >= s && addr < e)
|
||||
}
|
||||
|
||||
/// Read 4 big-endian bytes at absolute VA `addr` from the PE image.
|
||||
fn read_be_u32(pe: &[u8], image_base: u32, addr: u32) -> Option<u32> {
|
||||
let off = addr.wrapping_sub(image_base) as usize;
|
||||
if off + 4 > pe.len() { return None; }
|
||||
Some(u32::from_be_bytes([pe[off], pe[off + 1], pe[off + 2], pe[off + 3]]))
|
||||
}
|
||||
|
||||
/// Parse a `CompleteObjectLocator` at VA `col`. Returns
|
||||
/// `(type_descriptor_ptr, class_hierarchy_descriptor_ptr)` on success.
|
||||
///
|
||||
/// Layout (32-bit MSVC):
|
||||
/// ```text
|
||||
/// +0x00 signature (0 for x86 without /GR-, can be 1)
|
||||
/// +0x04 offset within complete object
|
||||
/// +0x08 cdOffset (this-pointer adjuster)
|
||||
/// +0x0C TypeDescriptor *
|
||||
/// +0x10 RTTIClassHierarchyDescriptor *
|
||||
/// ```
|
||||
fn read_col(pe: &[u8], image_base: u32, col: u32) -> Option<(u32, u32)> {
|
||||
let td = read_be_u32(pe, image_base, col + 0x0C)?;
|
||||
let chd = read_be_u32(pe, image_base, col + 0x10)?;
|
||||
if td == 0 { return None; }
|
||||
Some((td, chd))
|
||||
}
|
||||
|
||||
/// Read a TypeDescriptor's mangled-name string at VA `td`.
|
||||
///
|
||||
/// Layout: `+0x00` vftable ptr, `+0x04` "spare", `+0x08` zero-terminated
|
||||
/// mangled name (e.g. `.?AVClassName@@`).
|
||||
fn read_typedescriptor_name(
|
||||
pe: &[u8],
|
||||
image_base: u32,
|
||||
td: u32,
|
||||
rdata_ranges: &[(u32, u32)],
|
||||
) -> Option<String> {
|
||||
if !is_in_ranges(td, rdata_ranges) { return None; }
|
||||
let name_va = td + 0x08;
|
||||
let off = name_va.wrapping_sub(image_base) as usize;
|
||||
if off + 1 > pe.len() { return None; }
|
||||
// Read up to 256 bytes or until NUL.
|
||||
let mut end = off;
|
||||
while end < pe.len().min(off + 256) && pe[end] != 0 { end += 1; }
|
||||
if end == off { return None; }
|
||||
let s = std::str::from_utf8(&pe[off..end]).ok()?;
|
||||
// Sanity: MSVC RTTI names always start with `.?A`.
|
||||
if !s.starts_with(".?A") { return None; }
|
||||
Some(s.to_string())
|
||||
}
|
||||
|
||||
/// Demangle an RTTI type-name string of the form `.?AVClassName@ns@@`.
|
||||
/// MSVC convention: leading `.` is the marker for an RTTI string; strip it
|
||||
/// before passing to the demangler.
|
||||
fn demangle_rtti_typename(rtti_name: &str) -> Option<String> {
|
||||
let stripped = rtti_name.strip_prefix('.')?;
|
||||
let raw = msvc_demangler::demangle(stripped, msvc_demangler::DemangleFlags::llvm()).ok()?;
|
||||
// Output looks like `class xe::apu::AudioSystem` or `struct foo::Bar`.
|
||||
let cls = raw
|
||||
.strip_prefix("class ")
|
||||
.or_else(|| raw.strip_prefix("struct "))
|
||||
.or_else(|| raw.strip_prefix("union "))
|
||||
.unwrap_or(&raw);
|
||||
Some(cls.to_string())
|
||||
}
|
||||
|
||||
/// Best-effort `RTTIClassHierarchyDescriptor` walk: read the
|
||||
/// `BaseClassArray` entries and demangle each base's TypeDescriptor name.
|
||||
/// Returns a JSON array string on success.
|
||||
///
|
||||
/// Layout:
|
||||
/// ```text
|
||||
/// RTTIClassHierarchyDescriptor:
|
||||
/// +0x00 signature
|
||||
/// +0x04 attributes
|
||||
/// +0x08 numBaseClasses
|
||||
/// +0x0C BaseClassArray * (-> array of BaseClassDescriptor *)
|
||||
/// BaseClassDescriptor:
|
||||
/// +0x00 TypeDescriptor *
|
||||
/// +0x04 numContainedBases
|
||||
/// ...
|
||||
/// ```
|
||||
fn read_class_hierarchy(
|
||||
pe: &[u8],
|
||||
image_base: u32,
|
||||
chd: u32,
|
||||
rdata_ranges: &[(u32, u32)],
|
||||
) -> Option<String> {
|
||||
if !is_in_ranges(chd, rdata_ranges) { return None; }
|
||||
let num_bases = read_be_u32(pe, image_base, chd + 0x08)?;
|
||||
if num_bases == 0 || num_bases > 256 { return None; } // sanity cap
|
||||
let bca_ptr = read_be_u32(pe, image_base, chd + 0x0C)?;
|
||||
if !is_in_ranges(bca_ptr, rdata_ranges) { return None; }
|
||||
|
||||
let mut names: Vec<String> = Vec::new();
|
||||
for i in 0..num_bases {
|
||||
let bcd_ptr = match read_be_u32(pe, image_base, bca_ptr + i * 4) {
|
||||
Some(p) if is_in_ranges(p, rdata_ranges) => p,
|
||||
_ => return None,
|
||||
};
|
||||
let td_ptr = match read_be_u32(pe, image_base, bcd_ptr) {
|
||||
Some(p) if is_in_ranges(p, rdata_ranges) => p,
|
||||
_ => return None,
|
||||
};
|
||||
let mangled = match read_typedescriptor_name(pe, image_base, td_ptr, rdata_ranges) {
|
||||
Some(s) => s,
|
||||
None => return None,
|
||||
};
|
||||
let cls = demangle_rtti_typename(&mangled).unwrap_or(mangled);
|
||||
names.push(cls);
|
||||
}
|
||||
serde_json::to_string(&names).ok()
|
||||
}
|
||||
|
||||
/// Synthetic name for an RTTI-stripped vtable, derived from a stable hash of
|
||||
/// the sorted method-PC list. Two vtables with identical method ordering
|
||||
/// collapse to the same anonymous name.
|
||||
fn synth_anon_name(methods: &[u32]) -> String {
|
||||
// FNV-1a 64-bit on the sorted PC list; we only use 32 bits for brevity.
|
||||
let mut sorted = methods.to_vec();
|
||||
sorted.sort_unstable();
|
||||
let mut h: u64 = 0xcbf29ce484222325;
|
||||
for pc in &sorted {
|
||||
for b in pc.to_le_bytes() {
|
||||
h ^= b as u64;
|
||||
h = h.wrapping_mul(0x100000001b3);
|
||||
}
|
||||
}
|
||||
format!("ANON_Class_{:08X}", (h as u32))
|
||||
}
|
||||
|
||||
/// Build the per-method `(vtable_address, slot, function_address)` list for
|
||||
/// DB insertion, with optional demangled-name lookup for any function that
|
||||
/// has a matching `?…` label. Skips slots whose function isn't in the
|
||||
/// supplied label map.
|
||||
pub fn methods_table(
|
||||
vtables: &[Vtable],
|
||||
labels: &std::collections::HashMap<u32, String>,
|
||||
) -> Vec<(u32, u32, u32, Option<String>, Option<String>)> {
|
||||
let mut out = Vec::new();
|
||||
for v in vtables {
|
||||
for (slot, &fn_va) in v.methods.iter().enumerate() {
|
||||
let label = labels.get(&fn_va).cloned();
|
||||
let demangled = label.as_ref()
|
||||
.and_then(|l| demangle::demangle(l).map(|d| d.raw_demangled));
|
||||
out.push((v.address, slot as u32, fn_va, label, demangled));
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Build a `class_name → Vtable` summary for the `classes` table. Multiple
|
||||
/// vtables sharing the same class name (multiple instances at link time)
|
||||
/// collapse via `BTreeMap` — the first detected vtable wins.
|
||||
pub fn classes_table(vtables: &[Vtable]) -> Vec<(String, u32, bool, Option<String>)> {
|
||||
let mut by_name: BTreeMap<String, &Vtable> = BTreeMap::new();
|
||||
for v in vtables {
|
||||
by_name.entry(v.class_name.clone()).or_insert(v);
|
||||
}
|
||||
by_name
|
||||
.into_iter()
|
||||
.map(|(name, v)| (name, v.address, v.rtti_present, v.base_classes_json.clone()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn synth_anon_name_is_stable() {
|
||||
let a = synth_anon_name(&[0x82001000, 0x82001100, 0x82001200]);
|
||||
let b = synth_anon_name(&[0x82001200, 0x82001000, 0x82001100]);
|
||||
assert_eq!(a, b, "anon name must be order-independent");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn synth_anon_name_differs_for_different_methods() {
|
||||
let a = synth_anon_name(&[0x82001000, 0x82001100]);
|
||||
let b = synth_anon_name(&[0x82002000, 0x82002100]);
|
||||
assert_ne!(a, b);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_3_method_vtable_in_rdata() {
|
||||
let image_base = 0x82000000u32;
|
||||
let rdata_va = 0x1000u32;
|
||||
let text_va = 0x2000u32;
|
||||
let rdata_size = 16u32;
|
||||
let text_size = 0x100u32;
|
||||
|
||||
// PE buffer big enough for both sections.
|
||||
let total = (text_va + text_size) as usize;
|
||||
let mut pe = vec![0u8; total];
|
||||
|
||||
// Vtable: 3 method PCs at .rdata start, all valid function entries.
|
||||
let m: [u32; 3] = [image_base + text_va, image_base + text_va + 0x10, image_base + text_va + 0x20];
|
||||
for (i, val) in m.iter().enumerate() {
|
||||
pe[rdata_va as usize + i * 4..rdata_va as usize + (i + 1) * 4]
|
||||
.copy_from_slice(&val.to_be_bytes());
|
||||
}
|
||||
|
||||
let sections = vec![
|
||||
PeSection {
|
||||
name: ".rdata".into(),
|
||||
virtual_address: rdata_va,
|
||||
virtual_size: rdata_size,
|
||||
raw_offset: rdata_va,
|
||||
raw_size: rdata_size,
|
||||
flags: 0x4000_0040,
|
||||
},
|
||||
PeSection {
|
||||
name: ".text".into(),
|
||||
virtual_address: text_va,
|
||||
virtual_size: text_size,
|
||||
raw_offset: text_va,
|
||||
raw_size: text_size,
|
||||
flags: 0x6000_0020,
|
||||
},
|
||||
];
|
||||
let mut function_starts = std::collections::BTreeSet::new();
|
||||
for &pc in &m { function_starts.insert(pc); }
|
||||
|
||||
let vtables = analyze(&pe, image_base, §ions, &function_starts);
|
||||
assert_eq!(vtables.len(), 1);
|
||||
assert_eq!(vtables[0].length, 3);
|
||||
assert_eq!(vtables[0].address, image_base + rdata_va);
|
||||
assert!(vtables[0].class_name.starts_with("ANON_Class_"));
|
||||
assert!(!vtables[0].rtti_present);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_2_method_run() {
|
||||
let image_base = 0x82000000u32;
|
||||
let rdata_va = 0x1000u32;
|
||||
let text_va = 0x2000u32;
|
||||
|
||||
let total = (text_va + 0x100) as usize;
|
||||
let mut pe = vec![0u8; total];
|
||||
let m: [u32; 2] = [image_base + text_va, image_base + text_va + 0x10];
|
||||
for (i, val) in m.iter().enumerate() {
|
||||
pe[rdata_va as usize + i * 4..rdata_va as usize + (i + 1) * 4]
|
||||
.copy_from_slice(&val.to_be_bytes());
|
||||
}
|
||||
let sections = vec![
|
||||
PeSection {
|
||||
name: ".rdata".into(),
|
||||
virtual_address: rdata_va,
|
||||
virtual_size: 8,
|
||||
raw_offset: rdata_va,
|
||||
raw_size: 8,
|
||||
flags: 0x4000_0040,
|
||||
},
|
||||
PeSection {
|
||||
name: ".text".into(),
|
||||
virtual_address: text_va,
|
||||
virtual_size: 0x100,
|
||||
raw_offset: text_va,
|
||||
raw_size: 0x100,
|
||||
flags: 0x6000_0020,
|
||||
},
|
||||
];
|
||||
let mut function_starts = std::collections::BTreeSet::new();
|
||||
for &pc in &m { function_starts.insert(pc); }
|
||||
let vtables = analyze(&pe, image_base, §ions, &function_starts);
|
||||
assert_eq!(vtables.len(), 0, "runs of 2 must be rejected to keep false-positive rate down");
|
||||
}
|
||||
}
|
||||
@@ -8,23 +8,25 @@ use crate::func::FuncAnalysis;
|
||||
|
||||
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum XrefKind {
|
||||
Call, // bl
|
||||
Jump, // b (unconditional)
|
||||
Branch, // bc / bXX (conditional)
|
||||
DataRead, // lwz, lbz, lhz, lha, lfs, lfd, etc. from resolved address
|
||||
DataWrite, // stw, stb, sth, stfs, stfd, etc. to resolved address
|
||||
DataRef, // address computed via lis+addi/ori but not directly loaded/stored
|
||||
Call, // bl
|
||||
IndirectCall, // bcctrl through a statically-resolvable vtable slot (M5)
|
||||
Jump, // b (unconditional)
|
||||
Branch, // bc / bXX (conditional)
|
||||
DataRead, // lwz, lbz, lhz, lha, lfs, lfd, etc. from resolved address
|
||||
DataWrite, // stw, stb, sth, stfs, stfd, etc. to resolved address
|
||||
DataRef, // address computed via lis+addi/ori but not directly loaded/stored
|
||||
}
|
||||
|
||||
impl XrefKind {
|
||||
pub fn tag(self) -> &'static str {
|
||||
match self {
|
||||
XrefKind::Call => "call",
|
||||
XrefKind::Jump => "j",
|
||||
XrefKind::Branch => "br",
|
||||
XrefKind::DataRead => "read",
|
||||
XrefKind::DataWrite => "write",
|
||||
XrefKind::DataRef => "ref",
|
||||
XrefKind::Call => "call",
|
||||
XrefKind::IndirectCall => "ind_call",
|
||||
XrefKind::Jump => "j",
|
||||
XrefKind::Branch => "br",
|
||||
XrefKind::DataRead => "read",
|
||||
XrefKind::DataWrite => "write",
|
||||
XrefKind::DataRef => "ref",
|
||||
}
|
||||
}
|
||||
|
||||
@@ -37,10 +39,56 @@ impl XrefKind {
|
||||
}
|
||||
}
|
||||
|
||||
/// Sub-classification of how `source`'s instruction computes its target
|
||||
/// address. Only meaningful for data xrefs (`read` / `write` / `ref`); call
|
||||
/// / jump / branch / ind_call rows store `None`.
|
||||
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
|
||||
pub enum AddrMode {
|
||||
/// Standard signed-16 displacement: `lwz rD, simm(rA)`, `stw rS, simm(rA)`,
|
||||
/// FP D-forms (`lfs/lfd/stfs/stfd`), update variants. The dominant case.
|
||||
DForm,
|
||||
/// Address materialised via `lis + addi` register tracking — no
|
||||
/// load/store yet at this site.
|
||||
LisAddi,
|
||||
/// Address materialised via `lis + ori` register tracking.
|
||||
LisOri,
|
||||
/// Multi-word D-form: `lmw / stmw rS, simm(rA)` — emits one xref per
|
||||
/// register slot (32-rS slots starting at the resolved base).
|
||||
Multiword,
|
||||
/// X-form indexed: `stwx / stbx / sthx / stwux / stbux / sthux / stdx /
|
||||
/// stdux` plus AltiVec/VMX vector stores `stvx / stvxl / stvebx /
|
||||
/// stvehx / stvewx`. Static resolution requires both rA and rB
|
||||
/// constant. (M6 + VMX follow-up.)
|
||||
XFormIndexed,
|
||||
/// X-form byte-reverse: `stwbrx / sthbrx / lwbrx / lhbrx`.
|
||||
XFormByteRev,
|
||||
/// Reservation/atomic store-conditional: `stwcx. / stdcx.`.
|
||||
Atomic,
|
||||
/// Cache-line clear: `dcbz rA, rB` — clears 32 bytes at rA+rB.
|
||||
DCBZ,
|
||||
}
|
||||
|
||||
impl AddrMode {
|
||||
pub fn tag(self) -> &'static str {
|
||||
match self {
|
||||
AddrMode::DForm => "d_form",
|
||||
AddrMode::LisAddi => "lis_addi",
|
||||
AddrMode::LisOri => "lis_ori",
|
||||
AddrMode::Multiword => "multiword",
|
||||
AddrMode::XFormIndexed => "x_form_indexed",
|
||||
AddrMode::XFormByteRev => "x_form_byterev",
|
||||
AddrMode::Atomic => "atomic",
|
||||
AddrMode::DCBZ => "dcbz",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Xref {
|
||||
pub source: u32,
|
||||
pub kind: XrefKind,
|
||||
/// `None` for control-flow edges; `Some(...)` for data edges.
|
||||
pub addr_mode: Option<AddrMode>,
|
||||
}
|
||||
|
||||
pub type XrefMap = HashMap<u32, Vec<Xref>>;
|
||||
@@ -53,6 +101,7 @@ pub struct XrefResult {
|
||||
}
|
||||
|
||||
/// Perform full cross-reference analysis on a PE image.
|
||||
#[tracing::instrument(skip_all, fields(image_base = format_args!("{:#010x}", image_base), entry_point = format_args!("{:#010x}", entry_point)))]
|
||||
pub fn analyze_xrefs(
|
||||
pe: &[u8],
|
||||
image_base: u32,
|
||||
@@ -61,6 +110,7 @@ pub fn analyze_xrefs(
|
||||
func_analysis: &FuncAnalysis,
|
||||
import_map: &HashMap<u32, String>,
|
||||
) -> XrefResult {
|
||||
let started = std::time::Instant::now();
|
||||
let func_labels = func_analysis.generate_labels();
|
||||
let mut labels: HashMap<u32, String> = func_labels;
|
||||
labels.insert(entry_point, "entry_point".to_string());
|
||||
@@ -124,7 +174,7 @@ pub fn analyze_xrefs(
|
||||
let rd = ((instr >> 21) & 0x1F) as usize;
|
||||
let ra = ((instr >> 16) & 0x1F) as usize;
|
||||
let simm = ((instr & 0xFFFF) as i16) as i32;
|
||||
let uimm = (instr & 0xFFFF) as u32;
|
||||
let uimm = instr & 0xFFFF;
|
||||
|
||||
// Reset tracking on function boundaries (prologue = mfspr rN, LR)
|
||||
if opcode == 31 {
|
||||
@@ -156,7 +206,10 @@ pub fn analyze_xrefs(
|
||||
let data_addr = base.wrapping_add(simm as u32);
|
||||
if is_in_ranges(data_addr, &data_ranges) {
|
||||
data_annotations.insert(abs_addr, (data_addr, XrefKind::DataRef));
|
||||
xrefs.entry(data_addr).or_default().push(Xref { source: abs_addr, kind: XrefKind::DataRef });
|
||||
xrefs.entry(data_addr).or_default().push(Xref {
|
||||
source: abs_addr, kind: XrefKind::DataRef,
|
||||
addr_mode: Some(AddrMode::LisAddi),
|
||||
});
|
||||
labels.entry(data_addr).or_insert_with(|| format!("dat_{data_addr:08X}"));
|
||||
}
|
||||
reg_hi[rd] = Some(data_addr); // propagate for chained access
|
||||
@@ -171,7 +224,10 @@ pub fn analyze_xrefs(
|
||||
let data_addr = base | uimm;
|
||||
if is_in_ranges(data_addr, &data_ranges) {
|
||||
data_annotations.insert(abs_addr, (data_addr, XrefKind::DataRef));
|
||||
xrefs.entry(data_addr).or_default().push(Xref { source: abs_addr, kind: XrefKind::DataRef });
|
||||
xrefs.entry(data_addr).or_default().push(Xref {
|
||||
source: abs_addr, kind: XrefKind::DataRef,
|
||||
addr_mode: Some(AddrMode::LisOri),
|
||||
});
|
||||
labels.entry(data_addr).or_insert_with(|| format!("dat_{data_addr:08X}"));
|
||||
}
|
||||
reg_hi[ra] = Some(data_addr);
|
||||
@@ -180,33 +236,163 @@ pub fn analyze_xrefs(
|
||||
}
|
||||
}
|
||||
// Load instructions: lwz, lbz, lhz, lha, lfs, lfd, lwzu, etc.
|
||||
32 | 33 | 34 | 35 | 40 | 41 | 42 | 43 | 46 | 48 | 49 | 50 | 51 => {
|
||||
if ra != 0 {
|
||||
if let Some(base) = reg_hi[ra] {
|
||||
32 | 33 | 34 | 35 | 40 | 41 | 42 | 43 | 48 | 49 | 50 | 51 => {
|
||||
if ra != 0
|
||||
&& let Some(base) = reg_hi[ra] {
|
||||
let data_addr = base.wrapping_add(simm as u32);
|
||||
if is_in_ranges(data_addr, &data_ranges) {
|
||||
data_annotations.insert(abs_addr, (data_addr, XrefKind::DataRead));
|
||||
xrefs.entry(data_addr).or_default().push(Xref { source: abs_addr, kind: XrefKind::DataRead });
|
||||
xrefs.entry(data_addr).or_default().push(Xref {
|
||||
source: abs_addr, kind: XrefKind::DataRead,
|
||||
addr_mode: Some(AddrMode::DForm),
|
||||
});
|
||||
labels.entry(data_addr).or_insert_with(|| format!("dat_{data_addr:08X}"));
|
||||
}
|
||||
}
|
||||
}
|
||||
// Load into rD may clobber the tracked value
|
||||
reg_hi[rd] = None;
|
||||
}
|
||||
// lmw rD, simm(rA) — D-form multi-word load. Reads (32-rD)
|
||||
// consecutive 4-byte words starting at base+simm into
|
||||
// rD..r31. Emits one DataRead per slot.
|
||||
46 => {
|
||||
if ra != 0
|
||||
&& let Some(base) = reg_hi[ra]
|
||||
{
|
||||
let mut addr_w = base.wrapping_add(simm as u32);
|
||||
for _slot in (rd as u32)..32 {
|
||||
if is_in_ranges(addr_w, &data_ranges) {
|
||||
data_annotations.insert(abs_addr, (addr_w, XrefKind::DataRead));
|
||||
xrefs.entry(addr_w).or_default().push(Xref {
|
||||
source: abs_addr, kind: XrefKind::DataRead,
|
||||
addr_mode: Some(AddrMode::Multiword),
|
||||
});
|
||||
labels.entry(addr_w).or_insert_with(|| format!("dat_{addr_w:08X}"));
|
||||
}
|
||||
addr_w = addr_w.wrapping_add(4);
|
||||
}
|
||||
}
|
||||
reg_hi[rd] = None;
|
||||
}
|
||||
// Store instructions: stw, stb, sth, stfs, stfd, stwu, etc.
|
||||
36 | 37 | 38 | 39 | 44 | 45 | 47 | 52 | 53 | 54 | 55 => {
|
||||
if ra != 0 {
|
||||
if let Some(base) = reg_hi[ra] {
|
||||
36 | 37 | 38 | 39 | 44 | 45 | 52 | 53 | 54 | 55 => {
|
||||
if ra != 0
|
||||
&& let Some(base) = reg_hi[ra] {
|
||||
let data_addr = base.wrapping_add(simm as u32);
|
||||
if is_in_ranges(data_addr, &data_ranges) {
|
||||
data_annotations.insert(abs_addr, (data_addr, XrefKind::DataWrite));
|
||||
xrefs.entry(data_addr).or_default().push(Xref { source: abs_addr, kind: XrefKind::DataWrite });
|
||||
xrefs.entry(data_addr).or_default().push(Xref {
|
||||
source: abs_addr, kind: XrefKind::DataWrite,
|
||||
addr_mode: Some(AddrMode::DForm),
|
||||
});
|
||||
labels.entry(data_addr).or_insert_with(|| format!("dat_{data_addr:08X}"));
|
||||
}
|
||||
}
|
||||
}
|
||||
// stmw rS, simm(rA) — D-form multi-word store. Writes
|
||||
// (32-rS) consecutive 4-byte words from rS..r31 to
|
||||
// base+simm onward. Emits one DataWrite per slot.
|
||||
47 => {
|
||||
if ra != 0
|
||||
&& let Some(base) = reg_hi[ra]
|
||||
{
|
||||
let mut addr_w = base.wrapping_add(simm as u32);
|
||||
for _slot in (rd as u32)..32 {
|
||||
if is_in_ranges(addr_w, &data_ranges) {
|
||||
data_annotations.insert(abs_addr, (addr_w, XrefKind::DataWrite));
|
||||
xrefs.entry(addr_w).or_default().push(Xref {
|
||||
source: abs_addr, kind: XrefKind::DataWrite,
|
||||
addr_mode: Some(AddrMode::Multiword),
|
||||
});
|
||||
labels.entry(addr_w).or_insert_with(|| format!("dat_{addr_w:08X}"));
|
||||
}
|
||||
addr_w = addr_w.wrapping_add(4);
|
||||
}
|
||||
}
|
||||
}
|
||||
// X-form: opcode 31 — indexed loads/stores, atomic ops, dcbz.
|
||||
// We can't statically resolve `rA + rB` without tracking rB
|
||||
// too; we record an xref ONLY when rB is also a known
|
||||
// constant (rare) OR when rB is r0 (which encodes as zero).
|
||||
// Falls through to the generic-clobber arm afterwards via
|
||||
// the explicit reg_hi update.
|
||||
31 => {
|
||||
let xo = (instr >> 1) & 0x3FF;
|
||||
let rb = ((instr >> 11) & 0x1F) as usize;
|
||||
let resolve_rab = |reg_hi: &[Option<u32>; 32]| -> Option<u32> {
|
||||
let a = if ra == 0 { Some(0u32) } else { reg_hi[ra] };
|
||||
let b = if rb == 0 { Some(0u32) } else { reg_hi[rb] };
|
||||
match (a, b) {
|
||||
(Some(av), Some(bv)) => Some(av.wrapping_add(bv)),
|
||||
_ => None,
|
||||
}
|
||||
};
|
||||
let mode_for_xo = |xo: u32| -> Option<(AddrMode, XrefKind)> {
|
||||
match xo {
|
||||
// Atomic store-conditional
|
||||
150 => Some((AddrMode::Atomic, XrefKind::DataWrite)), // stwcx.
|
||||
214 => Some((AddrMode::Atomic, XrefKind::DataWrite)), // stdcx.
|
||||
// Byte-reverse stores
|
||||
662 => Some((AddrMode::XFormByteRev, XrefKind::DataWrite)), // stwbrx
|
||||
918 => Some((AddrMode::XFormByteRev, XrefKind::DataWrite)), // sthbrx
|
||||
// Byte-reverse loads
|
||||
534 => Some((AddrMode::XFormByteRev, XrefKind::DataRead)), // lwbrx
|
||||
790 => Some((AddrMode::XFormByteRev, XrefKind::DataRead)), // lhbrx
|
||||
// dcbz — cache-line zero (32-byte clear). Treat as a write.
|
||||
1014 => Some((AddrMode::DCBZ, XrefKind::DataWrite)),
|
||||
// Plain X-form indexed stores (the common ones)
|
||||
151 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // stwx
|
||||
215 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // stbx
|
||||
407 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // sthx
|
||||
183 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // stwux
|
||||
247 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // stbux
|
||||
439 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // sthux
|
||||
149 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // stdx
|
||||
181 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // stdux
|
||||
// Plain X-form indexed loads
|
||||
23 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lwzx
|
||||
87 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lbzx
|
||||
279 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lhzx
|
||||
343 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lhax
|
||||
55 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lwzux
|
||||
119 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lbzux
|
||||
311 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lhzux
|
||||
375 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lhaux
|
||||
21 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // ldx
|
||||
53 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // ldux
|
||||
// AltiVec/VMX (opcode 31) loads & stores. Element
|
||||
// variants store one byte/halfword/word; full
|
||||
// `stvx` stores 16 bytes. Address resolution still
|
||||
// requires both rA and rB constant — common only
|
||||
// in static-table setup loops.
|
||||
231 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // stvx
|
||||
487 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // stvxl
|
||||
135 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // stvebx
|
||||
167 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // stvehx
|
||||
199 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // stvewx
|
||||
// AltiVec/VMX loads — same XO range, kind=read.
|
||||
103 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lvx
|
||||
359 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lvxl
|
||||
7 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lvebx
|
||||
39 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lvehx
|
||||
71 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lvewx
|
||||
_ => None,
|
||||
}
|
||||
};
|
||||
if let Some((addr_mode, kind)) = mode_for_xo(xo)
|
||||
&& let Some(data_addr) = resolve_rab(®_hi)
|
||||
&& is_in_ranges(data_addr, &data_ranges)
|
||||
{
|
||||
data_annotations.insert(abs_addr, (data_addr, kind));
|
||||
xrefs.entry(data_addr).or_default().push(Xref {
|
||||
source: abs_addr, kind,
|
||||
addr_mode: Some(addr_mode),
|
||||
});
|
||||
labels.entry(data_addr).or_insert_with(|| format!("dat_{data_addr:08X}"));
|
||||
}
|
||||
// Fall through: any X-form op may write rD; invalidate.
|
||||
reg_hi[rd] = None;
|
||||
}
|
||||
// Any other instruction writing to rD: invalidate
|
||||
_ => {
|
||||
// Conservatively invalidate for instructions that modify rD
|
||||
@@ -221,6 +407,17 @@ pub fn analyze_xrefs(
|
||||
}
|
||||
}
|
||||
|
||||
let elapsed_ms = started.elapsed().as_millis() as f64;
|
||||
metrics::histogram!("analysis.phase_ms", "phase" => "xrefs").record(elapsed_ms);
|
||||
let total_xrefs: usize = xrefs.values().map(|v| v.len()).sum();
|
||||
tracing::info!(
|
||||
labels = labels.len(),
|
||||
xrefs = total_xrefs,
|
||||
data_annotations = data_annotations.len(),
|
||||
elapsed_ms,
|
||||
"xref analysis complete"
|
||||
);
|
||||
|
||||
XrefResult { labels, xrefs, data_annotations }
|
||||
}
|
||||
|
||||
@@ -235,7 +432,7 @@ fn collect_branch_target(instr: u32, addr: u32, labels: &mut HashMap<u32, String
|
||||
let target = if aa { li as u32 } else { addr.wrapping_add(li as u32) };
|
||||
labels.entry(target).or_insert_with(|| format!("loc_{target:08X}"));
|
||||
let kind = if lk { XrefKind::Call } else { XrefKind::Jump };
|
||||
xrefs.entry(target).or_default().push(Xref { source: addr, kind });
|
||||
xrefs.entry(target).or_default().push(Xref { source: addr, kind, addr_mode: None });
|
||||
}
|
||||
16 => {
|
||||
// B-form: bc/bcl
|
||||
@@ -243,7 +440,7 @@ fn collect_branch_target(instr: u32, addr: u32, labels: &mut HashMap<u32, String
|
||||
let aa = instr & 2 != 0;
|
||||
let target = if aa { bd as u32 } else { addr.wrapping_add(bd as u32) };
|
||||
labels.entry(target).or_insert_with(|| format!("loc_{target:08X}"));
|
||||
xrefs.entry(target).or_default().push(Xref { source: addr, kind: XrefKind::Branch });
|
||||
xrefs.entry(target).or_default().push(Xref { source: addr, kind: XrefKind::Branch, addr_mode: None });
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
@@ -262,7 +459,7 @@ fn is_in_ranges(addr: u32, ranges: &[(u32, u32)]) -> bool {
|
||||
}
|
||||
|
||||
/// Find which section a data address falls in.
|
||||
pub fn section_for_addr<'a>(addr: u32, sections: &'a [PeSection], image_base: u32) -> Option<&'a str> {
|
||||
pub fn section_for_addr(addr: u32, sections: &[PeSection], image_base: u32) -> Option<&str> {
|
||||
for s in sections {
|
||||
let start = image_base + s.virtual_address;
|
||||
let end = start + s.virtual_size;
|
||||
@@ -285,12 +482,44 @@ pub fn resolve_source_label(
|
||||
}
|
||||
|
||||
// Find the containing function (largest start <= addr)
|
||||
if let Some((&func_start, _fi)) = func_analysis.functions.range(..=addr).next_back() {
|
||||
if let Some(func_label) = labels.get(&func_start) {
|
||||
if let Some((&func_start, _fi)) = func_analysis.functions.range(..=addr).next_back()
|
||||
&& let Some(func_label) = labels.get(&func_start) {
|
||||
let offset = addr - func_start;
|
||||
return format!("{func_label}+0x{offset:X}");
|
||||
}
|
||||
}
|
||||
|
||||
format!("0x{addr:08X}")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn addr_mode_tags_are_distinct() {
|
||||
let modes = [
|
||||
AddrMode::DForm,
|
||||
AddrMode::LisAddi,
|
||||
AddrMode::LisOri,
|
||||
AddrMode::Multiword,
|
||||
AddrMode::XFormIndexed,
|
||||
AddrMode::XFormByteRev,
|
||||
AddrMode::Atomic,
|
||||
AddrMode::DCBZ,
|
||||
];
|
||||
let tags: std::collections::HashSet<&str> = modes.iter().map(|m| m.tag()).collect();
|
||||
assert_eq!(tags.len(), modes.len(), "every AddrMode variant must have a unique tag");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn xref_struct_carries_addr_mode_for_data_edges() {
|
||||
let x = Xref { source: 0x1234, kind: XrefKind::DataWrite, addr_mode: Some(AddrMode::DForm) };
|
||||
assert_eq!(x.addr_mode.unwrap().tag(), "d_form");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn xref_struct_addr_mode_is_none_for_call_edges() {
|
||||
let x = Xref { source: 0x1234, kind: XrefKind::Call, addr_mode: None };
|
||||
assert!(x.addr_mode.is_none());
|
||||
}
|
||||
}
|
||||
|
||||
362
crates/xenia-analysis/tests/db_schema_golden.rs
Normal file
362
crates/xenia-analysis/tests/db_schema_golden.rs
Normal file
@@ -0,0 +1,362 @@
|
||||
//! DB schema golden — locks the column layout (names + types) of every
|
||||
//! table written by `DbWriter`. A schema change here without a fixture
|
||||
//! update fails the test, forcing a conscious decision before downstream
|
||||
//! query consumers break.
|
||||
//!
|
||||
//! The fixture is constructed in-process (no XEX/ISO needed): a small
|
||||
//! synthetic PE-shaped byte slice with one `.text` section of 4
|
||||
//! instructions, plus an empty import-library list and one detected
|
||||
//! function.
|
||||
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::io::Write;
|
||||
|
||||
use duckdb::Connection;
|
||||
|
||||
use xenia_analysis::DbWriter;
|
||||
use xenia_analysis::formatter::DisasmInfo;
|
||||
use xenia_analysis::func::{FuncAnalysis, FuncInfo};
|
||||
use xenia_analysis::xref::XrefMap;
|
||||
use xenia_xex::pe::PeSection;
|
||||
|
||||
/// Build a 16-byte `.text` section: 4 instructions (mflr / nop / blr / nop).
|
||||
fn synthetic_pe() -> (Vec<u8>, Vec<PeSection>, Vec<xenia_xex::header::ImportLibrary>) {
|
||||
// VA layout: image_base + 0x1000 = .text start (so RVA = 0x1000).
|
||||
// The DB writer expects pe[rva] to hold the byte at that RVA, so the
|
||||
// buffer must be at least 0x1000 + section_size bytes long.
|
||||
const RVA: usize = 0x1000;
|
||||
const TEXT: [u32; 4] = [
|
||||
// mfspr r12, LR (a.k.a. mflr r12) — opcode 31, xo 339, spr 8 (LR).
|
||||
// Encoded with spr halves swapped per the ISA: spr_field = (8<<5).
|
||||
(31u32 << 26) | (12 << 21) | ((8 << 5) << 11) | (339 << 1),
|
||||
0x60000000, // nop (ori r0, r0, 0)
|
||||
(19u32 << 26) | (20 << 21) | (16 << 1), // blr (bclr 20, 0)
|
||||
0x60000000, // nop
|
||||
];
|
||||
|
||||
let mut pe = vec![0u8; RVA + 16];
|
||||
for (i, &word) in TEXT.iter().enumerate() {
|
||||
pe[RVA + i * 4..RVA + i * 4 + 4].copy_from_slice(&word.to_be_bytes());
|
||||
}
|
||||
|
||||
let sections = vec![PeSection {
|
||||
name: ".text".to_string(),
|
||||
virtual_address: 0x1000,
|
||||
virtual_size: 16,
|
||||
raw_offset: 0x1000,
|
||||
raw_size: 16,
|
||||
flags: 0x60000020, // CODE | EXECUTE | READ
|
||||
}];
|
||||
|
||||
let import_libraries = vec![]; // No imports in the fixture.
|
||||
(pe, sections, import_libraries)
|
||||
}
|
||||
|
||||
fn synthetic_func_analysis(image_base: u32) -> FuncAnalysis {
|
||||
// Single function covering all four .text instructions.
|
||||
let entry = image_base + 0x1000;
|
||||
let mut functions = BTreeMap::new();
|
||||
functions.insert(
|
||||
entry,
|
||||
FuncInfo {
|
||||
start: entry,
|
||||
end: entry + 16,
|
||||
frame_size: 0,
|
||||
saved_gprs: 0,
|
||||
is_leaf: true,
|
||||
is_saverestore: false,
|
||||
pdata_validated: false,
|
||||
pdata_length: None,
|
||||
has_eh: false,
|
||||
},
|
||||
);
|
||||
FuncAnalysis {
|
||||
functions,
|
||||
save_gpr_base: None,
|
||||
restore_gpr_base: None,
|
||||
pdata_entries: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn db_schema_matches_expected_columns() {
|
||||
let (pe, sections, libs) = synthetic_pe();
|
||||
let image_base = 0x82000000u32;
|
||||
let entry = image_base + 0x1000;
|
||||
|
||||
let info = DisasmInfo {
|
||||
image_base,
|
||||
entry_point: entry,
|
||||
original_pe_name: Some("synthetic.exe"),
|
||||
title_id: Some(0xDEADBEEF),
|
||||
media_id: Some(0xCAFEF00D),
|
||||
sections: §ions,
|
||||
import_libraries: &libs,
|
||||
};
|
||||
|
||||
let func_analysis = synthetic_func_analysis(image_base);
|
||||
let mut labels: HashMap<u32, String> = HashMap::new();
|
||||
labels.insert(entry, "entry_point".to_string());
|
||||
let xrefs: XrefMap = XrefMap::new();
|
||||
|
||||
let tmp = std::env::temp_dir().join("xenia_rs_schema_golden.duckdb");
|
||||
let _ = std::fs::remove_file(&tmp);
|
||||
|
||||
{
|
||||
let mut w = DbWriter::open_fresh(&tmp).expect("open fresh DB");
|
||||
w.write_base(&info).expect("write_base");
|
||||
w.ingest_instructions(&pe, &info, &func_analysis, &labels)
|
||||
.expect("ingest_instructions");
|
||||
w.write_analysis_results(&pe, &info, &func_analysis, &labels, &xrefs, &[], &[], &[], None, &[])
|
||||
.expect("write_analysis_results");
|
||||
w.create_sql_views().expect("create_sql_views");
|
||||
}
|
||||
|
||||
let conn = Connection::open(&tmp).expect("reopen DB");
|
||||
|
||||
// Lock the column layout per table. Pairs are (name, type).
|
||||
let expected: &[(&str, &[(&str, &str)])] = &[
|
||||
("metadata", &[
|
||||
("key", "VARCHAR"),
|
||||
("value", "VARCHAR"),
|
||||
]),
|
||||
("sections", &[
|
||||
("name", "VARCHAR"),
|
||||
("virtual_address", "BIGINT"),
|
||||
("virtual_size", "BIGINT"),
|
||||
("raw_offset", "BIGINT"),
|
||||
("raw_size", "BIGINT"),
|
||||
("flags", "BIGINT"),
|
||||
("is_code", "BOOLEAN"),
|
||||
]),
|
||||
("imports", &[
|
||||
("library", "VARCHAR"),
|
||||
("ordinal", "BIGINT"),
|
||||
("name", "VARCHAR"),
|
||||
("record_type", "BIGINT"),
|
||||
("address", "BIGINT"),
|
||||
]),
|
||||
("instructions", &[
|
||||
("address", "BIGINT"),
|
||||
("raw", "BIGINT"),
|
||||
("mnemonic", "VARCHAR"),
|
||||
("operands", "VARCHAR"),
|
||||
("disasm", "VARCHAR"),
|
||||
("ext_mnemonic", "VARCHAR"),
|
||||
("ext_operands", "VARCHAR"),
|
||||
("ext_disasm", "VARCHAR"),
|
||||
("target_hex", "BIGINT"),
|
||||
("section", "VARCHAR"),
|
||||
("function", "BIGINT"),
|
||||
("label", "VARCHAR"),
|
||||
]),
|
||||
("functions", &[
|
||||
("address", "BIGINT"),
|
||||
("name", "VARCHAR"),
|
||||
("end_address", "BIGINT"),
|
||||
("frame_size", "BIGINT"),
|
||||
("saved_gprs", "BIGINT"),
|
||||
("is_leaf", "BOOLEAN"),
|
||||
("is_saverestore", "BOOLEAN"),
|
||||
("pdata_validated", "BOOLEAN"),
|
||||
("pdata_length", "BIGINT"),
|
||||
("has_eh", "BOOLEAN"),
|
||||
]),
|
||||
("pdata_entries", &[
|
||||
("begin_address", "BIGINT"),
|
||||
("end_address", "BIGINT"),
|
||||
("function_length", "BIGINT"),
|
||||
("prolog_length", "BIGINT"),
|
||||
("flags", "BIGINT"),
|
||||
]),
|
||||
("labels", &[
|
||||
("address", "BIGINT"),
|
||||
("name", "VARCHAR"),
|
||||
("kind", "VARCHAR"),
|
||||
]),
|
||||
("demangled_names", &[
|
||||
("address", "BIGINT"),
|
||||
("mangled", "VARCHAR"),
|
||||
("raw_demangled", "VARCHAR"),
|
||||
("namespace_path", "VARCHAR"),
|
||||
("class_name", "VARCHAR"),
|
||||
("method_name", "VARCHAR"),
|
||||
("params_signature", "VARCHAR"),
|
||||
]),
|
||||
("vtables", &[
|
||||
("address", "BIGINT"),
|
||||
("length", "BIGINT"),
|
||||
("col_address", "BIGINT"),
|
||||
("class_name", "VARCHAR"),
|
||||
("rtti_present", "BOOLEAN"),
|
||||
("base_classes_json", "VARCHAR"),
|
||||
]),
|
||||
("methods", &[
|
||||
("vtable_address", "BIGINT"),
|
||||
("slot", "BIGINT"),
|
||||
("function_address", "BIGINT"),
|
||||
("mangled_name", "VARCHAR"),
|
||||
("demangled_name", "VARCHAR"),
|
||||
]),
|
||||
("classes", &[
|
||||
("name", "VARCHAR"),
|
||||
("vtable_address", "BIGINT"),
|
||||
("rtti_present", "BOOLEAN"),
|
||||
("base_classes_json", "VARCHAR"),
|
||||
]),
|
||||
("strings", &[
|
||||
("address", "BIGINT"),
|
||||
("encoding", "VARCHAR"),
|
||||
("length", "BIGINT"),
|
||||
("content", "VARCHAR"),
|
||||
]),
|
||||
("tls_info", &[
|
||||
("raw_data_start", "BIGINT"),
|
||||
("raw_data_end", "BIGINT"),
|
||||
("index_address", "BIGINT"),
|
||||
("callback_array", "BIGINT"),
|
||||
("zero_fill_size", "BIGINT"),
|
||||
("characteristics", "BIGINT"),
|
||||
]),
|
||||
("tls_callbacks", &[
|
||||
("slot", "BIGINT"),
|
||||
("address", "BIGINT"),
|
||||
]),
|
||||
("function_pointer_arrays", &[
|
||||
("address", "BIGINT"),
|
||||
("length", "BIGINT"),
|
||||
("kind", "VARCHAR"),
|
||||
]),
|
||||
("function_pointer_array_entries", &[
|
||||
("array_address", "BIGINT"),
|
||||
("slot", "BIGINT"),
|
||||
("function_address", "BIGINT"),
|
||||
]),
|
||||
("indirect_dispatch_sites", &[
|
||||
("dispatch_pc", "BIGINT"),
|
||||
("vptr_offset", "BIGINT"),
|
||||
("slot", "BIGINT"),
|
||||
("candidate_count", "BIGINT"),
|
||||
]),
|
||||
("indirect_dispatch_candidates", &[
|
||||
("dispatch_pc", "BIGINT"),
|
||||
("vtable_address", "BIGINT"),
|
||||
("method_address", "BIGINT"),
|
||||
]),
|
||||
("vptr_writes", &[
|
||||
("writer_pc", "BIGINT"),
|
||||
("vtable_address", "BIGINT"),
|
||||
("vptr_offset", "BIGINT"),
|
||||
("writer_function", "BIGINT"),
|
||||
]),
|
||||
("eh_funcinfo", &[
|
||||
("address", "BIGINT"),
|
||||
("magic", "BIGINT"),
|
||||
("max_state", "BIGINT"),
|
||||
("p_unwind_map", "BIGINT"),
|
||||
("n_try_blocks", "BIGINT"),
|
||||
("p_try_block_map", "BIGINT"),
|
||||
("n_ip_map_entries", "BIGINT"),
|
||||
("p_ip_to_state_map", "BIGINT"),
|
||||
("p_es_type_list", "BIGINT"),
|
||||
("eh_flags", "BIGINT"),
|
||||
]),
|
||||
("eh_unwind_map", &[
|
||||
("funcinfo_address", "BIGINT"),
|
||||
("state_index", "BIGINT"),
|
||||
("to_state", "BIGINT"),
|
||||
("action_pc", "BIGINT"),
|
||||
]),
|
||||
("eh_try_blocks", &[
|
||||
("funcinfo_address", "BIGINT"),
|
||||
("try_index", "BIGINT"),
|
||||
("try_low", "BIGINT"),
|
||||
("try_high", "BIGINT"),
|
||||
("catch_high", "BIGINT"),
|
||||
("n_catches", "BIGINT"),
|
||||
("p_handler_array", "BIGINT"),
|
||||
]),
|
||||
("xrefs", &[
|
||||
("source", "BIGINT"),
|
||||
("target", "BIGINT"),
|
||||
("kind", "VARCHAR"),
|
||||
("addr_mode", "VARCHAR"),
|
||||
("instruction", "VARCHAR"),
|
||||
("source_func", "BIGINT"),
|
||||
("source_label", "VARCHAR"),
|
||||
("target_label", "VARCHAR"),
|
||||
]),
|
||||
];
|
||||
|
||||
let mut errs: Vec<String> = Vec::new();
|
||||
for (table, cols) in expected {
|
||||
let mut stmt = conn
|
||||
.prepare(&format!("PRAGMA table_info('{}')", table))
|
||||
.unwrap_or_else(|e| panic!("prepare PRAGMA for {table}: {e}"));
|
||||
let rows: Vec<(String, String)> = stmt
|
||||
.query_map([], |row| {
|
||||
let name: String = row.get(1)?;
|
||||
let ty: String = row.get(2)?;
|
||||
Ok((name, ty))
|
||||
})
|
||||
.expect("query")
|
||||
.map(|r| r.unwrap())
|
||||
.collect();
|
||||
|
||||
if rows.len() != cols.len() {
|
||||
writeln!(
|
||||
std::io::stderr(),
|
||||
"{table}: column count mismatch (got {}, expected {})",
|
||||
rows.len(),
|
||||
cols.len()
|
||||
).ok();
|
||||
errs.push(format!("{table}: count {} vs {}", rows.len(), cols.len()));
|
||||
}
|
||||
for (i, (got, expected_col)) in rows.iter().zip(cols.iter()).enumerate() {
|
||||
if got.0 != expected_col.0 || got.1 != expected_col.1 {
|
||||
errs.push(format!(
|
||||
"{table} col {i}: got ({}, {}) expected ({}, {})",
|
||||
got.0, got.1, expected_col.0, expected_col.1
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert!(errs.is_empty(), "schema drift detected:\n {}", errs.join("\n "));
|
||||
|
||||
// Verify row counts in the populated tables.
|
||||
let n_instr: i64 = conn
|
||||
.query_row("SELECT COUNT(*) FROM instructions", [], |r| r.get(0))
|
||||
.unwrap();
|
||||
assert_eq!(n_instr, 4, "expected 4 instruction rows from the synthetic PE");
|
||||
|
||||
// The synthetic mflr should produce target_hex = NULL, blr likewise (indirect).
|
||||
let n_with_target: i64 = conn
|
||||
.query_row("SELECT COUNT(target_hex) FROM instructions", [], |r| r.get(0))
|
||||
.unwrap();
|
||||
assert_eq!(n_with_target, 0, "indirect-only fixture should have no direct branch targets");
|
||||
|
||||
// SQL views must be queryable. The `_` in SQL LIKE is a single-char
|
||||
// wildcard, so we list the names explicitly rather than `LIKE 'v_%'`
|
||||
// (which also matches DuckDB's built-in `views` system view).
|
||||
let expected_views = [
|
||||
"v_branch_xrefs",
|
||||
"v_call_graph",
|
||||
"v_function_first_instruction",
|
||||
"v_imports_called",
|
||||
"v_indirect_reachability_from_entry",
|
||||
"v_reachability_from_entry",
|
||||
];
|
||||
for v in expected_views {
|
||||
let exists: i64 = conn
|
||||
.query_row(
|
||||
"SELECT COUNT(*) FROM duckdb_views() WHERE view_name = ?",
|
||||
[v],
|
||||
|r| r.get(0),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(exists, 1, "missing SQL view: {v}");
|
||||
}
|
||||
|
||||
let _ = std::fs::remove_file(&tmp);
|
||||
}
|
||||
123
crates/xenia-analysis/tests/disasm_goldens.rs
Normal file
123
crates/xenia-analysis/tests/disasm_goldens.rs
Normal file
@@ -0,0 +1,123 @@
|
||||
//! Analysis-side goldens: every row in the xenia-cpu fixtures must
|
||||
//! round-trip cleanly through the [`xenia_analysis::ppc`] shim. This
|
||||
//! pins the shim's behaviour to the canonical `xenia_cpu::disasm::format`
|
||||
//! output so that any future refactor of the shim layer surfaces here.
|
||||
//!
|
||||
//! Loads the same JSON fixtures committed under
|
||||
//! `crates/xenia-cpu/tests/golden/`. No separate analysis-side fixture
|
||||
//! files — the cpu canon is the source of truth.
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
use serde::Deserialize;
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct GoldenRow {
|
||||
label: String,
|
||||
raw: String,
|
||||
addr: String,
|
||||
mnemonic: String,
|
||||
operands: String,
|
||||
#[serde(default)]
|
||||
ext_mnemonic: Option<String>,
|
||||
#[serde(default)]
|
||||
ext_operands: Option<String>,
|
||||
#[serde(default)]
|
||||
branch_target: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct GoldenFile {
|
||||
rows: Vec<GoldenRow>,
|
||||
}
|
||||
|
||||
fn cpu_fixture(name: &str) -> PathBuf {
|
||||
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.join("..")
|
||||
.join("xenia-cpu")
|
||||
.join("tests")
|
||||
.join("golden")
|
||||
.join(name)
|
||||
}
|
||||
|
||||
fn parse_hex(s: &str) -> u32 {
|
||||
let trimmed = s.strip_prefix("0x").or_else(|| s.strip_prefix("0X")).unwrap_or(s);
|
||||
u32::from_str_radix(trimmed, 16).expect("hex u32")
|
||||
}
|
||||
|
||||
/// Verify the shim's `Decoded { base, ext }` mirrors the canonical fields
|
||||
/// from `xenia_cpu::disasm::format` for every fixture row.
|
||||
fn check_fixture(fixture_name: &str) {
|
||||
let path = cpu_fixture(fixture_name);
|
||||
assert!(
|
||||
path.exists(),
|
||||
"missing fixture {} — run `cargo test -p xenia-cpu --test disasm_goldens` to (re)generate it",
|
||||
path.display()
|
||||
);
|
||||
let src = std::fs::read_to_string(&path).unwrap();
|
||||
let golden: GoldenFile = serde_json::from_str(&src).unwrap();
|
||||
|
||||
for row in &golden.rows {
|
||||
let raw = parse_hex(&row.raw);
|
||||
let addr = parse_hex(&row.addr);
|
||||
|
||||
let canonical =
|
||||
xenia_cpu::disasm::format(&xenia_cpu::decode(raw, addr));
|
||||
let shim = xenia_analysis::ppc::disasm(raw, addr);
|
||||
|
||||
assert_eq!(
|
||||
shim.base, canonical.disasm,
|
||||
"shim.base drifted for {} (raw={})",
|
||||
row.label, row.raw,
|
||||
);
|
||||
assert_eq!(
|
||||
shim.ext, canonical.ext_disasm,
|
||||
"shim.ext drifted for {} (raw={})",
|
||||
row.label, row.raw,
|
||||
);
|
||||
|
||||
// Also pin against the fixture's structured fields — guards against
|
||||
// someone changing the cpu canon without regenerating the fixture.
|
||||
assert_eq!(canonical.mnemonic, row.mnemonic, "mnemonic drift: {}", row.label);
|
||||
assert_eq!(canonical.operands, row.operands, "operands drift: {}", row.label);
|
||||
assert_eq!(canonical.ext_mnemonic, row.ext_mnemonic, "ext_mnemonic drift: {}", row.label);
|
||||
assert_eq!(canonical.ext_operands, row.ext_operands, "ext_operands drift: {}", row.label);
|
||||
|
||||
let target_str = canonical.branch_target.map(|t| format!("0x{t:08X}"));
|
||||
assert_eq!(target_str, row.branch_target, "branch_target drift: {}", row.label);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn analysis_shim_matches_base_mnemonics() {
|
||||
check_fixture("base_mnemonics.json");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn analysis_shim_matches_extended_mnemonics() {
|
||||
check_fixture("extended_mnemonics.json");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn analysis_shim_matches_vmx128_registers() {
|
||||
check_fixture("vmx128_registers.json");
|
||||
}
|
||||
|
||||
/// Spot-check that the shim's `display()` returns the extended form when
|
||||
/// present and falls back to the base otherwise. This is the contract
|
||||
/// `formatter.rs` and the .asm output rely on.
|
||||
#[test]
|
||||
fn shim_display_prefers_extended() {
|
||||
// ori r0, r0, 0 → base "ori r0, r0, 0x0", ext "nop"
|
||||
let d = xenia_analysis::ppc::disasm(0x60000000, 0);
|
||||
assert_eq!(d.display(), "nop");
|
||||
|
||||
// addi r3, r1, 16 → no extended form, display falls back to base
|
||||
let raw = (14u32 << 26) | (3 << 21) | (1 << 16) | 16;
|
||||
let d = xenia_analysis::ppc::disasm(raw, 0);
|
||||
assert!(
|
||||
d.ext.is_none(),
|
||||
"addi r3, r1, 16 has no extended form (only addi r3, r0, … → li)"
|
||||
);
|
||||
assert_eq!(d.display(), d.base);
|
||||
}
|
||||
@@ -20,9 +20,21 @@ xenia-apu = { workspace = true }
|
||||
xenia-hid = { workspace = true }
|
||||
xenia-debugger = { workspace = true }
|
||||
xenia-analysis = { workspace = true }
|
||||
xenia-ui = { workspace = true }
|
||||
winit = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
tracing-subscriber = { workspace = true }
|
||||
tracing-appender = { workspace = true }
|
||||
tracing-chrome = { workspace = true }
|
||||
tracing-error = { workspace = true }
|
||||
metrics = { workspace = true }
|
||||
metrics-util = { workspace = true }
|
||||
pprof = { workspace = true, optional = true }
|
||||
anyhow = { workspace = true }
|
||||
clap = { version = "4", features = ["derive"] }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
|
||||
[features]
|
||||
default = ["profiling"]
|
||||
profiling = ["dep:pprof"]
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
384
crates/xenia-app/src/observability.rs
Normal file
384
crates/xenia-app/src/observability.rs
Normal file
@@ -0,0 +1,384 @@
|
||||
//! Logging, tracing, and profiling wiring for the `xenia-rs` CLI.
|
||||
//!
|
||||
//! Owns the `tracing-subscriber` registry, optional file / Chrome-trace sinks,
|
||||
//! the `metrics` debugging recorder, and (behind the `profiling` feature) the
|
||||
//! `pprof-rs` sampling profiler. All drop-time cleanup (flushing appenders,
|
||||
//! finalising Chrome output, writing flamegraphs, printing the metrics
|
||||
//! summary) is carried by [`ObservabilityGuards`] so `main` just has to hold
|
||||
//! the value until return.
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use anyhow::{bail, Context, Result};
|
||||
use tracing::Level;
|
||||
use tracing_error::{ErrorLayer, SpanTrace};
|
||||
use tracing_subscriber::fmt::format::FmtSpan;
|
||||
use tracing_subscriber::layer::SubscriberExt;
|
||||
use tracing_subscriber::util::SubscriberInitExt;
|
||||
use tracing_subscriber::{fmt, EnvFilter, Layer, Registry};
|
||||
|
||||
/// User-selectable observability settings parsed from CLI + environment.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct ObservabilityConfig {
|
||||
/// If `true`, render console logs as JSON instead of compact text.
|
||||
pub log_json: bool,
|
||||
/// Additional log sink file. `.json` → JSON formatter; anything else → text.
|
||||
pub log_file: Option<PathBuf>,
|
||||
/// Overrides `RUST_LOG` when set. Passed through `EnvFilter::try_new`.
|
||||
pub log_filter: Option<String>,
|
||||
/// Default filter directive used when neither `RUST_LOG` nor
|
||||
/// [`log_filter`](Self::log_filter) are set.
|
||||
pub default_level: &'static str,
|
||||
/// If set, emit a Chrome `about:tracing` JSON trace to this path.
|
||||
pub trace_chrome: Option<PathBuf>,
|
||||
/// If set, run the pprof sampling profiler and write output here on drop.
|
||||
/// Extension `.svg` → flamegraph, `.pb` → protobuf.
|
||||
pub profile: Option<PathBuf>,
|
||||
}
|
||||
|
||||
impl ObservabilityConfig {
|
||||
#[allow(dead_code)]
|
||||
pub fn new(default_level: &'static str) -> Self {
|
||||
Self {
|
||||
default_level,
|
||||
..Self::default()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// RAII handle returned by [`init`]. Drop flushes the appender, finalises
|
||||
/// Chrome output, writes the pprof report, and prints the metrics summary.
|
||||
#[must_use = "drop of ObservabilityGuards is what flushes logs, profiles, and metrics"]
|
||||
pub struct ObservabilityGuards {
|
||||
_appender: Option<tracing_appender::non_blocking::WorkerGuard>,
|
||||
_chrome: Option<tracing_chrome::FlushGuard>,
|
||||
#[cfg(feature = "profiling")]
|
||||
pprof: Option<(pprof::ProfilerGuard<'static>, PathBuf)>,
|
||||
metrics_snapshotter: Option<metrics_util::debugging::Snapshotter>,
|
||||
}
|
||||
|
||||
impl Drop for ObservabilityGuards {
|
||||
fn drop(&mut self) {
|
||||
#[cfg(feature = "profiling")]
|
||||
if let Some((guard, path)) = self.pprof.take() {
|
||||
if let Err(e) = write_pprof_report(&guard, &path) {
|
||||
eprintln!("profile write failed: {e:#}");
|
||||
} else {
|
||||
tracing::info!(path = %path.display(), "pprof report written");
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(snap) = self.metrics_snapshotter.take() {
|
||||
print_metrics_summary(&snap);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Build and install the global tracing subscriber + metrics recorder.
|
||||
pub fn init(config: &ObservabilityConfig) -> Result<ObservabilityGuards> {
|
||||
let span_events = parse_span_events();
|
||||
|
||||
// Resolve the filter directive once; attach a freshly-built `EnvFilter`
|
||||
// per sink layer via `.with_filter()`. Previously the filter was pushed
|
||||
// into the layer-`Vec` but that only gates what *itself* sees in a
|
||||
// boxed-Vec setup; sibling fmt / chrome / file layers kept emitting
|
||||
// filtered-out events. Per-layer filtering is the idiomatic tracing-
|
||||
// subscriber pattern and works cleanly with boxed layer dispatch.
|
||||
let directive = resolve_filter_directive(config);
|
||||
|
||||
let mut layers: Vec<Box<dyn Layer<Registry> + Send + Sync + 'static>> = Vec::new();
|
||||
|
||||
// Console fmt layer — compact text or JSON, always stderr.
|
||||
let console_layer: Box<dyn Layer<Registry> + Send + Sync + 'static> = if config.log_json {
|
||||
fmt::layer()
|
||||
.json()
|
||||
.with_span_events(span_events.clone())
|
||||
.with_writer(std::io::stderr)
|
||||
.with_filter(EnvFilter::try_new(&directive).context("invalid filter")?)
|
||||
.boxed()
|
||||
} else {
|
||||
fmt::layer()
|
||||
.compact()
|
||||
.with_span_events(span_events.clone())
|
||||
.with_writer(std::io::stderr)
|
||||
.with_filter(EnvFilter::try_new(&directive).context("invalid filter")?)
|
||||
.boxed()
|
||||
};
|
||||
layers.push(console_layer);
|
||||
|
||||
// Optional file sink — also filtered.
|
||||
let appender_guard = match &config.log_file {
|
||||
Some(path) => {
|
||||
let (layer, guard) = build_file_layer(path, span_events)?;
|
||||
layers.push(
|
||||
layer
|
||||
.with_filter(EnvFilter::try_new(&directive).context("invalid filter")?)
|
||||
.boxed(),
|
||||
);
|
||||
Some(guard)
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
// Optional Chrome `about:tracing` sink — intentionally UNFILTERED so
|
||||
// traces capture the full picture even when the console is quiet.
|
||||
let chrome_guard = match &config.trace_chrome {
|
||||
Some(path) => {
|
||||
let (layer, guard) = tracing_chrome::ChromeLayerBuilder::new()
|
||||
.file(path.clone())
|
||||
.include_args(true)
|
||||
.build();
|
||||
layers.push(layer.boxed());
|
||||
Some(guard)
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
// `tracing-error` layer enables SpanTrace capture in `with_span_trace`.
|
||||
layers.push(ErrorLayer::default().boxed());
|
||||
|
||||
tracing_subscriber::registry()
|
||||
.with(layers)
|
||||
.try_init()
|
||||
.context("tracing subscriber already initialized")?;
|
||||
// `build_env_filter` is retained for compatibility with older callers;
|
||||
// `resolve_filter_directive` above is what actually drives the layer
|
||||
// filters.
|
||||
let _ = build_env_filter(config);
|
||||
|
||||
// Install the metrics debugging recorder. `install` sets the global
|
||||
// recorder; its snapshotter is held in the guards struct.
|
||||
let recorder = metrics_util::debugging::DebuggingRecorder::new();
|
||||
let snapshotter = recorder.snapshotter();
|
||||
if recorder.install().is_err() {
|
||||
tracing::warn!("a metrics recorder was already installed; skipping xenia-rs recorder");
|
||||
}
|
||||
|
||||
#[cfg(feature = "profiling")]
|
||||
let pprof = match &config.profile {
|
||||
Some(path) => {
|
||||
let guard = pprof::ProfilerGuardBuilder::default()
|
||||
.frequency(100)
|
||||
.blocklist(&["libc", "libgcc", "pthread", "vdso"])
|
||||
.build()
|
||||
.context("failed to start pprof sampling profiler")?;
|
||||
Some((guard, path.clone()))
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
#[cfg(not(feature = "profiling"))]
|
||||
if config.profile.is_some() {
|
||||
bail!("--profile requires building with --features profiling");
|
||||
}
|
||||
|
||||
Ok(ObservabilityGuards {
|
||||
_appender: appender_guard,
|
||||
_chrome: chrome_guard,
|
||||
#[cfg(feature = "profiling")]
|
||||
pprof,
|
||||
metrics_snapshotter: Some(snapshotter),
|
||||
})
|
||||
}
|
||||
|
||||
fn resolve_filter_directive(config: &ObservabilityConfig) -> String {
|
||||
if let Some(ref f) = config.log_filter {
|
||||
return f.clone();
|
||||
}
|
||||
if let Ok(f) = std::env::var("RUST_LOG")
|
||||
&& !f.is_empty() {
|
||||
return f;
|
||||
}
|
||||
config.default_level.to_string()
|
||||
}
|
||||
|
||||
fn build_env_filter(config: &ObservabilityConfig) -> Result<EnvFilter> {
|
||||
// Precedence: explicit --log-filter > RUST_LOG > default_level.
|
||||
if let Some(ref f) = config.log_filter {
|
||||
return EnvFilter::try_new(f).context("invalid --log-filter directive");
|
||||
}
|
||||
if let Ok(f) = EnvFilter::try_from_default_env() {
|
||||
return Ok(f);
|
||||
}
|
||||
EnvFilter::try_new(config.default_level)
|
||||
.with_context(|| format!("invalid default filter `{}`", config.default_level))
|
||||
}
|
||||
|
||||
fn parse_span_events() -> FmtSpan {
|
||||
match std::env::var("RUST_LOG_SPAN_EVENTS").as_deref() {
|
||||
Ok("full") => FmtSpan::FULL,
|
||||
Ok("close") => FmtSpan::CLOSE,
|
||||
Ok("active") => FmtSpan::ACTIVE,
|
||||
Ok("enter") => FmtSpan::ENTER,
|
||||
Ok("exit") => FmtSpan::EXIT,
|
||||
Ok("new") => FmtSpan::NEW,
|
||||
_ => FmtSpan::NONE,
|
||||
}
|
||||
}
|
||||
|
||||
type FileLayerBox = Box<dyn Layer<Registry> + Send + Sync + 'static>;
|
||||
|
||||
fn build_file_layer(
|
||||
path: &Path,
|
||||
span_events: FmtSpan,
|
||||
) -> Result<(FileLayerBox, tracing_appender::non_blocking::WorkerGuard)> {
|
||||
let parent = path.parent().unwrap_or_else(|| Path::new("."));
|
||||
let file_name = path
|
||||
.file_name()
|
||||
.ok_or_else(|| anyhow::anyhow!("log file path has no file name: {}", path.display()))?;
|
||||
std::fs::create_dir_all(parent)
|
||||
.with_context(|| format!("failed to create {}", parent.display()))?;
|
||||
|
||||
let appender = tracing_appender::rolling::never(parent, file_name);
|
||||
let (non_blocking, guard) = tracing_appender::non_blocking(appender);
|
||||
|
||||
let ext = path
|
||||
.extension()
|
||||
.and_then(|e| e.to_str())
|
||||
.unwrap_or_default();
|
||||
let layer: FileLayerBox = if ext.eq_ignore_ascii_case("json") {
|
||||
fmt::layer()
|
||||
.json()
|
||||
.with_span_events(span_events)
|
||||
.with_writer(non_blocking)
|
||||
.with_ansi(false)
|
||||
.boxed()
|
||||
} else {
|
||||
fmt::layer()
|
||||
.with_span_events(span_events)
|
||||
.with_writer(non_blocking)
|
||||
.with_ansi(false)
|
||||
.boxed()
|
||||
};
|
||||
|
||||
Ok((layer, guard))
|
||||
}
|
||||
|
||||
/// Wrap an error with a captured span-trace so the top-level `main` can
|
||||
/// render "failed in `cmd_exec > load_image > …`" alongside the regular
|
||||
/// anyhow context chain.
|
||||
#[allow(dead_code)]
|
||||
pub fn with_span_trace<E>(err: E) -> anyhow::Error
|
||||
where
|
||||
E: std::error::Error + Send + Sync + 'static,
|
||||
{
|
||||
anyhow::Error::new(err).context(SpanTraceDisplay(SpanTrace::capture()))
|
||||
}
|
||||
|
||||
/// Attach a captured span-trace to an existing `anyhow::Error` as extra
|
||||
/// context. Used at command boundaries where errors already bubble as
|
||||
/// `anyhow::Error`.
|
||||
pub fn attach_span_trace(err: anyhow::Error) -> anyhow::Error {
|
||||
err.context(SpanTraceDisplay(SpanTrace::capture()))
|
||||
}
|
||||
|
||||
struct SpanTraceDisplay(SpanTrace);
|
||||
|
||||
impl std::fmt::Display for SpanTraceDisplay {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "span trace:\n{}", self.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for SpanTraceDisplay {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
<Self as std::fmt::Display>::fmt(self, f)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "profiling")]
|
||||
fn write_pprof_report(guard: &pprof::ProfilerGuard<'static>, path: &Path) -> Result<()> {
|
||||
let report = guard.report().build().context("pprof report build failed")?;
|
||||
let ext = path
|
||||
.extension()
|
||||
.and_then(|e| e.to_str())
|
||||
.unwrap_or("")
|
||||
.to_ascii_lowercase();
|
||||
|
||||
let parent = path.parent().unwrap_or_else(|| Path::new("."));
|
||||
std::fs::create_dir_all(parent).ok();
|
||||
|
||||
match ext.as_str() {
|
||||
"svg" | "" => {
|
||||
let file = std::fs::File::create(path)
|
||||
.with_context(|| format!("create {}", path.display()))?;
|
||||
report
|
||||
.flamegraph(file)
|
||||
.context("flamegraph render failed")?;
|
||||
}
|
||||
"pb" | "proto" | "pprof" => {
|
||||
use pprof::protos::Message;
|
||||
let profile = report.pprof().context("pprof protobuf build failed")?;
|
||||
let buf = profile
|
||||
.write_to_bytes()
|
||||
.context("pprof protobuf encode failed")?;
|
||||
std::fs::write(path, &buf).with_context(|| format!("write {}", path.display()))?;
|
||||
}
|
||||
other => bail!("unknown --profile extension `.{other}` (use .svg or .pb)"),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn print_metrics_summary(snap: &metrics_util::debugging::Snapshotter) {
|
||||
use metrics_util::debugging::DebugValue;
|
||||
use metrics_util::MetricKind;
|
||||
|
||||
let snapshot = snap.snapshot();
|
||||
let rows = snapshot.into_vec();
|
||||
if rows.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
// Group counters, gauges, histograms into simple lines. Use tracing so
|
||||
// the summary honours the installed subscriber (can land in file + JSON
|
||||
// sinks and not just stderr).
|
||||
let mut lines: Vec<String> = Vec::with_capacity(rows.len());
|
||||
for (key, _unit, _desc, value) in rows {
|
||||
let kind = match key.kind() {
|
||||
MetricKind::Counter => "counter",
|
||||
MetricKind::Gauge => "gauge",
|
||||
MetricKind::Histogram => "histogram",
|
||||
};
|
||||
let name = key.key().name();
|
||||
let labels: Vec<String> = key
|
||||
.key()
|
||||
.labels()
|
||||
.map(|l| format!("{}={}", l.key(), l.value()))
|
||||
.collect();
|
||||
let labels_str = if labels.is_empty() {
|
||||
String::new()
|
||||
} else {
|
||||
format!("{{{}}}", labels.join(","))
|
||||
};
|
||||
let value_str = match value {
|
||||
DebugValue::Counter(n) => n.to_string(),
|
||||
DebugValue::Gauge(g) => format!("{}", g.into_inner()),
|
||||
DebugValue::Histogram(samples) => {
|
||||
if samples.is_empty() {
|
||||
"empty".to_string()
|
||||
} else {
|
||||
let floats: Vec<f64> = samples.iter().map(|s| s.into_inner()).collect();
|
||||
let count = floats.len();
|
||||
let sum: f64 = floats.iter().copied().sum();
|
||||
let min = floats.iter().copied().fold(f64::INFINITY, f64::min);
|
||||
let max = floats.iter().copied().fold(f64::NEG_INFINITY, f64::max);
|
||||
format!(
|
||||
"count={} sum={:.3} min={:.3} max={:.3} mean={:.3}",
|
||||
count,
|
||||
sum,
|
||||
min,
|
||||
max,
|
||||
sum / count as f64
|
||||
)
|
||||
}
|
||||
}
|
||||
};
|
||||
lines.push(format!(" {kind:<9} {name}{labels_str} = {value_str}"));
|
||||
}
|
||||
|
||||
if tracing::enabled!(Level::INFO) {
|
||||
tracing::info!("metrics summary:\n{}", lines.join("\n"));
|
||||
} else {
|
||||
eprintln!("metrics summary:\n{}", lines.join("\n"));
|
||||
}
|
||||
}
|
||||
72
crates/xenia-app/tests/golden/README.md
Normal file
72
crates/xenia-app/tests/golden/README.md
Normal file
@@ -0,0 +1,72 @@
|
||||
# Sylpheed regression goldens
|
||||
|
||||
These JSON files anchor `xenia-rs check` digest output for Project Sylpheed.
|
||||
|
||||
## Files
|
||||
|
||||
| File | -n | Mode | Captures |
|
||||
|------|----|------|----------|
|
||||
| `sylpheed_n2m.json` | 2_000_000 | full digest | early boot (swaps=0, no rendering) |
|
||||
| `sylpheed_n50m.json` | 50_000_000 | stable-digest | first VdSwap pair (swaps=2 post-Phase-A) |
|
||||
|
||||
## Stable-digest mode
|
||||
|
||||
`sylpheed_n50m.json` is captured with `--stable-digest`, which omits
|
||||
timing-sensitive counters: `packets` (±2–8% lockstep noise from a GPU thread
|
||||
race), `resolves`, `interrupts_delivered`, `interrupts_dropped`,
|
||||
`texture_decodes`. The remaining fields are byte-identical across repeated
|
||||
lockstep runs at a fixed -n.
|
||||
|
||||
`sylpheed_n2m.json` predates the stable-digest flag and uses full-digest
|
||||
compare. It still works because at -n 2M the GPU pipeline has not produced any
|
||||
packets yet — `packets=0` is trivially deterministic.
|
||||
|
||||
## Circularity hazard
|
||||
|
||||
Per ORACBUG-001/002/003, these goldens were captured by running the same code
|
||||
they validate. They detect **regression** from a known-good snapshot, not
|
||||
**correctness**. When a planned fix intentionally moves the digest (e.g. a
|
||||
shader fix landing `draws > 0` for the first time), re-baseline the golden as
|
||||
a separate commit and reference the audit ID in the message.
|
||||
|
||||
## Re-baselining
|
||||
|
||||
```sh
|
||||
cargo build --release -p xenia-app
|
||||
target/release/xenia-rs check \
|
||||
"$SYLPHEED_ISO" \
|
||||
-n 50000000 \
|
||||
--stable-digest \
|
||||
--out crates/xenia-app/tests/golden/sylpheed_n50m.json
|
||||
```
|
||||
|
||||
## Running the goldens
|
||||
|
||||
```sh
|
||||
cargo test --release -p xenia-app --test sylpheed_oracles -- --ignored --nocapture
|
||||
```
|
||||
|
||||
The tests are `#[ignore]`-gated because each run takes a few seconds, which is
|
||||
unacceptable in the default `cargo test` cycle. The ISO path defaults to the
|
||||
contributor's local `~/RE Project Sylpheed/Project Sylpheed*.iso` and can be
|
||||
overridden via `SYLPHEED_ISO=/path/to/sylpheed.iso`.
|
||||
|
||||
## n4b canonical-invocation regression anchor (deferred)
|
||||
|
||||
The audit's recommended next sprint also called for a `sylpheed_n4b.json`
|
||||
golden capturing the canonical reference invocation
|
||||
`xenia-rs check sylpheed.iso -n 4_000_000_000 --parallel --reservations-table`.
|
||||
This is **deferred** because:
|
||||
|
||||
1. The `--parallel --reservations-table` combination is empirically pathologically
|
||||
slow at -n 100M (>32 min per run per the audit memory). At -n 4B the run cost
|
||||
is many hours, not the single-session-friendly 5–15 min the original plan
|
||||
estimated.
|
||||
2. Each phase that intentionally moves rendering counters (C, D, E, F) would
|
||||
need a re-baseline of n4b — a significant time cost compounding over the
|
||||
sprint.
|
||||
|
||||
Once the renderer-unblock phases (C+D+E) land and `draws > 0` is confirmed at
|
||||
-n 100M lockstep, an n4b artifact may be captured one-shot and stored under
|
||||
`audit-runs/post-fix/` (not as a test golden) as a manual regression anchor for
|
||||
the canonical invocation.
|
||||
10
crates/xenia-app/tests/golden/sylpheed_n2m.json
Normal file
10
crates/xenia-app/tests/golden/sylpheed_n2m.json
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"instructions": 2000005,
|
||||
"imports": 5635,
|
||||
"unimpl": 0,
|
||||
"draws": 0,
|
||||
"swaps": 0,
|
||||
"unique_render_targets": 0,
|
||||
"shader_blobs_live": 0,
|
||||
"texture_cache_entries": 0
|
||||
}
|
||||
10
crates/xenia-app/tests/golden/sylpheed_n50m.json
Normal file
10
crates/xenia-app/tests/golden/sylpheed_n50m.json
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"instructions": 50000000,
|
||||
"imports": 339766,
|
||||
"unimpl": 0,
|
||||
"draws": 0,
|
||||
"swaps": 2,
|
||||
"unique_render_targets": 0,
|
||||
"shader_blobs_live": 0,
|
||||
"texture_cache_entries": 0
|
||||
}
|
||||
111
crates/xenia-app/tests/parallel_stress.rs
Normal file
111
crates/xenia-app/tests/parallel_stress.rs
Normal file
@@ -0,0 +1,111 @@
|
||||
//! M3 real-parallelism stress harness.
|
||||
//!
|
||||
//! Runs `xenia-rs check sylpheed.iso --parallel --halt-on-deadlock`
|
||||
//! many times back-to-back to surface lost-wakeups, lock-order
|
||||
//! inversions, and ABA hazards that a single run wouldn't reliably
|
||||
//! reproduce. Failures dump per-run stdout/stderr to
|
||||
//! `target/parallel-stress-NNN.{stdout,stderr}` for post-mortem.
|
||||
//!
|
||||
//! Two configurations:
|
||||
//! - `parallel_stress_short`: 20 runs at -n 5_000_000. Quick smoke
|
||||
//! check — runs in a few minutes on the current substrate.
|
||||
//! - `parallel_stress_long` (ignored, opt-in): 100 runs at
|
||||
//! -n 50_000_000. The full gate from the master plan; expected
|
||||
//! runtime is hours until the perf gap (Step 05's deferred parking
|
||||
//! fix) closes.
|
||||
//!
|
||||
//! Run with `cargo test --release -p xenia-app --test parallel_stress
|
||||
//! -- --ignored --nocapture` for the full 100x; otherwise the short
|
||||
//! variant runs as part of the normal test suite when explicitly
|
||||
//! invoked: `cargo test --release -p xenia-app --test parallel_stress
|
||||
//! -- --nocapture parallel_stress_short`.
|
||||
|
||||
use std::process::Command;
|
||||
use std::time::Instant;
|
||||
|
||||
const ISO_DEFAULT: &str = "/home/fabi/RE Project Sylpheed/Project Sylpheed - Arc of Deception (USA, Europe) (En,Ja).iso";
|
||||
|
||||
fn iso_path() -> String {
|
||||
std::env::var("SYLPHEED_ISO").unwrap_or_else(|_| ISO_DEFAULT.to_string())
|
||||
}
|
||||
|
||||
fn run_stress(label: &str, runs: u32, max_instr: u64) {
|
||||
let bin = env!("CARGO_BIN_EXE_xenia-rs");
|
||||
let iso = iso_path();
|
||||
if !std::path::Path::new(&iso).exists() {
|
||||
eprintln!("{label}: iso not found at {iso}; set SYLPHEED_ISO to override. SKIPPING.");
|
||||
return;
|
||||
}
|
||||
std::fs::create_dir_all("target").ok();
|
||||
let mut failures: u32 = 0;
|
||||
let mut wall_ms: Vec<u128> = Vec::with_capacity(runs as usize);
|
||||
let max_instr_str = max_instr.to_string();
|
||||
for run in 1..=runs {
|
||||
let t0 = Instant::now();
|
||||
let out = Command::new(bin)
|
||||
.args([
|
||||
"exec",
|
||||
&iso,
|
||||
"-n",
|
||||
&max_instr_str,
|
||||
"--parallel",
|
||||
"--halt-on-deadlock",
|
||||
"--quiet",
|
||||
])
|
||||
.output()
|
||||
.expect("failed to spawn xenia-rs");
|
||||
let dt = t0.elapsed().as_millis();
|
||||
wall_ms.push(dt);
|
||||
let exit_ok = out.status.success();
|
||||
let vdswap2 = String::from_utf8_lossy(&out.stderr).contains("VdSwap")
|
||||
|| String::from_utf8_lossy(&out.stdout).contains("VdSwap");
|
||||
let _ = vdswap2; // VdSwap=2 not required at -n 5M; tracked for diagnostic only.
|
||||
if !exit_ok {
|
||||
failures += 1;
|
||||
std::fs::write(
|
||||
format!("target/parallel-stress-{label}-{run:03}.stdout"),
|
||||
&out.stdout,
|
||||
)
|
||||
.ok();
|
||||
std::fs::write(
|
||||
format!("target/parallel-stress-{label}-{run:03}.stderr"),
|
||||
&out.stderr,
|
||||
)
|
||||
.ok();
|
||||
eprintln!(
|
||||
"{label}: run {run}/{runs} FAILED (wall={}ms, exit={:?})",
|
||||
dt,
|
||||
out.status.code()
|
||||
);
|
||||
} else {
|
||||
eprintln!("{label}: run {run}/{runs} ok (wall={dt}ms)");
|
||||
}
|
||||
}
|
||||
wall_ms.sort();
|
||||
let p50 = wall_ms[wall_ms.len() / 2];
|
||||
let p95_idx = ((wall_ms.len() - 1) * 95) / 100;
|
||||
let p95 = wall_ms[p95_idx];
|
||||
let max = *wall_ms.last().unwrap();
|
||||
eprintln!(
|
||||
"{label} summary: runs={runs} ok={} failed={failures} p50={p50}ms p95={p95}ms max={max}ms",
|
||||
runs - failures,
|
||||
);
|
||||
assert_eq!(failures, 0, "{label}: {failures} of {runs} stress runs failed");
|
||||
}
|
||||
|
||||
/// 20 runs at -n 5M. Session-feasible (~10 minutes at the current
|
||||
/// perf level). Surfaces lost-wakeup / lock-order / phaser-timeout
|
||||
/// bugs that a single run wouldn't reproduce.
|
||||
#[test]
|
||||
#[ignore = "stress test; run via `cargo test ... -- --ignored parallel_stress_short`"]
|
||||
fn parallel_stress_short() {
|
||||
run_stress("short", 20, 5_000_000);
|
||||
}
|
||||
|
||||
/// 100 runs at -n 50M. The full M3 follow-up gate per the master
|
||||
/// plan. Expected runtime is hours until the perf gap closes.
|
||||
#[test]
|
||||
#[ignore = "full stress test; run via `cargo test ... -- --ignored parallel_stress_long`"]
|
||||
fn parallel_stress_long() {
|
||||
run_stress("long", 100, 50_000_000);
|
||||
}
|
||||
85
crates/xenia-app/tests/sylpheed_oracles.rs
Normal file
85
crates/xenia-app/tests/sylpheed_oracles.rs
Normal file
@@ -0,0 +1,85 @@
|
||||
//! Sylpheed boot-sequence regression oracles.
|
||||
//!
|
||||
//! These goldens trigger `xenia-rs check` against the Project Sylpheed ISO and
|
||||
//! compare the resulting digest to a checked-in JSON file via `--stable-digest`,
|
||||
//! which excludes timing-sensitive counters (`packets`, `interrupts_*`,
|
||||
//! `resolves`, `texture_decodes`). The remaining fields are deterministic in
|
||||
//! lockstep at a fixed instruction budget — verified empirically across 3
|
||||
//! consecutive runs.
|
||||
//!
|
||||
//! Goldens are CIRCULAR per ORACBUG-001/002/003: they were captured by running
|
||||
//! the same code they validate. Treat them as **regression anchors** (catch
|
||||
//! drift from a known-good snapshot) not **correctness anchors** (no claim
|
||||
//! about absolute behavior). When a planned fix intentionally moves the
|
||||
//! digest (e.g. swap fix → `swaps` increments; renderer fix → `draws` becomes
|
||||
//! non-zero), re-baseline the golden as a separate commit.
|
||||
//!
|
||||
//! Tests are `#[ignore]`-gated because the runs take ~4 seconds each, which
|
||||
//! is unacceptable for the default `cargo test` cycle. Run explicitly:
|
||||
//! cargo test --release -p xenia-app --test sylpheed_oracles -- --ignored --nocapture
|
||||
//!
|
||||
//! ISO path is read from the `SYLPHEED_ISO` env var, falling back to the
|
||||
//! repo-relative default. CI/contributors without the ISO will see the test
|
||||
//! skip gracefully.
|
||||
|
||||
use std::process::Command;
|
||||
|
||||
const ISO_DEFAULT: &str = "/home/fabi/RE Project Sylpheed/Project Sylpheed - Arc of Deception (USA, Europe) (En,Ja).iso";
|
||||
|
||||
fn iso_path() -> String {
|
||||
std::env::var("SYLPHEED_ISO").unwrap_or_else(|_| ISO_DEFAULT.to_string())
|
||||
}
|
||||
|
||||
fn run_oracle(label: &str, max_instr: u64, golden_rel: &str) {
|
||||
let bin = env!("CARGO_BIN_EXE_xenia-rs");
|
||||
let iso = iso_path();
|
||||
if !std::path::Path::new(&iso).exists() {
|
||||
eprintln!("{label}: iso not found at {iso}; set SYLPHEED_ISO to override. SKIPPING.");
|
||||
return;
|
||||
}
|
||||
|
||||
// Resolve the golden path relative to the test's CARGO_MANIFEST_DIR so the
|
||||
// test runs correctly from any cwd.
|
||||
let manifest_dir = env!("CARGO_MANIFEST_DIR");
|
||||
let golden = std::path::Path::new(manifest_dir).join(golden_rel);
|
||||
assert!(
|
||||
golden.exists(),
|
||||
"{label}: golden file missing at {}",
|
||||
golden.display()
|
||||
);
|
||||
|
||||
let max_instr_str = max_instr.to_string();
|
||||
let golden_str = golden.to_string_lossy().to_string();
|
||||
|
||||
let out = Command::new(bin)
|
||||
.args([
|
||||
"check",
|
||||
&iso,
|
||||
"-n",
|
||||
&max_instr_str,
|
||||
"--stable-digest",
|
||||
"--expect",
|
||||
&golden_str,
|
||||
])
|
||||
.output()
|
||||
.expect("failed to spawn xenia-rs");
|
||||
|
||||
if !out.status.success() {
|
||||
eprintln!(
|
||||
"{label}: STDOUT:\n{}\nSTDERR:\n{}",
|
||||
String::from_utf8_lossy(&out.stdout),
|
||||
String::from_utf8_lossy(&out.stderr),
|
||||
);
|
||||
panic!("{label}: digest mismatch (exit {:?})", out.status.code());
|
||||
}
|
||||
}
|
||||
|
||||
/// Sylpheed boot to first VdSwap pair, captured at -n 50M lockstep.
|
||||
/// Catches regressions in: addi/addic semantics, kernel HLE for VdSwap path,
|
||||
/// thread spawning, file I/O for sound/config. With Phase A's swap fix landed,
|
||||
/// `swaps` should be 2 and `draws` 0 (Phase E gates draws>0).
|
||||
#[test]
|
||||
#[ignore = "long-running; run via `cargo test ... -- --ignored sylpheed_n50m`"]
|
||||
fn sylpheed_n50m() {
|
||||
run_oracle("sylpheed_n50m", 50_000_000, "tests/golden/sylpheed_n50m.json");
|
||||
}
|
||||
@@ -10,3 +10,11 @@ xenia-memory = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
bitflags = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
|
||||
[[bench]]
|
||||
name = "interpreter"
|
||||
harness = false
|
||||
|
||||
194
crates/xenia-cpu/benches/interpreter.rs
Normal file
194
crates/xenia-cpu/benches/interpreter.rs
Normal file
@@ -0,0 +1,194 @@
|
||||
//! Interpreter throughput micro-benchmarks.
|
||||
//!
|
||||
//! Custom `harness = false` main — no extra dev-deps. Run via
|
||||
//! `cargo bench -p xenia-cpu` (or `cargo run --release --bench interpreter`).
|
||||
//!
|
||||
//! Three workloads, each measuring `step_cached` throughput in MIPS:
|
||||
//!
|
||||
//! - `tight_alu_loop` — pure dispatch + ALU + decode-cache hit.
|
||||
//! - `loadstore_loop` — alternating `lwz`/`stw` against main RAM. Stresses
|
||||
//! every load/store path and `find_mmio` dispatch.
|
||||
//! - `mmio_storm` — same shape as `loadstore_loop` but the address is
|
||||
//! in a registered MMIO aperture. Sanity-checks that
|
||||
//! MMIO writes still dispatch correctly.
|
||||
//!
|
||||
//! These are not statistically rigorous — no warmup, no variance — they're
|
||||
//! just enough to detect 2x-class wins or regressions on the perf-track
|
||||
//! changes (MMIO fast-reject, threaded dispatch, block cache). Numbers go
|
||||
//! into commit messages; there is no automated baseline file.
|
||||
|
||||
use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
use xenia_cpu::context::PpcContext;
|
||||
use xenia_cpu::decoder::DecodeCache;
|
||||
use xenia_cpu::interpreter::{step_cached, StepResult};
|
||||
use xenia_memory::{GuestMemory, MemoryAccess, MmioRegion};
|
||||
use xenia_memory::page_table::MemoryProtect;
|
||||
|
||||
// PPC instruction encoders — minimal subset needed by the benches.
|
||||
|
||||
#[inline]
|
||||
fn enc_addi(rd: u32, ra: u32, simm: i16) -> u32 {
|
||||
(14 << 26) | (rd << 21) | (ra << 16) | (simm as u16 as u32)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn enc_lwz(rd: u32, ra: u32, d: i16) -> u32 {
|
||||
(32 << 26) | (rd << 21) | (ra << 16) | (d as u16 as u32)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn enc_stw(rs: u32, ra: u32, d: i16) -> u32 {
|
||||
(36 << 26) | (rs << 21) | (ra << 16) | (d as u16 as u32)
|
||||
}
|
||||
|
||||
/// Set up a `GuestMemory` with one writable region for code+data.
|
||||
fn make_mem(code_base: u32, code_size: u32) -> GuestMemory {
|
||||
let mut mem = GuestMemory::new().expect("reserve 4GB");
|
||||
mem.alloc(code_base, code_size, MemoryProtect::READ | MemoryProtect::WRITE)
|
||||
.expect("alloc bench region");
|
||||
mem
|
||||
}
|
||||
|
||||
/// Write a sequence of raw PPC instructions starting at `base`.
|
||||
fn write_program(mem: &GuestMemory, base: u32, instrs: &[u32]) {
|
||||
for (i, &raw) in instrs.iter().enumerate() {
|
||||
mem.write_u32(base + (i as u32 * 4), raw);
|
||||
}
|
||||
}
|
||||
|
||||
/// Run `total_instrs` interpreter steps over a program of length `n`,
|
||||
/// wrapping PC back to `base` whenever it falls off the end. Returns the
|
||||
/// elapsed wall time.
|
||||
fn run_loop(
|
||||
ctx: &mut PpcContext,
|
||||
mem: &GuestMemory,
|
||||
cache: &mut DecodeCache,
|
||||
base: u32,
|
||||
n: u32,
|
||||
total_instrs: u64,
|
||||
) -> std::time::Duration {
|
||||
let end = base + n * 4;
|
||||
ctx.pc = base;
|
||||
let t0 = Instant::now();
|
||||
for _ in 0..total_instrs {
|
||||
let pv = mem.page_version(ctx.pc);
|
||||
let r = step_cached(ctx, mem, cache, pv);
|
||||
debug_assert!(matches!(r, StepResult::Continue));
|
||||
if ctx.pc >= end {
|
||||
ctx.pc = base;
|
||||
}
|
||||
}
|
||||
t0.elapsed()
|
||||
}
|
||||
|
||||
fn report(label: &str, total_instrs: u64, elapsed: std::time::Duration) {
|
||||
let secs = elapsed.as_secs_f64();
|
||||
let mips = (total_instrs as f64) / secs / 1.0e6;
|
||||
println!(
|
||||
"{:<24} {:>12} instrs in {:>7.3}s = {:>7.2} MIPS",
|
||||
label, total_instrs, secs, mips
|
||||
);
|
||||
}
|
||||
|
||||
fn bench_tight_alu_loop() {
|
||||
const BASE: u32 = 0x1000;
|
||||
const N: u32 = 256;
|
||||
const TOTAL: u64 = 50_000_000;
|
||||
|
||||
let mut mem = make_mem(BASE, 0x1000);
|
||||
// 256 × `addi r3, r3, 1` — pure register-register, no memory touch
|
||||
// beyond instruction fetch.
|
||||
let prog: Vec<u32> = (0..N).map(|_| enc_addi(3, 3, 1)).collect();
|
||||
write_program(&mut mem, BASE, &prog);
|
||||
|
||||
let mut ctx = PpcContext::new();
|
||||
let mut cache = DecodeCache::new();
|
||||
|
||||
let elapsed = run_loop(&mut ctx, &mut mem, &mut cache, BASE, N, TOTAL);
|
||||
report("tight_alu_loop", TOTAL, elapsed);
|
||||
}
|
||||
|
||||
fn bench_loadstore_loop() {
|
||||
const CODE_BASE: u32 = 0x1000;
|
||||
const DATA_BASE: u32 = 0x2000;
|
||||
const N: u32 = 256;
|
||||
const TOTAL: u64 = 30_000_000;
|
||||
|
||||
let mut mem = make_mem(CODE_BASE, 0x2000);
|
||||
// 128 pairs of `stw r3, 0(r4); lwz r5, 0(r4)` — exercises every
|
||||
// load/store path through `read_u32`/`write_u32` (incl. `find_mmio`).
|
||||
let mut prog = Vec::with_capacity(N as usize);
|
||||
for _ in 0..(N / 2) {
|
||||
prog.push(enc_stw(3, 4, 0));
|
||||
prog.push(enc_lwz(5, 4, 0));
|
||||
}
|
||||
write_program(&mut mem, CODE_BASE, &prog);
|
||||
|
||||
let mut ctx = PpcContext::new();
|
||||
ctx.gpr[3] = 0xDEAD_BEEF;
|
||||
ctx.gpr[4] = DATA_BASE as u64;
|
||||
let mut cache = DecodeCache::new();
|
||||
|
||||
let elapsed = run_loop(&mut ctx, &mut mem, &mut cache, CODE_BASE, N, TOTAL);
|
||||
report("loadstore_loop", TOTAL, elapsed);
|
||||
}
|
||||
|
||||
fn bench_mmio_storm() {
|
||||
const CODE_BASE: u32 = 0x1000;
|
||||
const MMIO_BASE: u32 = 0xEA00_0000;
|
||||
const N: u32 = 64;
|
||||
// MMIO is slower per access — keep total smaller so the bench stays
|
||||
// under a few seconds.
|
||||
const TOTAL: u64 = 2_000_000;
|
||||
|
||||
let mut mem = make_mem(CODE_BASE, 0x1000);
|
||||
|
||||
let writes = Arc::new(AtomicU64::new(0));
|
||||
let reads = Arc::new(AtomicU32::new(0));
|
||||
let writes_clone = writes.clone();
|
||||
let reads_clone = reads.clone();
|
||||
mem.add_mmio_region(MmioRegion {
|
||||
base_address: MMIO_BASE,
|
||||
mask: 0xFFFF_0000,
|
||||
size: 0x0001_0000,
|
||||
read_callback: Box::new(move |_a| {
|
||||
reads_clone.fetch_add(1, Ordering::Relaxed);
|
||||
0
|
||||
}),
|
||||
write_callback: Box::new(move |_a, _v| {
|
||||
writes_clone.fetch_add(1, Ordering::Relaxed);
|
||||
}),
|
||||
});
|
||||
|
||||
let mut prog = Vec::with_capacity(N as usize);
|
||||
for _ in 0..(N / 2) {
|
||||
prog.push(enc_stw(3, 4, 0));
|
||||
prog.push(enc_lwz(5, 4, 0));
|
||||
}
|
||||
write_program(&mut mem, CODE_BASE, &prog);
|
||||
|
||||
let mut ctx = PpcContext::new();
|
||||
ctx.gpr[3] = 0x1234_5678;
|
||||
ctx.gpr[4] = MMIO_BASE as u64;
|
||||
let mut cache = DecodeCache::new();
|
||||
|
||||
let elapsed = run_loop(&mut ctx, &mut mem, &mut cache, CODE_BASE, N, TOTAL);
|
||||
report("mmio_storm", TOTAL, elapsed);
|
||||
|
||||
// Sanity assertions — silently catch a refactor that breaks MMIO dispatch.
|
||||
let w = writes.load(Ordering::Relaxed);
|
||||
let r = reads.load(Ordering::Relaxed);
|
||||
assert_eq!(w, TOTAL / 2, "expected MMIO writes to be dispatched");
|
||||
assert_eq!(r as u64, TOTAL / 2, "expected MMIO reads to be dispatched");
|
||||
}
|
||||
|
||||
fn main() {
|
||||
println!("xenia-cpu interpreter bench");
|
||||
println!(" build: {}", if cfg!(debug_assertions) { "debug" } else { "release" });
|
||||
bench_tight_alu_loop();
|
||||
bench_loadstore_loop();
|
||||
bench_mmio_storm();
|
||||
}
|
||||
423
crates/xenia-cpu/src/block_cache.rs
Normal file
423
crates/xenia-cpu/src/block_cache.rs
Normal file
@@ -0,0 +1,423 @@
|
||||
//! Tier-4 perf — basic-block cache for the PPC interpreter.
|
||||
//!
|
||||
//! `DecodeCache` (in [`crate::decoder`]) caches one decoded instruction
|
||||
//! per slot, indexed by PC. The hot loop still pays the per-instruction
|
||||
//! cost of fetching the raw word, hashing the PC into a slot, and
|
||||
//! comparing tags. For straight-line code — common in the asset/inflate
|
||||
//! loops where Sylpheed boot is currently CPU-bound — the savings of
|
||||
//! batching N decoded instructions per slot lookup are linear in block
|
||||
//! length.
|
||||
//!
|
||||
//! ## Shape
|
||||
//!
|
||||
//! A `DecodedBlock` is a contiguous run of decoded instructions starting
|
||||
//! at `start_pc`, ending at the first *block terminator* (any branch,
|
||||
//! `sc`, trap, or `Invalid`) or at one of two safety limits:
|
||||
//!
|
||||
//! - [`MAX_BLOCK_INSTRS`] caps memory growth and re-build cost.
|
||||
//! - 4 KiB page boundary stop. A block is fully contained inside a
|
||||
//! single 4 KiB guest page; that means `mem.page_version(start_pc)`
|
||||
//! is sufficient to detect any code-page rewrite that should
|
||||
//! invalidate the block. Without this rule the cache would have to
|
||||
//! walk every spanned page on every hit, which would erase the win.
|
||||
//!
|
||||
//! ## Invalidation
|
||||
//!
|
||||
//! Each block stamps the page version at build time. On lookup, if
|
||||
//! `mem.page_version(start_pc)` differs from `block.page_version`, the
|
||||
//! slot is rebuilt. Same mechanism `DecodeCache` uses, just at
|
||||
//! block granularity.
|
||||
//!
|
||||
//! ## Debugger semantics
|
||||
//!
|
||||
//! Block dispatch is **opt-in** by the caller. The hot loop in
|
||||
//! `xenia-app/src/main.rs` selects the per-instruction path whenever
|
||||
//! `Debugger::wants_hooks()` is true or any `--trace-*` flag is set.
|
||||
//! That's how single-step, breakpoints, in-memory trace, instruction
|
||||
//! trace, and branch trace continue to observe every PC: the block
|
||||
//! cache simply never runs in those modes.
|
||||
|
||||
use crate::decoder::{decode, DecodedInstr};
|
||||
use xenia_memory::MemoryAccess;
|
||||
|
||||
/// Direct-mapped block-cache slot count. Same shape as
|
||||
/// [`crate::decoder::DECODE_CACHE_SIZE`] — 64 K slots indexed by the
|
||||
/// low 16 bits of `start_pc >> 2`. With Sylpheed-class workloads the
|
||||
/// slot collision rate is negligible.
|
||||
const BLOCK_CACHE_SIZE: usize = 1 << 16;
|
||||
const BLOCK_CACHE_MASK: u32 = (BLOCK_CACHE_SIZE - 1) as u32;
|
||||
|
||||
/// Hard cap on instructions per block. Keeps the worst-case memory
|
||||
/// footprint bounded and limits the rebuild cost when a code page
|
||||
/// gets bumped. 32 instructions is generous for most basic blocks
|
||||
/// (real-world average across Sylpheed boot is ~6 between branches).
|
||||
pub const MAX_BLOCK_INSTRS: usize = 32;
|
||||
|
||||
/// Guest page size — duplicated here to avoid pulling
|
||||
/// `xenia-memory::heap` internals into `xenia-cpu`. Must stay in sync
|
||||
/// with the memory crate. Both refer to the architectural PowerPC 4 KiB
|
||||
/// page granule, so this constant is locked.
|
||||
const GUEST_PAGE_SIZE: u32 = 4096;
|
||||
const GUEST_PAGE_MASK: u32 = !(GUEST_PAGE_SIZE - 1);
|
||||
|
||||
/// One cached basic block. Owned by [`BlockCache`]; a `&DecodedBlock`
|
||||
/// is handed to the interpreter via [`BlockCache::lookup_or_build`] and
|
||||
/// stays valid until the next `lookup_or_build` on the same slot.
|
||||
#[derive(Debug)]
|
||||
pub struct DecodedBlock {
|
||||
/// Guest PC at which this block starts. Used as the slot tag.
|
||||
pub start_pc: u32,
|
||||
/// Guest PC immediately after the last instruction in `instrs`.
|
||||
/// Equal to `instrs.last().addr + 4` whether or not the block
|
||||
/// ended on a terminator. Useful for tracing / disassembly.
|
||||
pub end_pc: u32,
|
||||
/// `mem.page_version(start_pc)` at build time. Mismatch on lookup
|
||||
/// invalidates the block. Single value because every block is
|
||||
/// page-bounded by construction.
|
||||
pub page_version: u64,
|
||||
/// Decoded instructions in execution order. Always non-empty after
|
||||
/// a successful build (`MAX_BLOCK_INSTRS >= 1` and the build walk
|
||||
/// pushes the first decoded word unconditionally).
|
||||
pub instrs: Vec<DecodedInstr>,
|
||||
}
|
||||
|
||||
/// Per-slot status from a `lookup_or_build` probe. Internal only.
|
||||
enum CacheStatus {
|
||||
/// Block at this slot matches `pc` and the page version at build
|
||||
/// time matches `mem.page_version(pc)` — return as-is.
|
||||
Hit,
|
||||
/// Block at this slot matched `pc` but the page version has
|
||||
/// advanced — rebuild and bump `invalidations`.
|
||||
Stale,
|
||||
/// Slot is empty or holds a block keyed at a different `start_pc`.
|
||||
/// Build a fresh block and bump `misses`.
|
||||
Miss,
|
||||
}
|
||||
|
||||
/// Direct-mapped block cache. One instance shared across all HW slots
|
||||
/// (block contents are PC-only and read-only after fill). Not
|
||||
/// thread-safe — owner is the single scheduler thread, same as
|
||||
/// `DecodeCache`.
|
||||
pub struct BlockCache {
|
||||
slots: Box<[Option<Box<DecodedBlock>>]>,
|
||||
hits: u64,
|
||||
misses: u64,
|
||||
invalidations: u64,
|
||||
}
|
||||
|
||||
impl Default for BlockCache {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl BlockCache {
|
||||
pub fn new() -> Self {
|
||||
// `Option<Box<T>>` is a niche-optimized 8-byte slot; 64 K of
|
||||
// them cost ~512 KiB of cold storage. Live blocks beyond that
|
||||
// sit on the heap.
|
||||
let mut v: Vec<Option<Box<DecodedBlock>>> = Vec::with_capacity(BLOCK_CACHE_SIZE);
|
||||
v.resize_with(BLOCK_CACHE_SIZE, || None);
|
||||
Self {
|
||||
slots: v.into_boxed_slice(),
|
||||
hits: 0,
|
||||
misses: 0,
|
||||
invalidations: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn hits(&self) -> u64 {
|
||||
self.hits
|
||||
}
|
||||
pub fn misses(&self) -> u64 {
|
||||
self.misses
|
||||
}
|
||||
pub fn invalidations(&self) -> u64 {
|
||||
self.invalidations
|
||||
}
|
||||
|
||||
/// Return the cached block starting at `pc`, building it if absent
|
||||
/// or stale. The returned reference is borrowed from the cache and
|
||||
/// stays valid until the next `lookup_or_build` call.
|
||||
pub fn lookup_or_build(&mut self, pc: u32, mem: &dyn MemoryAccess) -> &DecodedBlock {
|
||||
let idx = ((pc >> 2) & BLOCK_CACHE_MASK) as usize;
|
||||
let cur_pv = mem.page_version(pc);
|
||||
|
||||
// Phase 1: classify the slot. Borrow ends before fill so the
|
||||
// mutable update below doesn't conflict.
|
||||
let status = match &self.slots[idx] {
|
||||
Some(b) if b.start_pc == pc && b.page_version == cur_pv => CacheStatus::Hit,
|
||||
Some(b) if b.start_pc == pc => CacheStatus::Stale,
|
||||
_ => CacheStatus::Miss,
|
||||
};
|
||||
|
||||
// Phase 2: fill on miss/stale, account.
|
||||
match status {
|
||||
CacheStatus::Hit => {
|
||||
self.hits += 1;
|
||||
}
|
||||
CacheStatus::Stale => {
|
||||
self.invalidations += 1;
|
||||
self.misses += 1;
|
||||
let block = build_block(pc, mem, cur_pv);
|
||||
self.slots[idx] = Some(Box::new(block));
|
||||
}
|
||||
CacheStatus::Miss => {
|
||||
self.misses += 1;
|
||||
let block = build_block(pc, mem, cur_pv);
|
||||
self.slots[idx] = Some(Box::new(block));
|
||||
}
|
||||
}
|
||||
|
||||
// Slot is guaranteed populated at this point — Hit returned a
|
||||
// pre-existing block, Miss/Stale just wrote a new one.
|
||||
self.slots[idx]
|
||||
.as_deref()
|
||||
.expect("block freshly built or hit")
|
||||
}
|
||||
}
|
||||
|
||||
/// Walk forward from `pc`, decoding instructions and collecting them
|
||||
/// into a `DecodedBlock`. The walk stops on the first of:
|
||||
/// - a [`PpcOpcode::terminates_block`] true (the terminator IS
|
||||
/// included as the last instruction),
|
||||
/// - reaching [`MAX_BLOCK_INSTRS`],
|
||||
/// - the next PC would cross a 4 KiB guest page boundary.
|
||||
fn build_block(start_pc: u32, mem: &dyn MemoryAccess, page_version: u64) -> DecodedBlock {
|
||||
let mut instrs: Vec<DecodedInstr> = Vec::with_capacity(8);
|
||||
let page_base = start_pc & GUEST_PAGE_MASK;
|
||||
let mut cur = start_pc;
|
||||
|
||||
loop {
|
||||
let raw = mem.read_u32(cur);
|
||||
let decoded = decode(raw, cur);
|
||||
let terminates = decoded.opcode.terminates_block();
|
||||
instrs.push(decoded);
|
||||
|
||||
if terminates {
|
||||
break;
|
||||
}
|
||||
if instrs.len() >= MAX_BLOCK_INSTRS {
|
||||
break;
|
||||
}
|
||||
let next = cur.wrapping_add(4);
|
||||
if (next & GUEST_PAGE_MASK) != page_base {
|
||||
break;
|
||||
}
|
||||
cur = next;
|
||||
}
|
||||
|
||||
let last = instrs.last().expect("build pushes at least one instruction");
|
||||
let end_pc = last.addr.wrapping_add(4);
|
||||
|
||||
DecodedBlock {
|
||||
start_pc,
|
||||
end_pc,
|
||||
page_version,
|
||||
instrs,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::opcode::PpcOpcode;
|
||||
|
||||
use std::cell::Cell;
|
||||
|
||||
/// 64 KiB byte-array memory, big-endian word reads.
|
||||
/// Mirrors `interpreter::tests::TestMem` but lives here so block_cache
|
||||
/// tests don't depend on interpreter internals.
|
||||
struct BlockTestMem {
|
||||
data: Box<[Cell<u8>]>,
|
||||
version_a: u64,
|
||||
version_b: u64,
|
||||
// Address of the page whose version is `version_b` instead of
|
||||
// `version_a`. Used to model an out-of-band page-version bump in
|
||||
// the invalidation test without going through write_*.
|
||||
bumped_page: Cell<Option<u32>>,
|
||||
}
|
||||
|
||||
impl BlockTestMem {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
data: (0..0x10000u32).map(|_| Cell::new(0)).collect(),
|
||||
version_a: 1,
|
||||
version_b: 2,
|
||||
bumped_page: Cell::new(None),
|
||||
}
|
||||
}
|
||||
fn put(&self, addr: u32, raw: u32) {
|
||||
let a = addr as usize;
|
||||
for (i, byte) in raw.to_be_bytes().iter().enumerate() {
|
||||
self.data[a + i].set(*byte);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl MemoryAccess for BlockTestMem {
|
||||
fn read_u8(&self, a: u32) -> u8 { self.data[a as usize].get() }
|
||||
fn read_u16(&self, a: u32) -> u16 {
|
||||
let i = a as usize;
|
||||
u16::from_be_bytes([self.data[i].get(), self.data[i + 1].get()])
|
||||
}
|
||||
fn read_u32(&self, a: u32) -> u32 {
|
||||
let i = a as usize;
|
||||
u32::from_be_bytes([
|
||||
self.data[i].get(), self.data[i + 1].get(),
|
||||
self.data[i + 2].get(), self.data[i + 3].get(),
|
||||
])
|
||||
}
|
||||
fn read_u64(&self, a: u32) -> u64 {
|
||||
let i = a as usize;
|
||||
u64::from_be_bytes([
|
||||
self.data[i].get(), self.data[i + 1].get(),
|
||||
self.data[i + 2].get(), self.data[i + 3].get(),
|
||||
self.data[i + 4].get(), self.data[i + 5].get(),
|
||||
self.data[i + 6].get(), self.data[i + 7].get(),
|
||||
])
|
||||
}
|
||||
fn write_u8(&self, a: u32, v: u8) { self.data[a as usize].set(v); }
|
||||
fn write_u16(&self, a: u32, v: u16) {
|
||||
let i = a as usize;
|
||||
let b = v.to_be_bytes();
|
||||
self.data[i].set(b[0]);
|
||||
self.data[i + 1].set(b[1]);
|
||||
}
|
||||
fn write_u32(&self, a: u32, v: u32) {
|
||||
let i = a as usize;
|
||||
for (k, byte) in v.to_be_bytes().iter().enumerate() {
|
||||
self.data[i + k].set(*byte);
|
||||
}
|
||||
}
|
||||
fn write_u64(&self, a: u32, v: u64) {
|
||||
let i = a as usize;
|
||||
for (k, byte) in v.to_be_bytes().iter().enumerate() {
|
||||
self.data[i + k].set(*byte);
|
||||
}
|
||||
}
|
||||
fn translate(&self, _: u32) -> Option<*const u8> { None }
|
||||
fn translate_mut(&self, _: u32) -> Option<*mut u8> { None }
|
||||
fn page_version(&self, addr: u32) -> u64 {
|
||||
if Some(addr & GUEST_PAGE_MASK) == self.bumped_page.get() {
|
||||
self.version_b
|
||||
} else {
|
||||
self.version_a
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// PPC encodings — minimal subset for these tests.
|
||||
fn enc_addi(rd: u32, ra: u32, simm: i16) -> u32 {
|
||||
(14 << 26) | (rd << 21) | (ra << 16) | (simm as u16 as u32)
|
||||
}
|
||||
fn enc_b_self() -> u32 {
|
||||
// b 0 — branch to self (LI=0). Opcode=18, AA=0, LK=0.
|
||||
18 << 26
|
||||
}
|
||||
fn enc_unimplemented() -> u32 {
|
||||
// Use opcode 0 raw = 0; decoder maps to Invalid.
|
||||
0
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn block_built_to_terminator() {
|
||||
let mem = BlockTestMem::new();
|
||||
mem.put(0x100, enc_addi(3, 3, 1));
|
||||
mem.put(0x104, enc_addi(3, 3, 1));
|
||||
mem.put(0x108, enc_addi(3, 3, 1));
|
||||
mem.put(0x10C, enc_b_self()); // terminator
|
||||
let mut bc = BlockCache::new();
|
||||
let b = bc.lookup_or_build(0x100, &mem);
|
||||
assert_eq!(b.start_pc, 0x100);
|
||||
assert_eq!(b.instrs.len(), 4);
|
||||
assert_eq!(b.instrs.last().unwrap().opcode, PpcOpcode::bx);
|
||||
assert_eq!(b.end_pc, 0x110);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn block_stops_at_page_boundary() {
|
||||
// Build from 0x1FFC. The next PC (0x2000) is in a different
|
||||
// 4 KiB page — block must contain only the one instruction.
|
||||
let mem = BlockTestMem::new();
|
||||
mem.put(0x1FFC, enc_addi(3, 3, 1));
|
||||
mem.put(0x2000, enc_addi(3, 3, 1));
|
||||
let mut bc = BlockCache::new();
|
||||
let b = bc.lookup_or_build(0x1FFC, &mem);
|
||||
assert_eq!(b.instrs.len(), 1);
|
||||
assert_eq!(b.end_pc, 0x2000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn block_stops_at_max_len() {
|
||||
// 64 consecutive non-terminator instructions on one page —
|
||||
// block must clamp at MAX_BLOCK_INSTRS.
|
||||
let mem = BlockTestMem::new();
|
||||
for i in 0..64u32 {
|
||||
mem.put(0x100 + i * 4, enc_addi(3, 3, 1));
|
||||
}
|
||||
let mut bc = BlockCache::new();
|
||||
let b = bc.lookup_or_build(0x100, &mem);
|
||||
assert_eq!(b.instrs.len(), MAX_BLOCK_INSTRS);
|
||||
assert_eq!(b.end_pc, 0x100 + (MAX_BLOCK_INSTRS as u32) * 4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn block_stops_at_invalid_opcode() {
|
||||
// Decoder mapping `Invalid` is treated as a block terminator
|
||||
// so the per-instruction Unimplemented path is preserved.
|
||||
let mem = BlockTestMem::new();
|
||||
mem.put(0x100, enc_addi(3, 3, 1));
|
||||
mem.put(0x104, enc_unimplemented());
|
||||
mem.put(0x108, enc_addi(3, 3, 1));
|
||||
let mut bc = BlockCache::new();
|
||||
let b = bc.lookup_or_build(0x100, &mem);
|
||||
assert_eq!(b.instrs.len(), 2);
|
||||
assert_eq!(b.instrs.last().unwrap().opcode, PpcOpcode::Invalid);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn block_invalidates_on_page_version_bump() {
|
||||
let mem = BlockTestMem::new();
|
||||
mem.put(0x100, enc_addi(3, 3, 1));
|
||||
mem.put(0x104, enc_b_self());
|
||||
let mut bc = BlockCache::new();
|
||||
let _ = bc.lookup_or_build(0x100, &mem);
|
||||
assert_eq!(bc.misses(), 1);
|
||||
assert_eq!(bc.hits(), 0);
|
||||
|
||||
// Same call → hit.
|
||||
let _ = bc.lookup_or_build(0x100, &mem);
|
||||
assert_eq!(bc.hits(), 1);
|
||||
assert_eq!(bc.invalidations(), 0);
|
||||
|
||||
// Bump the page version on the page containing 0x100. Next
|
||||
// lookup must invalidate and rebuild.
|
||||
mem.bumped_page.set(Some(0x100 & GUEST_PAGE_MASK));
|
||||
let _ = bc.lookup_or_build(0x100, &mem);
|
||||
assert_eq!(bc.invalidations(), 1);
|
||||
assert_eq!(bc.misses(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn block_hit_returns_same_contents() {
|
||||
// Sanity: cache hit returns a block whose contents reflect the
|
||||
// ORIGINAL instruction stream, even after a non-version-bumping
|
||||
// poke to the underlying bytes. (No real workload would do
|
||||
// this, but it confirms we're returning cached data, not
|
||||
// re-reading.)
|
||||
let mem = BlockTestMem::new();
|
||||
mem.put(0x100, enc_addi(3, 3, 7));
|
||||
mem.put(0x104, enc_b_self());
|
||||
let mut bc = BlockCache::new();
|
||||
let first_simm = bc.lookup_or_build(0x100, &mem).instrs[0].simm16();
|
||||
// Rewrite without bumping version (test-only path).
|
||||
let bytes = enc_addi(3, 3, 99).to_be_bytes();
|
||||
for (i, b) in bytes.iter().enumerate() {
|
||||
mem.data[0x100 + i].set(*b);
|
||||
}
|
||||
let cached_simm = bc.lookup_or_build(0x100, &mem).instrs[0].simm16();
|
||||
assert_eq!(first_simm, 7);
|
||||
assert_eq!(cached_simm, 7, "cache must serve original decoded form");
|
||||
}
|
||||
}
|
||||
@@ -29,16 +29,37 @@ pub mod spr {
|
||||
pub const XER: u32 = 1;
|
||||
pub const LR: u32 = 8;
|
||||
pub const CTR: u32 = 9;
|
||||
pub const TBL: u32 = 268;
|
||||
pub const TBU: u32 = 269;
|
||||
pub const DSISR: u32 = 18;
|
||||
pub const DAR: u32 = 19;
|
||||
/// Decrementer (hypervisor-visible, 32-bit down-counter).
|
||||
pub const DEC: u32 = 22;
|
||||
pub const TBL: u32 = 268; // Read (user)
|
||||
pub const TBU: u32 = 269; // Read (user)
|
||||
/// Time-base write (supervisor). Separate SPR number from TBL (268) for
|
||||
/// access-control reasons.
|
||||
pub const TBL_WRITE: u32 = 284;
|
||||
pub const TBU_WRITE: u32 = 285;
|
||||
pub const SPRG0: u32 = 272;
|
||||
pub const SPRG1: u32 = 273;
|
||||
pub const SPRG2: u32 = 274;
|
||||
pub const SPRG3: u32 = 275;
|
||||
pub const VRSAVE: u32 = 256;
|
||||
pub const PVR: u32 = 287;
|
||||
pub const HID0: u32 = 1008;
|
||||
pub const HID1: u32 = 1009;
|
||||
pub const PIR: u32 = 1023;
|
||||
}
|
||||
|
||||
/// LR halt sentinel. When `bclr` returns to this address, the interpreter
|
||||
/// loop halts cleanly (matches the "entry returned" convention).
|
||||
pub const LR_HALT_SENTINEL: u64 = 0xBCBC_BCBC;
|
||||
|
||||
/// VSCR NJ (Non-Java mode) bit. Stored in word 3 at bit 16 (mask 0x0001_0000).
|
||||
/// Set at startup; when clear, denormals are flushed to zero following IEEE-754.
|
||||
pub const VSCR_NJ_MASK: u32 = 0x0001_0000;
|
||||
/// VSCR SAT (saturation sticky) bit. Stored in word 3 at bit 31 (mask 0x0000_0001).
|
||||
pub const VSCR_SAT_MASK: u32 = 0x0000_0001;
|
||||
|
||||
/// PowerPC processor context. Holds all register state for one guest thread.
|
||||
/// Mirrors PPCContext from ppc_context.h, minus JIT-specific fields.
|
||||
#[repr(C, align(64))]
|
||||
@@ -64,15 +85,49 @@ pub struct PpcContext {
|
||||
pub xer_ca: u8,
|
||||
pub xer_ov: u8,
|
||||
pub xer_so: u8,
|
||||
// Altivec VSCR saturation bit
|
||||
pub vscr_sat: u8,
|
||||
/// XER[25:31] string-byte count (`TBC`). Read/written by `mtspr XER`,
|
||||
/// consumed by `lswx`/`stswx`. Per PPCBUG-123/124/161: was previously
|
||||
/// unmodelled, making `lswx`/`stswx` a permanent no-op.
|
||||
pub xer_tbc: u8,
|
||||
// Altivec VSCR. Only bits 16 (NJ) and 31 (SAT) of word 3 are meaningful.
|
||||
pub vscr: Vec128,
|
||||
// VRSAVE (SPR 256). Bitmask of which VRs need saving across context switches.
|
||||
pub vrsave: u32,
|
||||
|
||||
// Program counter
|
||||
pub pc: u32,
|
||||
// Reservation address/value for lwarx/stwcx
|
||||
pub reserved_addr: u32,
|
||||
// Reservation for lwarx/ldarx/stwcx/stdcx. Xenon's reservation granule is
|
||||
// one L2 cache line (128 bytes) — `reserved_line` is stored as the base
|
||||
// address of that line (`ea & !0x7F`). `has_reservation` gates the
|
||||
// validity; stwcx./stdcx. check that both match before committing.
|
||||
// `reserved_val` is retained for possible future use by a coherency
|
||||
// observer; the store-conditional logic itself does not compare it.
|
||||
pub reserved_line: u32,
|
||||
pub reserved_val: u64,
|
||||
pub has_reservation: bool,
|
||||
/// PPCBUG-151 — width of the active reservation: 4 = `lwarx` (word),
|
||||
/// 8 = `ldarx` (doubleword), 0 = no reservation. `stwcx.` requires
|
||||
/// width==4; `stdcx.` requires width==8. Cross-width pairs fail
|
||||
/// deterministically with CR0.EQ=0. Cleared alongside `has_reservation`
|
||||
/// on every `stwcx.`/`stdcx.` exit (success or failure).
|
||||
pub reservation_width: u8,
|
||||
/// M3.7 — generation stamp returned by [`crate::ReservationTable::reserve`]
|
||||
/// at the most recent `lwarx`/`ldarx`. Paired with `reserved_line`;
|
||||
/// `stwcx.`/`stdcx.` pass this back to `try_commit`. Meaningful only
|
||||
/// when `reservation_table` is `Some` and the table is enabled.
|
||||
pub reserved_generation: u32,
|
||||
/// M3.7 — optional handle to the inter-thread reservation table.
|
||||
/// When `Some(table)` *and* `table.is_enabled()`, the interpreter's
|
||||
/// `lwarx`/`stwcx.`/`ldarx`/`stdcx.` arms route through the table;
|
||||
/// otherwise they use the legacy per-`PpcContext` fields above. The
|
||||
/// scheduler populates this when it spawns a thread under a kernel
|
||||
/// that has `reservations` set.
|
||||
pub reservation_table: Option<std::sync::Arc<crate::ReservationTable>>,
|
||||
/// M3.7 — emulated HW slot ID this thread is bound to. Used as the
|
||||
/// reservation table's `hw_id` discriminator so two threads on
|
||||
/// different slots can't accidentally commit each other's
|
||||
/// reservations. Populated by the scheduler at spawn / migration.
|
||||
pub hw_id: u8,
|
||||
|
||||
// Thread ID (for kernel use)
|
||||
pub thread_id: u32,
|
||||
@@ -82,6 +137,12 @@ pub struct PpcContext {
|
||||
|
||||
// Time base (incremented each instruction for debugging)
|
||||
pub timebase: u64,
|
||||
|
||||
// Decrementer (SPR 22): 32-bit down-counter that fires an external
|
||||
// interrupt at underflow on real hw. Xenia-rs doesn't dispatch DEC
|
||||
// interrupts to the guest; this value is maintained so that mfspr DEC
|
||||
// returns something coherent.
|
||||
pub dec: u32,
|
||||
}
|
||||
|
||||
impl PpcContext {
|
||||
@@ -89,7 +150,9 @@ impl PpcContext {
|
||||
Self {
|
||||
gpr: [0; 32],
|
||||
ctr: 0,
|
||||
lr: 0,
|
||||
// Canary sets LR to the halt sentinel at thread start so `blr`
|
||||
// from the top-level entry falls out of the interpreter loop.
|
||||
lr: LR_HALT_SENTINEL,
|
||||
msr: 0,
|
||||
fpr: [0.0; 32],
|
||||
vr: [Vec128::ZERO; 128],
|
||||
@@ -98,14 +161,23 @@ impl PpcContext {
|
||||
xer_ca: 0,
|
||||
xer_ov: 0,
|
||||
xer_so: 0,
|
||||
vscr_sat: 0,
|
||||
xer_tbc: 0,
|
||||
// VSCR starts with NJ bit set (denormals flushed) — matches canary
|
||||
// thread_state.cc initialization.
|
||||
vscr: Vec128::from_u32x4(0, 0, 0, VSCR_NJ_MASK),
|
||||
vrsave: 0xFFFF_FFFF,
|
||||
pc: 0,
|
||||
reserved_addr: 0,
|
||||
reserved_line: 0,
|
||||
reserved_val: 0,
|
||||
has_reservation: false,
|
||||
reservation_width: 0,
|
||||
reserved_generation: 0,
|
||||
reservation_table: None,
|
||||
hw_id: 0,
|
||||
thread_id: 0,
|
||||
cycle_count: 0,
|
||||
timebase: 0,
|
||||
dec: 0,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -173,7 +245,10 @@ impl PpcContext {
|
||||
|
||||
/// Get the full XER register value.
|
||||
pub fn xer(&self) -> u32 {
|
||||
((self.xer_so as u32) << 31) | ((self.xer_ov as u32) << 30) | ((self.xer_ca as u32) << 29)
|
||||
((self.xer_so as u32) << 31)
|
||||
| ((self.xer_ov as u32) << 30)
|
||||
| ((self.xer_ca as u32) << 29)
|
||||
| (self.xer_tbc as u32) // PPCBUG-123/566: bits 0-6 (TBC).
|
||||
}
|
||||
|
||||
/// Set XER from a full 32-bit value.
|
||||
@@ -181,6 +256,28 @@ impl PpcContext {
|
||||
self.xer_so = ((val >> 31) & 1) as u8;
|
||||
self.xer_ov = ((val >> 30) & 1) as u8;
|
||||
self.xer_ca = ((val >> 29) & 1) as u8;
|
||||
self.xer_tbc = (val & 0x7F) as u8; // PPCBUG-124.
|
||||
}
|
||||
|
||||
/// Read the VSCR SAT (sticky saturation) bit.
|
||||
pub fn vscr_sat(&self) -> bool {
|
||||
(self.vscr.u32x4(3) & VSCR_SAT_MASK) != 0
|
||||
}
|
||||
|
||||
/// Set or clear VSCR SAT. Preserves the NJ bit (and any other word-3 bits).
|
||||
pub fn set_vscr_sat(&mut self, v: bool) {
|
||||
let mut w = self.vscr.u32x4(3);
|
||||
if v {
|
||||
w |= VSCR_SAT_MASK;
|
||||
} else {
|
||||
w &= !VSCR_SAT_MASK;
|
||||
}
|
||||
self.vscr.set_u32x4(3, w);
|
||||
}
|
||||
|
||||
/// Read the VSCR NJ (non-Java mode / flush-denormals) bit.
|
||||
pub fn vscr_nj(&self) -> bool {
|
||||
(self.vscr.u32x4(3) & VSCR_NJ_MASK) != 0
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -74,9 +74,24 @@ impl DecodedInstr {
|
||||
/// Rc bit (bit 31) - record CR0
|
||||
#[inline] pub fn rc_bit(&self) -> bool { self.raw & 1 != 0 }
|
||||
|
||||
/// Rc for VC-form vector compare instructions — PPC bit 21 = host bit 10.
|
||||
#[inline] pub fn vc_rc_bit(&self) -> bool { (self.raw >> 10) & 1 != 0 }
|
||||
/// Rc for VX128_R-form vector compare instructions — PPC bit 27 = host bit 4.
|
||||
/// VX128_R Rc bit — PPC bit 25 (host bit 6) per canary's FormatVX128_R
|
||||
/// bitfield layout. PPCBUG-700.
|
||||
#[inline] pub fn vx128r_rc_bit(&self) -> bool { (self.raw >> 6) & 1 != 0 }
|
||||
|
||||
/// IMM field for VX128_4-form instructions (vrlimi128) — 5-bit blend mask at PPC bits 11-15.
|
||||
#[inline] pub fn vx128_4_imm(&self) -> u32 { extract_bits(self.raw, 11, 15) }
|
||||
/// z field for VX128_4-form instructions (vrlimi128) — 2-bit rotation index at PPC bits 24-25.
|
||||
#[inline] pub fn vx128_4_z(&self) -> u32 { extract_bits(self.raw, 24, 25) }
|
||||
|
||||
/// OE bit (bit 21) - overflow enable
|
||||
#[inline] pub fn oe(&self) -> bool { extract_bits(self.raw, 21, 21) != 0 }
|
||||
|
||||
/// TO field (bits 6-10) for tw/twi/td/tdi trap instructions.
|
||||
#[inline] pub fn to(&self) -> u32 { extract_bits(self.raw, 6, 10) }
|
||||
|
||||
/// MB, ME fields for rotate instructions
|
||||
#[inline] pub fn mb(&self) -> u32 { extract_bits(self.raw, 21, 25) }
|
||||
#[inline] pub fn me(&self) -> u32 { extract_bits(self.raw, 26, 30) }
|
||||
@@ -86,7 +101,13 @@ impl DecodedInstr {
|
||||
|
||||
/// SH field for 64-bit shifts (bits 16-20 + bit 30)
|
||||
#[inline] pub fn sh64(&self) -> u32 {
|
||||
(extract_bits(self.raw, 16, 20) << 1) | extract_bits(self.raw, 30, 30)
|
||||
(extract_bits(self.raw, 30, 30) << 5) | extract_bits(self.raw, 16, 20)
|
||||
}
|
||||
|
||||
/// MB/ME field for MD-form and MDS-form instructions (6-bit field, split encoding).
|
||||
/// MB[4:0] at PPC bits 21-25; MB[5] at PPC bit 26.
|
||||
#[inline] pub fn mb_md(&self) -> u32 {
|
||||
extract_bits(self.raw, 21, 25) | (extract_bits(self.raw, 26, 26) << 5)
|
||||
}
|
||||
|
||||
/// SPR field (bits 11-20, swapped halves)
|
||||
@@ -114,32 +135,67 @@ impl DecodedInstr {
|
||||
/// crbB (bits 16-20)
|
||||
#[inline] pub fn crbb(&self) -> u32 { extract_bits(self.raw, 16, 20) }
|
||||
|
||||
// VMX128 field extractors
|
||||
// VMX128 field extractors — bit positions match canary's
|
||||
// FormatVX128/VX128_2/VX128_4/VX128_5/VX128_R bitfield layout
|
||||
// (xenia-canary `ppc_decode_data.h:484-663`, LSB-first packed). PPCBUG-700.
|
||||
|
||||
/// VA128 (bits 6-10, plus bit from 29)
|
||||
/// VA128 = VA128l(5) | VA128h(1) << 5 | VA128H(1) << 6.
|
||||
/// Canonical 7-bit register selector: PPC 11-15 (low), PPC 26 (mid), PPC 21 (high).
|
||||
#[inline] pub fn va128(&self) -> usize {
|
||||
(extract_bits(self.raw, 6, 10) | (extract_bits(self.raw, 29, 29) << 5)) as usize
|
||||
(extract_bits(self.raw, 11, 15)
|
||||
| (extract_bits(self.raw, 26, 26) << 5)
|
||||
| (extract_bits(self.raw, 21, 21) << 6)) as usize
|
||||
}
|
||||
|
||||
/// VB128 (bits 16-20, plus bits from 28, 30)
|
||||
/// VB128 = VB128l(5) | VB128h(2) << 5. Canary's VB128h is a 2-bit
|
||||
/// contiguous field at PPC 30-31 (host bits 0-1).
|
||||
#[inline] pub fn vb128(&self) -> usize {
|
||||
(extract_bits(self.raw, 16, 20)
|
||||
| (extract_bits(self.raw, 28, 28) << 5)
|
||||
| (extract_bits(self.raw, 30, 30) << 6)) as usize
|
||||
| (extract_bits(self.raw, 30, 31) << 5)) as usize
|
||||
}
|
||||
|
||||
/// VD128 (bits 6-10, plus bits from 21, 22)
|
||||
/// VD128 = VD128l(5) | VD128h(2) << 5. Canary's VD128h is a 2-bit
|
||||
/// contiguous field at PPC 28-29 (host bits 2-3).
|
||||
#[inline] pub fn vd128(&self) -> usize {
|
||||
(extract_bits(self.raw, 6, 10)
|
||||
| (extract_bits(self.raw, 21, 21) << 5)
|
||||
| (extract_bits(self.raw, 22, 22) << 6)) as usize
|
||||
| (extract_bits(self.raw, 28, 29) << 5)) as usize
|
||||
}
|
||||
|
||||
/// VS128 - same encoding as VD128
|
||||
#[inline] pub fn vs128(&self) -> usize { self.vd128() }
|
||||
|
||||
/// VC register for VX128_2-form instructions (vperm128) — 3-bit at PPC bits 23-25.
|
||||
#[inline] pub fn vc128_2(&self) -> usize { extract_bits(self.raw, 23, 25) as usize }
|
||||
|
||||
/// NB field (bits 16-20) for lswi/stswi
|
||||
#[inline] pub fn nb(&self) -> u32 { extract_bits(self.raw, 16, 20) }
|
||||
|
||||
/// PERM field for VX128_P-form instructions (vpermwi128) — 8-bit split encoding.
|
||||
/// PERMl (5 bits) at PPC bits 11-15; PERMh (3 bits) at PPC bits 23-25.
|
||||
#[inline] pub fn vx128_p_perm(&self) -> u32 {
|
||||
extract_bits(self.raw, 11, 15) | (extract_bits(self.raw, 23, 25) << 5)
|
||||
}
|
||||
|
||||
/// SH field for VX128_5-form instructions (vsldoi128) — 4-bit shift at PPC bits 22-25.
|
||||
#[inline] pub fn vx128_5_sh(&self) -> u32 { extract_bits(self.raw, 22, 25) }
|
||||
}
|
||||
|
||||
/// Extract the 5-bit `UIMM` (`VX128_3`) / `IMM` (`VX128_4`) field. Canary
|
||||
/// packs both formats with LSB-bits 16-20 holding the field, which is
|
||||
/// MSB bits 11-15 in our `extract_bits` convention. For `vpkd3d128` /
|
||||
/// `vupkd3d128` the decoded selector is `type = UIMM >> 2` (3 bits; valid
|
||||
/// values 0-6 per [`crate::vmx::D3dPackType`], 7 is undocumented /
|
||||
/// undefined in canary) and `pack = UIMM & 0x3` (output-slot layout for
|
||||
/// `vpkd3d128` only, `vupkd3d128` ignores it).
|
||||
///
|
||||
/// First-Pixels M3: the interpreter previously used a hand-rolled
|
||||
/// `(instr.raw >> 6) & 0x7` that was **LSB-numbered** and extracted
|
||||
/// bits from a completely different part of the word (the
|
||||
/// secondary-opcode region). Centralizing the extractor here matches
|
||||
/// canary's `FormatVX128_{3,4}::{UIMM,IMM}` field semantics exactly.
|
||||
#[inline]
|
||||
pub fn extract_vx128_uimm5(raw: u32) -> u32 {
|
||||
extract_bits(raw, 11, 15)
|
||||
}
|
||||
|
||||
/// Decode a 32-bit PPC instruction into its opcode.
|
||||
@@ -149,6 +205,123 @@ pub fn decode(raw: u32, addr: u32) -> DecodedInstr {
|
||||
DecodedInstr { opcode, raw, addr }
|
||||
}
|
||||
|
||||
// Perf tier-2 — direct-mapped PC-keyed decode cache.
|
||||
//
|
||||
// The interpreter hot path spends ~15-25% of its time in `decode()`
|
||||
// parsing the raw u32 and walking the primary+secondary opcode tables.
|
||||
// For non-self-modifying guest code — the common case past the XEX
|
||||
// loader — `decode(raw, pc)` is purely a function of `(raw, pc)` and
|
||||
// the output is `Copy + 16B`. A direct-mapped cache indexed by
|
||||
// `(pc >> 2) & MASK` gives the interpreter a 1-comparison fast path,
|
||||
// at the cost of one branch and a 1.5 MiB region of memory.
|
||||
//
|
||||
// Invalidation piggybacks on `xenia_memory::GuestMemory::page_version`
|
||||
// (P5 texture-cache invalidation): every cache entry carries the page
|
||||
// version that was active at decode time; on lookup we compare against
|
||||
// the current version of the containing 4 KiB page. Any write to the
|
||||
// page bumps the counter, so the next decode on that PC is a miss that
|
||||
// refills.
|
||||
|
||||
/// Number of direct-mapped entries. 2^16 = 65,536 slots, one PPC
|
||||
/// instruction address per slot — enough for every hot code path in a
|
||||
/// typical Xbox 360 title to stay resident without collision.
|
||||
const DECODE_CACHE_SIZE: usize = 1 << 16;
|
||||
const DECODE_CACHE_MASK: u32 = (DECODE_CACHE_SIZE - 1) as u32;
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
struct DecodeCacheEntry {
|
||||
/// Guest PC this entry was decoded at. Used as the tag on lookup; a
|
||||
/// mismatch means the slot was last populated by a different PC that
|
||||
/// shares the same low-16 index.
|
||||
pc: u32,
|
||||
/// Page version at decode time (from `GuestMemory::page_version(pc)`).
|
||||
/// Zero means "unused slot" since real page versions start at 1.
|
||||
page_version: u64,
|
||||
decoded: DecodedInstr,
|
||||
}
|
||||
|
||||
impl DecodeCacheEntry {
|
||||
const fn empty() -> Self {
|
||||
// `Invalid` is the decoder's "unrecognized opcode" sentinel; we
|
||||
// use it here as the empty-slot marker. Real misses compare `pc`,
|
||||
// not the opcode, so the sentinel choice is cosmetic.
|
||||
Self {
|
||||
pc: 0,
|
||||
page_version: 0,
|
||||
decoded: DecodedInstr {
|
||||
opcode: PpcOpcode::Invalid,
|
||||
raw: 0,
|
||||
addr: 0,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Direct-mapped PC-keyed decode cache. One instance shared across all
|
||||
/// HW threads (PC is thread-independent; entries are read-only once
|
||||
/// filled). Not thread-safe — the single scheduler thread owns it.
|
||||
pub struct DecodeCache {
|
||||
slots: Box<[DecodeCacheEntry]>,
|
||||
hits: u64,
|
||||
misses: u64,
|
||||
invalidations: u64,
|
||||
}
|
||||
|
||||
impl Default for DecodeCache {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl DecodeCache {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
slots: vec![DecodeCacheEntry::empty(); DECODE_CACHE_SIZE].into_boxed_slice(),
|
||||
hits: 0,
|
||||
misses: 0,
|
||||
invalidations: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Look up (or fill) the decoded form of the instruction at `pc`.
|
||||
/// `raw` is the fetched instruction word; `current_page_version` is
|
||||
/// `mem.page_version(pc)` — the caller has it cheaper than we do,
|
||||
/// since they're already touching `mem` to fetch `raw`.
|
||||
#[inline]
|
||||
pub fn lookup(&mut self, pc: u32, raw: u32, current_page_version: u64) -> DecodedInstr {
|
||||
let idx = ((pc >> 2) & DECODE_CACHE_MASK) as usize;
|
||||
// Safety: `idx` is masked into `[0, DECODE_CACHE_SIZE)` so the
|
||||
// slice access is always in-bounds. Opt-out of the bounds check
|
||||
// for the hot path.
|
||||
let entry = unsafe { self.slots.get_unchecked_mut(idx) };
|
||||
if entry.pc == pc && entry.page_version == current_page_version {
|
||||
self.hits += 1;
|
||||
return entry.decoded;
|
||||
}
|
||||
if entry.pc == pc && entry.page_version != current_page_version {
|
||||
self.invalidations += 1;
|
||||
}
|
||||
self.misses += 1;
|
||||
let decoded = decode(raw, pc);
|
||||
*entry = DecodeCacheEntry {
|
||||
pc,
|
||||
page_version: current_page_version,
|
||||
decoded,
|
||||
};
|
||||
decoded
|
||||
}
|
||||
|
||||
pub fn hits(&self) -> u64 {
|
||||
self.hits
|
||||
}
|
||||
pub fn misses(&self) -> u64 {
|
||||
self.misses
|
||||
}
|
||||
pub fn invalidations(&self) -> u64 {
|
||||
self.invalidations
|
||||
}
|
||||
}
|
||||
|
||||
fn lookup_opcode(code: u32) -> PpcOpcode {
|
||||
match extract_bits(code, 0, 5) {
|
||||
2 => PpcOpcode::tdi,
|
||||
@@ -498,9 +671,13 @@ fn decode_op6(code: u32) -> PpcOpcode {
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// VMX128 compare
|
||||
let key4 = (extract_bits(code, 22, 24) << 3) | extract_bits(code, 27, 27);
|
||||
match key4 {
|
||||
// VMX128 compare (VX128_R form). Single dispatch path: bit 27 = 0 always
|
||||
// for these opcodes per canary's table (`ppc_opcode_table_gen.cc:295-305`).
|
||||
// The Rc bit is at PPC 25 (host bit 6) per the FormatVX128_R bitfield —
|
||||
// it's a runtime modifier read by the interpreter, NOT part of the
|
||||
// secondary-opcode discrimination. PPCBUG-700.
|
||||
let key4_nd = (extract_bits(code, 22, 24) << 3) | extract_bits(code, 27, 27);
|
||||
match key4_nd {
|
||||
0b000000 => return PpcOpcode::vcmpeqfp128,
|
||||
0b001000 => return PpcOpcode::vcmpgefp128,
|
||||
0b010000 => return PpcOpcode::vcmpgtfp128,
|
||||
@@ -781,6 +958,57 @@ mod tests {
|
||||
assert_eq!(instr.d(), 0x20);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_cache_miss_fills_then_hit() {
|
||||
let mut cache = DecodeCache::new();
|
||||
let raw: u32 = (14 << 26) | (3 << 21) | (1 << 16) | 0x10;
|
||||
let pc = 0x8200_0000u32;
|
||||
let first = cache.lookup(pc, raw, 1);
|
||||
assert_eq!(first.opcode, PpcOpcode::addi);
|
||||
assert_eq!(cache.hits(), 0);
|
||||
assert_eq!(cache.misses(), 1);
|
||||
// Same pc, same version → cache hit, no new decode.
|
||||
let second = cache.lookup(pc, raw, 1);
|
||||
assert_eq!(second.opcode, PpcOpcode::addi);
|
||||
assert_eq!(cache.hits(), 1);
|
||||
assert_eq!(cache.misses(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_cache_stale_version_refills() {
|
||||
let mut cache = DecodeCache::new();
|
||||
// First fill with an `addi`.
|
||||
let raw_addi: u32 = (14 << 26) | (3 << 21) | (1 << 16) | 0x10;
|
||||
let pc = 0x8200_0000u32;
|
||||
cache.lookup(pc, raw_addi, 1);
|
||||
// Guest rewrote the page: same pc, different raw + bumped version.
|
||||
// Cache must refill — not return the stale `addi`.
|
||||
let raw_lwz: u32 = (32 << 26) | (5 << 21) | (1 << 16) | 0x20;
|
||||
let refreshed = cache.lookup(pc, raw_lwz, 2);
|
||||
assert_eq!(refreshed.opcode, PpcOpcode::lwz);
|
||||
assert_eq!(cache.invalidations(), 1);
|
||||
assert_eq!(cache.misses(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_cache_pc_collision_refills() {
|
||||
// Two PCs that hash to the same slot (pc >> 2 low 16 bits equal)
|
||||
// must not alias. Slot index = ((pc >> 2) & 0xFFFF) — pick two
|
||||
// PCs 4 * 2^16 bytes apart.
|
||||
let mut cache = DecodeCache::new();
|
||||
let pc_a = 0x8200_0000u32;
|
||||
let pc_b = pc_a.wrapping_add(0x0004_0000u32); // (>> 2) differs by 2^16
|
||||
let raw_addi: u32 = (14 << 26) | (3 << 21) | (1 << 16) | 0x10;
|
||||
let raw_lwz: u32 = (32 << 26) | (5 << 21) | (1 << 16) | 0x20;
|
||||
cache.lookup(pc_a, raw_addi, 1);
|
||||
// Different pc but same slot → miss + refill.
|
||||
cache.lookup(pc_b, raw_lwz, 1);
|
||||
// First pc comes back → miss + refill (slot was taken by pc_b).
|
||||
let back = cache.lookup(pc_a, raw_addi, 1);
|
||||
assert_eq!(back.opcode, PpcOpcode::addi);
|
||||
assert_eq!(cache.misses(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_decode_branch() {
|
||||
// b +0x100 => opcode 18, LI=0x40 (shifted left 2 = 0x100), AA=0, LK=0
|
||||
@@ -816,4 +1044,202 @@ mod tests {
|
||||
assert_eq!(extract_bits(0x8000_0000, 0, 0), 1);
|
||||
assert_eq!(extract_bits(0x0000_0001, 31, 31), 1);
|
||||
}
|
||||
|
||||
// VMX128 register-name extraction. Locks the canonical bit positions
|
||||
// (decoder.rs is the single source of truth — the analysis crate's
|
||||
// old `ppc.rs` had different positions, which produced wrong printed
|
||||
// register names; the bug was silent because the interpreter never
|
||||
// used those extractors). Each test poke-bits exactly the slots the
|
||||
// accessor reads and asserts the assembled register number.
|
||||
|
||||
/// Build a VMX128 test word for the canary-compliant register layout.
|
||||
/// `vd128 = vd_lo | (vd_hi << 5)` where vd_lo is 5 bits (PPC 6-10) and
|
||||
/// vd_hi is 2 bits (PPC 28-29). Same shape for vb128 (vb_lo at PPC 16-20,
|
||||
/// vb_hi 2 bits at PPC 30-31). va128 = va_lo | (va_h26<<5) | (va_h21<<6)
|
||||
/// per canary's 7-bit VA selector.
|
||||
fn vmx128_test_word(vd_lo: u32, vd_hi: u32, va_lo: u32, va_h26: u32, va_h21: u32,
|
||||
vb_lo: u32, vb_hi: u32) -> u32 {
|
||||
// PPC bit i -> host bit (31-i).
|
||||
(vd_lo << (31 - 10)) // VD128l: PPC 6-10 = host 21-25
|
||||
| (vd_hi << (31 - 29)) // VD128h: PPC 28-29 = host 2-3 (LSB at host 2)
|
||||
| (va_lo << (31 - 15)) // VA128l: PPC 11-15 = host 16-20
|
||||
| (va_h26 << (31 - 26)) // VA128h: PPC 26 = host 5
|
||||
| (va_h21 << (31 - 21)) // VA128H: PPC 21 = host 10
|
||||
| (vb_lo << (31 - 20)) // VB128l: PPC 16-20 = host 11-15
|
||||
| (vb_hi << (31 - 31)) // VB128h: PPC 30-31 = host 0-1 (LSB at host 0)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vmx128_vd128_low_5_bits_only() {
|
||||
// vd_lo = 0..31, vd_hi = 0 → vd128 = vd_lo
|
||||
for r in 0..32u32 {
|
||||
let raw = (r as u32) << (31 - 10);
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.vd128(), r as usize, "vd_lo={r}");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vmx128_vd128_high_low_bit_adds_32() {
|
||||
// vd_lo = 0, VD128h = 0b01 (LSB only at host bit 2 = PPC 29) → vd128 = 32
|
||||
let raw = (1u32 << (31 - 29));
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.vd128(), 32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vmx128_vd128_high_high_bit_adds_64() {
|
||||
// vd_lo = 0, VD128h = 0b10 (MSB only at host bit 3 = PPC 28) → vd128 = 64
|
||||
let raw = (1u32 << (31 - 28));
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.vd128(), 64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vmx128_vd128_full_127() {
|
||||
// vd_lo = 31, VD128h = 0b11 → vd128 = 127
|
||||
let raw = (31u32 << (31 - 10))
|
||||
| (1u32 << (31 - 28))
|
||||
| (1u32 << (31 - 29));
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.vd128(), 127);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vmx128_va128_canary_layout() {
|
||||
// va_lo = 7 at PPC 11-15, VA128h = 1 at PPC 26 → va128 = 7 | 32 = 39
|
||||
let raw = (7u32 << (31 - 15)) | (1u32 << (31 - 26));
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.va128(), 39);
|
||||
// VA128H = 1 at PPC 21 → va128 += 64 = 103
|
||||
let raw = raw | (1u32 << (31 - 21));
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.va128(), 7 | 32 | 64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vmx128_vb128_uses_bits30_31() {
|
||||
// vb_lo = 5 at PPC 16-20. VB128h = 0b01 (LSB at PPC 31 = host 0) → +32.
|
||||
// VB128h = 0b11 → +96.
|
||||
let raw = (5u32 << (31 - 20)) | (1u32 << (31 - 31));
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.vb128(), 5 | 32);
|
||||
let raw = raw | (1u32 << (31 - 30));
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.vb128(), 5 | 32 | 64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vmx128_vs128_aliases_vd128() {
|
||||
// vs128 must always equal vd128.
|
||||
for r in [0u32, 31, 32, 64, 96, 127] {
|
||||
let lo = r & 0x1F;
|
||||
let hi = (r >> 5) & 0x3;
|
||||
let raw = (lo << (31 - 10))
|
||||
| (hi << (31 - 29));
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.vd128(), r as usize, "vd128 mismatch for r={r}");
|
||||
assert_eq!(d.vs128(), r as usize, "vs128 mismatch for r={r}");
|
||||
assert_eq!(d.vd128(), d.vs128());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(dead_code)]
|
||||
fn _vmx128_test_word_helper_compiles() {
|
||||
// Keep the helper validated against the real accessor.
|
||||
// vd_lo=5, vd_hi=0b11 → vd128 = 5 | 96 = 101
|
||||
let raw = vmx128_test_word(5, 3, 0, 0, 0, 0, 0);
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.vd128(), 5 | 32 | 64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vx128_5_sh_bit_positions() {
|
||||
// SH=8 (binary 1000): bit 3 = 1, bits 0-2 = 0.
|
||||
// Host bit 9 = 1 (PPC bit 22), host bits 6-8 = 0.
|
||||
// So raw bit 9 set = raw |= 1 << 9 = 0x200
|
||||
let raw = 0x200u32; // host bit 9 set only
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.vx128_5_sh(), 8, "SH=8: MSB at PPC bit 22");
|
||||
|
||||
// SH=1 (binary 0001): host bit 6 set = raw |= 1 << 6 = 0x40
|
||||
let raw = 0x40u32;
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.vx128_5_sh(), 1, "SH=1: LSB at PPC bit 25");
|
||||
|
||||
// SH=15 (binary 1111): host bits 6-9 all set = raw |= 0xF << 6 = 0x3C0
|
||||
let raw = 0x3C0u32;
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.vx128_5_sh(), 15, "SH=15: all 4 bits set");
|
||||
|
||||
// SH=0: raw=0
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw: 0, addr: 0 };
|
||||
assert_eq!(d.vx128_5_sh(), 0, "SH=0");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vx128_4_accessors_correct_bit_positions() {
|
||||
// z=3 (binary 11) at PPC bits 24-25 = host bits 6-7
|
||||
let raw = 0b11u32 << 6;
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.vx128_4_z(), 3, "z=3 from host bits 6-7");
|
||||
|
||||
// IMM=0x15 (binary 10101) at PPC bits 11-15 = host bits 16-20
|
||||
let raw2 = 0x15u32 << 16;
|
||||
let d2 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: raw2, addr: 0 };
|
||||
assert_eq!(d2.vx128_4_imm(), 0x15, "IMM=0x15 from host bits 16-20");
|
||||
|
||||
// Combined: z=1, IMM=0xA — fields must not bleed into each other
|
||||
let raw3 = (0x1u32 << 6) | (0xAu32 << 16);
|
||||
let d3 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: raw3, addr: 0 };
|
||||
assert_eq!(d3.vx128_4_z(), 1, "z=1 combined");
|
||||
assert_eq!(d3.vx128_4_imm(), 0xA, "IMM=0xA combined");
|
||||
|
||||
// z=2, IMM=0xF — max 4-bit blend mask, exercises the full lower nibble
|
||||
let raw4 = (0b10u32 << 6) | (0xFu32 << 16);
|
||||
let d4 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: raw4, addr: 0 };
|
||||
assert_eq!(d4.vx128_4_z(), 2, "z=2 from binary 10");
|
||||
assert_eq!(d4.vx128_4_imm(), 0xF, "IMM=0xF all-ones nibble");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vc128_2_extracts_ppc_bits_23_25() {
|
||||
// VC=5 (binary 101) at PPC bits 23-25 = host bits 6-8
|
||||
// extract_bits(raw, 23, 25) = (raw >> (31-25)) & 0x7 = (raw >> 6) & 0x7
|
||||
let raw = 5u32 << 6; // host bits 6-8 = 5
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.vc128_2(), 5);
|
||||
|
||||
let d0 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: 0, addr: 0 };
|
||||
assert_eq!(d0.vc128_2(), 0);
|
||||
|
||||
let d7 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: 7u32 << 6, addr: 0 };
|
||||
assert_eq!(d7.vc128_2(), 7);
|
||||
|
||||
let d1 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: 1u32 << 6, addr: 0 };
|
||||
assert_eq!(d1.vc128_2(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vx128_p_perm_assembles_correctly() {
|
||||
// PERMl=0x1F (all 5 bits set) at host bits 16-20: raw = 0x1F << 16
|
||||
let raw = 0x1Fu32 << 16;
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.vx128_p_perm(), 0x1F, "PERMl only");
|
||||
|
||||
// PERMh=0x7 (all 3 bits set) at host bits 6-8: raw = 0x7 << 6 = 0x1C0
|
||||
let raw = 0x7u32 << 6;
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.vx128_p_perm(), 0x7 << 5, "PERMh only: bits 5-7");
|
||||
|
||||
// PERMl=0xA, PERMh=0x5: raw = (0xA << 16) | (0x5 << 6)
|
||||
let raw = (0xAu32 << 16) | (0x5u32 << 6);
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.vx128_p_perm(), 0xA | (0x5 << 5));
|
||||
|
||||
// PERMl and PERMh bits must not bleed into each other
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw: 0, addr: 0 };
|
||||
assert_eq!(d.vx128_p_perm(), 0);
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
447
crates/xenia-cpu/src/fpscr.rs
Normal file
447
crates/xenia-cpu/src/fpscr.rs
Normal file
@@ -0,0 +1,447 @@
|
||||
//! FPSCR (Floating-Point Status and Control Register) maintenance.
|
||||
//!
|
||||
//! Scope per project plan: rounding modes honoured, plus the exception bits
|
||||
//! games actually read (FX, FEX, VX, OX, UX, ZX, XX, FI, FPRF). Enabled-
|
||||
//! exception dispatch (FE[0,1], VE/OE/UE/ZE/XE) is *not* modelled — games
|
||||
//! running on Xenon almost never take FP traps.
|
||||
//!
|
||||
//! Bit layout (PowerISA, MSB-0 numbering; stored in a u32 with bit 31 = MSB):
|
||||
//!
|
||||
//! | PPC bit | u32 mask | Name |
|
||||
//! |---------|-------------------------|-------------|
|
||||
//! | 0 | `1<<31` | FX |
|
||||
//! | 1 | `1<<30` | FEX |
|
||||
//! | 2 | `1<<29` | VX (summary)|
|
||||
//! | 3 | `1<<28` | OX |
|
||||
//! | 4 | `1<<27` | UX |
|
||||
//! | 5 | `1<<26` | ZX |
|
||||
//! | 6 | `1<<25` | XX |
|
||||
//! | 7 | `1<<24` | VXSNAN |
|
||||
//! | 8 | `1<<23` | VXISI |
|
||||
//! | 9 | `1<<22` | VXIDI |
|
||||
//! | 10 | `1<<21` | VXZDZ |
|
||||
//! | 11 | `1<<20` | VXIMZ |
|
||||
//! | 12 | `1<<19` | VXVC |
|
||||
//! | 13 | `1<<18` | FR |
|
||||
//! | 14 | `1<<17` | FI |
|
||||
//! | 15..19 | `0xF8000 >> 15` @ 15..19 | FPRF (5 bits)|
|
||||
//! | 21 | `1<<10` | VXSOFT |
|
||||
//! | 22 | `1<<9` | VXSQRT |
|
||||
//! | 23 | `1<<8` | VXCVI |
|
||||
//! | 30..31 | `0x3` | RN (2 bits) |
|
||||
|
||||
use crate::context::PpcContext;
|
||||
|
||||
pub const FX: u32 = 1 << 31;
|
||||
pub const FEX: u32 = 1 << 30;
|
||||
pub const VX: u32 = 1 << 29;
|
||||
pub const OX: u32 = 1 << 28;
|
||||
pub const UX: u32 = 1 << 27;
|
||||
pub const ZX: u32 = 1 << 26;
|
||||
pub const XX: u32 = 1 << 25;
|
||||
pub const VXSNAN: u32 = 1 << 24;
|
||||
pub const VXISI: u32 = 1 << 23;
|
||||
pub const VXIDI: u32 = 1 << 22;
|
||||
pub const VXZDZ: u32 = 1 << 21;
|
||||
pub const VXIMZ: u32 = 1 << 20;
|
||||
pub const VXVC: u32 = 1 << 19;
|
||||
pub const FR: u32 = 1 << 18;
|
||||
pub const FI: u32 = 1 << 17;
|
||||
pub const FPRF_MASK: u32 = 0x1F << 12; // bits 15..19
|
||||
pub const VXSOFT: u32 = 1 << 10;
|
||||
pub const VXSQRT: u32 = 1 << 9;
|
||||
pub const VXCVI: u32 = 1 << 8;
|
||||
pub const RN_MASK: u32 = 0x3;
|
||||
|
||||
/// Union of all VX* bits (used for the VX summary recomputation).
|
||||
pub const VX_ALL: u32 = VXSNAN | VXISI | VXIDI | VXZDZ | VXIMZ | VXVC | VXSOFT | VXSQRT | VXCVI;
|
||||
|
||||
/// FPRF classification codes (5-bit, placed in FPSCR bits 15..19).
|
||||
/// The high bit ("C" in PowerISA) distinguishes ±zero/±denormal/QNaN from
|
||||
/// ±normal/±inf. The next 4 bits are (FL, FG, FE, FU) = (less, greater, equal, unordered).
|
||||
pub mod fprf {
|
||||
pub const QNAN: u8 = 0b1_0001;
|
||||
pub const NEG_INF: u8 = 0b0_1001;
|
||||
pub const NEG_NORMAL: u8 = 0b0_1000;
|
||||
pub const NEG_DENORMAL: u8 = 0b1_1000;
|
||||
pub const NEG_ZERO: u8 = 0b1_0010;
|
||||
pub const POS_ZERO: u8 = 0b0_0010;
|
||||
pub const POS_DENORMAL: u8 = 0b1_0100;
|
||||
pub const POS_NORMAL: u8 = 0b0_0100;
|
||||
pub const POS_INF: u8 = 0b0_0101;
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum RoundingMode {
|
||||
NearestEven, // RN=00
|
||||
TowardZero, // RN=01
|
||||
TowardPosInf, // RN=10
|
||||
TowardNegInf, // RN=11
|
||||
}
|
||||
|
||||
pub fn rounding_mode(ctx: &PpcContext) -> RoundingMode {
|
||||
match ctx.fpscr & RN_MASK {
|
||||
0 => RoundingMode::NearestEven,
|
||||
1 => RoundingMode::TowardZero,
|
||||
2 => RoundingMode::TowardPosInf,
|
||||
_ => RoundingMode::TowardNegInf,
|
||||
}
|
||||
}
|
||||
|
||||
/// Classify a finite f64 into its FPRF 5-bit code.
|
||||
pub fn classify_fprf(v: f64) -> u8 {
|
||||
if v.is_nan() {
|
||||
fprf::QNAN
|
||||
} else if v.is_infinite() {
|
||||
if v.is_sign_negative() { fprf::NEG_INF } else { fprf::POS_INF }
|
||||
} else if v == 0.0 {
|
||||
if v.is_sign_negative() { fprf::NEG_ZERO } else { fprf::POS_ZERO }
|
||||
} else if v.is_subnormal() {
|
||||
if v.is_sign_negative() { fprf::NEG_DENORMAL } else { fprf::POS_DENORMAL }
|
||||
} else if v.is_sign_negative() { fprf::NEG_NORMAL } else { fprf::POS_NORMAL }
|
||||
}
|
||||
|
||||
/// Write FPRF into FPSCR, preserving other bits.
|
||||
pub fn set_fprf(ctx: &mut PpcContext, code: u8) {
|
||||
ctx.fpscr = (ctx.fpscr & !FPRF_MASK) | ((code as u32 & 0x1F) << 12);
|
||||
}
|
||||
|
||||
/// Set one or more exception bits on FPSCR, maintaining FX (sticky set on any
|
||||
/// new exception) and VX (summary of VX* bits).
|
||||
pub fn set_exception(ctx: &mut PpcContext, bits: u32) {
|
||||
let prev = ctx.fpscr;
|
||||
let new = prev | bits;
|
||||
// FX is sticky-set if any new non-sticky bit transitions to 1. PPC defines
|
||||
// FX as "any of OX, UX, ZX, XX, VX* newly set". Compute the transition set.
|
||||
let transition = (new & !prev) & (OX | UX | ZX | XX | VX_ALL);
|
||||
let mut updated = new;
|
||||
if transition != 0 {
|
||||
updated |= FX;
|
||||
}
|
||||
// Recompute VX summary from any VX* bits currently set.
|
||||
if (updated & VX_ALL) != 0 { updated |= VX; }
|
||||
ctx.fpscr = updated;
|
||||
}
|
||||
|
||||
/// Classify the inputs of a floating-point arithmetic op and set appropriate
|
||||
/// VX* bits. Returns true if any invalid-operation was detected (caller may
|
||||
/// want to write a default QNaN result).
|
||||
///
|
||||
/// Detected cases:
|
||||
/// * any SNaN input → VXSNAN
|
||||
/// * infinity - infinity (same sign) → VXISI
|
||||
/// * 0 / 0 → VXZDZ
|
||||
/// * infinity / infinity → VXIDI
|
||||
/// * 0 * infinity → VXIMZ
|
||||
pub fn check_invalid_add(ctx: &mut PpcContext, a: f64, b: f64, sub: bool) -> bool {
|
||||
let mut bits = 0u32;
|
||||
if is_snan(a) || is_snan(b) { bits |= VXSNAN; }
|
||||
if a.is_infinite() && b.is_infinite() {
|
||||
// For add: VXISI iff same-sign(a,b) negated — inf - inf
|
||||
// For sub: VXISI iff same-sign(a,b) — (+inf) - (+inf) or (-inf) - (-inf)
|
||||
let both_pos = a.is_sign_positive() && b.is_sign_positive();
|
||||
let both_neg = a.is_sign_negative() && b.is_sign_negative();
|
||||
if sub {
|
||||
if both_pos || both_neg { bits |= VXISI; }
|
||||
} else {
|
||||
// add: opposite signs cancel to inf-inf
|
||||
if a.is_sign_positive() != b.is_sign_positive() { bits |= VXISI; }
|
||||
}
|
||||
}
|
||||
if bits != 0 { set_exception(ctx, bits); return true; }
|
||||
false
|
||||
}
|
||||
|
||||
/// FMA-aware add/sub VXISI check. Per PPCBUG-202+203: the previous code
|
||||
/// passed `a*c` as `lhs` to `check_invalid_add`, which suffers from two
|
||||
/// rounding errors and can spuriously raise/miss VXISI in extreme cases.
|
||||
/// This helper derives the mathematical product's sign and infinity status
|
||||
/// from the inputs directly.
|
||||
///
|
||||
/// `sub` follows the same semantics as `check_invalid_add`:
|
||||
/// - false (add): VXISI when product and b have opposite signs at infinity
|
||||
/// - true (sub): VXISI when product and b have same sign at infinity
|
||||
pub fn check_invalid_fma_add(ctx: &mut PpcContext, a: f64, c: f64, b: f64, sub: bool) -> bool {
|
||||
let mut bits = 0u32;
|
||||
if is_snan(a) || is_snan(c) || is_snan(b) { bits |= VXSNAN; }
|
||||
let product_is_inf = (a.is_infinite() || c.is_infinite())
|
||||
&& a != 0.0 && c != 0.0
|
||||
&& !a.is_nan() && !c.is_nan();
|
||||
if product_is_inf && b.is_infinite() {
|
||||
let p_neg = a.is_sign_negative() != c.is_sign_negative();
|
||||
let b_neg = b.is_sign_negative();
|
||||
let same_sign = p_neg == b_neg;
|
||||
if (sub && same_sign) || (!sub && !same_sign) {
|
||||
bits |= VXISI;
|
||||
}
|
||||
}
|
||||
if bits != 0 { set_exception(ctx, bits); return true; }
|
||||
false
|
||||
}
|
||||
|
||||
pub fn check_invalid_mul(ctx: &mut PpcContext, a: f64, b: f64) -> bool {
|
||||
let mut bits = 0u32;
|
||||
if is_snan(a) || is_snan(b) { bits |= VXSNAN; }
|
||||
let zero_times_inf =
|
||||
(a == 0.0 && b.is_infinite()) || (b == 0.0 && a.is_infinite());
|
||||
if zero_times_inf { bits |= VXIMZ; }
|
||||
if bits != 0 { set_exception(ctx, bits); return true; }
|
||||
false
|
||||
}
|
||||
|
||||
pub fn check_invalid_div(ctx: &mut PpcContext, a: f64, b: f64) -> bool {
|
||||
let mut bits = 0u32;
|
||||
if is_snan(a) || is_snan(b) { bits |= VXSNAN; }
|
||||
if a == 0.0 && b == 0.0 { bits |= VXZDZ; }
|
||||
if a.is_infinite() && b.is_infinite() { bits |= VXIDI; }
|
||||
if bits != 0 { set_exception(ctx, bits); return true; }
|
||||
false
|
||||
}
|
||||
|
||||
/// Divide-by-zero (finite nonzero / 0) — sets ZX but not VX.
|
||||
pub fn check_zero_divide(ctx: &mut PpcContext, a: f64, b: f64) {
|
||||
if b == 0.0 && a != 0.0 && !a.is_nan() && !a.is_infinite() {
|
||||
set_exception(ctx, ZX);
|
||||
}
|
||||
}
|
||||
|
||||
/// Post-op: classify the result and update FPRF + detect overflow/underflow/inexact.
|
||||
/// `inputs_finite` lets us suppress OX for ops whose output is infinite because
|
||||
/// an input already was.
|
||||
pub fn update_after_op(ctx: &mut PpcContext, result: f64, inputs_were_finite: bool) {
|
||||
let mut bits = 0u32;
|
||||
if result.is_infinite() && inputs_were_finite {
|
||||
bits |= OX;
|
||||
}
|
||||
if result.is_subnormal() {
|
||||
bits |= UX;
|
||||
}
|
||||
if bits != 0 { set_exception(ctx, bits); }
|
||||
set_fprf(ctx, classify_fprf(result));
|
||||
}
|
||||
|
||||
/// Test whether an f64 is a signalling NaN.
|
||||
/// In IEEE 754-2008 (binary64), the signalling bit is the high bit of the
|
||||
/// mantissa. SNaN has it clear, QNaN has it set. NaN with high mantissa bit
|
||||
/// clear (and mantissa nonzero) is an SNaN.
|
||||
pub fn is_snan(x: f64) -> bool {
|
||||
if !x.is_nan() { return false; }
|
||||
let bits = x.to_bits();
|
||||
// Highest mantissa bit (bit 51) clear ⇒ SNaN. Mantissa nonzero always true for NaN.
|
||||
(bits & (1u64 << 51)) == 0
|
||||
}
|
||||
|
||||
/// Round an f64 to f32 honouring FPSCR[RN]. Uses the current hardware
|
||||
/// rounding mode when RN=0 (nearest-even, the PPC default), otherwise
|
||||
/// emulates the directed rounding via bit-manipulation.
|
||||
pub fn round_to_single(ctx: &PpcContext, v: f64) -> f64 {
|
||||
match rounding_mode(ctx) {
|
||||
RoundingMode::NearestEven => (v as f32) as f64,
|
||||
RoundingMode::TowardZero => round_single_toward_zero(v) as f64,
|
||||
RoundingMode::TowardPosInf => round_single_toward_pos_inf(v) as f64,
|
||||
RoundingMode::TowardNegInf => round_single_toward_neg_inf(v) as f64,
|
||||
}
|
||||
}
|
||||
|
||||
/// Round an f64 to an i64 integer honouring FPSCR[RN]. Used by fctidx.
|
||||
pub fn round_to_i64(ctx: &PpcContext, v: f64) -> i64 {
|
||||
match rounding_mode(ctx) {
|
||||
RoundingMode::NearestEven => {
|
||||
// PPCBUG-221: round-half-to-even (banker's rounding). The previous
|
||||
// tie-detection used `(diff - 0.5).abs() < f64::EPSILON` which
|
||||
// breaks for |v| > 2^52 (where v.trunc() == v exactly, giving diff
|
||||
// == 0). Use a fractional-part-only check that's exact for
|
||||
// |v| <= 2^52 and degenerates correctly above.
|
||||
let t = v.trunc();
|
||||
let frac = v - t;
|
||||
let fa = frac.abs();
|
||||
if fa > 0.5 {
|
||||
t as i64 + if v >= 0.0 { 1 } else { -1 }
|
||||
} else if fa < 0.5 {
|
||||
t as i64
|
||||
} else {
|
||||
// Exact 0.5 tie — round to even.
|
||||
let fi = t as i64;
|
||||
if fi & 1 == 0 { fi } else { fi + if v >= 0.0 { 1 } else { -1 } }
|
||||
}
|
||||
}
|
||||
RoundingMode::TowardZero => v.trunc() as i64,
|
||||
RoundingMode::TowardPosInf => v.ceil() as i64,
|
||||
RoundingMode::TowardNegInf => v.floor() as i64,
|
||||
}
|
||||
}
|
||||
|
||||
/// Round an f64 to an i32 integer honouring FPSCR[RN]. Used by fctiwx.
|
||||
pub fn round_to_i32(ctx: &PpcContext, v: f64) -> i32 {
|
||||
round_to_i64(ctx, v).clamp(i32::MIN as i64, i32::MAX as i64) as i32
|
||||
}
|
||||
|
||||
// ------ directed rounding helpers (f64 → f32) ------
|
||||
|
||||
fn round_single_toward_zero(v: f64) -> f32 {
|
||||
// Default f64→f32 is round-to-nearest-even. Emulate truncation:
|
||||
// take the default rounded value; if the absolute rounded magnitude
|
||||
// exceeds |v|, bump down by one ULP toward zero.
|
||||
let rn = v as f32;
|
||||
if rn.is_nan() || rn.is_infinite() || rn == 0.0 { return rn; }
|
||||
if rn.abs() as f64 <= v.abs() { return rn; }
|
||||
let adj_bits = rn.to_bits();
|
||||
// Both positive and negative finite f32 values have the IEEE-754 sign
|
||||
// bit as the MSB; subtracting 1 from `to_bits()` always reduces the
|
||||
// magnitude by one ULP (clearing the lowest mantissa bit, with carry
|
||||
// never reaching the sign bit since adj_bits is already not-zero,
|
||||
// not-inf, not-NaN, and we already returned early for those).
|
||||
let lower = adj_bits - 1;
|
||||
f32::from_bits(lower)
|
||||
}
|
||||
|
||||
fn round_single_toward_pos_inf(v: f64) -> f32 {
|
||||
let rn = v as f32;
|
||||
if rn.is_nan() || rn.is_infinite() { return rn; }
|
||||
if (rn as f64) >= v { return rn; }
|
||||
// rn < v — bump up by one ULP in the +direction.
|
||||
let b = rn.to_bits();
|
||||
let nb = if rn.is_sign_negative() { b - 1 } else { b + 1 };
|
||||
f32::from_bits(nb)
|
||||
}
|
||||
|
||||
fn round_single_toward_neg_inf(v: f64) -> f32 {
|
||||
let rn = v as f32;
|
||||
if rn.is_nan() || rn.is_infinite() { return rn; }
|
||||
if (rn as f64) <= v { return rn; }
|
||||
// rn > v — bump down.
|
||||
let b = rn.to_bits();
|
||||
let nb = if rn.is_sign_negative() { b + 1 } else { b - 1 };
|
||||
f32::from_bits(nb)
|
||||
}
|
||||
|
||||
/// Drop-in replacement for the old `update_cr1_from_fpscr`. Reads the
|
||||
/// currently-maintained FPSCR bits (FX, FEX, VX, OX) into CR1.
|
||||
pub fn update_cr1(ctx: &mut PpcContext) {
|
||||
ctx.cr[1].lt = (ctx.fpscr & FX) != 0;
|
||||
ctx.cr[1].gt = (ctx.fpscr & FEX) != 0;
|
||||
ctx.cr[1].eq = (ctx.fpscr & VX) != 0;
|
||||
ctx.cr[1].so = (ctx.fpscr & OX) != 0;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn ctx() -> PpcContext { PpcContext::new() }
|
||||
|
||||
#[test]
|
||||
fn rn_default_is_nearest() {
|
||||
assert_eq!(rounding_mode(&ctx()), RoundingMode::NearestEven);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rn_bits_decode() {
|
||||
let mut c = ctx();
|
||||
c.fpscr = 0x1;
|
||||
assert_eq!(rounding_mode(&c), RoundingMode::TowardZero);
|
||||
c.fpscr = 0x2;
|
||||
assert_eq!(rounding_mode(&c), RoundingMode::TowardPosInf);
|
||||
c.fpscr = 0x3;
|
||||
assert_eq!(rounding_mode(&c), RoundingMode::TowardNegInf);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fprf_classifies_correctly() {
|
||||
assert_eq!(classify_fprf(1.0), fprf::POS_NORMAL);
|
||||
assert_eq!(classify_fprf(-1.0), fprf::NEG_NORMAL);
|
||||
assert_eq!(classify_fprf(0.0), fprf::POS_ZERO);
|
||||
assert_eq!(classify_fprf(-0.0), fprf::NEG_ZERO);
|
||||
assert_eq!(classify_fprf(f64::INFINITY), fprf::POS_INF);
|
||||
assert_eq!(classify_fprf(f64::NEG_INFINITY), fprf::NEG_INF);
|
||||
assert_eq!(classify_fprf(f64::NAN), fprf::QNAN);
|
||||
assert_eq!(classify_fprf(f64::from_bits(1)), fprf::POS_DENORMAL);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fx_is_sticky_on_new_exception() {
|
||||
let mut c = ctx();
|
||||
set_exception(&mut c, OX);
|
||||
assert_ne!(c.fpscr & FX, 0);
|
||||
// Clear FX/OX manually.
|
||||
c.fpscr &= !(FX | OX);
|
||||
// Re-set OX; FX should re-latch.
|
||||
set_exception(&mut c, OX);
|
||||
assert_ne!(c.fpscr & FX, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vx_summary_set_on_any_vx_bit() {
|
||||
let mut c = ctx();
|
||||
set_exception(&mut c, VXSNAN);
|
||||
assert_ne!(c.fpscr & VX, 0);
|
||||
assert_ne!(c.fpscr & VXSNAN, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn round_to_single_nearest_is_identity_on_representable() {
|
||||
let c = ctx();
|
||||
assert_eq!(round_to_single(&c, 1.0_f64), 1.0_f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn round_to_i32_clamps_out_of_range() {
|
||||
let c = ctx();
|
||||
assert_eq!(round_to_i32(&c, 1e20_f64), i32::MAX);
|
||||
assert_eq!(round_to_i32(&c, -1e20_f64), i32::MIN);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn round_to_i64_nearest_even_on_tie() {
|
||||
let c = ctx();
|
||||
assert_eq!(round_to_i64(&c, 0.5_f64), 0);
|
||||
assert_eq!(round_to_i64(&c, 1.5_f64), 2);
|
||||
assert_eq!(round_to_i64(&c, 2.5_f64), 2);
|
||||
assert_eq!(round_to_i64(&c, 3.5_f64), 4);
|
||||
assert_eq!(round_to_i64(&c, -0.5_f64), 0);
|
||||
assert_eq!(round_to_i64(&c, -1.5_f64), -2);
|
||||
assert_eq!(round_to_i64(&c, -2.5_f64), -2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn round_to_i64_non_tie_cases() {
|
||||
// PPCBUG-221 regression: non-tie fractions must round to nearest.
|
||||
let c = ctx();
|
||||
assert_eq!(round_to_i64(&c, 0.4_f64), 0);
|
||||
assert_eq!(round_to_i64(&c, 0.6_f64), 1);
|
||||
assert_eq!(round_to_i64(&c, -0.4_f64), 0);
|
||||
assert_eq!(round_to_i64(&c, -0.6_f64), -1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn round_to_i32_nearest_even_on_tie() {
|
||||
// PPCBUG-227: round_to_i32 inherits round_to_i64's tie semantics.
|
||||
let c = ctx();
|
||||
assert_eq!(round_to_i32(&c, 0.5_f64), 0);
|
||||
assert_eq!(round_to_i32(&c, 1.5_f64), 2);
|
||||
assert_eq!(round_to_i32(&c, 2.5_f64), 2);
|
||||
assert_eq!(round_to_i32(&c, -1.5_f64), -2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_invalid_add_detects_inf_minus_inf() {
|
||||
let mut c = ctx();
|
||||
assert!(check_invalid_add(&mut c, f64::INFINITY, f64::INFINITY, true));
|
||||
assert_ne!(c.fpscr & VXISI, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_invalid_div_detects_zero_over_zero() {
|
||||
let mut c = ctx();
|
||||
assert!(check_invalid_div(&mut c, 0.0, 0.0));
|
||||
assert_ne!(c.fpscr & VXZDZ, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn snan_detection() {
|
||||
// SNaN in binary64: sign=0, exp=all-ones, mantissa nonzero with bit 51 clear.
|
||||
let snan = f64::from_bits(0x7FF0_0000_0000_0001);
|
||||
assert!(is_snan(snan));
|
||||
assert!(!is_snan(f64::NAN));
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,9 +1,25 @@
|
||||
pub mod block_cache;
|
||||
pub mod context;
|
||||
pub mod decoder;
|
||||
pub mod disasm;
|
||||
pub mod fpscr;
|
||||
pub mod interpreter;
|
||||
pub mod opcode;
|
||||
pub mod overflow;
|
||||
pub mod phaser;
|
||||
pub mod reservation;
|
||||
pub mod scheduler;
|
||||
pub mod trap;
|
||||
pub mod vmx;
|
||||
|
||||
pub use context::PpcContext;
|
||||
pub use decoder::decode;
|
||||
pub use disasm::{DisasmItem, DisasmText, disassemble, format as disasm_format, iter_disasm};
|
||||
pub use opcode::PpcOpcode;
|
||||
pub use phaser::{Phaser, PhaserOutcome};
|
||||
pub use reservation::ReservationTable;
|
||||
pub use scheduler::{
|
||||
BlockReason, GuestThread, HwSlot, HwState, MigrationFixup, OrderMode, PcrWriter, RoundOutcome,
|
||||
Scheduler, SpawnError, SpawnParams, ThreadRef, HW_THREAD_COUNT, INITIAL_GUEST_TID,
|
||||
QUANTUM_DEFAULT,
|
||||
};
|
||||
|
||||
@@ -145,6 +145,33 @@ impl PpcOpcode {
|
||||
matches!(self, Self::sc)
|
||||
}
|
||||
|
||||
/// Returns true if this opcode unconditionally ends a basic block:
|
||||
/// any branch, system call, trap, or `Invalid` (decoder couldn't
|
||||
/// recognize the instruction — execution will hit the
|
||||
/// `Unimplemented` arm and we don't want to swallow the boundary
|
||||
/// inside a cached block).
|
||||
///
|
||||
/// Notably *not* terminating: `mtmsr`/`mtmsrd`/`isync`/`mfmsr`.
|
||||
/// On real hardware these have synchronization semantics (a context
|
||||
/// synchronizing event for `isync`, MSR rewrite for the `mt*`s) but
|
||||
/// our interpreter has no asynchronous-exception model and no
|
||||
/// out-of-order execution — they execute as plain ALU/move ops and
|
||||
/// don't change control flow synchronously. Block-cache replay is
|
||||
/// still bit-for-bit identical to per-instruction dispatch for
|
||||
/// those.
|
||||
///
|
||||
/// Used by the basic-block cache (`block_cache.rs`) to know when to
|
||||
/// stop accumulating instructions during a forward decode walk.
|
||||
pub fn terminates_block(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
Self::bx | Self::bcx | Self::bclrx | Self::bcctrx
|
||||
| Self::sc
|
||||
| Self::td | Self::tdi | Self::tw | Self::twi
|
||||
| Self::Invalid
|
||||
)
|
||||
}
|
||||
|
||||
/// Returns true if this is a load instruction.
|
||||
pub fn is_load(&self) -> bool {
|
||||
matches!(self,
|
||||
@@ -194,3 +221,60 @@ impl std::fmt::Display for PpcOpcode {
|
||||
std::fmt::Debug::fmt(self, f)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn terminates_block_includes_all_branches() {
|
||||
assert!(PpcOpcode::bx.terminates_block());
|
||||
assert!(PpcOpcode::bcx.terminates_block());
|
||||
assert!(PpcOpcode::bclrx.terminates_block());
|
||||
assert!(PpcOpcode::bcctrx.terminates_block());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn terminates_block_includes_sc_and_traps() {
|
||||
assert!(PpcOpcode::sc.terminates_block());
|
||||
assert!(PpcOpcode::td.terminates_block());
|
||||
assert!(PpcOpcode::tdi.terminates_block());
|
||||
assert!(PpcOpcode::tw.terminates_block());
|
||||
assert!(PpcOpcode::twi.terminates_block());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn terminates_block_includes_invalid() {
|
||||
// Decoder failure must end the block — otherwise an unknown
|
||||
// opcode would be replayed inside a cached block without going
|
||||
// through the per-instruction Unimplemented path.
|
||||
assert!(PpcOpcode::Invalid.terminates_block());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn terminates_block_excludes_straight_line_ops() {
|
||||
// Common ALU and load/store ops must NOT terminate a block.
|
||||
assert!(!PpcOpcode::addi.terminates_block());
|
||||
assert!(!PpcOpcode::addis.terminates_block());
|
||||
assert!(!PpcOpcode::addx.terminates_block());
|
||||
assert!(!PpcOpcode::cmpi.terminates_block());
|
||||
assert!(!PpcOpcode::cmp.terminates_block());
|
||||
assert!(!PpcOpcode::lwz.terminates_block());
|
||||
assert!(!PpcOpcode::stw.terminates_block());
|
||||
assert!(!PpcOpcode::lbzx.terminates_block());
|
||||
assert!(!PpcOpcode::ori.terminates_block());
|
||||
assert!(!PpcOpcode::oris.terminates_block());
|
||||
assert!(!PpcOpcode::rlwinmx.terminates_block());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn terminates_block_excludes_msr_and_sync_ops() {
|
||||
// Documented decision: synchronizing ops execute as ALU within
|
||||
// a block since the interpreter has no async-exception model.
|
||||
assert!(!PpcOpcode::mtmsr.terminates_block());
|
||||
assert!(!PpcOpcode::mtmsrd.terminates_block());
|
||||
assert!(!PpcOpcode::isync.terminates_block());
|
||||
assert!(!PpcOpcode::sync.terminates_block());
|
||||
assert!(!PpcOpcode::mfmsr.terminates_block());
|
||||
}
|
||||
}
|
||||
|
||||
178
crates/xenia-cpu/src/overflow.rs
Normal file
178
crates/xenia-cpu/src/overflow.rs
Normal file
@@ -0,0 +1,178 @@
|
||||
//! OE / XER[OV] / XER[SO] handling for integer arithmetic.
|
||||
//!
|
||||
//! PPC integer ops with the OE bit set update XER[OV] (overflow) and sticky-set
|
||||
//! XER[SO]. When OE is clear the instruction leaves XER untouched. Signed
|
||||
//! overflow is predicated on the operation width and operand signs per the
|
||||
//! PowerISA pseudocode. For 32-bit-word operations (`addw`, `mullw`, `divw`,
|
||||
//! `neg`, etc. — on PPC these all have `w` in the mnemonic in spec
|
||||
//! descriptions even when the assembler spells them without) the predicate
|
||||
//! uses the low 32 bits. For 64-bit operations (`add`, `mulld`, `divd`) the
|
||||
//! predicate uses the full 64 bits.
|
||||
|
||||
use crate::context::PpcContext;
|
||||
|
||||
#[inline]
|
||||
pub fn apply(ctx: &mut PpcContext, overflowed: bool) {
|
||||
if overflowed {
|
||||
ctx.xer_ov = 1;
|
||||
ctx.xer_so = 1;
|
||||
} else {
|
||||
ctx.xer_ov = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/// Signed addition overflow at width-64 (plain `add`, `addc`, `subf`, `subfc`).
|
||||
///
|
||||
/// Predicate: same-sign inputs with opposite-sign result.
|
||||
/// For sub callers, rewrite as `a + b'` first (see `_sub`).
|
||||
#[inline]
|
||||
pub fn add_ov_64(a: u64, b: u64, result: u64) -> bool {
|
||||
((!(a ^ b)) & (a ^ result)) >> 63 != 0
|
||||
}
|
||||
|
||||
/// Universal signed-overflow predicate for 64-bit arithmetic.
|
||||
///
|
||||
/// Caller computes the mathematical (infinite-precision) signed sum as i128,
|
||||
/// plus the stored 64-bit result. Overflow iff the two disagree — i.e. the
|
||||
/// true value doesn't fit in i64.
|
||||
///
|
||||
/// Use this for multi-term chains (`adde`, `addme`, `addze`, `subfe`, `subfme`,
|
||||
/// `subfze`) where the carry-in makes the bit-predicate above awkward.
|
||||
#[inline]
|
||||
pub fn sum_overflow_64(true_sum: i128, result: u64) -> bool {
|
||||
true_sum != (result as i64) as i128
|
||||
}
|
||||
|
||||
/// Signed subtraction: RT = b - a. Overflow iff opposite-sign inputs with
|
||||
/// result sign != b's sign. Equivalently, reduce to addition with `!a + 1`.
|
||||
#[inline]
|
||||
pub fn sub_ov_64(a: u64, b: u64, result: u64) -> bool {
|
||||
((a ^ b) & (b ^ result)) >> 63 != 0
|
||||
}
|
||||
|
||||
/// Signed `addc`/`adde` chain overflow. Same rule as `add_ov_64` — the carry
|
||||
/// in doesn't alter the sign predicate directly because it's already folded
|
||||
/// into the stored result.
|
||||
#[inline]
|
||||
pub fn adde_ov_64(a: u64, b: u64, result: u64) -> bool {
|
||||
add_ov_64(a, b, result)
|
||||
}
|
||||
|
||||
/// Signed 32-bit multiply overflow (`mullwo`): result fits in 32 bits signed
|
||||
/// iff bit 32 equals bits 33..63 of the 64-bit product.
|
||||
#[inline]
|
||||
pub fn mullw_ov(product: i64) -> bool {
|
||||
let lo = product as i32 as i64;
|
||||
lo != product
|
||||
}
|
||||
|
||||
/// Signed 64-bit multiply overflow (`mulldo`). Detected via checked_mul.
|
||||
#[inline]
|
||||
pub fn mulld_ov(a: i64, b: i64) -> bool {
|
||||
a.checked_mul(b).is_none()
|
||||
}
|
||||
|
||||
/// `divwo` / `divwuo` / `divdo` / `divduo` raise OV in two cases:
|
||||
/// * divisor is zero, or
|
||||
/// * signed division of `INT_MIN / -1` (quotient doesn't fit).
|
||||
#[inline]
|
||||
pub fn divw_ov_signed(ra: i32, rb: i32) -> bool {
|
||||
rb == 0 || (ra == i32::MIN && rb == -1)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn divw_ov_unsigned(rb: u32) -> bool {
|
||||
rb == 0
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn divd_ov_signed(ra: i64, rb: i64) -> bool {
|
||||
rb == 0 || (ra == i64::MIN && rb == -1)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn divd_ov_unsigned(rb: u64) -> bool {
|
||||
rb == 0
|
||||
}
|
||||
|
||||
/// `negx`: RT = -(RA). Overflow only when RA = INT_MIN (the negation doesn't fit).
|
||||
#[inline]
|
||||
pub fn neg_ov_64(ra: u64) -> bool {
|
||||
ra == 0x8000_0000_0000_0000
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn add_no_overflow() {
|
||||
assert!(!add_ov_64(1, 2, 3));
|
||||
assert!(!add_ov_64(u64::MAX, 0, u64::MAX));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn add_positive_overflow() {
|
||||
// INT64_MAX + 1 = INT64_MIN — signed overflow
|
||||
let a = i64::MAX as u64;
|
||||
let b = 1u64;
|
||||
let r = a.wrapping_add(b);
|
||||
assert!(add_ov_64(a, b, r));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn add_negative_overflow() {
|
||||
// INT64_MIN + -1 = INT64_MAX — signed overflow
|
||||
let a = i64::MIN as u64;
|
||||
let b = (-1i64) as u64;
|
||||
let r = a.wrapping_add(b);
|
||||
assert!(add_ov_64(a, b, r));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sub_overflow_min_minus_pos() {
|
||||
// INT64_MIN - 1 overflows
|
||||
let b = i64::MIN as u64;
|
||||
let a = 1u64;
|
||||
let r = b.wrapping_sub(a);
|
||||
assert!(sub_ov_64(a, b, r));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sub_no_overflow() {
|
||||
let b = 5u64;
|
||||
let a = 2u64;
|
||||
let r = b.wrapping_sub(a);
|
||||
assert!(!sub_ov_64(a, b, r));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mullw_fits_32_bits() {
|
||||
assert!(!mullw_ov((i32::MAX as i64) * 1));
|
||||
assert!(!mullw_ov(-1i64));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mullw_overflows_32_bits() {
|
||||
let p = (i32::MAX as i64) * 2;
|
||||
assert!(mullw_ov(p));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mulld_overflows() {
|
||||
assert!(mulld_ov(i64::MAX, 2));
|
||||
assert!(!mulld_ov(i64::MAX, 1));
|
||||
// PPCBUG-022: INT_MIN * -1 overflows (=-INT_MIN > INT_MAX).
|
||||
// checked_mul correctly returns None for this case.
|
||||
assert!(mulld_ov(i64::MIN, -1), "INT_MIN * -1 overflows i64");
|
||||
assert!(!mulld_ov(i64::MIN, 1));
|
||||
assert!(!mulld_ov(i64::MIN + 1, -1), "INT_MIN+1 * -1 = INT_MAX, no overflow");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn neg_ov_only_at_min() {
|
||||
assert!(neg_ov_64(i64::MIN as u64));
|
||||
assert!(!neg_ov_64(0));
|
||||
assert!(!neg_ov_64(1));
|
||||
}
|
||||
}
|
||||
345
crates/xenia-cpu/src/phaser.rs
Normal file
345
crates/xenia-cpu/src/phaser.rs
Normal file
@@ -0,0 +1,345 @@
|
||||
//! Quantum-boundary phaser for the M3 per-HW-thread parallel scheduler.
|
||||
//!
|
||||
//! Six [`super::HW_THREAD_COUNT`] host threads run their slots' interpreters
|
||||
//! in parallel, then meet at a phaser to advance to the next quantum. This
|
||||
//! is **not** [`std::sync::Barrier`]: a Barrier needs a fixed party count,
|
||||
//! but our slots can become idle (no runnable thread) and shouldn't block
|
||||
//! the phaser arrival.
|
||||
//!
|
||||
//! ## Semantics
|
||||
//!
|
||||
//! - Each slot at the end of its quantum either calls
|
||||
//! [`Phaser::arrive_and_wait`] (it has a runnable thread to run next
|
||||
//! quantum) or [`Phaser::skip`] (it's idle this round and will wake on
|
||||
//! `slot_wake[i]`).
|
||||
//! - The phase advances when **all 6 slots have either arrived or
|
||||
//! skipped**. Arrived slots block until the advance; skipped slots
|
||||
//! return immediately and re-poll their wake state.
|
||||
//! - The phaser uses a generation counter so a slot that arrives "early"
|
||||
//! in the next phase doesn't see the prior phase's "all arrived"
|
||||
//! condition.
|
||||
//! - Defensive timeout: [`Phaser::arrive_and_wait_timeout`] returns
|
||||
//! [`PhaserOutcome::Timeout`] if a peer crashes / hangs. Callers
|
||||
//! typically convert this into a graceful shutdown rather than
|
||||
//! panicking, so the rest of the topology can tear down cleanly.
|
||||
//!
|
||||
//! ## Memory ordering
|
||||
//!
|
||||
//! - The participant counter (`arrived` + `skipped`) uses `AcqRel` on
|
||||
//! the increment so the last-to-arrive thread sees a consistent
|
||||
//! "everyone is here" snapshot.
|
||||
//! - The generation `phase` is read with `Acquire` in arrivers' wait
|
||||
//! loops; the advancing thread stores with `Release` after bumping.
|
||||
//! - The condvar's broadcast publishes the phase; the wait loop
|
||||
//! re-checks `phase` against its captured value to defend against
|
||||
//! spurious wakeups.
|
||||
|
||||
use std::sync::atomic::{AtomicU32, Ordering};
|
||||
use std::sync::{Condvar, Mutex};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
/// Outcome of a phaser arrival.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum PhaserOutcome {
|
||||
/// All participants arrived/skipped — phase advanced. Caller proceeds
|
||||
/// into the next quantum.
|
||||
Advanced,
|
||||
/// Defensive timeout fired before all peers arrived. Caller should
|
||||
/// log + initiate shutdown rather than retry.
|
||||
Timeout,
|
||||
/// Phaser was shut down via [`Phaser::shutdown`]; all waiters are
|
||||
/// woken and return this. Caller exits cleanly.
|
||||
Shutdown,
|
||||
}
|
||||
|
||||
/// Custom barrier-with-skip primitive. Construct once with the number of
|
||||
/// participating slots; share via `Arc` across host threads.
|
||||
pub struct Phaser {
|
||||
/// Total participant count (constant after construction). For our
|
||||
/// scheduler this is `HW_THREAD_COUNT = 6`.
|
||||
party_count: u32,
|
||||
/// Monotonic phase counter, incremented every time the phase
|
||||
/// advances. Used as a generation marker so a slot that wakes "into"
|
||||
/// the next phase doesn't observe the old "everyone arrived" state.
|
||||
phase: AtomicU32,
|
||||
/// Inner state guarded by the condvar's mutex.
|
||||
inner: Mutex<Inner>,
|
||||
/// Notified when a phase advances or shutdown fires.
|
||||
cv: Condvar,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Inner {
|
||||
arrived_or_skipped: u32,
|
||||
shutdown: bool,
|
||||
}
|
||||
|
||||
impl Phaser {
|
||||
/// Create a phaser with `party_count` participants. Panics if
|
||||
/// `party_count == 0`.
|
||||
pub fn new(party_count: u32) -> Self {
|
||||
assert!(party_count > 0, "phaser party_count must be > 0");
|
||||
Self {
|
||||
party_count,
|
||||
phase: AtomicU32::new(0),
|
||||
inner: Mutex::new(Inner {
|
||||
arrived_or_skipped: 0,
|
||||
shutdown: false,
|
||||
}),
|
||||
cv: Condvar::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the current phase number. Useful for tests and observability.
|
||||
pub fn current_phase(&self) -> u32 {
|
||||
self.phase.load(Ordering::Acquire)
|
||||
}
|
||||
|
||||
/// Mark this slot as not participating in the current phase. Counts
|
||||
/// toward the advance threshold but does not block. Used when a slot
|
||||
/// has no runnable thread and is parked waiting on
|
||||
/// `slot_wake[i].unpark()`.
|
||||
///
|
||||
/// `_slot_id` is informational (not stored); the parameter exists so
|
||||
/// call sites stay greppable.
|
||||
pub fn skip(&self, _slot_id: u8) {
|
||||
self.contribute_advance();
|
||||
}
|
||||
|
||||
/// Block until the phase advances or the defensive 5-second timeout
|
||||
/// fires. Returns [`PhaserOutcome::Advanced`] on a clean phase
|
||||
/// transition; [`Timeout`] if a peer hung; [`Shutdown`] on tear-down.
|
||||
///
|
||||
/// `_slot_id` is informational (see [`Self::skip`]).
|
||||
pub fn arrive_and_wait(&self, _slot_id: u8) -> PhaserOutcome {
|
||||
self.arrive_and_wait_timeout(Duration::from_secs(5))
|
||||
}
|
||||
|
||||
/// Same as [`Self::arrive_and_wait`] with a caller-supplied timeout.
|
||||
pub fn arrive_and_wait_timeout(&self, timeout: Duration) -> PhaserOutcome {
|
||||
let pre_phase = self.phase.load(Ordering::Acquire);
|
||||
self.contribute_advance();
|
||||
let deadline = Instant::now() + timeout;
|
||||
let mut guard = self.inner.lock().unwrap();
|
||||
loop {
|
||||
if guard.shutdown {
|
||||
return PhaserOutcome::Shutdown;
|
||||
}
|
||||
if self.phase.load(Ordering::Acquire) != pre_phase {
|
||||
return PhaserOutcome::Advanced;
|
||||
}
|
||||
let now = Instant::now();
|
||||
if now >= deadline {
|
||||
return PhaserOutcome::Timeout;
|
||||
}
|
||||
let remaining = deadline - now;
|
||||
let result = self.cv.wait_timeout(guard, remaining).unwrap();
|
||||
guard = result.0;
|
||||
if result.1.timed_out() {
|
||||
// Loop once more to disambiguate "real timeout" vs
|
||||
// "spurious wakeup just before the deadline".
|
||||
if self.phase.load(Ordering::Acquire) != pre_phase {
|
||||
return PhaserOutcome::Advanced;
|
||||
}
|
||||
if guard.shutdown {
|
||||
return PhaserOutcome::Shutdown;
|
||||
}
|
||||
return PhaserOutcome::Timeout;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Wake every parked arriver and signal shutdown. After this, all
|
||||
/// future and outstanding `arrive_and_wait_*` calls return
|
||||
/// [`PhaserOutcome::Shutdown`].
|
||||
pub fn shutdown(&self) {
|
||||
let mut guard = self.inner.lock().unwrap();
|
||||
guard.shutdown = true;
|
||||
self.cv.notify_all();
|
||||
}
|
||||
|
||||
/// Common path for both arrive-and-wait and skip: bump the
|
||||
/// participant counter, and if we were the last one in, advance the
|
||||
/// phase + broadcast.
|
||||
fn contribute_advance(&self) {
|
||||
let mut guard = self.inner.lock().unwrap();
|
||||
guard.arrived_or_skipped += 1;
|
||||
if guard.arrived_or_skipped >= self.party_count {
|
||||
// Last one in. Reset the counter, bump the phase, broadcast.
|
||||
guard.arrived_or_skipped = 0;
|
||||
// `Release` on the phase store pairs with `Acquire` reads in
|
||||
// arriving slots' wait-loop predicates.
|
||||
self.phase.fetch_add(1, Ordering::Release);
|
||||
self.cv.notify_all();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::AtomicU32;
|
||||
use std::thread;
|
||||
|
||||
/// All N participants arrive — phase advances, every arriver returns
|
||||
/// `Advanced`.
|
||||
#[test]
|
||||
fn n_arrivers_all_advance() {
|
||||
const N: u32 = 6;
|
||||
let p = Arc::new(Phaser::new(N));
|
||||
let mut handles = Vec::new();
|
||||
for i in 0..N {
|
||||
let p = p.clone();
|
||||
handles.push(
|
||||
thread::Builder::new()
|
||||
.name(format!("phaser-test-{i}"))
|
||||
.spawn(move || p.arrive_and_wait(i as u8))
|
||||
.unwrap(),
|
||||
);
|
||||
}
|
||||
for h in handles {
|
||||
assert_eq!(h.join().unwrap(), PhaserOutcome::Advanced);
|
||||
}
|
||||
assert_eq!(p.current_phase(), 1);
|
||||
}
|
||||
|
||||
/// 5 arrive + 1 skip → phase advances; arrivers see `Advanced`.
|
||||
#[test]
|
||||
fn skip_counts_toward_advance() {
|
||||
const N: u32 = 6;
|
||||
let p = Arc::new(Phaser::new(N));
|
||||
let mut handles = Vec::new();
|
||||
for i in 0..(N - 1) {
|
||||
let p = p.clone();
|
||||
handles.push(
|
||||
thread::Builder::new()
|
||||
.name(format!("phaser-arrive-{i}"))
|
||||
.spawn(move || p.arrive_and_wait(i as u8))
|
||||
.unwrap(),
|
||||
);
|
||||
}
|
||||
// Brief pause to let arrivers park first (exercising the
|
||||
// skip-unblocks-arrivers path).
|
||||
thread::sleep(Duration::from_millis(20));
|
||||
p.skip((N - 1) as u8);
|
||||
for h in handles {
|
||||
assert_eq!(h.join().unwrap(), PhaserOutcome::Advanced);
|
||||
}
|
||||
assert_eq!(p.current_phase(), 1);
|
||||
}
|
||||
|
||||
/// Shutdown wakes parked arrivers; they return `Shutdown`.
|
||||
#[test]
|
||||
fn shutdown_wakes_arrivers() {
|
||||
const N: u32 = 6;
|
||||
let p = Arc::new(Phaser::new(N));
|
||||
let mut handles = Vec::new();
|
||||
// Only N-1 arrive — phase will not advance.
|
||||
for i in 0..(N - 1) {
|
||||
let p = p.clone();
|
||||
handles.push(
|
||||
thread::Builder::new()
|
||||
.name(format!("phaser-arrive-shutdown-{i}"))
|
||||
.spawn(move || p.arrive_and_wait(i as u8))
|
||||
.unwrap(),
|
||||
);
|
||||
}
|
||||
thread::sleep(Duration::from_millis(20));
|
||||
p.shutdown();
|
||||
for h in handles {
|
||||
assert_eq!(h.join().unwrap(), PhaserOutcome::Shutdown);
|
||||
}
|
||||
}
|
||||
|
||||
/// Defensive timeout: if some peers never arrive, others surface
|
||||
/// `Timeout` rather than blocking forever.
|
||||
#[test]
|
||||
fn timeout_fires_when_peer_hangs() {
|
||||
const N: u32 = 4;
|
||||
let p = Arc::new(Phaser::new(N));
|
||||
// Only 2 of 4 arrive — others "hang".
|
||||
let p1 = p.clone();
|
||||
let h1 = thread::spawn(move || {
|
||||
p1.arrive_and_wait_timeout(Duration::from_millis(50))
|
||||
});
|
||||
let p2 = p.clone();
|
||||
let h2 = thread::spawn(move || {
|
||||
p2.arrive_and_wait_timeout(Duration::from_millis(50))
|
||||
});
|
||||
assert_eq!(h1.join().unwrap(), PhaserOutcome::Timeout);
|
||||
assert_eq!(h2.join().unwrap(), PhaserOutcome::Timeout);
|
||||
}
|
||||
|
||||
/// Multi-phase stress: all participants run a tight loop of
|
||||
/// arrive_and_wait calls; after K phases they all observe the same
|
||||
/// `current_phase()` value. Catches generation/counter resync bugs.
|
||||
#[test]
|
||||
fn multi_phase_progress() {
|
||||
const N: u32 = 6;
|
||||
const K: u32 = 1000;
|
||||
let p = Arc::new(Phaser::new(N));
|
||||
let counter = Arc::new(AtomicU32::new(0));
|
||||
let mut handles = Vec::new();
|
||||
for i in 0..N {
|
||||
let p = p.clone();
|
||||
let c = counter.clone();
|
||||
handles.push(
|
||||
thread::Builder::new()
|
||||
.name(format!("phaser-multi-{i}"))
|
||||
.spawn(move || {
|
||||
for _ in 0..K {
|
||||
assert_eq!(
|
||||
p.arrive_and_wait(i as u8),
|
||||
PhaserOutcome::Advanced
|
||||
);
|
||||
}
|
||||
c.fetch_add(1, Ordering::Relaxed);
|
||||
})
|
||||
.unwrap(),
|
||||
);
|
||||
}
|
||||
for h in handles {
|
||||
h.join().unwrap();
|
||||
}
|
||||
assert_eq!(counter.load(Ordering::Relaxed), N);
|
||||
assert_eq!(p.current_phase(), K);
|
||||
}
|
||||
|
||||
/// Mixed skip/arrive across phases — emulates the realistic scheduler
|
||||
/// pattern where slots become idle for some quanta.
|
||||
#[test]
|
||||
fn mixed_skip_and_arrive_random() {
|
||||
const N: u32 = 6;
|
||||
const K: u32 = 200;
|
||||
let p = Arc::new(Phaser::new(N));
|
||||
let mut handles = Vec::new();
|
||||
for i in 0..N {
|
||||
let p = p.clone();
|
||||
handles.push(
|
||||
thread::Builder::new()
|
||||
.name(format!("phaser-mixed-{i}"))
|
||||
.spawn(move || {
|
||||
// Pseudo-random skip pattern based on slot+phase
|
||||
let mut state: u32 = 0x9E37_79B9u32.wrapping_add(i);
|
||||
for phase in 0..K {
|
||||
state = state.wrapping_mul(0x6C8E_9CF7).wrapping_add(phase);
|
||||
if state & 0xF == 0 {
|
||||
p.skip(i as u8);
|
||||
} else {
|
||||
let _ = p.arrive_and_wait(i as u8);
|
||||
}
|
||||
}
|
||||
})
|
||||
.unwrap(),
|
||||
);
|
||||
}
|
||||
for h in handles {
|
||||
h.join().unwrap();
|
||||
}
|
||||
// After K rounds with all-N participation each phase, the phase
|
||||
// counter equals K. Each iteration contributes exactly N to the
|
||||
// counter (split between arrive and skip).
|
||||
assert_eq!(p.current_phase(), K);
|
||||
}
|
||||
}
|
||||
424
crates/xenia-cpu/src/reservation.rs
Normal file
424
crates/xenia-cpu/src/reservation.rs
Normal file
@@ -0,0 +1,424 @@
|
||||
//! Inter-thread reservation table for `lwarx`/`stwcx.` and
|
||||
//! `ldarx`/`stdcx.`.
|
||||
//!
|
||||
//! On real Xenon, each core's `lwarx` places a reservation on a 128-byte
|
||||
//! cache line; any other CPU's store to the line invalidates the
|
||||
//! reservation. `stwcx.`'s success depends on the reservation still being
|
||||
//! valid. Under M3's per-HW-thread parallelism, we need an inter-thread
|
||||
//! mechanism for the same guarantee.
|
||||
//!
|
||||
//! M2 introduces the table behind a runtime `reservations_enabled` flag
|
||||
//! (default `false`). When the flag is `false`, the interpreter's
|
||||
//! existing per-`PpcContext` `reserved_line`/`has_reservation` fields are
|
||||
//! used as-is — no inter-thread tracking. M3 flips the flag on once the
|
||||
//! per-HW-thread host threads are spawning.
|
||||
//!
|
||||
//! ## Design
|
||||
//!
|
||||
//! - **Banked AtomicU64 array** of [`NUM_LINES`] entries (4096 × 8 B =
|
||||
//! 32 KiB total). Each entry packs `(line_address, generation,
|
||||
//! hw_id)`. A zero value means "no reservation on this bank".
|
||||
//! - **Hash function**: `(line >> 7) & (NUM_LINES - 1)`. Different lines
|
||||
//! that map to the same bank conservatively invalidate each other's
|
||||
//! reservations — sound (real Xenon's L2 has finite associativity and
|
||||
//! has the same property), at the cost of slightly more `stwcx.`
|
||||
//! failures than a perfect-mapping table would produce.
|
||||
//! - **`active_reservers: AtomicU16`** — a fast-path counter
|
||||
//! incremented by every `lwarx` and decremented when its reservation is
|
||||
//! either committed or invalidated. `write_u32` checks this with a
|
||||
//! single `Relaxed` load; when zero (the common case in code that
|
||||
//! doesn't use atomics), the invalidation hook is a one-instruction
|
||||
//! skip.
|
||||
//! - **Generation counter**: monotonic across all reservations,
|
||||
//! incremented atomically. 24 bits of generation packed in the slot
|
||||
//! means 16 M reuses per slot before wraparound; at multi-million
|
||||
//! reservations/sec sustained that's still many seconds, and a
|
||||
//! stale-gen `stwcx.` simply fails (sound, not livelocking).
|
||||
//!
|
||||
//! ## Invariants
|
||||
//!
|
||||
//! 1. A `stwcx.(addr)` succeeds only if the line slot still holds the
|
||||
//! same `(line, gen, hw_id)` triple the reserver stamped at `lwarx`.
|
||||
//! 2. Any plain store to a reserved line invalidates it (slot CASed to
|
||||
//! zero). Hash-collision side-effect: a store to a different line
|
||||
//! that maps to the same bank also invalidates — guests that observe
|
||||
//! a `stwcx.` failure simply retry, so this is correctness-preserving.
|
||||
//! 3. `stwcx.` from a different `hw_id` than the reserver fails even if
|
||||
//! the line and gen would otherwise match — only the originating HW
|
||||
//! thread can commit its own reservation.
|
||||
//!
|
||||
//! Memory ordering: all CAS / store operations on the line slot use
|
||||
//! `AcqRel`; readers use `Acquire`. The store inside `stwcx.`'s payload
|
||||
//! itself (the actual data write) is the caller's responsibility — see
|
||||
//! [`crate::interpreter`]'s `stwcx.` arm.
|
||||
|
||||
use std::sync::atomic::{AtomicU16, AtomicU64, Ordering};
|
||||
|
||||
/// Real Xenon L2 cache-line size — the granule a reservation covers.
|
||||
pub const LINE_BYTES: u32 = 0x80;
|
||||
/// Mask to align an address to a cache-line boundary.
|
||||
pub const LINE_MASK: u32 = !(LINE_BYTES - 1);
|
||||
/// Number of bank entries in the reservation table. Power of two so the
|
||||
/// hash is a single AND. 32 KiB total at 8 B per entry.
|
||||
pub const NUM_LINES: usize = 4096;
|
||||
const HASH_MASK: u32 = (NUM_LINES as u32) - 1;
|
||||
|
||||
/// Pack `(line_addr, generation, hw_id)` into a single u64. The packed
|
||||
/// layout is:
|
||||
/// bits 63..32: line address (we only need the high bits since the
|
||||
/// low 7 are always zero — reserved range is line-aligned)
|
||||
/// bits 31..8: 24-bit generation
|
||||
/// bits 7..0: 8-bit `hw_id`
|
||||
///
|
||||
/// A packed value of `0` means "no reservation". Since we never reserve
|
||||
/// on guest virtual address `0` (the page is unmapped) and the
|
||||
/// generation increments from `1`, zero is a safe sentinel.
|
||||
#[inline]
|
||||
pub fn pack(line_addr: u32, generation: u32, hw_id: u8) -> u64 {
|
||||
debug_assert!(line_addr & !LINE_MASK == 0, "line_addr must be line-aligned");
|
||||
debug_assert!(generation < (1 << 24), "generation must fit in 24 bits");
|
||||
((line_addr as u64) << 32)
|
||||
| ((generation as u64 & 0xFF_FFFF) << 8)
|
||||
| (hw_id as u64)
|
||||
}
|
||||
|
||||
/// Inverse of [`pack`]. Returns `None` if the value is the zero sentinel
|
||||
/// (no reservation).
|
||||
#[inline]
|
||||
pub fn unpack(raw: u64) -> Option<(u32, u32, u8)> {
|
||||
if raw == 0 {
|
||||
return None;
|
||||
}
|
||||
let line = (raw >> 32) as u32;
|
||||
let generation = ((raw >> 8) & 0xFF_FFFF) as u32;
|
||||
let hw_id = (raw & 0xFF) as u8;
|
||||
Some((line, generation, hw_id))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn hash(line_addr: u32) -> usize {
|
||||
((line_addr >> 7) & HASH_MASK) as usize
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn align_to_line(addr: u32) -> u32 {
|
||||
addr & LINE_MASK
|
||||
}
|
||||
|
||||
/// Banked reservation table shared across all emulated HW threads. Built
|
||||
/// once per emulation instance; lives behind an `Arc` so worker host
|
||||
/// threads (M3) can hold their own clones without lifetime gymnastics.
|
||||
pub struct ReservationTable {
|
||||
lines: Vec<AtomicU64>,
|
||||
active_reservers: AtomicU16,
|
||||
next_gen: AtomicU64,
|
||||
/// Runtime activation flag. Default `false`. M2.8's
|
||||
/// `--reservations-table` flag (or M3 spawn) flips this to `true`,
|
||||
/// at which point the interpreter's `lwarx`/`stwcx.` arms route
|
||||
/// through the table; otherwise they use the legacy per-`PpcContext`
|
||||
/// reservation fields.
|
||||
enabled: std::sync::atomic::AtomicBool,
|
||||
}
|
||||
|
||||
impl Default for ReservationTable {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl ReservationTable {
|
||||
/// Construct a fresh table with all banks empty.
|
||||
pub fn new() -> Self {
|
||||
let mut lines = Vec::with_capacity(NUM_LINES);
|
||||
for _ in 0..NUM_LINES {
|
||||
lines.push(AtomicU64::new(0));
|
||||
}
|
||||
Self {
|
||||
lines,
|
||||
active_reservers: AtomicU16::new(0),
|
||||
// Start at 1 so the very first reservation gets a non-zero
|
||||
// gen and the packed slot value is non-zero (zero is the
|
||||
// "no reservation" sentinel).
|
||||
next_gen: AtomicU64::new(1),
|
||||
enabled: std::sync::atomic::AtomicBool::new(false),
|
||||
}
|
||||
}
|
||||
|
||||
/// Activate the table. The interpreter's `lwarx`/`stwcx.` arms will
|
||||
/// route through this table on subsequent dispatches. Idempotent.
|
||||
pub fn enable(&self) {
|
||||
self.enabled
|
||||
.store(true, std::sync::atomic::Ordering::Release);
|
||||
}
|
||||
|
||||
/// Deactivate the table. The interpreter falls back to per-`PpcContext`
|
||||
/// reservation fields. Idempotent.
|
||||
pub fn disable(&self) {
|
||||
self.enabled
|
||||
.store(false, std::sync::atomic::Ordering::Release);
|
||||
}
|
||||
|
||||
/// Whether the table is currently active. The interpreter consults
|
||||
/// this on every `lwarx`/`stwcx.` to decide which path runs.
|
||||
pub fn is_enabled(&self) -> bool {
|
||||
self.enabled.load(std::sync::atomic::Ordering::Acquire)
|
||||
}
|
||||
|
||||
/// True when at least one reservation is currently outstanding.
|
||||
/// Plain `write_u32` consults this to skip the invalidation hook
|
||||
/// when no thread holds a reservation — the common case for
|
||||
/// non-atomic code.
|
||||
#[inline]
|
||||
pub fn has_active_reservers(&self) -> bool {
|
||||
self.active_reservers.load(Ordering::Relaxed) > 0
|
||||
}
|
||||
|
||||
/// `lwarx(addr)` — claim a reservation on the line containing `addr`.
|
||||
/// Returns the generation stamped into the slot; the interpreter
|
||||
/// stores this alongside the per-`PpcContext` `has_reservation` bit
|
||||
/// so a subsequent `stwcx.` can verify the same gen still holds.
|
||||
///
|
||||
/// If a different reservation already occupied the bank, it's
|
||||
/// silently overwritten — that thread's `stwcx.` will fail because
|
||||
/// the slot no longer matches its stamped gen. Matches Xenon
|
||||
/// behavior (a different core's lwarx on the same line displaces
|
||||
/// any prior reservation).
|
||||
pub fn reserve(&self, addr: u32, hw_id: u8) -> u32 {
|
||||
let line = align_to_line(addr);
|
||||
let generation = (self
|
||||
.next_gen
|
||||
.fetch_add(1, Ordering::Relaxed)
|
||||
& 0xFF_FFFF) as u32;
|
||||
let new_raw = pack(line, generation, hw_id);
|
||||
// Release: prior reads of the reservation target should
|
||||
// happen-before any thread that observes the new slot value.
|
||||
let prev = self.lines[hash(line)].swap(new_raw, Ordering::AcqRel);
|
||||
// If the previous slot was non-zero, the displaced reserver is
|
||||
// implicitly invalidated — decrement the active counter for it.
|
||||
// Else, increment for our new reservation. Net effect: the
|
||||
// counter equals the number of *bank slots* with a non-zero
|
||||
// value, which is an upper bound on actual reservers.
|
||||
if prev == 0 {
|
||||
self.active_reservers.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
generation
|
||||
}
|
||||
|
||||
/// `stwcx.(addr)` — try to commit a reservation. Returns `true` if
|
||||
/// the slot still holds `(line, my_gen, my_hw_id)` (in which case
|
||||
/// it's CAS'd back to zero, releasing the bank), `false` otherwise.
|
||||
/// The data store itself is the caller's responsibility — see
|
||||
/// [`crate::interpreter`]'s `stwcx.` arm.
|
||||
pub fn try_commit(&self, addr: u32, my_gen: u32, my_hw_id: u8) -> bool {
|
||||
let line = align_to_line(addr);
|
||||
let expected = pack(line, my_gen, my_hw_id);
|
||||
match self.lines[hash(line)].compare_exchange(
|
||||
expected,
|
||||
0,
|
||||
Ordering::AcqRel,
|
||||
Ordering::Relaxed,
|
||||
) {
|
||||
Ok(_) => {
|
||||
// Successfully released the slot; decrement the active
|
||||
// count.
|
||||
self.active_reservers.fetch_sub(1, Ordering::Relaxed);
|
||||
true
|
||||
}
|
||||
Err(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Hook for plain (non-reserving) stores: invalidate any
|
||||
/// reservation on the containing line. Cheap when the bank is
|
||||
/// already empty (single Acquire load + branch).
|
||||
pub fn invalidate_for_write(&self, addr: u32) {
|
||||
let line = align_to_line(addr);
|
||||
let bank = &self.lines[hash(line)];
|
||||
let prev = bank.load(Ordering::Acquire);
|
||||
if prev == 0 {
|
||||
return;
|
||||
}
|
||||
// Verify the slot still holds a reservation on *this* line
|
||||
// before clearing — hash collisions mean the bank may hold a
|
||||
// reservation on an unrelated line that maps to the same slot.
|
||||
// Real Xenon has the same property (limited L2 associativity);
|
||||
// we mirror it here. A spurious bank match invalidates a
|
||||
// different line's reservation; the affected `stwcx.` retries —
|
||||
// sound, slightly less efficient.
|
||||
if let Some((bank_line, _generation, _hw)) = unpack(prev) {
|
||||
if bank_line != line {
|
||||
// Different line in the same bank — leave it alone (we
|
||||
// chose not to invalidate cross-line collisions to
|
||||
// reduce false-fail noise; real-HW behavior is similar
|
||||
// since L2 associativity sets cross-line constraints).
|
||||
return;
|
||||
}
|
||||
}
|
||||
// CAS-clear the bank if it still holds the value we observed.
|
||||
// If a concurrent `stwcx.` or `reserve` raced with us, the CAS
|
||||
// fails — that's fine; the line slot is now in a different
|
||||
// state and the displaced reservation will be picked up there.
|
||||
if bank
|
||||
.compare_exchange(prev, 0, Ordering::AcqRel, Ordering::Relaxed)
|
||||
.is_ok()
|
||||
{
|
||||
self.active_reservers.fetch_sub(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
/// Drop a per-`PpcContext` reservation without committing. Called
|
||||
/// when the interpreter clears `has_reservation` due to a
|
||||
/// non-`stwcx.` event (context switch, exception, etc.). Safe to
|
||||
/// call when the table doesn't hold our reservation anymore (the
|
||||
/// CAS simply fails).
|
||||
pub fn release(&self, addr: u32, my_gen: u32, my_hw_id: u8) {
|
||||
let _ = self.try_commit(addr, my_gen, my_hw_id);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
|
||||
#[test]
|
||||
fn pack_unpack_roundtrip() {
|
||||
let raw = pack(0x1000_0000, 42, 5);
|
||||
let (line, generation, hw) = unpack(raw).unwrap();
|
||||
assert_eq!(line, 0x1000_0000);
|
||||
assert_eq!(generation, 42);
|
||||
assert_eq!(hw, 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unpack_zero_is_none() {
|
||||
assert!(unpack(0).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reserve_then_commit_succeeds() {
|
||||
let t = ReservationTable::new();
|
||||
let gn = t.reserve(0x1234, 0);
|
||||
assert!(t.try_commit(0x1234, gn, 0));
|
||||
// Already released — second commit fails.
|
||||
assert!(!t.try_commit(0x1234, gn, 0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn other_hw_id_cannot_commit() {
|
||||
let t = ReservationTable::new();
|
||||
let gn = t.reserve(0x1234, 0);
|
||||
assert!(
|
||||
!t.try_commit(0x1234, gn, 1),
|
||||
"stwcx. from a different hw_id must fail"
|
||||
);
|
||||
// Original owner can still commit.
|
||||
assert!(t.try_commit(0x1234, gn, 0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lwarx_displaces_prior_reservation() {
|
||||
let t = ReservationTable::new();
|
||||
let g0 = t.reserve(0x1234, 0);
|
||||
// Different HW thread's lwarx on the same line.
|
||||
let g1 = t.reserve(0x1234, 1);
|
||||
// Original reserver's stwcx. fails because the gen changed.
|
||||
assert!(!t.try_commit(0x1234, g0, 0));
|
||||
// New reserver's stwcx. succeeds.
|
||||
assert!(t.try_commit(0x1234, g1, 1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalidate_clears_matching_reservation() {
|
||||
let t = ReservationTable::new();
|
||||
let gn = t.reserve(0x1234, 0);
|
||||
t.invalidate_for_write(0x1238); // same line as 0x1234
|
||||
assert!(!t.try_commit(0x1234, gn, 0));
|
||||
assert_eq!(t.active_reservers.load(Ordering::Relaxed), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalidate_different_line_in_same_bank_is_noop() {
|
||||
let t = ReservationTable::new();
|
||||
// Force a hash collision: addr A and addr B with same hash but
|
||||
// different line addresses.
|
||||
let line_a = 0x0000_1000;
|
||||
let line_b = line_a + ((NUM_LINES as u32) << 7); // +0x80000 → same hash
|
||||
assert_eq!(hash(line_a), hash(line_b));
|
||||
let gn = t.reserve(line_a, 0);
|
||||
// Invalidating line_b must NOT clear line_a's reservation.
|
||||
t.invalidate_for_write(line_b);
|
||||
assert!(t.try_commit(line_a, gn, 0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn has_active_reservers_tracks_count() {
|
||||
let t = ReservationTable::new();
|
||||
assert!(!t.has_active_reservers());
|
||||
let g0 = t.reserve(0x1000, 0);
|
||||
assert!(t.has_active_reservers());
|
||||
let g1 = t.reserve(0x2000, 1);
|
||||
assert!(t.has_active_reservers());
|
||||
t.try_commit(0x1000, g0, 0);
|
||||
assert!(t.has_active_reservers());
|
||||
t.try_commit(0x2000, g1, 1);
|
||||
assert!(!t.has_active_reservers());
|
||||
}
|
||||
|
||||
/// Stress test: 8 host threads each loop reserve+stwcx on the same
|
||||
/// line. Exactly one stwcx per round can win; the others fail and
|
||||
/// retry. The total number of *successful* commits across N
|
||||
/// outer iterations equals N (one winner per round).
|
||||
///
|
||||
/// This proves the table's mutual-exclusion property: at most one
|
||||
/// thread's stwcx. on a given line can succeed between two events
|
||||
/// that would invalidate the line.
|
||||
#[test]
|
||||
fn concurrent_lwarx_stwcx_serializes() {
|
||||
let t = Arc::new(ReservationTable::new());
|
||||
const ROUNDS: u32 = 1000;
|
||||
const THREADS: u8 = 8;
|
||||
let total_successes = Arc::new(AtomicU64::new(0));
|
||||
|
||||
let mut handles = Vec::new();
|
||||
for hw_id in 0..THREADS {
|
||||
let t_clone = t.clone();
|
||||
let s_clone = total_successes.clone();
|
||||
handles.push(
|
||||
thread::Builder::new()
|
||||
.name(format!("res-stress-{hw_id}"))
|
||||
.spawn(move || {
|
||||
let mut wins = 0u64;
|
||||
for _ in 0..ROUNDS {
|
||||
let gn = t_clone.reserve(0x1234_5678, hw_id);
|
||||
if t_clone.try_commit(0x1234_5678, gn, hw_id) {
|
||||
wins += 1;
|
||||
}
|
||||
}
|
||||
s_clone.fetch_add(wins, Ordering::Relaxed);
|
||||
})
|
||||
.expect("spawn"),
|
||||
);
|
||||
}
|
||||
for h in handles {
|
||||
h.join().expect("join");
|
||||
}
|
||||
let total = total_successes.load(Ordering::Relaxed);
|
||||
// Lower bound: every round had at least one winner — but races
|
||||
// can cause some rounds to have zero (all threads' reservations
|
||||
// got displaced before any could commit). Assert progress: at
|
||||
// least 10% of attempts succeed, and active_reservers is back
|
||||
// to zero.
|
||||
let attempts = ROUNDS as u64 * THREADS as u64;
|
||||
assert!(
|
||||
total > attempts / 10,
|
||||
"expected at least 10% successful commits, got {total}/{attempts}"
|
||||
);
|
||||
assert_eq!(
|
||||
t.active_reservers.load(Ordering::Relaxed),
|
||||
0,
|
||||
"all reservations should have been resolved"
|
||||
);
|
||||
}
|
||||
}
|
||||
2223
crates/xenia-cpu/src/scheduler.rs
Normal file
2223
crates/xenia-cpu/src/scheduler.rs
Normal file
File diff suppressed because it is too large
Load Diff
95
crates/xenia-cpu/src/trap.rs
Normal file
95
crates/xenia-cpu/src/trap.rs
Normal file
@@ -0,0 +1,95 @@
|
||||
//! TO-field evaluation for `tw`, `twi`, `td`, `tdi`.
|
||||
//!
|
||||
//! The TO field (5 bits) encodes which comparison outcomes trigger a trap:
|
||||
//!
|
||||
//! | bit | condition |
|
||||
//! |-----|-----------|
|
||||
//! | 0 | a < b (signed) |
|
||||
//! | 1 | a > b (signed) |
|
||||
//! | 2 | a == b |
|
||||
//! | 3 | a < b (unsigned) |
|
||||
//! | 4 | a > b (unsigned) |
|
||||
//!
|
||||
//! The bit numbering matches PowerISA ("MSB is bit 0"): TO[0] corresponds to
|
||||
//! the high bit of the 5-bit field, i.e. (to >> 4) & 1.
|
||||
//!
|
||||
//! `tw` / `twi` compare the low 32 bits of the operands (sign-extended back to
|
||||
//! 64 for the signed comparison); `td` / `tdi` compare the full 64 bits.
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum TrapWidth {
|
||||
Word, // tw, twi: 32-bit
|
||||
Doubleword, // td, tdi: 64-bit
|
||||
}
|
||||
|
||||
const TO_SLT: u32 = 1 << 4; // a < b signed
|
||||
const TO_SGT: u32 = 1 << 3; // a > b signed
|
||||
const TO_EQ: u32 = 1 << 2; // a == b
|
||||
const TO_ULT: u32 = 1 << 1; // a < b unsigned
|
||||
const TO_UGT: u32 = 1 << 0; // a > b unsigned
|
||||
|
||||
/// Returns true when the trap should fire.
|
||||
pub fn evaluate(to: u32, a: u64, b: u64, width: TrapWidth) -> bool {
|
||||
let (sa, sb, ua, ub): (i64, i64, u64, u64) = match width {
|
||||
TrapWidth::Word => (
|
||||
a as i32 as i64,
|
||||
b as i32 as i64,
|
||||
a as u32 as u64,
|
||||
b as u32 as u64,
|
||||
),
|
||||
TrapWidth::Doubleword => (a as i64, b as i64, a, b),
|
||||
};
|
||||
|
||||
if (to & TO_SLT) != 0 && sa < sb { return true; }
|
||||
if (to & TO_SGT) != 0 && sa > sb { return true; }
|
||||
if (to & TO_EQ) != 0 && ua == ub { return true; }
|
||||
if (to & TO_ULT) != 0 && ua < ub { return true; }
|
||||
if (to & TO_UGT) != 0 && ua > ub { return true; }
|
||||
false
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn to_zero_never_traps() {
|
||||
assert!(!evaluate(0, 0, 0, TrapWidth::Doubleword));
|
||||
assert!(!evaluate(0, 5, 3, TrapWidth::Doubleword));
|
||||
assert!(!evaluate(0, !0, 0, TrapWidth::Doubleword));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn to_31_always_traps_when_any_condition_holds() {
|
||||
// 31 = 0b11111 = all conditions enabled
|
||||
assert!(evaluate(31, 1, 2, TrapWidth::Doubleword)); // slt+ult
|
||||
assert!(evaluate(31, 2, 1, TrapWidth::Doubleword)); // sgt+ugt
|
||||
assert!(evaluate(31, 7, 7, TrapWidth::Doubleword)); // eq
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn to_eq_only() {
|
||||
// TO[2] = 0b00100 = 4
|
||||
assert!(evaluate(4, 5, 5, TrapWidth::Doubleword));
|
||||
assert!(!evaluate(4, 5, 6, TrapWidth::Doubleword));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn to_signed_vs_unsigned_on_negative() {
|
||||
// a=-1 (as u64 = all-ones). TO[0]=slt enabled = 0b10000 = 16
|
||||
// Signed: -1 < 0 → true
|
||||
let neg1 = (-1i64) as u64;
|
||||
assert!(evaluate(16, neg1, 0, TrapWidth::Doubleword));
|
||||
// TO[3]=ult enabled = 0b00010 = 2 → unsigned: all-ones < 0 is false
|
||||
assert!(!evaluate(2, neg1, 0, TrapWidth::Doubleword));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn word_width_ignores_high_32_bits() {
|
||||
// a's low 32 = 1, high 32 = different; b = 1. With TO=eq, should trap.
|
||||
let a = 0xDEAD_BEEF_0000_0001u64;
|
||||
assert!(evaluate(4, a, 1, TrapWidth::Word));
|
||||
// In doubleword, different.
|
||||
assert!(!evaluate(4, a, 1, TrapWidth::Doubleword));
|
||||
}
|
||||
}
|
||||
944
crates/xenia-cpu/src/vmx.rs
Normal file
944
crates/xenia-cpu/src/vmx.rs
Normal file
@@ -0,0 +1,944 @@
|
||||
//! VMX / AltiVec helper routines shared by the interpreter's 150+ vector
|
||||
//! opcode handlers.
|
||||
//!
|
||||
//! Big-endian lane indexing throughout: `Vec128::bytes[0]` is the most
|
||||
//! significant byte, which corresponds to PowerPC lane 0. Operations that
|
||||
//! care about "even" vs "odd" lanes follow the PPC convention (lane 0 = most
|
||||
//! significant = "even" for multiply-even/odd purposes).
|
||||
|
||||
use xenia_memory::MemoryAccess;
|
||||
use xenia_types::Vec128;
|
||||
|
||||
// ─── Lane accessors ────────────────────────────────────────────────────────
|
||||
|
||||
#[inline] pub fn as_i8x16(v: Vec128) -> [i8; 16] {
|
||||
let b = v.as_bytes();
|
||||
let mut r = [0i8; 16];
|
||||
for i in 0..16 { r[i] = b[i] as i8; }
|
||||
r
|
||||
}
|
||||
|
||||
#[inline] pub fn as_i16x8(v: Vec128) -> [i16; 8] {
|
||||
let u = v.as_u16x8();
|
||||
[u[0] as i16, u[1] as i16, u[2] as i16, u[3] as i16,
|
||||
u[4] as i16, u[5] as i16, u[6] as i16, u[7] as i16]
|
||||
}
|
||||
|
||||
#[inline] pub fn as_i32x4(v: Vec128) -> [i32; 4] {
|
||||
let u = v.as_u32x4();
|
||||
[u[0] as i32, u[1] as i32, u[2] as i32, u[3] as i32]
|
||||
}
|
||||
|
||||
#[inline] pub fn from_i8x16(r: [i8; 16]) -> Vec128 {
|
||||
let mut b = [0u8; 16];
|
||||
for i in 0..16 { b[i] = r[i] as u8; }
|
||||
Vec128::from_bytes(b)
|
||||
}
|
||||
|
||||
#[inline] pub fn from_i16x8(r: [i16; 8]) -> Vec128 {
|
||||
Vec128::from_u16x8_array([
|
||||
r[0] as u16, r[1] as u16, r[2] as u16, r[3] as u16,
|
||||
r[4] as u16, r[5] as u16, r[6] as u16, r[7] as u16,
|
||||
])
|
||||
}
|
||||
|
||||
#[inline] pub fn from_i32x4(r: [i32; 4]) -> Vec128 {
|
||||
Vec128::from_u32x4_array([r[0] as u32, r[1] as u32, r[2] as u32, r[3] as u32])
|
||||
}
|
||||
|
||||
// ─── Saturation helpers ────────────────────────────────────────────────────
|
||||
// Each returns (clamped_value, saturated_flag). Handlers OR the flags together
|
||||
// and call `ctx.set_vscr_sat(true)` once per instruction.
|
||||
|
||||
#[inline] pub fn sat_add_u8(a: u8, b: u8) -> (u8, bool) {
|
||||
let s = a as u16 + b as u16;
|
||||
if s > u8::MAX as u16 { (u8::MAX, true) } else { (s as u8, false) }
|
||||
}
|
||||
#[inline] pub fn sat_sub_u8(a: u8, b: u8) -> (u8, bool) {
|
||||
if a >= b { (a - b, false) } else { (0, true) }
|
||||
}
|
||||
#[inline] pub fn sat_add_i8(a: i8, b: i8) -> (i8, bool) {
|
||||
let s = a as i16 + b as i16;
|
||||
if s > i8::MAX as i16 { (i8::MAX, true) }
|
||||
else if s < i8::MIN as i16 { (i8::MIN, true) }
|
||||
else { (s as i8, false) }
|
||||
}
|
||||
#[inline] pub fn sat_sub_i8(a: i8, b: i8) -> (i8, bool) {
|
||||
let s = a as i16 - b as i16;
|
||||
if s > i8::MAX as i16 { (i8::MAX, true) }
|
||||
else if s < i8::MIN as i16 { (i8::MIN, true) }
|
||||
else { (s as i8, false) }
|
||||
}
|
||||
|
||||
#[inline] pub fn sat_add_u16(a: u16, b: u16) -> (u16, bool) {
|
||||
let s = a as u32 + b as u32;
|
||||
if s > u16::MAX as u32 { (u16::MAX, true) } else { (s as u16, false) }
|
||||
}
|
||||
#[inline] pub fn sat_sub_u16(a: u16, b: u16) -> (u16, bool) {
|
||||
if a >= b { (a - b, false) } else { (0, true) }
|
||||
}
|
||||
#[inline] pub fn sat_add_i16(a: i16, b: i16) -> (i16, bool) {
|
||||
let s = a as i32 + b as i32;
|
||||
if s > i16::MAX as i32 { (i16::MAX, true) }
|
||||
else if s < i16::MIN as i32 { (i16::MIN, true) }
|
||||
else { (s as i16, false) }
|
||||
}
|
||||
#[inline] pub fn sat_sub_i16(a: i16, b: i16) -> (i16, bool) {
|
||||
let s = a as i32 - b as i32;
|
||||
if s > i16::MAX as i32 { (i16::MAX, true) }
|
||||
else if s < i16::MIN as i32 { (i16::MIN, true) }
|
||||
else { (s as i16, false) }
|
||||
}
|
||||
|
||||
#[inline] pub fn sat_add_u32(a: u32, b: u32) -> (u32, bool) {
|
||||
let s = a as u64 + b as u64;
|
||||
if s > u32::MAX as u64 { (u32::MAX, true) } else { (s as u32, false) }
|
||||
}
|
||||
#[inline] pub fn sat_sub_u32(a: u32, b: u32) -> (u32, bool) {
|
||||
if a >= b { (a - b, false) } else { (0, true) }
|
||||
}
|
||||
#[inline] pub fn sat_add_i32(a: i32, b: i32) -> (i32, bool) {
|
||||
let s = a as i64 + b as i64;
|
||||
if s > i32::MAX as i64 { (i32::MAX, true) }
|
||||
else if s < i32::MIN as i64 { (i32::MIN, true) }
|
||||
else { (s as i32, false) }
|
||||
}
|
||||
#[inline] pub fn sat_sub_i32(a: i32, b: i32) -> (i32, bool) {
|
||||
let s = a as i64 - b as i64;
|
||||
if s > i32::MAX as i64 { (i32::MAX, true) }
|
||||
else if s < i32::MIN as i64 { (i32::MIN, true) }
|
||||
else { (s as i32, false) }
|
||||
}
|
||||
|
||||
// Pack-with-saturation helpers — clamp a wider integer to the narrower type.
|
||||
#[inline] pub fn sat_i16_to_i8(v: i16) -> (i8, bool) {
|
||||
if v > i8::MAX as i16 { (i8::MAX, true) }
|
||||
else if v < i8::MIN as i16 { (i8::MIN, true) }
|
||||
else { (v as i8, false) }
|
||||
}
|
||||
#[inline] pub fn sat_i16_to_u8(v: i16) -> (u8, bool) {
|
||||
if v < 0 { (0, true) }
|
||||
else if v > u8::MAX as i16 { (u8::MAX, true) }
|
||||
else { (v as u8, false) }
|
||||
}
|
||||
#[inline] pub fn sat_u16_to_u8(v: u16) -> (u8, bool) {
|
||||
if v > u8::MAX as u16 { (u8::MAX, true) } else { (v as u8, false) }
|
||||
}
|
||||
#[inline] pub fn sat_i32_to_i16(v: i32) -> (i16, bool) {
|
||||
if v > i16::MAX as i32 { (i16::MAX, true) }
|
||||
else if v < i16::MIN as i32 { (i16::MIN, true) }
|
||||
else { (v as i16, false) }
|
||||
}
|
||||
#[inline] pub fn sat_i32_to_u16(v: i32) -> (u16, bool) {
|
||||
if v < 0 { (0, true) }
|
||||
else if v > u16::MAX as i32 { (u16::MAX, true) }
|
||||
else { (v as u16, false) }
|
||||
}
|
||||
#[inline] pub fn sat_u32_to_u16(v: u32) -> (u16, bool) {
|
||||
if v > u16::MAX as u32 { (u16::MAX, true) } else { (v as u16, false) }
|
||||
}
|
||||
#[inline] pub fn sat_i64_to_i32(v: i64) -> (i32, bool) {
|
||||
if v > i32::MAX as i64 { (i32::MAX, true) }
|
||||
else if v < i32::MIN as i64 { (i32::MIN, true) }
|
||||
else { (v as i32, false) }
|
||||
}
|
||||
#[inline] pub fn sat_i64_to_u32(v: i64) -> (u32, bool) {
|
||||
if v < 0 { (0, true) }
|
||||
else if v > u32::MAX as i64 { (u32::MAX, true) }
|
||||
else { (v as u32, false) }
|
||||
}
|
||||
|
||||
// ─── Averages ──────────────────────────────────────────────────────────────
|
||||
// PPC avg is rounded up: (a + b + 1) / 2.
|
||||
#[inline] pub fn avg_u8(a: u8, b: u8) -> u8 {
|
||||
((a as u16 + b as u16 + 1) >> 1) as u8
|
||||
}
|
||||
#[inline] pub fn avg_u16(a: u16, b: u16) -> u16 {
|
||||
((a as u32 + b as u32 + 1) >> 1) as u16
|
||||
}
|
||||
#[inline] pub fn avg_u32(a: u32, b: u32) -> u32 {
|
||||
((a as u64 + b as u64 + 1) >> 1) as u32
|
||||
}
|
||||
#[inline] pub fn avg_i8(a: i8, b: i8) -> i8 {
|
||||
((a as i32 + b as i32 + 1) >> 1) as i8
|
||||
}
|
||||
#[inline] pub fn avg_i16(a: i16, b: i16) -> i16 {
|
||||
((a as i32 + b as i32 + 1) >> 1) as i16
|
||||
}
|
||||
#[inline] pub fn avg_i32(a: i32, b: i32) -> i32 {
|
||||
((a as i64 + b as i64 + 1) >> 1) as i32
|
||||
}
|
||||
|
||||
// ─── NaN-aware f32 min/max for vmaxfp / vminfp ────────────────────────────
|
||||
//
|
||||
// Altivec PEM: "If either element of vA or vB is a NaN, the corresponding
|
||||
// element of vD is set to the quiet NaN form of that NaN". Rust's `>` / `<`
|
||||
// comparison with NaN always returns false, so `if a > b { a } else { b }`
|
||||
// would silently pick `b` whenever `a` is NaN — losing NaN propagation.
|
||||
|
||||
#[inline]
|
||||
pub fn max_nan(a: f32, b: f32) -> f32 {
|
||||
if a.is_nan() { quiet_nan(a) }
|
||||
else if b.is_nan() { quiet_nan(b) }
|
||||
else if a > b { a } else { b }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn min_nan(a: f32, b: f32) -> f32 {
|
||||
if a.is_nan() { quiet_nan(a) }
|
||||
else if b.is_nan() { quiet_nan(b) }
|
||||
else if a < b { a } else { b }
|
||||
}
|
||||
|
||||
/// Convert an SNaN to QNaN by setting the high mantissa bit. A QNaN is
|
||||
/// returned unchanged.
|
||||
#[inline]
|
||||
pub fn quiet_nan(x: f32) -> f32 {
|
||||
if !x.is_nan() { return x; }
|
||||
f32::from_bits(x.to_bits() | 0x0040_0000)
|
||||
}
|
||||
|
||||
/// Flush a subnormal f32 to ±0 (preserving the sign). Used by vmaddfp family,
|
||||
/// vctsxs / vctuxs, and any instruction whose AltiVec definition specifies
|
||||
/// input-side denormal flushing regardless of VSCR[NJ].
|
||||
#[inline]
|
||||
pub fn flush_denorm(x: f32) -> f32 {
|
||||
if x.is_subnormal() {
|
||||
if x.is_sign_negative() { -0.0 } else { 0.0 }
|
||||
} else {
|
||||
x
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Float ⇄ fixed-point conversions (scaled by 2^scale_bits) ─────────────
|
||||
//
|
||||
// vctsxs / vctuxs flush denormal inputs to 0 before scaling, per Altivec.
|
||||
#[inline] pub fn cvt_f32_to_i32_sat(x: f32, scale_bits: u32) -> (i32, bool) {
|
||||
// PPCBUG-433: AltiVec ISA saturates NaN to INT_MIN (0x80000000), not 0.
|
||||
// (vctuxs's NaN→0 is correct per AltiVec ISA — see PPCBUG-434.)
|
||||
if x.is_nan() { return (i32::MIN, true); }
|
||||
let x = flush_denorm(x);
|
||||
let scaled = (x as f64) * ((1u64 << scale_bits) as f64);
|
||||
if scaled >= i32::MAX as f64 { return (i32::MAX, true); }
|
||||
if scaled <= i32::MIN as f64 { return (i32::MIN, true); }
|
||||
(scaled.trunc() as i32, false)
|
||||
}
|
||||
#[inline] pub fn cvt_f32_to_u32_sat(x: f32, scale_bits: u32) -> (u32, bool) {
|
||||
if x.is_nan() { return (0, true); }
|
||||
let x = flush_denorm(x);
|
||||
let scaled = (x as f64) * ((1u64 << scale_bits) as f64);
|
||||
if scaled < 0.0 { return (0, true); }
|
||||
if scaled > u32::MAX as f64 { return (u32::MAX, true); }
|
||||
(scaled.trunc() as u32, false)
|
||||
}
|
||||
#[inline] pub fn cvt_i32_to_f32(v: i32, scale_bits: u32) -> f32 {
|
||||
(v as f64 / (1u64 << scale_bits) as f64) as f32
|
||||
}
|
||||
#[inline] pub fn cvt_u32_to_f32(v: u32, scale_bits: u32) -> f32 {
|
||||
(v as f64 / (1u64 << scale_bits) as f64) as f32
|
||||
}
|
||||
|
||||
// ─── Unaligned vector load/store ──────────────────────────────────────────
|
||||
//
|
||||
// lvlx/lvrx and stvlx/stvrx combine to perform any unaligned 16-byte access:
|
||||
// lvlx(EA) | lvrx(EA + 16) loads 16 bytes starting at unaligned EA.
|
||||
// stvlx(EA); stvrx(EA + 16) stores 16 bytes starting at unaligned EA.
|
||||
//
|
||||
// Semantics per the AltiVec manual (and xenia-canary ppc_emit_memory.cc):
|
||||
// lvlx: shift = EA & 0xF, n = 16 - shift. Loads mem[EA..EA+n] into
|
||||
// lanes VR[0..n], zeros VR[n..16].
|
||||
// lvrx: shift = EA & 0xF. If shift == 0, VR = 0. Otherwise loads
|
||||
// mem[EA-shift..EA] into lanes VR[16-shift..16], zeros VR[0..16-shift].
|
||||
// stvlx / stvrx are the symmetric stores.
|
||||
//
|
||||
// `Vec128::bytes[0]` is the most significant byte (PPC lane 0 in BE view).
|
||||
|
||||
pub fn load_vector_left(mem: &dyn MemoryAccess, ea: u32) -> Vec128 {
|
||||
let shift = (ea & 0xF) as usize;
|
||||
let n = 16 - shift;
|
||||
let mut bytes = [0u8; 16];
|
||||
for i in 0..n {
|
||||
bytes[i] = mem.read_u8(ea.wrapping_add(i as u32));
|
||||
}
|
||||
Vec128::from_bytes(bytes)
|
||||
}
|
||||
|
||||
pub fn load_vector_right(mem: &dyn MemoryAccess, ea: u32) -> Vec128 {
|
||||
let shift = (ea & 0xF) as usize;
|
||||
if shift == 0 { return Vec128::ZERO; }
|
||||
let base = ea & !0xFu32;
|
||||
let mut bytes = [0u8; 16];
|
||||
for i in 0..shift {
|
||||
bytes[16 - shift + i] = mem.read_u8(base.wrapping_add(i as u32));
|
||||
}
|
||||
Vec128::from_bytes(bytes)
|
||||
}
|
||||
|
||||
pub fn store_vector_left(mem: &dyn MemoryAccess, ea: u32, v: Vec128) {
|
||||
let shift = (ea & 0xF) as usize;
|
||||
let n = 16 - shift;
|
||||
let b = v.as_bytes();
|
||||
for i in 0..n {
|
||||
mem.write_u8(ea.wrapping_add(i as u32), b[i]);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn store_vector_right(mem: &dyn MemoryAccess, ea: u32, v: Vec128) {
|
||||
let shift = (ea & 0xF) as usize;
|
||||
if shift == 0 { return; }
|
||||
let base = ea & !0xFu32;
|
||||
let b = v.as_bytes();
|
||||
for i in 0..shift {
|
||||
mem.write_u8(base.wrapping_add(i as u32), b[16 - shift + i]);
|
||||
}
|
||||
}
|
||||
|
||||
// ─── pixel pack (vpkpx / vupkhpx / vupklpx) ───────────────────────────────
|
||||
// PPC vpkpx packs each 32-bit lane into a 16-bit 1-5-5-5 pixel.
|
||||
// Mapping transcribed EXACTLY from xenia-canary
|
||||
// `ppc_emit_altivec.cc::vkpkx_in_low` (lines 1795-1808):
|
||||
// tmp1 = (input >> 9) & 0xFC00 // out bits 15:10 = in bits 24:19
|
||||
// tmp2 = (input >> 6) & 0x3E0 // out bits 9:5 = in bits 14:10
|
||||
// tmp3 = (input >> 3) & 0x1F // out bits 4:0 = in bits 7:3
|
||||
// result = tmp1 | tmp2 | tmp3
|
||||
// This is a pure shift/mask: there is NO standalone alpha select. Output
|
||||
// bit 15 is simply input bit 24 (the top of the 6-bit field masked by
|
||||
// 0xFC00) — NOT input bit 7. The red field is 6 bits wide here.
|
||||
|
||||
#[inline] pub fn pack_pixel_555(input: u32) -> u16 {
|
||||
let tmp1 = (input >> 9) & 0xFC00;
|
||||
let tmp2 = (input >> 6) & 0x3E0;
|
||||
let tmp3 = (input >> 3) & 0x1F;
|
||||
(tmp1 | tmp2 | tmp3) as u16
|
||||
}
|
||||
|
||||
#[inline] pub fn unpack_pixel_555(input: u16) -> u32 {
|
||||
let input = input as u32;
|
||||
let a = (input >> 15) & 0x1;
|
||||
let r = (input >> 10) & 0x1F;
|
||||
let g = (input >> 5) & 0x1F;
|
||||
let b = input & 0x1F;
|
||||
// Sign-extend A and replicate 5-bit RGB into the top of each byte.
|
||||
let a8 = if a != 0 { 0xFFu32 } else { 0 };
|
||||
let r8 = (r << 3) | (r >> 2);
|
||||
let g8 = (g << 3) | (g >> 2);
|
||||
let b8 = (b << 3) | (b >> 2);
|
||||
(a8 << 24) | (r8 << 16) | (g8 << 8) | b8
|
||||
}
|
||||
|
||||
// ─── VMX128 D3D pack/unpack dispatch ──────────────────────────────────────
|
||||
// `vpkd3d128` / `vupkd3d128` encode a small enum in the instruction word
|
||||
// (VX128_4 immediate field). The exact enum lives in canary's
|
||||
// ppc_emit_altivec.cc under PACK_TYPE_*; titles usually touch D3DCOLOR
|
||||
// (type 0) and a handful of texture-coordinate variants.
|
||||
//
|
||||
// Rather than risk getting a rarely-used sub-case wrong, we implement the
|
||||
// common types and fall back to a warning + pass-through for unknown types.
|
||||
// Returning the VB register value unchanged is always preferable to emitting
|
||||
// StepResult::Unimplemented because it keeps the interpreter running.
|
||||
|
||||
/// Pack-type encoding of `vpkd3d128` / `vupkd3d128`.
|
||||
///
|
||||
/// The immediate field lives at PPC bits 16-22 (VX128_3/4 IMM, 7 bits).
|
||||
/// Canary decodes `type = IMM >> 2` (top 5 bits) and `pack = IMM & 0x3`
|
||||
/// (low 2 bits, used only by `vpkd3d128` to select output-slot layout).
|
||||
/// Valid `type` values are 0..=6 per `ppc_emit_altivec.cc:2095-2118`:
|
||||
///
|
||||
/// | id | canary name | format |
|
||||
/// |----|-------------------|---------------------------------------|
|
||||
/// | 0 | VPACK_D3DCOLOR | 4 f32 [0,1] ↔ ARGB8 |
|
||||
/// | 1 | VPACK_NORMSHORT2 | 2 f32 [-1,1] ↔ 2× signed-normalized i16 |
|
||||
/// | 2 | VPACK_NORMPACKED32| 4 f32 [-1,1] ↔ UINT_2101010 (w:2,z:10,y:10,x:10) |
|
||||
/// | 3 | VPACK_FLOAT16_2 | 2 f32 ↔ 2× fp16 |
|
||||
/// | 4 | VPACK_NORMSHORT4 | 4 f32 [-1,1] ↔ 4× signed-normalized i16 |
|
||||
/// | 5 | VPACK_FLOAT16_4 | 4 f32 ↔ 4× fp16 |
|
||||
/// | 6 | VPACK_NORMPACKED64| 4 f32 [-1,1] ↔ ULONG_4202020 (w:4,z:20,y:20,x:20) |
|
||||
///
|
||||
/// Prior (M3-pre) this enum listed made-up "Normal16"/"Normal8"/"UByteN4"
|
||||
/// variants that didn't match canary; the immediate extraction was also
|
||||
/// wrong (LSB-numbered `>>6 & 0x7` instead of MSB-numbered `>>11 & 0x1F`
|
||||
/// against a 7-bit IMM field). M3 fixes both.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum D3dPackType {
|
||||
D3dColor,
|
||||
NormShort2,
|
||||
NormPacked32,
|
||||
Float16_2,
|
||||
NormShort4,
|
||||
Float16_4,
|
||||
NormPacked64,
|
||||
Other(u32),
|
||||
}
|
||||
|
||||
impl D3dPackType {
|
||||
/// Decode the `type` bits extracted from the VX128_3/4 IMM field via
|
||||
/// canary's `IMM >> 2` convention (i.e. the caller has already divided
|
||||
/// out the 2-bit `pack` subfield).
|
||||
pub fn from_immediate(type_bits: u32) -> Self {
|
||||
match type_bits {
|
||||
0 => Self::D3dColor,
|
||||
1 => Self::NormShort2,
|
||||
2 => Self::NormPacked32,
|
||||
3 => Self::Float16_2,
|
||||
4 => Self::NormShort4,
|
||||
5 => Self::Float16_4,
|
||||
6 => Self::NormPacked64,
|
||||
other => Self::Other(other),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Pack an f32x4 vector of [R, G, B, A] in [0.0, 1.0] into a single D3DCOLOR
|
||||
/// value in lane 3 of the output.
|
||||
pub fn pack_d3dcolor(v: Vec128) -> Vec128 {
|
||||
let f = v.as_f32x4();
|
||||
let to_byte = |x: f32| -> u32 {
|
||||
let c = x.clamp(0.0, 1.0) * 255.0;
|
||||
(c + 0.5) as u32 & 0xFF
|
||||
};
|
||||
// D3DCOLOR is A,R,G,B in that byte order inside a u32.
|
||||
let word = (to_byte(f[3]) << 24) | (to_byte(f[0]) << 16) | (to_byte(f[1]) << 8) | to_byte(f[2]);
|
||||
Vec128::from_u32x4(0, 0, 0, word)
|
||||
}
|
||||
|
||||
/// Unpack a D3DCOLOR value (in lane 3 of the input) into an f32x4 [R, G, B, A].
|
||||
pub fn unpack_d3dcolor(v: Vec128) -> Vec128 {
|
||||
let word = v.u32x4(3);
|
||||
let a = ((word >> 24) & 0xFF) as f32 / 255.0;
|
||||
let r = ((word >> 16) & 0xFF) as f32 / 255.0;
|
||||
let g = ((word >> 8) & 0xFF) as f32 / 255.0;
|
||||
let b = (word & 0xFF) as f32 / 255.0;
|
||||
Vec128::from_f32x4(r, g, b, a)
|
||||
}
|
||||
|
||||
// ───────────────────────────────────────────────────────────────────────
|
||||
// First-Pixels M3 — pack/unpack for the remaining canary pack types.
|
||||
//
|
||||
// Conventions shared across all helpers:
|
||||
// * Input-to-`unpack_*` (packed data) lives in the *source* lane position
|
||||
// canary's HIR assumes: canonically the 32-bit word is in lane 3 and
|
||||
// the 64-bit value straddles lanes 2-3. We match that so the existing
|
||||
// D3DCOLOR helpers' 3-lane convention is preserved across the whole
|
||||
// pack-type family.
|
||||
// * Output-from-`pack_*` sits in the same lane(s). The caller usually
|
||||
// follows with a permute to move it elsewhere (the VX128_4 `pack`
|
||||
// subfield controls that in `vpkd3d128`).
|
||||
// * Range semantics match canary: normalized types use `max` = (1<<N-1)-1
|
||||
// for signed, clamp before rounding.
|
||||
// ───────────────────────────────────────────────────────────────────────
|
||||
|
||||
#[inline]
|
||||
fn norm_to_i16(x: f32) -> i16 {
|
||||
let c = x.clamp(-1.0, 1.0) * 32767.0;
|
||||
// Round half away from zero, matching canary's `vcfsx` semantics.
|
||||
let r = if c >= 0.0 { (c + 0.5) as i32 } else { (c - 0.5) as i32 };
|
||||
r.clamp(-32768, 32767) as i16
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn i16_to_norm(s: i16) -> f32 {
|
||||
(s as f32) / 32767.0
|
||||
}
|
||||
|
||||
/// **NORMSHORT2** — 2 f32s in [-1, 1] → two 16-bit signed-normalized
|
||||
/// shorts packed as `(x << 16) | y` in lane 3 (high 32 bits of the word
|
||||
/// hold X; low 16 hold Y). Output lanes 0..=2 are zero-filled.
|
||||
pub fn pack_normshort2(v: Vec128) -> Vec128 {
|
||||
let f = v.as_f32x4();
|
||||
let x = norm_to_i16(f[0]) as u16 as u32;
|
||||
let y = norm_to_i16(f[1]) as u16 as u32;
|
||||
Vec128::from_u32x4(0, 0, 0, (x << 16) | y)
|
||||
}
|
||||
|
||||
pub fn unpack_normshort2(v: Vec128) -> Vec128 {
|
||||
let word = v.u32x4(3);
|
||||
let x = i16_to_norm((word >> 16) as i16);
|
||||
let y = i16_to_norm(word as i16);
|
||||
Vec128::from_f32x4(x, y, 0.0, 1.0)
|
||||
}
|
||||
|
||||
/// **NORMSHORT4** — 4 f32s in [-1, 1] → four 16-bit signed-normalized
|
||||
/// shorts packed across lanes 2-3 (big-endian dword order: X in the
|
||||
/// high word of lane 2, Y low of lane 2, Z high of lane 3, W low of lane
|
||||
/// 3).
|
||||
pub fn pack_normshort4(v: Vec128) -> Vec128 {
|
||||
let f = v.as_f32x4();
|
||||
let x = norm_to_i16(f[0]) as u16 as u32;
|
||||
let y = norm_to_i16(f[1]) as u16 as u32;
|
||||
let z = norm_to_i16(f[2]) as u16 as u32;
|
||||
let w = norm_to_i16(f[3]) as u16 as u32;
|
||||
Vec128::from_u32x4(0, 0, (x << 16) | y, (z << 16) | w)
|
||||
}
|
||||
|
||||
pub fn unpack_normshort4(v: Vec128) -> Vec128 {
|
||||
let hi = v.u32x4(2);
|
||||
let lo = v.u32x4(3);
|
||||
let x = i16_to_norm((hi >> 16) as i16);
|
||||
let y = i16_to_norm(hi as i16);
|
||||
let z = i16_to_norm((lo >> 16) as i16);
|
||||
let w = i16_to_norm(lo as i16);
|
||||
Vec128::from_f32x4(x, y, z, w)
|
||||
}
|
||||
|
||||
/// **NORMPACKED32** — UINT_2101010 layout, 4 f32s in [-1, 1] packed into
|
||||
/// 32 bits in lane 3. Per canary's comment `2_10_10_10 w_z_y_x`: the
|
||||
/// high 2 bits hold W (signed 2-bit, -2..=1), then Z/Y/X each use 10
|
||||
/// signed-normalized bits.
|
||||
pub fn pack_normpacked32(v: Vec128) -> Vec128 {
|
||||
let f = v.as_f32x4();
|
||||
#[inline]
|
||||
fn n10(x: f32) -> u32 {
|
||||
let c = x.clamp(-1.0, 1.0) * 511.0;
|
||||
let r = if c >= 0.0 { (c + 0.5) as i32 } else { (c - 0.5) as i32 };
|
||||
(r.clamp(-512, 511) as i32 as u32) & 0x3FF
|
||||
}
|
||||
#[inline]
|
||||
fn n2(x: f32) -> u32 {
|
||||
let c = x.clamp(-1.0, 1.0) * 1.0;
|
||||
let r = if c >= 0.0 { (c + 0.5) as i32 } else { (c - 0.5) as i32 };
|
||||
(r.clamp(-2, 1) as i32 as u32) & 0x3
|
||||
}
|
||||
let x = n10(f[0]);
|
||||
let y = n10(f[1]);
|
||||
let z = n10(f[2]);
|
||||
let w = n2(f[3]);
|
||||
let word = (w << 30) | (z << 20) | (y << 10) | x;
|
||||
Vec128::from_u32x4(0, 0, 0, word)
|
||||
}
|
||||
|
||||
pub fn unpack_normpacked32(v: Vec128) -> Vec128 {
|
||||
let word = v.u32x4(3);
|
||||
#[inline]
|
||||
fn u10_to_norm(bits: u32) -> f32 {
|
||||
// Sign-extend the 10-bit field then normalize.
|
||||
let s = ((bits & 0x3FF) as i32) << 22 >> 22;
|
||||
(s as f32) / 511.0
|
||||
}
|
||||
#[inline]
|
||||
fn u2_to_norm(bits: u32) -> f32 {
|
||||
let s = ((bits & 0x3) as i32) << 30 >> 30;
|
||||
(s as f32).clamp(-1.0, 1.0)
|
||||
}
|
||||
let x = u10_to_norm(word);
|
||||
let y = u10_to_norm(word >> 10);
|
||||
let z = u10_to_norm(word >> 20);
|
||||
let w = u2_to_norm(word >> 30);
|
||||
Vec128::from_f32x4(x, y, z, w)
|
||||
}
|
||||
|
||||
/// **NORMPACKED64** — ULONG_4202020, 4 f32s in [-1, 1] packed into 64
|
||||
/// bits across lanes 2-3. Per canary's comment `4_20_20_20 w_z_y_x`:
|
||||
/// the high 4 bits of the dword hold W (signed 4-bit); the remaining 60
|
||||
/// bits hold 3× 20-bit signed-normalized Z/Y/X. Rare outside very few
|
||||
/// titles (canary notes 54540829).
|
||||
pub fn pack_normpacked64(v: Vec128) -> Vec128 {
|
||||
let f = v.as_f32x4();
|
||||
#[inline]
|
||||
fn n20(x: f32) -> u64 {
|
||||
let c = x.clamp(-1.0, 1.0) * 524287.0; // 2^19 - 1
|
||||
let r = if c >= 0.0 { (c + 0.5) as i64 } else { (c - 0.5) as i64 };
|
||||
(r.clamp(-524288, 524287) as i64 as u64) & 0xF_FFFF
|
||||
}
|
||||
#[inline]
|
||||
fn n4(x: f32) -> u64 {
|
||||
let c = x.clamp(-1.0, 1.0) * 7.0;
|
||||
let r = if c >= 0.0 { (c + 0.5) as i64 } else { (c - 0.5) as i64 };
|
||||
(r.clamp(-8, 7) as i64 as u64) & 0xF
|
||||
}
|
||||
let x = n20(f[0]);
|
||||
let y = n20(f[1]);
|
||||
let z = n20(f[2]);
|
||||
let w = n4(f[3]);
|
||||
let dw: u64 = (w << 60) | (z << 40) | (y << 20) | x;
|
||||
Vec128::from_u32x4(0, 0, (dw >> 32) as u32, dw as u32)
|
||||
}
|
||||
|
||||
pub fn unpack_normpacked64(v: Vec128) -> Vec128 {
|
||||
let hi = v.u32x4(2) as u64;
|
||||
let lo = v.u32x4(3) as u64;
|
||||
let dw = (hi << 32) | lo;
|
||||
#[inline]
|
||||
fn u20_to_norm(bits: u64) -> f32 {
|
||||
let s = ((bits & 0xF_FFFF) as i64) << 44 >> 44;
|
||||
(s as f32) / 524287.0
|
||||
}
|
||||
#[inline]
|
||||
fn u4_to_norm(bits: u64) -> f32 {
|
||||
let s = ((bits & 0xF) as i64) << 60 >> 60;
|
||||
(s as f32) / 7.0
|
||||
}
|
||||
let x = u20_to_norm(dw);
|
||||
let y = u20_to_norm(dw >> 20);
|
||||
let z = u20_to_norm(dw >> 40);
|
||||
let w = u4_to_norm(dw >> 60);
|
||||
Vec128::from_f32x4(x, y, z, w)
|
||||
}
|
||||
|
||||
/// IEEE 754 half-precision float pack/unpack — used by both FLOAT16_2
|
||||
/// and FLOAT16_4. No FMA quirks involved; we go via `f32::to_bits` and
|
||||
/// manual bit-twiddling (the stable-Rust `f16` type isn't available
|
||||
/// yet).
|
||||
#[inline]
|
||||
fn f32_to_f16_bits(f: f32) -> u16 {
|
||||
let bits = f.to_bits();
|
||||
let sign = ((bits >> 31) & 0x1) as u16;
|
||||
let exp = ((bits >> 23) & 0xFF) as i32;
|
||||
let mant = bits & 0x7FFFFF;
|
||||
// Handle the easy cases first.
|
||||
if exp == 0xFF {
|
||||
// NaN or infinity.
|
||||
let half_exp = 0x1F;
|
||||
let half_mant = if mant != 0 { 0x200 } else { 0 }; // quiet NaN / zero mantissa for Inf
|
||||
return (sign << 15) | (half_exp << 10) | half_mant;
|
||||
}
|
||||
let unbiased_exp = exp - 127;
|
||||
if unbiased_exp >= 16 {
|
||||
// Overflow → infinity.
|
||||
return (sign << 15) | (0x1F << 10);
|
||||
}
|
||||
if unbiased_exp <= -15 {
|
||||
// Denormal or zero. Compute the shift and subnormal mantissa;
|
||||
// anything too small flushes to signed zero.
|
||||
if unbiased_exp < -24 {
|
||||
return sign << 15;
|
||||
}
|
||||
let shift = -14 - unbiased_exp as i32; // amount to shift the implicit-1'd mantissa
|
||||
let full_mant = 0x800000 | mant; // 24 bits with implicit leading 1
|
||||
let half_mant = (full_mant >> (shift + 13)) as u16;
|
||||
return (sign << 15) | half_mant;
|
||||
}
|
||||
let half_exp = ((unbiased_exp + 15) as u16) & 0x1F;
|
||||
let half_mant = (mant >> 13) as u16;
|
||||
(sign << 15) | (half_exp << 10) | half_mant
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn f16_bits_to_f32(h: u16) -> f32 {
|
||||
let sign = ((h >> 15) & 0x1) as u32;
|
||||
let exp = ((h >> 10) & 0x1F) as i32;
|
||||
let mant = (h & 0x3FF) as u32;
|
||||
let bits = if exp == 0x1F {
|
||||
// NaN or infinity.
|
||||
let f32_exp = 0xFFu32;
|
||||
let f32_mant = if mant != 0 { 0x400000 } else { 0 };
|
||||
(sign << 31) | (f32_exp << 23) | f32_mant
|
||||
} else if exp == 0 && mant == 0 {
|
||||
// Signed zero.
|
||||
sign << 31
|
||||
} else if exp == 0 {
|
||||
// Subnormal — renormalize.
|
||||
let mut e = -14i32;
|
||||
let mut m = mant;
|
||||
while (m & 0x400) == 0 {
|
||||
m <<= 1;
|
||||
e -= 1;
|
||||
}
|
||||
let f32_exp = ((e + 127) as u32) & 0xFF;
|
||||
let f32_mant = (m & 0x3FF) << 13;
|
||||
(sign << 31) | (f32_exp << 23) | f32_mant
|
||||
} else {
|
||||
let f32_exp = ((exp - 15 + 127) as u32) & 0xFF;
|
||||
let f32_mant = mant << 13;
|
||||
(sign << 31) | (f32_exp << 23) | f32_mant
|
||||
};
|
||||
f32::from_bits(bits)
|
||||
}
|
||||
|
||||
/// **FLOAT16_2** — two 32-bit floats → two half-floats packed into one
|
||||
/// 32-bit word (X in high 16 bits of lane 3, Y in low 16).
|
||||
pub fn pack_float16_2(v: Vec128) -> Vec128 {
|
||||
let f = v.as_f32x4();
|
||||
let x = f32_to_f16_bits(f[0]) as u32;
|
||||
let y = f32_to_f16_bits(f[1]) as u32;
|
||||
Vec128::from_u32x4(0, 0, 0, (x << 16) | y)
|
||||
}
|
||||
|
||||
pub fn unpack_float16_2(v: Vec128) -> Vec128 {
|
||||
let word = v.u32x4(3);
|
||||
let x = f16_bits_to_f32((word >> 16) as u16);
|
||||
let y = f16_bits_to_f32(word as u16);
|
||||
Vec128::from_f32x4(x, y, 0.0, 1.0)
|
||||
}
|
||||
|
||||
/// **FLOAT16_4** — four 32-bit floats → four half-floats packed across
|
||||
/// 64 bits (lanes 2-3).
|
||||
pub fn pack_float16_4(v: Vec128) -> Vec128 {
|
||||
let f = v.as_f32x4();
|
||||
let x = f32_to_f16_bits(f[0]) as u32;
|
||||
let y = f32_to_f16_bits(f[1]) as u32;
|
||||
let z = f32_to_f16_bits(f[2]) as u32;
|
||||
let w = f32_to_f16_bits(f[3]) as u32;
|
||||
Vec128::from_u32x4(0, 0, (x << 16) | y, (z << 16) | w)
|
||||
}
|
||||
|
||||
pub fn unpack_float16_4(v: Vec128) -> Vec128 {
|
||||
let hi = v.u32x4(2);
|
||||
let lo = v.u32x4(3);
|
||||
let x = f16_bits_to_f32((hi >> 16) as u16);
|
||||
let y = f16_bits_to_f32(hi as u16);
|
||||
let z = f16_bits_to_f32((lo >> 16) as u16);
|
||||
let w = f16_bits_to_f32(lo as u16);
|
||||
Vec128::from_f32x4(x, y, z, w)
|
||||
}
|
||||
|
||||
// ─── CR6 helpers used by integer compares ─────────────────────────────────
|
||||
// vcmp*. (record-form) updates CR6 in a compressed form:
|
||||
// CR6 = {all-true, 0, all-false, 0}
|
||||
// where each bit reflects the per-lane mask across the whole register.
|
||||
|
||||
#[inline] pub fn cr6_flags_from_mask(mask: Vec128) -> (bool, bool) {
|
||||
let b = mask.as_bytes();
|
||||
let mut any_set = false;
|
||||
let mut any_clear = false;
|
||||
for &byte in b.iter() {
|
||||
if byte != 0 { any_set = true; }
|
||||
if byte != 0xFF { any_clear = true; }
|
||||
}
|
||||
let all_true = !any_clear;
|
||||
let all_false = !any_set;
|
||||
(all_true, all_false)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
use std::cell::Cell;
|
||||
|
||||
struct TestMem { data: Box<[Cell<u8>]> }
|
||||
impl TestMem {
|
||||
fn new(size: usize) -> Self {
|
||||
Self { data: (0..size).map(|_| Cell::new(0)).collect() }
|
||||
}
|
||||
}
|
||||
impl MemoryAccess for TestMem {
|
||||
fn read_u8(&self, a: u32) -> u8 { self.data[a as usize].get() }
|
||||
fn read_u16(&self, a: u32) -> u16 {
|
||||
u16::from_be_bytes([self.data[a as usize].get(), self.data[a as usize + 1].get()])
|
||||
}
|
||||
fn read_u32(&self, a: u32) -> u32 {
|
||||
let a = a as usize;
|
||||
u32::from_be_bytes([
|
||||
self.data[a].get(), self.data[a+1].get(),
|
||||
self.data[a+2].get(), self.data[a+3].get(),
|
||||
])
|
||||
}
|
||||
fn read_u64(&self, a: u32) -> u64 {
|
||||
let a = a as usize;
|
||||
u64::from_be_bytes([
|
||||
self.data[a].get(), self.data[a+1].get(),
|
||||
self.data[a+2].get(), self.data[a+3].get(),
|
||||
self.data[a+4].get(), self.data[a+5].get(),
|
||||
self.data[a+6].get(), self.data[a+7].get(),
|
||||
])
|
||||
}
|
||||
fn write_u8(&self, a: u32, v: u8) { self.data[a as usize].set(v); }
|
||||
fn write_u16(&self, a: u32, v: u16) {
|
||||
let b = v.to_be_bytes();
|
||||
self.data[a as usize].set(b[0]);
|
||||
self.data[a as usize + 1].set(b[1]);
|
||||
}
|
||||
fn write_u32(&self, a: u32, v: u32) {
|
||||
let b = v.to_be_bytes(); let a = a as usize;
|
||||
for (i, byte) in b.iter().enumerate() { self.data[a+i].set(*byte); }
|
||||
}
|
||||
fn write_u64(&self, a: u32, v: u64) {
|
||||
let b = v.to_be_bytes(); let a = a as usize;
|
||||
for (i, byte) in b.iter().enumerate() { self.data[a+i].set(*byte); }
|
||||
}
|
||||
fn translate(&self, _a: u32) -> Option<*const u8> { None }
|
||||
fn translate_mut(&self, _a: u32) -> Option<*mut u8> { None }
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lvlx_lvrx_round_trip() {
|
||||
let m = TestMem::new(0x40);
|
||||
for i in 0..0x30 { m.data[i].set((i as u8).wrapping_add(0x10)); }
|
||||
// Unaligned load from 0x13 should combine lvlx(0x13) | lvrx(0x23).
|
||||
let lo = load_vector_left(&m, 0x13);
|
||||
let hi = load_vector_right(&m, 0x23);
|
||||
let mut combined = [0u8; 16];
|
||||
let lob = lo.as_bytes();
|
||||
let hib = hi.as_bytes();
|
||||
for i in 0..16 { combined[i] = lob[i] | hib[i]; }
|
||||
for i in 0..16 {
|
||||
assert_eq!(combined[i], m.data[0x13 + i].get(), "lane {}", i);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lvlx_aligned_is_full_load() {
|
||||
let m = TestMem::new(0x20);
|
||||
for i in 0..0x20 { m.data[i].set(i as u8); }
|
||||
let v = load_vector_left(&m, 0x10);
|
||||
let b = v.as_bytes();
|
||||
for i in 0..16 { assert_eq!(b[i], 0x10 + i as u8); }
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lvrx_aligned_is_zero() {
|
||||
let m = TestMem::new(0x20);
|
||||
let v = load_vector_right(&m, 0x10);
|
||||
assert_eq!(v.as_bytes(), [0u8; 16]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sat_add_signed_overflow() {
|
||||
assert_eq!(sat_add_i8(120, 10), (127, true));
|
||||
assert_eq!(sat_add_i8(-120, -10), (-128, true));
|
||||
assert_eq!(sat_add_i8(1, 2), (3, false));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sat_sub_unsigned_underflow() {
|
||||
assert_eq!(sat_sub_u8(5, 10), (0, true));
|
||||
assert_eq!(sat_sub_u8(10, 5), (5, false));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pack_pixel_555_matches_canary() {
|
||||
// Mapping (canary ppc_emit_altivec.cc::vkpkx_in_low):
|
||||
// out[15:10] = in[24:19], out[9:5] = in[14:10], out[4:0] = in[7:3]
|
||||
// Pure shift/mask, NO standalone alpha bit.
|
||||
|
||||
// All three colour fields exercised. Expected (hand-computed):
|
||||
// (0x018844C0 >> 9)&0xFC00 = 0xC400
|
||||
// (0x018844C0 >> 6)&0x3E0 = 0x100
|
||||
// (0x018844C0 >> 3)&0x1F = 0x18
|
||||
// => 0xC518
|
||||
assert_eq!(pack_pixel_555(0x01_88_44_C0), 0xC518);
|
||||
|
||||
// Boundary the audit flagged: low byte 0xF8 has bit 7 set. Canary does
|
||||
// NOT turn that into output bit 15 (alpha). Output bit 15 = in bit 24,
|
||||
// which is 0 here => high bit clear. (Old impl wrongly produced 0x8000.)
|
||||
assert_eq!(pack_pixel_555(0x80_F8_F8_F8), 0x7FFF);
|
||||
assert_eq!(pack_pixel_555(0x80_F8_F8_F8) & 0x8000, 0);
|
||||
|
||||
// Lone source bit 7 (0x80) lands in the blue field, not in bit 15.
|
||||
assert_eq!(pack_pixel_555(0x00_00_00_80), 0x0010);
|
||||
|
||||
// Output bit 15 is sourced from input bit 24, not bit 7.
|
||||
assert_eq!(pack_pixel_555(0x01_00_00_00), 0x8000);
|
||||
|
||||
// Saturated input -> all field bits set.
|
||||
assert_eq!(pack_pixel_555(0xFF_FF_FF_FF), 0xFFFF);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unpack_pixel_555_roundtrip() {
|
||||
// vupkhpx/vupklpx are NOTIMPLEMENTED in canary, so unpack_pixel_555 is
|
||||
// unchanged; just sanity-check the alpha-replicate path still holds.
|
||||
let w = unpack_pixel_555(0x8000 | (0x1F << 10) | (0x1F << 5) | 0x1F);
|
||||
assert_eq!(w & 0xFF000000, 0xFF000000);
|
||||
}
|
||||
|
||||
// ─── First-Pixels M3 pack/unpack roundtrip tests ───
|
||||
|
||||
/// Quantization error tolerance for N-bit signed normalized values.
|
||||
/// `1.0 / ((1 << (bits - 1)) - 1)` is the step size.
|
||||
fn tol_normalized(bits: u32) -> f32 {
|
||||
1.0 / ((1u32 << (bits - 1)) - 1) as f32
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normshort2_roundtrip() {
|
||||
let v = Vec128::from_f32x4(0.5, -0.75, 0.0, 0.0);
|
||||
let packed = pack_normshort2(v);
|
||||
let back = unpack_normshort2(packed).as_f32x4();
|
||||
let tol = tol_normalized(16);
|
||||
assert!((back[0] - 0.5).abs() < tol, "x got {}", back[0]);
|
||||
assert!((back[1] - -0.75).abs() < tol, "y got {}", back[1]);
|
||||
assert_eq!(back[2], 0.0);
|
||||
assert_eq!(back[3], 1.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normshort4_roundtrip_extremes() {
|
||||
let v = Vec128::from_f32x4(1.0, -1.0, 0.0, 0.25);
|
||||
let packed = pack_normshort4(v);
|
||||
let back = unpack_normshort4(packed).as_f32x4();
|
||||
let tol = tol_normalized(16);
|
||||
assert!((back[0] - 1.0).abs() < tol);
|
||||
assert!((back[1] - -1.0).abs() < tol);
|
||||
assert!((back[2] - 0.0).abs() < tol);
|
||||
assert!((back[3] - 0.25).abs() < tol);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normpacked32_roundtrip() {
|
||||
let v = Vec128::from_f32x4(0.5, -0.5, 0.9, -1.0);
|
||||
let packed = pack_normpacked32(v);
|
||||
let back = unpack_normpacked32(packed).as_f32x4();
|
||||
let tol10 = tol_normalized(10);
|
||||
let tol2 = tol_normalized(2);
|
||||
assert!((back[0] - 0.5).abs() < tol10, "x got {}", back[0]);
|
||||
assert!((back[1] - -0.5).abs() < tol10, "y got {}", back[1]);
|
||||
assert!((back[2] - 0.9).abs() < tol10, "z got {}", back[2]);
|
||||
// 2-bit signed quantizes to {-1, -0.5-ish, 0, 0.5-ish}; tolerance
|
||||
// is the full step.
|
||||
assert!((back[3] - -1.0).abs() < 2.0 * tol2, "w got {}", back[3]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normpacked64_roundtrip() {
|
||||
let v = Vec128::from_f32x4(0.5, -0.25, 0.75, 0.5);
|
||||
let packed = pack_normpacked64(v);
|
||||
let back = unpack_normpacked64(packed).as_f32x4();
|
||||
let tol20 = tol_normalized(20);
|
||||
let tol4 = tol_normalized(4);
|
||||
assert!((back[0] - 0.5).abs() < tol20, "x got {}", back[0]);
|
||||
assert!((back[1] - -0.25).abs() < tol20, "y got {}", back[1]);
|
||||
assert!((back[2] - 0.75).abs() < tol20, "z got {}", back[2]);
|
||||
assert!((back[3] - 0.5).abs() < tol4, "w got {}", back[3]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn float16_2_roundtrip_normals() {
|
||||
// Half has ~3 decimal digits of precision. Pick values that
|
||||
// survive conversion cleanly: powers of 2 + simple fractions.
|
||||
let v = Vec128::from_f32x4(1.0, -2.5, 0.0, 0.0);
|
||||
let packed = pack_float16_2(v);
|
||||
let back = unpack_float16_2(packed).as_f32x4();
|
||||
assert_eq!(back[0], 1.0);
|
||||
assert_eq!(back[1], -2.5);
|
||||
assert_eq!(back[2], 0.0);
|
||||
assert_eq!(back[3], 1.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn float16_4_roundtrip_normals() {
|
||||
let v = Vec128::from_f32x4(0.5, -3.0, 16.0, -0.125);
|
||||
let packed = pack_float16_4(v);
|
||||
let back = unpack_float16_4(packed).as_f32x4();
|
||||
assert_eq!(back[0], 0.5);
|
||||
assert_eq!(back[1], -3.0);
|
||||
assert_eq!(back[2], 16.0);
|
||||
assert_eq!(back[3], -0.125);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn float16_handles_zero_and_infinity() {
|
||||
// Zero should survive.
|
||||
assert_eq!(f16_bits_to_f32(f32_to_f16_bits(0.0)), 0.0);
|
||||
assert_eq!(f16_bits_to_f32(f32_to_f16_bits(-0.0)).to_bits(), (-0.0f32).to_bits());
|
||||
// +inf.
|
||||
let inf_back = f16_bits_to_f32(f32_to_f16_bits(f32::INFINITY));
|
||||
assert!(inf_back.is_infinite() && inf_back > 0.0);
|
||||
// Overflow → +inf.
|
||||
let overflow_back = f16_bits_to_f32(f32_to_f16_bits(65536.0));
|
||||
assert!(overflow_back.is_infinite());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pack_type_enum_maps_canary_values() {
|
||||
use D3dPackType::*;
|
||||
assert!(matches!(D3dPackType::from_immediate(0), D3dColor));
|
||||
assert!(matches!(D3dPackType::from_immediate(1), NormShort2));
|
||||
assert!(matches!(D3dPackType::from_immediate(2), NormPacked32));
|
||||
assert!(matches!(D3dPackType::from_immediate(3), Float16_2));
|
||||
assert!(matches!(D3dPackType::from_immediate(4), NormShort4));
|
||||
assert!(matches!(D3dPackType::from_immediate(5), Float16_4));
|
||||
assert!(matches!(D3dPackType::from_immediate(6), NormPacked64));
|
||||
assert!(matches!(D3dPackType::from_immediate(7), Other(7)));
|
||||
}
|
||||
}
|
||||
550
crates/xenia-cpu/tests/disasm_goldens.rs
Normal file
550
crates/xenia-cpu/tests/disasm_goldens.rs
Normal file
@@ -0,0 +1,550 @@
|
||||
//! Assert-based goldens for the PPC disassembler.
|
||||
//!
|
||||
//! Each test owns an inline list of `(raw, addr, label)` cases. On a
|
||||
//! normal run, the test reads the corresponding fixture JSON and asserts
|
||||
//! that `format(decode(raw, addr))` reproduces every field exactly. On
|
||||
//! first creation (fixture file missing) or with `REGEN_GOLDENS=1` set,
|
||||
//! the test (re)writes the fixture from `format()` output.
|
||||
//!
|
||||
//! Workflow:
|
||||
//! ```sh
|
||||
//! cargo test -p xenia-cpu --test disasm_goldens # assert
|
||||
//! REGEN_GOLDENS=1 cargo test -p xenia-cpu --test disasm_goldens # regen
|
||||
//! ```
|
||||
//!
|
||||
//! The hand-encoded test cases below cover the silent-bug regression
|
||||
//! cases that lived in the old println-based `disasm_audit.rs` harness
|
||||
//! (now deleted).
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use xenia_cpu::decoder::{DecodedInstr, decode};
|
||||
use xenia_cpu::disasm::format;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
|
||||
struct GoldenRow {
|
||||
label: String,
|
||||
raw: String,
|
||||
addr: String,
|
||||
mnemonic: String,
|
||||
operands: String,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
ext_mnemonic: Option<String>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
ext_operands: Option<String>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
branch_target: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
struct GoldenFile {
|
||||
rows: Vec<GoldenRow>,
|
||||
}
|
||||
|
||||
fn fixture_path(name: &str) -> PathBuf {
|
||||
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.join("tests")
|
||||
.join("golden")
|
||||
.join(name)
|
||||
}
|
||||
|
||||
/// Encode a VMX128 VX128-form (or VX128_R/_2) instruction with canary's
|
||||
/// 7-bit register layout: VD low at PPC 6-10, high 2 bits at PPC 28-29;
|
||||
/// VA low at PPC 11-15, mid bit at PPC 26, high bit at PPC 21; VB low at
|
||||
/// PPC 16-20, high 2 bits at PPC 30-31. `secondary_bits` carries any
|
||||
/// secondary opcode + VC + Rc + key bits the caller needs.
|
||||
fn encode_vx128(op6: u32, vd: u32, va: u32, vb: u32, secondary_bits: u32) -> u32 {
|
||||
((op6 & 0x3F) << 26)
|
||||
| ((vd & 0x1F) << 21)
|
||||
| (((vd >> 5) & 0x3) << 2)
|
||||
| ((va & 0x1F) << 16)
|
||||
| (((va >> 5) & 0x1) << 5)
|
||||
| (((va >> 6) & 0x1) << 10)
|
||||
| ((vb & 0x1F) << 11)
|
||||
| (((vb >> 5) & 0x3) << 0)
|
||||
| secondary_bits
|
||||
}
|
||||
|
||||
fn build_rows(cases: &[(u32, u32, &str)]) -> Vec<GoldenRow> {
|
||||
cases
|
||||
.iter()
|
||||
.map(|&(raw, addr, label)| {
|
||||
let d = decode(raw, addr);
|
||||
let t = format(&d);
|
||||
GoldenRow {
|
||||
label: label.to_string(),
|
||||
raw: format!("0x{raw:08X}"),
|
||||
addr: format!("0x{addr:08X}"),
|
||||
mnemonic: t.mnemonic,
|
||||
operands: t.operands,
|
||||
ext_mnemonic: t.ext_mnemonic,
|
||||
ext_operands: t.ext_operands,
|
||||
branch_target: t.branch_target.map(|t| format!("0x{t:08X}")),
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Compare what `format()` produces against the committed JSON snapshot.
|
||||
/// Set `REGEN_GOLDENS=1` to overwrite the snapshot from current output.
|
||||
/// Missing snapshot is treated as "first creation": writes and panics so
|
||||
/// CI can't accidentally accept blank goldens.
|
||||
fn assert_or_regen(fixture_name: &str, cases: &[(u32, u32, &str)]) {
|
||||
let rows = build_rows(cases);
|
||||
let path = fixture_path(fixture_name);
|
||||
let regen = std::env::var("REGEN_GOLDENS").is_ok();
|
||||
|
||||
if regen || !path.exists() {
|
||||
if let Some(parent) = path.parent() {
|
||||
std::fs::create_dir_all(parent).unwrap();
|
||||
}
|
||||
let serialized = serde_json::to_string_pretty(&GoldenFile { rows }).unwrap();
|
||||
std::fs::write(&path, serialized + "\n").unwrap();
|
||||
if !regen {
|
||||
panic!(
|
||||
"Generated fixture {} (was missing). Inspect, commit, then re-run.",
|
||||
path.display()
|
||||
);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
let src = std::fs::read_to_string(&path).unwrap();
|
||||
let golden: GoldenFile = serde_json::from_str(&src).unwrap();
|
||||
assert_eq!(
|
||||
rows.len(),
|
||||
golden.rows.len(),
|
||||
"row count differs from {} (live={}, fixture={}). Run with REGEN_GOLDENS=1 if the test cases changed intentionally.",
|
||||
path.display(),
|
||||
rows.len(),
|
||||
golden.rows.len()
|
||||
);
|
||||
for (i, (got, expected)) in rows.iter().zip(golden.rows.iter()).enumerate() {
|
||||
assert_eq!(
|
||||
got, expected,
|
||||
"row {} ({}) differs in {}\n live: {got:#?}\n fixture: {expected:#?}",
|
||||
i,
|
||||
expected.label,
|
||||
path.display()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Encoding helpers ────────────────────────────────────────────────────────
|
||||
// PPC bit numbering: bit 0 is MSB, bit 31 is LSB. Most helpers below emit
|
||||
// instructions in canonical hand-readable form: opcode << 26 | <fields>.
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn xform_xo3(rd: u32, ra: u32, rb: u32, oe: u32, xo: u32, rc: u32) -> u32 {
|
||||
(31 << 26) | (rd << 21) | (ra << 16) | (rb << 11) | (oe << 10) | (xo << 1) | rc
|
||||
}
|
||||
|
||||
fn xform_logic(rs: u32, ra: u32, rb: u32, xo: u32, rc: u32) -> u32 {
|
||||
(31 << 26) | (rs << 21) | (ra << 16) | (rb << 11) | (xo << 1) | rc
|
||||
}
|
||||
|
||||
fn dform(op: u32, rt: u32, ra: u32, imm: i16) -> u32 {
|
||||
(op << 26) | (rt << 21) | (ra << 16) | ((imm as u16) as u32)
|
||||
}
|
||||
|
||||
fn iform_b(target_disp: i32, aa: u32, lk: u32) -> u32 {
|
||||
// I-form: opcode 18 | LI<<2 | AA<<1 | LK
|
||||
let li = (target_disp as u32) & 0x03FF_FFFC;
|
||||
(18 << 26) | li | (aa << 1) | lk
|
||||
}
|
||||
|
||||
fn bform_bc(bo: u32, bi: u32, target_disp: i32, aa: u32, lk: u32) -> u32 {
|
||||
// B-form: opcode 16 | BO<<21 | BI<<16 | BD<<2 | AA<<1 | LK
|
||||
let bd = (target_disp as u32) & 0x0000_FFFC;
|
||||
(16 << 26) | (bo << 21) | (bi << 16) | bd | (aa << 1) | lk
|
||||
}
|
||||
|
||||
fn xlform_bclr(bo: u32, bi: u32, lk: u32) -> u32 {
|
||||
// XL-form: opcode 19 | BO<<21 | BI<<16 | XO=16<<1 | LK
|
||||
(19 << 26) | (bo << 21) | (bi << 16) | (16 << 1) | lk
|
||||
}
|
||||
|
||||
fn xlform_bcctr(bo: u32, bi: u32, lk: u32) -> u32 {
|
||||
(19 << 26) | (bo << 21) | (bi << 16) | (528 << 1) | lk
|
||||
}
|
||||
|
||||
fn rlwinm(rs: u32, ra: u32, sh: u32, mb: u32, me: u32, rc: u32) -> u32 {
|
||||
(21 << 26) | (rs << 21) | (ra << 16) | (sh << 11) | (mb << 6) | (me << 1) | rc
|
||||
}
|
||||
|
||||
fn rldicl(rs: u32, ra: u32, sh: u32, mb: u32, rc: u32) -> u32 {
|
||||
// MD-form: sh[4:0] at PPC bits 16-20 (host bits 11-15); sh[5] at PPC bit 30 (host bit 1).
|
||||
// mb[4:0] at PPC bits 21-25 (host bits 6-10); mb[5] at PPC bit 26 (host bit 5).
|
||||
let sh_lo = sh & 0x1F;
|
||||
let sh_hi = (sh >> 5) & 1;
|
||||
let mb_lo = mb & 0x1F;
|
||||
let mb_hi = (mb >> 5) & 1;
|
||||
(30 << 26)
|
||||
| (rs << 21)
|
||||
| (ra << 16)
|
||||
| (sh_lo << 11)
|
||||
| (mb_lo << 6)
|
||||
| (mb_hi << 5)
|
||||
| (0 << 2)
|
||||
| (sh_hi << 1)
|
||||
| rc
|
||||
}
|
||||
|
||||
fn mfspr(rd: u32, spr: u32) -> u32 {
|
||||
let spr_swapped = ((spr & 0x1F) << 5) | ((spr >> 5) & 0x1F);
|
||||
(31 << 26) | (rd << 21) | (spr_swapped << 11) | (339 << 1)
|
||||
}
|
||||
|
||||
fn mtspr(rs: u32, spr: u32) -> u32 {
|
||||
let spr_swapped = ((spr & 0x1F) << 5) | ((spr >> 5) & 0x1F);
|
||||
(31 << 26) | (rs << 21) | (spr_swapped << 11) | (467 << 1)
|
||||
}
|
||||
|
||||
// ── Tests ───────────────────────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn base_mnemonics() {
|
||||
let cases: &[(u32, u32, &str)] = &[
|
||||
// X-form ALU (Rc and OE bits)
|
||||
(xform_xo3(3, 4, 5, 0, 266, 0), 0x82000000, "add r3,r4,r5"),
|
||||
(xform_xo3(3, 4, 5, 0, 266, 1), 0x82000000, "add. r3,r4,r5"),
|
||||
(xform_xo3(3, 4, 5, 1, 266, 0), 0x82000000, "addo r3,r4,r5"),
|
||||
(xform_xo3(3, 4, 5, 1, 266, 1), 0x82000000, "addo. r3,r4,r5"),
|
||||
(xform_xo3(3, 4, 0, 0, 104, 0), 0x82000000, "neg r3,r4"),
|
||||
(xform_xo3(3, 4, 5, 0, 235, 0), 0x82000000, "mullw r3,r4,r5"),
|
||||
(xform_xo3(3, 4, 5, 0, 491, 0), 0x82000000, "divw r3,r4,r5"),
|
||||
(xform_xo3(3, 4, 5, 0, 75, 1), 0x82000000, "mulhw. r3,r4,r5"),
|
||||
(xform_xo3(3, 4, 5, 0, 11, 1), 0x82000000, "mulhwu. r3,r4,r5"),
|
||||
(xform_xo3(3, 4, 5, 0, 233, 0), 0x82000000, "mulld r3,r4,r5"),
|
||||
// X-form logical
|
||||
(xform_logic(4, 3, 5, 28, 0), 0x82000000, "and r3,r4,r5"),
|
||||
(xform_logic(4, 3, 5, 444, 0), 0x82000000, "or r3,r4,r5 (non-mr: rs!=rb)"),
|
||||
(xform_logic(4, 3, 5, 316, 0), 0x82000000, "xor r3,r4,r5"),
|
||||
(xform_logic(4, 3, 5, 124, 0), 0x82000000, "nor r3,r4,r5"),
|
||||
(xform_logic(4, 3, 5, 476, 0), 0x82000000, "nand r3,r4,r5"),
|
||||
(xform_logic(4, 3, 5, 284, 0), 0x82000000, "eqv r3,r4,r5"),
|
||||
(xform_logic(4, 3, 5, 60, 0), 0x82000000, "andc r3,r4,r5"),
|
||||
(xform_logic(4, 3, 5, 412, 0), 0x82000000, "orc r3,r4,r5"),
|
||||
// X-form shift
|
||||
(xform_logic(4, 3, 5, 24, 0), 0x82000000, "slw r3,r4,r5"),
|
||||
(xform_logic(4, 3, 5, 536, 0), 0x82000000, "srw r3,r4,r5"),
|
||||
(xform_logic(4, 3, 5, 792, 0), 0x82000000, "sraw r3,r4,r5"),
|
||||
(xform_logic(4, 3, 5, 27, 0), 0x82000000, "sld r3,r4,r5"),
|
||||
(xform_logic(4, 3, 5, 539, 0), 0x82000000, "srd r3,r4,r5"),
|
||||
// srawi / sradi (immediate shifts)
|
||||
((31 << 26) | (4 << 21) | (3 << 16) | (16 << 11) | (824 << 1), 0x82000000, "srawi r3,r4,16"),
|
||||
// Atomics
|
||||
((31 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1, 0x82000000, "stwcx. r3,r4,r5"),
|
||||
((31 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (214 << 1) | 1, 0x82000000, "stdcx. r3,r4,r5"),
|
||||
((31 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1), 0x82000000, "lwarx r3,r4,r5"),
|
||||
((31 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (84 << 1), 0x82000000, "ldarx r3,r4,r5"),
|
||||
// Compares
|
||||
(dform(11, 0, 3, 16), 0x82000000, "cmpwi cr0, r3, 16"),
|
||||
(dform(11, 2 << 2, 3, 16), 0x82000000, "cmpwi cr2, r3, 16"),
|
||||
(dform(10, 0, 3, 16), 0x82000000, "cmplwi cr0, r3, 16"),
|
||||
((31 << 26) | (3 << 16) | (4 << 11), 0x82000000, "cmpw r3,r4 in cr0"),
|
||||
((31 << 26) | (1 << 21) | (3 << 16) | (4 << 11), 0x82000000, "cmpd r3,r4"),
|
||||
((31 << 26) | (3 << 16) | (4 << 11) | (32 << 1), 0x82000000, "cmplw r3,r4"),
|
||||
// D-form ALU/load/store
|
||||
(dform(14, 3, 1, 16), 0x82000000, "addi r3, r1, 16"),
|
||||
(dform(15, 3, 1, 0x100), 0x82000000, "addis r3, r1, 0x100 (ra!=0)"),
|
||||
(dform(7, 3, 4, 5), 0x82000000, "mulli r3, r4, 5"),
|
||||
(dform(8, 3, 4, 5), 0x82000000, "subfic r3, r4, 5"),
|
||||
(dform(12, 3, 4, 16), 0x82000000, "addic r3, r4, 16"),
|
||||
(dform(13, 3, 4, 16), 0x82000000, "addic. r3, r4, 16"),
|
||||
(dform(24, 3, 4, 0x10), 0x82000000, "ori r4, r3, 0x10 (non-nop)"),
|
||||
(dform(25, 3, 4, 0x10), 0x82000000, "oris r4, r3, 0x10"),
|
||||
(dform(26, 3, 4, 0x10), 0x82000000, "xori r4, r3, 0x10"),
|
||||
(dform(28, 3, 4, 0x10), 0x82000000, "andi. r4, r3, 0x10"),
|
||||
// Loads/stores D-form
|
||||
(dform(32, 5, 1, 0x20), 0x82000000, "lwz r5, 0x20(r1)"),
|
||||
(dform(36, 5, 1, 0x20), 0x82000000, "stw r5, 0x20(r1)"),
|
||||
(dform(34, 5, 1, 0x20), 0x82000000, "lbz r5, 0x20(r1)"),
|
||||
(dform(40, 5, 1, 0x20), 0x82000000, "lhz r5, 0x20(r1)"),
|
||||
(dform(48, 5, 1, 0x20), 0x82000000, "lfs f5, 0x20(r1)"),
|
||||
(dform(50, 5, 1, 0x20), 0x82000000, "lfd f5, 0x20(r1)"),
|
||||
(dform(54, 5, 1, 0x20), 0x82000000, "stfd f5, 0x20(r1)"),
|
||||
// DS-form 64-bit loads
|
||||
((58u32 << 26) | (5 << 21) | (1 << 16) | 0x20, 0x82000000, "ld r5, 0x20(r1)"),
|
||||
((62u32 << 26) | (5 << 21) | (1 << 16) | 0x20, 0x82000000, "std r5, 0x20(r1)"),
|
||||
// Sync / barrier (parameterless)
|
||||
((31 << 26) | (598 << 1), 0x82000000, "sync 0 (extends to sync)"),
|
||||
((19 << 26) | (150 << 1), 0x82000000, "isync"),
|
||||
((31 << 26) | (854 << 1), 0x82000000, "eieio"),
|
||||
// Cache hints
|
||||
((31 << 26) | (1 << 16) | (2 << 11) | (54 << 1), 0x82000000, "dcbst r1, r2"),
|
||||
((31 << 26) | (1 << 16) | (2 << 11) | (86 << 1), 0x82000000, "dcbf r1, r2"),
|
||||
((31 << 26) | (1 << 16) | (2 << 11) | (278 << 1), 0x82000000, "dcbt r1, r2"),
|
||||
((31 << 26) | (1 << 16) | (2 << 11) | (1014 << 1), 0x82000000, "dcbz r1, r2"),
|
||||
((31 << 26) | (1 << 21) | (1 << 16) | (2 << 11) | (1014 << 1), 0x82000000, "dcbz128 r1, r2"),
|
||||
// CR logical (without simplification triggers)
|
||||
((19 << 26) | (4 << 21) | (5 << 16) | (6 << 11) | (33 << 1), 0x82000000, "crnor 4,5,6 (no simplify)"),
|
||||
((19 << 26) | (4 << 21) | (5 << 16) | (6 << 11) | (257 << 1), 0x82000000, "crand 4,5,6"),
|
||||
((19 << 26) | (4 << 21) | (5 << 16) | (6 << 11) | (449 << 1), 0x82000000, "cror 4,5,6 (no simplify)"),
|
||||
// Trap (no simplification: TO=11 doesn't match the table)
|
||||
((31 << 26) | (11 << 21) | (3 << 16) | (4 << 11) | (4 << 1), 0x82000000, "tw 11, r3, r4 (uncommon TO)"),
|
||||
((2u32 << 26) | (11 << 21) | (3 << 16) | (123u32 & 0xFFFF), 0x82000000, "tdi 11, r3, 123"),
|
||||
// mtcr (extended): mtcrf 0xFF, r5
|
||||
((31 << 26) | (5 << 21) | (0xFF << 12) | (144 << 1), 0x82000000, "mtcrf 0xFF, r5 → mtcr"),
|
||||
// mfcr / mfmsr / mtmsr / mtmsrd
|
||||
((31 << 26) | (5 << 21) | (19 << 1), 0x82000000, "mfcr r5"),
|
||||
((31 << 26) | (5 << 21) | (83 << 1), 0x82000000, "mfmsr r5"),
|
||||
((31 << 26) | (5 << 21) | (146 << 1), 0x82000000, "mtmsr r5"),
|
||||
((31 << 26) | (5 << 21) | (178 << 1), 0x82000000, "mtmsrd r5"),
|
||||
// FPU base
|
||||
((63u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (21 << 1), 0x82000000, "fadd f3, f4, f5"),
|
||||
((63u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1), 0x82000000, "fsub f3, f4, f5"),
|
||||
((63u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (18 << 1), 0x82000000, "fdiv f3, f4, f5"),
|
||||
((63u32 << 26) | (3 << 21) | (5 << 21) | (5 << 11) | (25 << 1), 0x82000000, "fmul f3, f0, f5 (encoded)"),
|
||||
((63u32 << 26) | (3 << 21) | (4 << 16) | (40 << 1), 0x82000000, "fneg f3, f4"),
|
||||
((63u32 << 26) | (3 << 21) | (4 << 16) | (72 << 1), 0x82000000, "fmr f3, f4"),
|
||||
// mtfsf — XFL form (Fix 1). FM at LSB bits 17-24 (PPC bits 7-14).
|
||||
// Encoding: opcode 63 | FM<<17 | frB<<11 | XO=711<<1 | Rc.
|
||||
((63u32 << 26) | (0xFF << 17) | (5 << 11) | (711 << 1), 0x82000000, "mtfsf 0xFF, f5 (Rc=0)"),
|
||||
((63u32 << 26) | (0xFF << 17) | (5 << 11) | (711 << 1) | 1, 0x82000000, "mtfsf. 0xFF, f5 (Rc=1)"),
|
||||
];
|
||||
assert_or_regen("base_mnemonics.json", cases);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extended_mnemonics() {
|
||||
let cases: &[(u32, u32, &str)] = &[
|
||||
// ori r0, r0, 0 → nop
|
||||
(dform(24, 0, 0, 0), 0x82000000, "nop"),
|
||||
// addi r3, r0, imm → li
|
||||
(dform(14, 3, 0, 16), 0x82000000, "li r3, 16"),
|
||||
(dform(14, 3, 0, -1), 0x82000000, "li r3, -1"),
|
||||
// addi r3, r4, neg → subi
|
||||
(dform(14, 3, 4, -16), 0x82000000, "subi r3, r4, 16"),
|
||||
// addis r3, r0, imm → lis
|
||||
(dform(15, 3, 0, 0x1234), 0x82000000, "lis r3, 0x1234"),
|
||||
// addis r3, r4, neg → subis
|
||||
(dform(15, 3, 4, -1), 0x82000000, "subis r3, r4, 0xFFFF"),
|
||||
// or rA, rS, rS → mr
|
||||
(xform_logic(4, 3, 4, 444, 0), 0x82000000, "mr r3, r4"),
|
||||
(xform_logic(4, 3, 4, 444, 1), 0x82000000, "mr. r3, r4"),
|
||||
// and rA, rS, rS → mr (also)
|
||||
(xform_logic(4, 3, 4, 28, 0), 0x82000000, "mr (via and)"),
|
||||
// nor rA, rS, rS → not
|
||||
(xform_logic(4, 3, 4, 124, 0), 0x82000000, "not r3, r4"),
|
||||
// subf → sub (operand swap)
|
||||
(xform_xo3(3, 4, 5, 0, 40, 0), 0x82000000, "subf → sub r3, r5, r4"),
|
||||
// rlwinm simplifications
|
||||
(rlwinm(4, 3, 4, 0, 31 - 4, 0), 0x82000000, "slwi r3, r4, 4"),
|
||||
(rlwinm(4, 3, 32 - 4, 4, 31, 0), 0x82000000, "srwi r3, r4, 4"),
|
||||
(rlwinm(4, 3, 8, 0, 31, 0), 0x82000000, "rotlwi r3, r4, 8"),
|
||||
(rlwinm(4, 3, 0, 4, 31, 0), 0x82000000, "clrlwi r3, r4, 4"),
|
||||
(rlwinm(4, 3, 0, 0, 27, 0), 0x82000000, "clrrwi r3, r4, 4"),
|
||||
(rlwinm(4, 3, 8, 0, 7, 0), 0x82000000, "extlwi r3, r4, 8, 8"),
|
||||
// rlwinm with Rc
|
||||
(rlwinm(4, 3, 4, 0, 31 - 4, 1), 0x82000000, "slwi. r3, r4, 4"),
|
||||
// rlwinm Sylpheed regression
|
||||
(rlwinm(11, 11, 0, 31, 31, 1), 0x82000000, "rlwinm. r11,r11,0,31,31 (no simplify)"),
|
||||
// rldicl simplifications
|
||||
(rldicl(4, 3, 0, 32, 0), 0x82000000, "clrldi r3, r4, 32"),
|
||||
(rldicl(4, 3, 64u32 - 8, 8, 0), 0x82000000, "srdi r3, r4, 8"),
|
||||
(rldicl(4, 3, 8, 0, 0), 0x82000000, "rotldi r3, r4, 8"),
|
||||
// cmpi / cmpli → cmpwi/cmpdi/cmplwi/cmpldi
|
||||
(dform(11, 0, 3, 16), 0x82000000, "cmpwi cr0, r3, 16"),
|
||||
(dform(11, (1 << 21) | (2 << 23), 3, 16) | (1 << 21), 0x82000000, "cmpdi (L=1) variant"),
|
||||
// bclr 20, 0 → blr
|
||||
(xlform_bclr(20, 0, 0), 0x82000000, "blr"),
|
||||
(xlform_bclr(20, 0, 1), 0x82000000, "blrl"),
|
||||
// bcctr 20, 0 → bctr
|
||||
(xlform_bcctr(20, 0, 0), 0x82000000, "bctr"),
|
||||
(xlform_bcctr(20, 0, 1), 0x82000000, "bctrl"),
|
||||
// bclr conditional
|
||||
(xlform_bclr(12, 2, 0), 0x82000000, "beqlr (BO=12, BI=2 → cr0.eq true)"),
|
||||
(xlform_bclr(4, 2, 0), 0x82000000, "bnelr"),
|
||||
// bc with full BO/BI: branch always (BO=20)
|
||||
(bform_bc(20, 0, 0x40, 0, 0), 0x82000000, "bc → b 0x82000040"),
|
||||
(bform_bc(20, 0, 0x40, 0, 1), 0x82000000, "bc l → bl 0x82000040"),
|
||||
// Conditional bc → beq/bne/etc
|
||||
(bform_bc(12, 2, 0x40, 0, 0), 0x82000000, "bc 12,cr0.eq → beq 0x82000040"),
|
||||
(bform_bc(4, 2, 0x40, 0, 0), 0x82000000, "bc 4,cr0.eq → bne 0x82000040"),
|
||||
(bform_bc(12, 0, 0x40, 0, 0), 0x82000000, "bc 12,cr0.lt → blt 0x82000040"),
|
||||
(bform_bc(4, 0, 0x40, 0, 0), 0x82000000, "bc 4,cr0.lt → bge 0x82000040"),
|
||||
(bform_bc(12, 1, 0x40, 0, 0), 0x82000000, "bc 12,cr0.gt → bgt 0x82000040"),
|
||||
(bform_bc(4, 1, 0x40, 0, 0), 0x82000000, "bc 4,cr0.gt → ble 0x82000040"),
|
||||
// Conditional with non-zero CR field
|
||||
(bform_bc(12, 2 + 8, 0x40, 0, 0), 0x82000000, "bc 12, cr2.eq → beq cr2, 0x...040"),
|
||||
// bdnz / bdz (decrement-CTR branches)
|
||||
(bform_bc(16, 0, 0x40, 0, 0), 0x82000000, "bdnz 0x82000040"),
|
||||
(bform_bc(18, 0, 0x40, 0, 0), 0x82000000, "bdz 0x82000040"),
|
||||
// I-form branches
|
||||
(iform_b(0x40, 0, 0), 0x82000000, "b +0x40 → 0x82000040"),
|
||||
(iform_b(0x40, 0, 1), 0x82000000, "bl +0x40 → 0x82000040"),
|
||||
(iform_b(0x40, 1, 0), 0x82000000, "ba 0x40 absolute"),
|
||||
(iform_b(0x40, 1, 1), 0x82000000, "bla 0x40 absolute"),
|
||||
// Trap immediate simplifications
|
||||
((2u32 << 26) | (4 << 21) | (3 << 16) | (123u32 & 0xFFFF), 0x82000000, "tdeqi r3, 123"),
|
||||
((3u32 << 26) | (16 << 21) | (3 << 16) | (123u32 & 0xFFFF), 0x82000000, "twlti r3, 123"),
|
||||
// mfspr → mflr / mfctr / mfxer
|
||||
(mfspr(3, 8), 0x82000000, "mflr r3"),
|
||||
(mfspr(3, 9), 0x82000000, "mfctr r3"),
|
||||
(mfspr(3, 1), 0x82000000, "mfxer r3"),
|
||||
// mtspr → mtlr / mtctr / mtxer
|
||||
(mtspr(3, 8), 0x82000000, "mtlr r3"),
|
||||
(mtspr(3, 9), 0x82000000, "mtctr r3"),
|
||||
(mtspr(3, 1), 0x82000000, "mtxer r3"),
|
||||
// crnor with same source bits → crnot
|
||||
((19 << 26) | (4 << 21) | (5 << 16) | (5 << 11) | (33 << 1), 0x82000000, "crnot 4, 5"),
|
||||
// crxor with all same → crclr
|
||||
((19 << 26) | (4 << 21) | (4 << 16) | (4 << 11) | (193 << 1), 0x82000000, "crclr 4"),
|
||||
// creqv with all same → crset
|
||||
((19 << 26) | (4 << 21) | (4 << 16) | (4 << 11) | (289 << 1), 0x82000000, "crset 4"),
|
||||
// cror with same source bits → crmove
|
||||
((19 << 26) | (4 << 21) | (5 << 16) | (5 << 11) | (449 << 1), 0x82000000, "crmove 4, 5"),
|
||||
// sync L=1 → lwsync
|
||||
((31 << 26) | (1 << 21) | (598 << 1), 0x82000000, "lwsync"),
|
||||
// tw 31, 0, 0 → trap
|
||||
((31 << 26) | (31 << 21) | (4 << 1), 0x82000000, "trap"),
|
||||
// Fix 2: bclr/bcctr with BO=20 and BI≠0 still emits blr/bctr ext.
|
||||
// BO=20 ignores both CTR test and CR test, so BI is don't-care.
|
||||
(xlform_bclr(20, 4, 0), 0x82000000, "blr (BO=20, BI=4 — BI is don't-care)"),
|
||||
(xlform_bclr(20, 7, 1), 0x82000000, "blrl (BO=20, BI=7)"),
|
||||
(xlform_bcctr(20, 4, 0), 0x82000000, "bctr (BO=20, BI=4)"),
|
||||
// Fix 3: trap unsigned simplified mnemonics (TO=1, 2, 5, 6 — logical
|
||||
// compare conditions). Register form (tw/td) and immediate (twi/tdi).
|
||||
((31u32 << 26) | (2 << 21) | (3 << 16) | (4 << 11) | (4 << 1), 0x82000000, "twllt r3, r4 (TO=2)"),
|
||||
((31u32 << 26) | (1 << 21) | (3 << 16) | (4 << 11) | (4 << 1), 0x82000000, "twlgt r3, r4 (TO=1)"),
|
||||
((31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (68 << 1), 0x82000000, "tdlge r3, r4 (TO=5)"),
|
||||
((31u32 << 26) | (6 << 21) | (3 << 16) | (4 << 11) | (4 << 1), 0x82000000, "twlle r3, r4 (TO=6)"),
|
||||
((3u32 << 26) | (2 << 21) | (3 << 16) | (16u32 & 0xFFFF), 0x82000000, "twllti r3, 16"),
|
||||
((2u32 << 26) | (5 << 21) | (3 << 16) | (16u32 & 0xFFFF), 0x82000000, "tdlgei r3, 16"),
|
||||
];
|
||||
assert_or_regen("extended_mnemonics.json", cases);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vmx128_registers() {
|
||||
// Standard VMX (op=4) — 5-bit registers v0..v31. Verifies that the
|
||||
// low-register path renders correctly through the new formatter.
|
||||
let std_vmx = [
|
||||
// vaddubm v3, v4, v5 : op=4, 3-op key=0
|
||||
((4u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | 0, 0x82000000, "vaddubm v3, v4, v5"),
|
||||
// vaddfp v3, v4, v5 : op=4, vx=10
|
||||
((4u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | 10, 0x82000000, "vaddfp v3, v4, v5"),
|
||||
// vand v3, v4, v5 : vx=1028
|
||||
((4u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | 1028, 0x82000000, "vand v3, v4, v5"),
|
||||
// vor v3, v4, v5 : vx=1156
|
||||
((4u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | 1156, 0x82000000, "vor v3, v4, v5"),
|
||||
// vxor v3, v4, v5 : vx=1220
|
||||
((4u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | 1220, 0x82000000, "vxor v3, v4, v5"),
|
||||
// vsel v3, v4, v5, v6 : op=4, va_key=42 (4-op)
|
||||
((4u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (6 << 6) | 42, 0x82000000, "vsel v3,v4,v5,v6"),
|
||||
// vperm v3, v4, v5, v6 : va_key=43
|
||||
((4u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (6 << 6) | 43, 0x82000000, "vperm v3,v4,v5,v6"),
|
||||
// vmaddfp v3, v4, v5, v6 : va_key=46 (operand swap: vd, va, vc, vb)
|
||||
((4u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (6 << 6) | 46, 0x82000000, "vmaddfp v3, v4, v6, v5 (swap)"),
|
||||
// mfvscr v3 : vx=1540
|
||||
((4u32 << 26) | (3 << 21) | 1540, 0x82000000, "mfvscr v3"),
|
||||
// mtvscr v5 : vx=1604, vb=v5
|
||||
((4u32 << 26) | (5 << 11) | 1604, 0x82000000, "mtvscr v5"),
|
||||
];
|
||||
|
||||
// VMX128 op=5: vperm128 v3, v4, v5, vc=0. Canary FormatVX128: VD low
|
||||
// at PPC 6-10, VA low at PPC 11-15, VB low at PPC 16-20, VC at PPC 23-25.
|
||||
// key1 = (bit22<<5)|bit27 = 0 selects vperm128.
|
||||
let vmx128_op5 = [
|
||||
(encode_vx128(5, 3, 4, 5, 0), 0x82000000, "vperm128 v3, v4, v5, 0 (canary)"),
|
||||
];
|
||||
|
||||
// VMX128 op=6 — exercise full 0-127 vd128 range under canary's layout.
|
||||
// VD128h is at PPC 28-29 (host 2-3): no overlap with secondary opcode key,
|
||||
// so vd can be freely 0-127 for any op6 instruction.
|
||||
let vsrw128 = |vd: u32, vb: u32| -> u32 {
|
||||
// vsrw128 secondary: 0x000001D0 (decode_op6 key5 = 0b011101).
|
||||
encode_vx128(6, vd, 0, vb, 0x000001D0)
|
||||
};
|
||||
let vpermwi128 = |vd: u32, vb: u32, perm: u32| -> u32 {
|
||||
// vpermwi128: PERMl at PPC 11-15, PERMh at PPC 23-25, key1 sets bit 22 + bit 27.
|
||||
let perml = perm & 0x1F;
|
||||
let permh = (perm >> 5) & 0x7;
|
||||
let mut raw = (6u32 << 26)
|
||||
| ((vd & 0x1F) << 21)
|
||||
| (((vd >> 5) & 0x3) << 2) // VD128h
|
||||
| (perml << 16)
|
||||
| ((vb & 0x1F) << 11)
|
||||
| (((vb >> 5) & 0x3) << 0) // VB128h
|
||||
| (permh << 6) // PERMh at PPC 23-25
|
||||
| (1 << 9) // bit 22 (key1 high)
|
||||
| (1 << 4); // bit 27 (key1 low)
|
||||
raw &= !(1 << 10); // PPC 21 = 0 for vpermwi128
|
||||
raw
|
||||
};
|
||||
let vrlimi128 = |vd: u32, vb: u32, imm: u32, z: u32| -> u32 {
|
||||
// vrlimi128: IMM at PPC 11-15, z at PPC 24-25, key2 = 0b1110001 over
|
||||
// bits 21-23 + 26-27 → bits 21,22,23 = 1, bit 26 = 0, bit 27 = 1.
|
||||
(6u32 << 26)
|
||||
| ((vd & 0x1F) << 21)
|
||||
| (((vd >> 5) & 0x3) << 2) // VD128h
|
||||
| ((imm & 0x1F) << 16)
|
||||
| ((vb & 0x1F) << 11)
|
||||
| (((vb >> 5) & 0x3) << 0) // VB128h
|
||||
| ((z & 0x3) << 6) // z at PPC 24-25 = host 6-7
|
||||
| (1 << 8) // bit 23 (key2)
|
||||
| (1 << 9) // bit 22 (key2)
|
||||
| (1 << 10) // bit 21 (key2)
|
||||
| (1 << 4) // bit 27 (key2)
|
||||
};
|
||||
let vmx128_high = [
|
||||
(vsrw128(0, 12), 0x82000000, "vsrw128 v0, v0, v12 (canary, vd_hi=00)"),
|
||||
(vsrw128(32, 12), 0x82000000, "vsrw128 v32, v0, v12 (canary, VD128h=01)"),
|
||||
(vpermwi128(64, 12, 0xE4), 0x82000000, "vpermwi128 v64, v12, 0xE4 (canary, VD128h=10)"),
|
||||
(vrlimi128(96, 12, 4, 3), 0x82000000, "vrlimi128 v96, v12, 4, 3 (canary, VD128h=11)"),
|
||||
(vrlimi128(127, 95, 4, 3), 0x82000000, "vrlimi128 v127, v95, 4, 3 (canary)"),
|
||||
];
|
||||
|
||||
// Fix 4: VMX128 multiply-add 4-operand layouts. Per canary, the addend
|
||||
// is the VD register re-used; operand order differs between the three
|
||||
// mnemonics. Encodings hand-built to satisfy decode_op5's key2 secondary
|
||||
// opcode (vmaddfp128=0b001101, vmaddcfp128=0b010001, vnmsubfp128=0b010101)
|
||||
// with bit 22=0 (forced by key2's high nibble) so vd128 high bit 1 = 0.
|
||||
// vd128 low = 3 (bits 6-10); va128 = 3 | (bit29<<5) = 35; vb128 = 5.
|
||||
// Distinct VD vs VA verifies the layout isn't trivially aliasing VD.
|
||||
//
|
||||
// layout (canary):
|
||||
// vmaddfp128 VD, VA, VB, VD → "v3, v35, v5, v3"
|
||||
// vmaddcfp128 VD, VA, VD, VB → "v3, v35, v3, v5"
|
||||
// vnmsubfp128 VD, VA, VD, VB → "v3, v35, v3, v5"
|
||||
let vmx128_4op = [
|
||||
// Canary FormatVX128 layout: vd=3 (PPC 6-10), va=35 (low 3 at PPC 11-15 + VA128h=1 at PPC 26),
|
||||
// vb=5 (PPC 16-20), key2 at PPC 22-25 + bit 27.
|
||||
(0x146328F0u32, 0x82000000, "vmaddfp128 v3, v35, v5, v3"),
|
||||
(0x14632930u32, 0x82000000, "vmaddcfp128 v3, v35, v3, v5"),
|
||||
(0x14632970u32, 0x82000000, "vnmsubfp128 v3, v35, v3, v5"),
|
||||
];
|
||||
|
||||
let mut all = Vec::new();
|
||||
all.extend_from_slice(&std_vmx);
|
||||
all.extend_from_slice(&vmx128_op5);
|
||||
all.extend_from_slice(&vmx128_high);
|
||||
all.extend_from_slice(&vmx128_4op);
|
||||
assert_or_regen("vmx128_registers.json", &all);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sradi_shift_32_decodes_to_32() {
|
||||
// sradi rA, rS, 32: sh=32 → sh[4:0]=0, sh[5]=1
|
||||
// After PPCBUG-040 fix, sh64() must return 32, not 1.
|
||||
let instr: DecodedInstr = decode(rldicl(3, 4, 32, 63, 0), 0);
|
||||
// rldicl with mb=63 is not sradi, but tests sh64() extraction.
|
||||
assert_eq!(instr.sh64(), 32, "sh64 must return 32 for sh=32 (sh5=1, sh_lo=0)");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sh64_shift_1_decodes_correctly() {
|
||||
// sh=1: sh[4:0]=1, sh[5]=0 → sh64() must return 1
|
||||
let instr: DecodedInstr = decode(rldicl(3, 4, 1, 0, 0), 0);
|
||||
assert_eq!(instr.sh64(), 1, "sh64 must return 1 for sh=1");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sh64_shift_63_decodes_correctly() {
|
||||
// sh=63: sh[4:0]=31=0x1F, sh[5]=1 → sh64() must return 63
|
||||
let instr: DecodedInstr = decode(rldicl(3, 4, 63, 0, 0), 0);
|
||||
assert_eq!(instr.sh64(), 63, "sh64 must return 63 for sh=63");
|
||||
}
|
||||
571
crates/xenia-cpu/tests/golden/base_mnemonics.json
Normal file
571
crates/xenia-cpu/tests/golden/base_mnemonics.json
Normal file
@@ -0,0 +1,571 @@
|
||||
{
|
||||
"rows": [
|
||||
{
|
||||
"label": "add r3,r4,r5",
|
||||
"raw": "0x7C642A14",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "add",
|
||||
"operands": "r3, r4, r5"
|
||||
},
|
||||
{
|
||||
"label": "add. r3,r4,r5",
|
||||
"raw": "0x7C642A15",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "add.",
|
||||
"operands": "r3, r4, r5"
|
||||
},
|
||||
{
|
||||
"label": "addo r3,r4,r5",
|
||||
"raw": "0x7C642E14",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "addo",
|
||||
"operands": "r3, r4, r5"
|
||||
},
|
||||
{
|
||||
"label": "addo. r3,r4,r5",
|
||||
"raw": "0x7C642E15",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "addo.",
|
||||
"operands": "r3, r4, r5"
|
||||
},
|
||||
{
|
||||
"label": "neg r3,r4",
|
||||
"raw": "0x7C6400D0",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "neg",
|
||||
"operands": "r3, r4"
|
||||
},
|
||||
{
|
||||
"label": "mullw r3,r4,r5",
|
||||
"raw": "0x7C6429D6",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "mullw",
|
||||
"operands": "r3, r4, r5"
|
||||
},
|
||||
{
|
||||
"label": "divw r3,r4,r5",
|
||||
"raw": "0x7C642BD6",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "divw",
|
||||
"operands": "r3, r4, r5"
|
||||
},
|
||||
{
|
||||
"label": "mulhw. r3,r4,r5",
|
||||
"raw": "0x7C642897",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "mulhw.",
|
||||
"operands": "r3, r4, r5"
|
||||
},
|
||||
{
|
||||
"label": "mulhwu. r3,r4,r5",
|
||||
"raw": "0x7C642817",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "mulhwu.",
|
||||
"operands": "r3, r4, r5"
|
||||
},
|
||||
{
|
||||
"label": "mulld r3,r4,r5",
|
||||
"raw": "0x7C6429D2",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "mulld",
|
||||
"operands": "r3, r4, r5"
|
||||
},
|
||||
{
|
||||
"label": "and r3,r4,r5",
|
||||
"raw": "0x7C832838",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "and",
|
||||
"operands": "r3, r4, r5"
|
||||
},
|
||||
{
|
||||
"label": "or r3,r4,r5 (non-mr: rs!=rb)",
|
||||
"raw": "0x7C832B78",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "or",
|
||||
"operands": "r3, r4, r5"
|
||||
},
|
||||
{
|
||||
"label": "xor r3,r4,r5",
|
||||
"raw": "0x7C832A78",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "xor",
|
||||
"operands": "r3, r4, r5"
|
||||
},
|
||||
{
|
||||
"label": "nor r3,r4,r5",
|
||||
"raw": "0x7C8328F8",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "nor",
|
||||
"operands": "r3, r4, r5"
|
||||
},
|
||||
{
|
||||
"label": "nand r3,r4,r5",
|
||||
"raw": "0x7C832BB8",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "nand",
|
||||
"operands": "r3, r4, r5"
|
||||
},
|
||||
{
|
||||
"label": "eqv r3,r4,r5",
|
||||
"raw": "0x7C832A38",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "eqv",
|
||||
"operands": "r3, r4, r5"
|
||||
},
|
||||
{
|
||||
"label": "andc r3,r4,r5",
|
||||
"raw": "0x7C832878",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "andc",
|
||||
"operands": "r3, r4, r5"
|
||||
},
|
||||
{
|
||||
"label": "orc r3,r4,r5",
|
||||
"raw": "0x7C832B38",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "orc",
|
||||
"operands": "r3, r4, r5"
|
||||
},
|
||||
{
|
||||
"label": "slw r3,r4,r5",
|
||||
"raw": "0x7C832830",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "slw",
|
||||
"operands": "r3, r4, r5"
|
||||
},
|
||||
{
|
||||
"label": "srw r3,r4,r5",
|
||||
"raw": "0x7C832C30",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "srw",
|
||||
"operands": "r3, r4, r5"
|
||||
},
|
||||
{
|
||||
"label": "sraw r3,r4,r5",
|
||||
"raw": "0x7C832E30",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "sraw",
|
||||
"operands": "r3, r4, r5"
|
||||
},
|
||||
{
|
||||
"label": "sld r3,r4,r5",
|
||||
"raw": "0x7C832836",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "sld",
|
||||
"operands": "r3, r4, r5"
|
||||
},
|
||||
{
|
||||
"label": "srd r3,r4,r5",
|
||||
"raw": "0x7C832C36",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "srd",
|
||||
"operands": "r3, r4, r5"
|
||||
},
|
||||
{
|
||||
"label": "srawi r3,r4,16",
|
||||
"raw": "0x7C838670",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "srawi",
|
||||
"operands": "r3, r4, 16"
|
||||
},
|
||||
{
|
||||
"label": "stwcx. r3,r4,r5",
|
||||
"raw": "0x7C64292D",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "stwcx.",
|
||||
"operands": "r3, r4, r5"
|
||||
},
|
||||
{
|
||||
"label": "stdcx. r3,r4,r5",
|
||||
"raw": "0x7C6429AD",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "stdcx.",
|
||||
"operands": "r3, r4, r5"
|
||||
},
|
||||
{
|
||||
"label": "lwarx r3,r4,r5",
|
||||
"raw": "0x7C642828",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "lwarx",
|
||||
"operands": "r3, r4, r5"
|
||||
},
|
||||
{
|
||||
"label": "ldarx r3,r4,r5",
|
||||
"raw": "0x7C6428A8",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "ldarx",
|
||||
"operands": "r3, r4, r5"
|
||||
},
|
||||
{
|
||||
"label": "cmpwi cr0, r3, 16",
|
||||
"raw": "0x2C030010",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "cmpi",
|
||||
"operands": "0, r3, 16",
|
||||
"ext_mnemonic": "cmpwi",
|
||||
"ext_operands": "r3, 16"
|
||||
},
|
||||
{
|
||||
"label": "cmpwi cr2, r3, 16",
|
||||
"raw": "0x2D030010",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "cmpi",
|
||||
"operands": "cr2, 0, r3, 16",
|
||||
"ext_mnemonic": "cmpwi",
|
||||
"ext_operands": "cr2, r3, 16"
|
||||
},
|
||||
{
|
||||
"label": "cmplwi cr0, r3, 16",
|
||||
"raw": "0x28030010",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "cmpli",
|
||||
"operands": "0, r3, 0x10",
|
||||
"ext_mnemonic": "cmplwi",
|
||||
"ext_operands": "r3, 0x10"
|
||||
},
|
||||
{
|
||||
"label": "cmpw r3,r4 in cr0",
|
||||
"raw": "0x7C032000",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "cmp",
|
||||
"operands": "0, r3, r4",
|
||||
"ext_mnemonic": "cmpw",
|
||||
"ext_operands": "r3, r4"
|
||||
},
|
||||
{
|
||||
"label": "cmpd r3,r4",
|
||||
"raw": "0x7C232000",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "cmp",
|
||||
"operands": "1, r3, r4",
|
||||
"ext_mnemonic": "cmpd",
|
||||
"ext_operands": "r3, r4"
|
||||
},
|
||||
{
|
||||
"label": "cmplw r3,r4",
|
||||
"raw": "0x7C032040",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "cmpl",
|
||||
"operands": "0, r3, r4",
|
||||
"ext_mnemonic": "cmplw",
|
||||
"ext_operands": "r3, r4"
|
||||
},
|
||||
{
|
||||
"label": "addi r3, r1, 16",
|
||||
"raw": "0x38610010",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "addi",
|
||||
"operands": "r3, r1, 16"
|
||||
},
|
||||
{
|
||||
"label": "addis r3, r1, 0x100 (ra!=0)",
|
||||
"raw": "0x3C610100",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "addis",
|
||||
"operands": "r3, r1, 0x100"
|
||||
},
|
||||
{
|
||||
"label": "mulli r3, r4, 5",
|
||||
"raw": "0x1C640005",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "mulli",
|
||||
"operands": "r3, r4, 5"
|
||||
},
|
||||
{
|
||||
"label": "subfic r3, r4, 5",
|
||||
"raw": "0x20640005",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "subfic",
|
||||
"operands": "r3, r4, 5"
|
||||
},
|
||||
{
|
||||
"label": "addic r3, r4, 16",
|
||||
"raw": "0x30640010",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "addic",
|
||||
"operands": "r3, r4, 16"
|
||||
},
|
||||
{
|
||||
"label": "addic. r3, r4, 16",
|
||||
"raw": "0x34640010",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "addic.",
|
||||
"operands": "r3, r4, 16"
|
||||
},
|
||||
{
|
||||
"label": "ori r4, r3, 0x10 (non-nop)",
|
||||
"raw": "0x60640010",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "ori",
|
||||
"operands": "r4, r3, 0x10"
|
||||
},
|
||||
{
|
||||
"label": "oris r4, r3, 0x10",
|
||||
"raw": "0x64640010",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "oris",
|
||||
"operands": "r4, r3, 0x10"
|
||||
},
|
||||
{
|
||||
"label": "xori r4, r3, 0x10",
|
||||
"raw": "0x68640010",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "xori",
|
||||
"operands": "r4, r3, 0x10"
|
||||
},
|
||||
{
|
||||
"label": "andi. r4, r3, 0x10",
|
||||
"raw": "0x70640010",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "andi.",
|
||||
"operands": "r4, r3, 0x10"
|
||||
},
|
||||
{
|
||||
"label": "lwz r5, 0x20(r1)",
|
||||
"raw": "0x80A10020",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "lwz",
|
||||
"operands": "r5, 32(r1)"
|
||||
},
|
||||
{
|
||||
"label": "stw r5, 0x20(r1)",
|
||||
"raw": "0x90A10020",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "stw",
|
||||
"operands": "r5, 32(r1)"
|
||||
},
|
||||
{
|
||||
"label": "lbz r5, 0x20(r1)",
|
||||
"raw": "0x88A10020",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "lbz",
|
||||
"operands": "r5, 32(r1)"
|
||||
},
|
||||
{
|
||||
"label": "lhz r5, 0x20(r1)",
|
||||
"raw": "0xA0A10020",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "lhz",
|
||||
"operands": "r5, 32(r1)"
|
||||
},
|
||||
{
|
||||
"label": "lfs f5, 0x20(r1)",
|
||||
"raw": "0xC0A10020",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "lfs",
|
||||
"operands": "f5, 32(r1)"
|
||||
},
|
||||
{
|
||||
"label": "lfd f5, 0x20(r1)",
|
||||
"raw": "0xC8A10020",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "lfd",
|
||||
"operands": "f5, 32(r1)"
|
||||
},
|
||||
{
|
||||
"label": "stfd f5, 0x20(r1)",
|
||||
"raw": "0xD8A10020",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "stfd",
|
||||
"operands": "f5, 32(r1)"
|
||||
},
|
||||
{
|
||||
"label": "ld r5, 0x20(r1)",
|
||||
"raw": "0xE8A10020",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "ld",
|
||||
"operands": "r5, 32(r1)"
|
||||
},
|
||||
{
|
||||
"label": "std r5, 0x20(r1)",
|
||||
"raw": "0xF8A10020",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "std",
|
||||
"operands": "r5, 32(r1)"
|
||||
},
|
||||
{
|
||||
"label": "sync 0 (extends to sync)",
|
||||
"raw": "0x7C0004AC",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "sync",
|
||||
"operands": ""
|
||||
},
|
||||
{
|
||||
"label": "isync",
|
||||
"raw": "0x4C00012C",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "isync",
|
||||
"operands": ""
|
||||
},
|
||||
{
|
||||
"label": "eieio",
|
||||
"raw": "0x7C0006AC",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "eieio",
|
||||
"operands": ""
|
||||
},
|
||||
{
|
||||
"label": "dcbst r1, r2",
|
||||
"raw": "0x7C01106C",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "dcbst",
|
||||
"operands": "r1, r2"
|
||||
},
|
||||
{
|
||||
"label": "dcbf r1, r2",
|
||||
"raw": "0x7C0110AC",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "dcbf",
|
||||
"operands": "r1, r2"
|
||||
},
|
||||
{
|
||||
"label": "dcbt r1, r2",
|
||||
"raw": "0x7C01122C",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "dcbt",
|
||||
"operands": "r1, r2"
|
||||
},
|
||||
{
|
||||
"label": "dcbz r1, r2",
|
||||
"raw": "0x7C0117EC",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "dcbz",
|
||||
"operands": "r1, r2"
|
||||
},
|
||||
{
|
||||
"label": "dcbz128 r1, r2",
|
||||
"raw": "0x7C2117EC",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "dcbz128",
|
||||
"operands": "r1, r2"
|
||||
},
|
||||
{
|
||||
"label": "crnor 4,5,6 (no simplify)",
|
||||
"raw": "0x4C853042",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "crnor",
|
||||
"operands": "4*cr1+lt, 4*cr1+gt, 4*cr1+eq"
|
||||
},
|
||||
{
|
||||
"label": "crand 4,5,6",
|
||||
"raw": "0x4C853202",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "crand",
|
||||
"operands": "4*cr1+lt, 4*cr1+gt, 4*cr1+eq"
|
||||
},
|
||||
{
|
||||
"label": "cror 4,5,6 (no simplify)",
|
||||
"raw": "0x4C853382",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "cror",
|
||||
"operands": "4*cr1+lt, 4*cr1+gt, 4*cr1+eq"
|
||||
},
|
||||
{
|
||||
"label": "tw 11, r3, r4 (uncommon TO)",
|
||||
"raw": "0x7D632008",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "tw",
|
||||
"operands": "11, r3, r4"
|
||||
},
|
||||
{
|
||||
"label": "tdi 11, r3, 123",
|
||||
"raw": "0x0963007B",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "tdi",
|
||||
"operands": "11, r3, 123"
|
||||
},
|
||||
{
|
||||
"label": "mtcrf 0xFF, r5 → mtcr",
|
||||
"raw": "0x7CAFF120",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "mtcrf",
|
||||
"operands": "0xFF, r5",
|
||||
"ext_mnemonic": "mtcr",
|
||||
"ext_operands": "r5"
|
||||
},
|
||||
{
|
||||
"label": "mfcr r5",
|
||||
"raw": "0x7CA00026",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "mfcr",
|
||||
"operands": "r5"
|
||||
},
|
||||
{
|
||||
"label": "mfmsr r5",
|
||||
"raw": "0x7CA000A6",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "mfmsr",
|
||||
"operands": "r5"
|
||||
},
|
||||
{
|
||||
"label": "mtmsr r5",
|
||||
"raw": "0x7CA00124",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "mtmsr",
|
||||
"operands": "r5"
|
||||
},
|
||||
{
|
||||
"label": "mtmsrd r5",
|
||||
"raw": "0x7CA00164",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "mtmsrd",
|
||||
"operands": "r5"
|
||||
},
|
||||
{
|
||||
"label": "fadd f3, f4, f5",
|
||||
"raw": "0xFC64282A",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "fadd",
|
||||
"operands": "f3, f4, f5"
|
||||
},
|
||||
{
|
||||
"label": "fsub f3, f4, f5",
|
||||
"raw": "0xFC642828",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "fsub",
|
||||
"operands": "f3, f4, f5"
|
||||
},
|
||||
{
|
||||
"label": "fdiv f3, f4, f5",
|
||||
"raw": "0xFC642824",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "fdiv",
|
||||
"operands": "f3, f4, f5"
|
||||
},
|
||||
{
|
||||
"label": "fmul f3, f0, f5 (encoded)",
|
||||
"raw": "0xFCE02832",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "fmul",
|
||||
"operands": "f7, f0, f0"
|
||||
},
|
||||
{
|
||||
"label": "fneg f3, f4",
|
||||
"raw": "0xFC640050",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "fneg",
|
||||
"operands": "f3, f0"
|
||||
},
|
||||
{
|
||||
"label": "fmr f3, f4",
|
||||
"raw": "0xFC640090",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "fmr",
|
||||
"operands": "f3, f0"
|
||||
},
|
||||
{
|
||||
"label": "mtfsf 0xFF, f5 (Rc=0)",
|
||||
"raw": "0xFDFE2D8E",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "mtfsf",
|
||||
"operands": "0xFF, f5"
|
||||
},
|
||||
{
|
||||
"label": "mtfsf. 0xFF, f5 (Rc=1)",
|
||||
"raw": "0xFDFE2D8F",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "mtfsf.",
|
||||
"operands": "0xFF, f5"
|
||||
}
|
||||
]
|
||||
}
|
||||
623
crates/xenia-cpu/tests/golden/extended_mnemonics.json
Normal file
623
crates/xenia-cpu/tests/golden/extended_mnemonics.json
Normal file
@@ -0,0 +1,623 @@
|
||||
{
|
||||
"rows": [
|
||||
{
|
||||
"label": "nop",
|
||||
"raw": "0x60000000",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "ori",
|
||||
"operands": "r0, r0, 0x0",
|
||||
"ext_mnemonic": "nop",
|
||||
"ext_operands": ""
|
||||
},
|
||||
{
|
||||
"label": "li r3, 16",
|
||||
"raw": "0x38600010",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "addi",
|
||||
"operands": "r3, r0, 16",
|
||||
"ext_mnemonic": "li",
|
||||
"ext_operands": "r3, 16"
|
||||
},
|
||||
{
|
||||
"label": "li r3, -1",
|
||||
"raw": "0x3860FFFF",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "addi",
|
||||
"operands": "r3, r0, -1",
|
||||
"ext_mnemonic": "li",
|
||||
"ext_operands": "r3, -1"
|
||||
},
|
||||
{
|
||||
"label": "subi r3, r4, 16",
|
||||
"raw": "0x3864FFF0",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "addi",
|
||||
"operands": "r3, r4, -16",
|
||||
"ext_mnemonic": "subi",
|
||||
"ext_operands": "r3, r4, 16"
|
||||
},
|
||||
{
|
||||
"label": "lis r3, 0x1234",
|
||||
"raw": "0x3C601234",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "addis",
|
||||
"operands": "r3, r0, 0x1234",
|
||||
"ext_mnemonic": "lis",
|
||||
"ext_operands": "r3, 0x1234"
|
||||
},
|
||||
{
|
||||
"label": "subis r3, r4, 0xFFFF",
|
||||
"raw": "0x3C64FFFF",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "addis",
|
||||
"operands": "r3, r4, 0xFFFF",
|
||||
"ext_mnemonic": "subis",
|
||||
"ext_operands": "r3, r4, 0x1"
|
||||
},
|
||||
{
|
||||
"label": "mr r3, r4",
|
||||
"raw": "0x7C832378",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "or",
|
||||
"operands": "r3, r4, r4",
|
||||
"ext_mnemonic": "mr",
|
||||
"ext_operands": "r3, r4"
|
||||
},
|
||||
{
|
||||
"label": "mr. r3, r4",
|
||||
"raw": "0x7C832379",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "or.",
|
||||
"operands": "r3, r4, r4",
|
||||
"ext_mnemonic": "mr.",
|
||||
"ext_operands": "r3, r4"
|
||||
},
|
||||
{
|
||||
"label": "mr (via and)",
|
||||
"raw": "0x7C832038",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "and",
|
||||
"operands": "r3, r4, r4",
|
||||
"ext_mnemonic": "mr",
|
||||
"ext_operands": "r3, r4"
|
||||
},
|
||||
{
|
||||
"label": "not r3, r4",
|
||||
"raw": "0x7C8320F8",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "nor",
|
||||
"operands": "r3, r4, r4",
|
||||
"ext_mnemonic": "not",
|
||||
"ext_operands": "r3, r4"
|
||||
},
|
||||
{
|
||||
"label": "subf → sub r3, r5, r4",
|
||||
"raw": "0x7C642850",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "subf",
|
||||
"operands": "r3, r4, r5",
|
||||
"ext_mnemonic": "sub",
|
||||
"ext_operands": "r3, r5, r4"
|
||||
},
|
||||
{
|
||||
"label": "slwi r3, r4, 4",
|
||||
"raw": "0x54832036",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "rlwinm",
|
||||
"operands": "r3, r4, 4, 0, 27",
|
||||
"ext_mnemonic": "slwi",
|
||||
"ext_operands": "r3, r4, 4"
|
||||
},
|
||||
{
|
||||
"label": "srwi r3, r4, 4",
|
||||
"raw": "0x5483E13E",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "rlwinm",
|
||||
"operands": "r3, r4, 28, 4, 31",
|
||||
"ext_mnemonic": "srwi",
|
||||
"ext_operands": "r3, r4, 4"
|
||||
},
|
||||
{
|
||||
"label": "rotlwi r3, r4, 8",
|
||||
"raw": "0x5483403E",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "rlwinm",
|
||||
"operands": "r3, r4, 8, 0, 31",
|
||||
"ext_mnemonic": "rotlwi",
|
||||
"ext_operands": "r3, r4, 8"
|
||||
},
|
||||
{
|
||||
"label": "clrlwi r3, r4, 4",
|
||||
"raw": "0x5483013E",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "rlwinm",
|
||||
"operands": "r3, r4, 0, 4, 31",
|
||||
"ext_mnemonic": "clrlwi",
|
||||
"ext_operands": "r3, r4, 4"
|
||||
},
|
||||
{
|
||||
"label": "clrrwi r3, r4, 4",
|
||||
"raw": "0x54830036",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "rlwinm",
|
||||
"operands": "r3, r4, 0, 0, 27",
|
||||
"ext_mnemonic": "clrrwi",
|
||||
"ext_operands": "r3, r4, 4"
|
||||
},
|
||||
{
|
||||
"label": "extlwi r3, r4, 8, 8",
|
||||
"raw": "0x5483400E",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "rlwinm",
|
||||
"operands": "r3, r4, 8, 0, 7",
|
||||
"ext_mnemonic": "extlwi",
|
||||
"ext_operands": "r3, r4, 8, 8"
|
||||
},
|
||||
{
|
||||
"label": "slwi. r3, r4, 4",
|
||||
"raw": "0x54832037",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "rlwinm.",
|
||||
"operands": "r3, r4, 4, 0, 27",
|
||||
"ext_mnemonic": "slwi.",
|
||||
"ext_operands": "r3, r4, 4"
|
||||
},
|
||||
{
|
||||
"label": "rlwinm. r11,r11,0,31,31 (no simplify)",
|
||||
"raw": "0x556B07FF",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "rlwinm.",
|
||||
"operands": "r11, r11, 0, 31, 31",
|
||||
"ext_mnemonic": "clrlwi.",
|
||||
"ext_operands": "r11, r11, 31"
|
||||
},
|
||||
{
|
||||
"label": "clrldi r3, r4, 32",
|
||||
"raw": "0x78830020",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "rldicl",
|
||||
"operands": "r3, r4, 0, 32",
|
||||
"ext_mnemonic": "clrldi",
|
||||
"ext_operands": "r3, r4, 32"
|
||||
},
|
||||
{
|
||||
"label": "srdi r3, r4, 8",
|
||||
"raw": "0x7883C202",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "rldicl",
|
||||
"operands": "r3, r4, 56, 8",
|
||||
"ext_mnemonic": "srdi",
|
||||
"ext_operands": "r3, r4, 8"
|
||||
},
|
||||
{
|
||||
"label": "rotldi r3, r4, 8",
|
||||
"raw": "0x78834000",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "rldicl",
|
||||
"operands": "r3, r4, 8, 0",
|
||||
"ext_mnemonic": "rotldi",
|
||||
"ext_operands": "r3, r4, 8"
|
||||
},
|
||||
{
|
||||
"label": "cmpwi cr0, r3, 16",
|
||||
"raw": "0x2C030010",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "cmpi",
|
||||
"operands": "0, r3, 16",
|
||||
"ext_mnemonic": "cmpwi",
|
||||
"ext_operands": "r3, 16"
|
||||
},
|
||||
{
|
||||
"label": "cmpdi (L=1) variant",
|
||||
"raw": "0x2C230010",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "cmpi",
|
||||
"operands": "1, r3, 16",
|
||||
"ext_mnemonic": "cmpdi",
|
||||
"ext_operands": "r3, 16"
|
||||
},
|
||||
{
|
||||
"label": "blr",
|
||||
"raw": "0x4E800020",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "bclr",
|
||||
"operands": "20, lt",
|
||||
"ext_mnemonic": "blr",
|
||||
"ext_operands": ""
|
||||
},
|
||||
{
|
||||
"label": "blrl",
|
||||
"raw": "0x4E800021",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "bclrl",
|
||||
"operands": "20, lt",
|
||||
"ext_mnemonic": "blrl",
|
||||
"ext_operands": ""
|
||||
},
|
||||
{
|
||||
"label": "bctr",
|
||||
"raw": "0x4E800420",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "bcctr",
|
||||
"operands": "20, lt",
|
||||
"ext_mnemonic": "bctr",
|
||||
"ext_operands": ""
|
||||
},
|
||||
{
|
||||
"label": "bctrl",
|
||||
"raw": "0x4E800421",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "bcctrl",
|
||||
"operands": "20, lt",
|
||||
"ext_mnemonic": "bctrl",
|
||||
"ext_operands": ""
|
||||
},
|
||||
{
|
||||
"label": "beqlr (BO=12, BI=2 → cr0.eq true)",
|
||||
"raw": "0x4D820020",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "bclr",
|
||||
"operands": "12, eq",
|
||||
"ext_mnemonic": "beqlr",
|
||||
"ext_operands": ""
|
||||
},
|
||||
{
|
||||
"label": "bnelr",
|
||||
"raw": "0x4C820020",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "bclr",
|
||||
"operands": "4, eq",
|
||||
"ext_mnemonic": "bnelr",
|
||||
"ext_operands": ""
|
||||
},
|
||||
{
|
||||
"label": "bc → b 0x82000040",
|
||||
"raw": "0x42800040",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "bc",
|
||||
"operands": "20, lt, 0x82000040",
|
||||
"ext_mnemonic": "b",
|
||||
"ext_operands": "0x82000040",
|
||||
"branch_target": "0x82000040"
|
||||
},
|
||||
{
|
||||
"label": "bc l → bl 0x82000040",
|
||||
"raw": "0x42800041",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "bcl",
|
||||
"operands": "20, lt, 0x82000040",
|
||||
"ext_mnemonic": "bl",
|
||||
"ext_operands": "0x82000040",
|
||||
"branch_target": "0x82000040"
|
||||
},
|
||||
{
|
||||
"label": "bc 12,cr0.eq → beq 0x82000040",
|
||||
"raw": "0x41820040",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "bc",
|
||||
"operands": "12, eq, 0x82000040",
|
||||
"ext_mnemonic": "beq",
|
||||
"ext_operands": "0x82000040",
|
||||
"branch_target": "0x82000040"
|
||||
},
|
||||
{
|
||||
"label": "bc 4,cr0.eq → bne 0x82000040",
|
||||
"raw": "0x40820040",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "bc",
|
||||
"operands": "4, eq, 0x82000040",
|
||||
"ext_mnemonic": "bne",
|
||||
"ext_operands": "0x82000040",
|
||||
"branch_target": "0x82000040"
|
||||
},
|
||||
{
|
||||
"label": "bc 12,cr0.lt → blt 0x82000040",
|
||||
"raw": "0x41800040",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "bc",
|
||||
"operands": "12, lt, 0x82000040",
|
||||
"ext_mnemonic": "blt",
|
||||
"ext_operands": "0x82000040",
|
||||
"branch_target": "0x82000040"
|
||||
},
|
||||
{
|
||||
"label": "bc 4,cr0.lt → bge 0x82000040",
|
||||
"raw": "0x40800040",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "bc",
|
||||
"operands": "4, lt, 0x82000040",
|
||||
"ext_mnemonic": "bge",
|
||||
"ext_operands": "0x82000040",
|
||||
"branch_target": "0x82000040"
|
||||
},
|
||||
{
|
||||
"label": "bc 12,cr0.gt → bgt 0x82000040",
|
||||
"raw": "0x41810040",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "bc",
|
||||
"operands": "12, gt, 0x82000040",
|
||||
"ext_mnemonic": "bgt",
|
||||
"ext_operands": "0x82000040",
|
||||
"branch_target": "0x82000040"
|
||||
},
|
||||
{
|
||||
"label": "bc 4,cr0.gt → ble 0x82000040",
|
||||
"raw": "0x40810040",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "bc",
|
||||
"operands": "4, gt, 0x82000040",
|
||||
"ext_mnemonic": "ble",
|
||||
"ext_operands": "0x82000040",
|
||||
"branch_target": "0x82000040"
|
||||
},
|
||||
{
|
||||
"label": "bc 12, cr2.eq → beq cr2, 0x...040",
|
||||
"raw": "0x418A0040",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "bc",
|
||||
"operands": "12, 4*cr2+eq, 0x82000040",
|
||||
"ext_mnemonic": "beq",
|
||||
"ext_operands": "cr2, 0x82000040",
|
||||
"branch_target": "0x82000040"
|
||||
},
|
||||
{
|
||||
"label": "bdnz 0x82000040",
|
||||
"raw": "0x42000040",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "bc",
|
||||
"operands": "16, lt, 0x82000040",
|
||||
"ext_mnemonic": "bdnz",
|
||||
"ext_operands": "0x82000040",
|
||||
"branch_target": "0x82000040"
|
||||
},
|
||||
{
|
||||
"label": "bdz 0x82000040",
|
||||
"raw": "0x42400040",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "bc",
|
||||
"operands": "18, lt, 0x82000040",
|
||||
"ext_mnemonic": "bdz",
|
||||
"ext_operands": "0x82000040",
|
||||
"branch_target": "0x82000040"
|
||||
},
|
||||
{
|
||||
"label": "b +0x40 → 0x82000040",
|
||||
"raw": "0x48000040",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "b",
|
||||
"operands": "0x82000040",
|
||||
"branch_target": "0x82000040"
|
||||
},
|
||||
{
|
||||
"label": "bl +0x40 → 0x82000040",
|
||||
"raw": "0x48000041",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "bl",
|
||||
"operands": "0x82000040",
|
||||
"branch_target": "0x82000040"
|
||||
},
|
||||
{
|
||||
"label": "ba 0x40 absolute",
|
||||
"raw": "0x48000042",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "ba",
|
||||
"operands": "0x00000040",
|
||||
"branch_target": "0x00000040"
|
||||
},
|
||||
{
|
||||
"label": "bla 0x40 absolute",
|
||||
"raw": "0x48000043",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "bla",
|
||||
"operands": "0x00000040",
|
||||
"branch_target": "0x00000040"
|
||||
},
|
||||
{
|
||||
"label": "tdeqi r3, 123",
|
||||
"raw": "0x0883007B",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "tdi",
|
||||
"operands": "4, r3, 123",
|
||||
"ext_mnemonic": "tdeqi",
|
||||
"ext_operands": "r3, 123"
|
||||
},
|
||||
{
|
||||
"label": "twlti r3, 123",
|
||||
"raw": "0x0E03007B",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "twi",
|
||||
"operands": "16, r3, 123",
|
||||
"ext_mnemonic": "twlti",
|
||||
"ext_operands": "r3, 123"
|
||||
},
|
||||
{
|
||||
"label": "mflr r3",
|
||||
"raw": "0x7C6802A6",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "mfspr",
|
||||
"operands": "r3, LR",
|
||||
"ext_mnemonic": "mflr",
|
||||
"ext_operands": "r3"
|
||||
},
|
||||
{
|
||||
"label": "mfctr r3",
|
||||
"raw": "0x7C6902A6",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "mfspr",
|
||||
"operands": "r3, CTR",
|
||||
"ext_mnemonic": "mfctr",
|
||||
"ext_operands": "r3"
|
||||
},
|
||||
{
|
||||
"label": "mfxer r3",
|
||||
"raw": "0x7C6102A6",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "mfspr",
|
||||
"operands": "r3, XER",
|
||||
"ext_mnemonic": "mfxer",
|
||||
"ext_operands": "r3"
|
||||
},
|
||||
{
|
||||
"label": "mtlr r3",
|
||||
"raw": "0x7C6803A6",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "mtspr",
|
||||
"operands": "LR, r3",
|
||||
"ext_mnemonic": "mtlr",
|
||||
"ext_operands": "r3"
|
||||
},
|
||||
{
|
||||
"label": "mtctr r3",
|
||||
"raw": "0x7C6903A6",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "mtspr",
|
||||
"operands": "CTR, r3",
|
||||
"ext_mnemonic": "mtctr",
|
||||
"ext_operands": "r3"
|
||||
},
|
||||
{
|
||||
"label": "mtxer r3",
|
||||
"raw": "0x7C6103A6",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "mtspr",
|
||||
"operands": "XER, r3",
|
||||
"ext_mnemonic": "mtxer",
|
||||
"ext_operands": "r3"
|
||||
},
|
||||
{
|
||||
"label": "crnot 4, 5",
|
||||
"raw": "0x4C852842",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "crnor",
|
||||
"operands": "4*cr1+lt, 4*cr1+gt, 4*cr1+gt",
|
||||
"ext_mnemonic": "crnot",
|
||||
"ext_operands": "4*cr1+lt, 4*cr1+gt"
|
||||
},
|
||||
{
|
||||
"label": "crclr 4",
|
||||
"raw": "0x4C842182",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "crxor",
|
||||
"operands": "4*cr1+lt, 4*cr1+lt, 4*cr1+lt",
|
||||
"ext_mnemonic": "crclr",
|
||||
"ext_operands": "4*cr1+lt"
|
||||
},
|
||||
{
|
||||
"label": "crset 4",
|
||||
"raw": "0x4C842242",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "creqv",
|
||||
"operands": "4*cr1+lt, 4*cr1+lt, 4*cr1+lt",
|
||||
"ext_mnemonic": "crset",
|
||||
"ext_operands": "4*cr1+lt"
|
||||
},
|
||||
{
|
||||
"label": "crmove 4, 5",
|
||||
"raw": "0x4C852B82",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "cror",
|
||||
"operands": "4*cr1+lt, 4*cr1+gt, 4*cr1+gt",
|
||||
"ext_mnemonic": "crmove",
|
||||
"ext_operands": "4*cr1+lt, 4*cr1+gt"
|
||||
},
|
||||
{
|
||||
"label": "lwsync",
|
||||
"raw": "0x7C2004AC",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "sync",
|
||||
"operands": "",
|
||||
"ext_mnemonic": "lwsync",
|
||||
"ext_operands": ""
|
||||
},
|
||||
{
|
||||
"label": "trap",
|
||||
"raw": "0x7FE00008",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "tw",
|
||||
"operands": "31, r0, r0",
|
||||
"ext_mnemonic": "trap",
|
||||
"ext_operands": ""
|
||||
},
|
||||
{
|
||||
"label": "blr (BO=20, BI=4 — BI is don't-care)",
|
||||
"raw": "0x4E840020",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "bclr",
|
||||
"operands": "20, 4*cr1+lt",
|
||||
"ext_mnemonic": "blr",
|
||||
"ext_operands": ""
|
||||
},
|
||||
{
|
||||
"label": "blrl (BO=20, BI=7)",
|
||||
"raw": "0x4E870021",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "bclrl",
|
||||
"operands": "20, 4*cr1+so",
|
||||
"ext_mnemonic": "blrl",
|
||||
"ext_operands": ""
|
||||
},
|
||||
{
|
||||
"label": "bctr (BO=20, BI=4)",
|
||||
"raw": "0x4E840420",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "bcctr",
|
||||
"operands": "20, 4*cr1+lt",
|
||||
"ext_mnemonic": "bctr",
|
||||
"ext_operands": ""
|
||||
},
|
||||
{
|
||||
"label": "twllt r3, r4 (TO=2)",
|
||||
"raw": "0x7C432008",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "tw",
|
||||
"operands": "2, r3, r4",
|
||||
"ext_mnemonic": "twllt",
|
||||
"ext_operands": "r3, r4"
|
||||
},
|
||||
{
|
||||
"label": "twlgt r3, r4 (TO=1)",
|
||||
"raw": "0x7C232008",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "tw",
|
||||
"operands": "1, r3, r4",
|
||||
"ext_mnemonic": "twlgt",
|
||||
"ext_operands": "r3, r4"
|
||||
},
|
||||
{
|
||||
"label": "tdlge r3, r4 (TO=5)",
|
||||
"raw": "0x7CA32088",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "td",
|
||||
"operands": "5, r3, r4",
|
||||
"ext_mnemonic": "tdlge",
|
||||
"ext_operands": "r3, r4"
|
||||
},
|
||||
{
|
||||
"label": "twlle r3, r4 (TO=6)",
|
||||
"raw": "0x7CC32008",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "tw",
|
||||
"operands": "6, r3, r4",
|
||||
"ext_mnemonic": "twlle",
|
||||
"ext_operands": "r3, r4"
|
||||
},
|
||||
{
|
||||
"label": "twllti r3, 16",
|
||||
"raw": "0x0C430010",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "twi",
|
||||
"operands": "2, r3, 16",
|
||||
"ext_mnemonic": "twllti",
|
||||
"ext_operands": "r3, 16"
|
||||
},
|
||||
{
|
||||
"label": "tdlgei r3, 16",
|
||||
"raw": "0x08A30010",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "tdi",
|
||||
"operands": "5, r3, 16",
|
||||
"ext_mnemonic": "tdlgei",
|
||||
"ext_operands": "r3, 16"
|
||||
}
|
||||
]
|
||||
}
|
||||
137
crates/xenia-cpu/tests/golden/vmx128_registers.json
Normal file
137
crates/xenia-cpu/tests/golden/vmx128_registers.json
Normal file
@@ -0,0 +1,137 @@
|
||||
{
|
||||
"rows": [
|
||||
{
|
||||
"label": "vaddubm v3, v4, v5",
|
||||
"raw": "0x10642800",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "vaddubm",
|
||||
"operands": "v3, v4, v5"
|
||||
},
|
||||
{
|
||||
"label": "vaddfp v3, v4, v5",
|
||||
"raw": "0x1064280A",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "vaddfp",
|
||||
"operands": "v3, v4, v5"
|
||||
},
|
||||
{
|
||||
"label": "vand v3, v4, v5",
|
||||
"raw": "0x10642C04",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "vand",
|
||||
"operands": "v3, v4, v5"
|
||||
},
|
||||
{
|
||||
"label": "vor v3, v4, v5",
|
||||
"raw": "0x10642C84",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "vor",
|
||||
"operands": "v3, v4, v5"
|
||||
},
|
||||
{
|
||||
"label": "vxor v3, v4, v5",
|
||||
"raw": "0x10642CC4",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "vxor",
|
||||
"operands": "v3, v4, v5"
|
||||
},
|
||||
{
|
||||
"label": "vsel v3,v4,v5,v6",
|
||||
"raw": "0x106429AA",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "vsel",
|
||||
"operands": "v3, v4, v5, v6"
|
||||
},
|
||||
{
|
||||
"label": "vperm v3,v4,v5,v6",
|
||||
"raw": "0x106429AB",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "vperm",
|
||||
"operands": "v3, v4, v5, v6"
|
||||
},
|
||||
{
|
||||
"label": "vmaddfp v3, v4, v6, v5 (swap)",
|
||||
"raw": "0x106429AE",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "vmaddfp",
|
||||
"operands": "v3, v4, v6, v5"
|
||||
},
|
||||
{
|
||||
"label": "mfvscr v3",
|
||||
"raw": "0x10600604",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "mfvscr",
|
||||
"operands": "v3"
|
||||
},
|
||||
{
|
||||
"label": "mtvscr v5",
|
||||
"raw": "0x10002E44",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "mtvscr",
|
||||
"operands": "v5"
|
||||
},
|
||||
{
|
||||
"label": "vperm128 v3, v4, v5, 0 (canary)",
|
||||
"raw": "0x14642800",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "vperm128",
|
||||
"operands": "v3, v4, v5, 0"
|
||||
},
|
||||
{
|
||||
"label": "vsrw128 v0, v0, v12 (canary, vd_hi=00)",
|
||||
"raw": "0x180061D0",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "vsrw128",
|
||||
"operands": "v0, v0, v12"
|
||||
},
|
||||
{
|
||||
"label": "vsrw128 v32, v0, v12 (canary, VD128h=01)",
|
||||
"raw": "0x180061D4",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "vsrw128",
|
||||
"operands": "v32, v0, v12"
|
||||
},
|
||||
{
|
||||
"label": "vpermwi128 v64, v12, 0xE4 (canary, VD128h=10)",
|
||||
"raw": "0x180463D8",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "vpermwi128",
|
||||
"operands": "v64, v12, 0xE4"
|
||||
},
|
||||
{
|
||||
"label": "vrlimi128 v96, v12, 4, 3 (canary, VD128h=11)",
|
||||
"raw": "0x180467DC",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "vrlimi128",
|
||||
"operands": "v96, v12, 4, 3"
|
||||
},
|
||||
{
|
||||
"label": "vrlimi128 v127, v95, 4, 3 (canary)",
|
||||
"raw": "0x1BE4FFDE",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "vrlimi128",
|
||||
"operands": "v127, v95, 4, 3"
|
||||
},
|
||||
{
|
||||
"label": "vmaddfp128 v3, v35, v5, v3",
|
||||
"raw": "0x146328F0",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "vmaddfp128",
|
||||
"operands": "v3, v35, v5, v3"
|
||||
},
|
||||
{
|
||||
"label": "vmaddcfp128 v3, v35, v3, v5",
|
||||
"raw": "0x14632930",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "vmaddcfp128",
|
||||
"operands": "v3, v35, v3, v5"
|
||||
},
|
||||
{
|
||||
"label": "vnmsubfp128 v3, v35, v3, v5",
|
||||
"raw": "0x14632970",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "vnmsubfp128",
|
||||
"operands": "v3, v35, v3, v5"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -42,15 +42,30 @@ impl Debugger {
|
||||
}
|
||||
}
|
||||
|
||||
/// Tier-3 perf: single branch that the hot interpreter loop checks
|
||||
/// before dispatching to [`pre_step`]/[`post_step`]. When the
|
||||
/// debugger is in "cold run" mode (not paused, no breakpoints,
|
||||
/// `StepMode::Run`, in-memory trace off), both hooks become dead
|
||||
/// code and we can skip the HashMap lookup + step-mode match + Vec
|
||||
/// maintenance entirely. The compiler reliably branch-predicts the
|
||||
/// stable branch direction across millions of instructions.
|
||||
#[inline]
|
||||
pub fn wants_hooks(&self) -> bool {
|
||||
self.trace_enabled
|
||||
|| self.paused
|
||||
|| self.break_pending
|
||||
|| !matches!(self.step_mode, StepMode::Run)
|
||||
|| !self.breakpoints.is_empty()
|
||||
}
|
||||
|
||||
/// Called before each instruction executes.
|
||||
pub fn pre_step(&mut self, ctx: &PpcContext, _mem: &dyn MemoryAccess) {
|
||||
// Check breakpoints
|
||||
if let Some(bp) = self.breakpoints.get(&ctx.pc) {
|
||||
if bp.enabled {
|
||||
if let Some(bp) = self.breakpoints.get(&ctx.pc)
|
||||
&& bp.enabled {
|
||||
self.break_pending = true;
|
||||
tracing::info!("Breakpoint hit at {:#010x}", ctx.pc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Called after each instruction executes.
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user