From f75bc96d174ec4c0e9fa2104cc8ce372079d0df6 Mon Sep 17 00:00:00 2001 From: MechaCat02 Date: Sat, 13 Jun 2026 10:53:54 +0200 Subject: [PATCH] [iterate-2H] PPC spin/yield/sync hint-class audit: lock no-over-yield + barrier-decode invariants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Audited the full PowerPC spin/yield/sync/SMT-priority-hint instruction class against the canary oracle (ppc_emit_alu.cc InstrEmit_orx / ppc_emit_memory.cc sync/eieio/isync) and against what Project Sylpheed actually executes (static scan of the extracted image + disasm of the spin sites 0x824D1328 / 0x824C17AC / 0x824D3CF8). Findings (no behavior change required — the class is already faithful): - or rX,rX,rX SMT priority hints: canary special-cases EXACTLY 0x7FFFFB78 (db16cyc) -> DelayExecution; every OTHER or-self form -> Nop. Ours already matches (only 0x7FFFFB78 yields). Image scan: the documented priority hints or 1/2/3/6/26..30 do NOT appear in Sylpheed at all; the only SMT spin hint used is or 31,31,31 (db16cyc), already handled in de21c7a. The 854 `or 8,8,8` etc. are compiler register self-moves (plain no-ops), not spin hints. - sync / lwsync / ptesync share XO=598 -> all decode to PpcOpcode::sync (canary keys on XO only, identical); eieio (XO=854), isync (XO=150) decode correctly. All are value-neutral no-ops under the single-host model, matching canary MemoryBarrier/Nop. unimpl=0 in a 200M run confirms none trap. tlbsync is not implemented by canary either and is unused by Sylpheed. - mftb-based timed back-off (loop at 0x824D3CF8: mftb delta vs timeout, with db16cyc between polls and a timeout escape) relies on the already-landed db16cyc yield + coherent global-clock timebase; no deadlock, no new gap. - ori 0,0,0 canonical nop (140 sites) is value-neutral; matches canary Nop. Lands two regression tests that lock the audited invariants so a future change cannot over-yield on a benign priority hint (which would perturb the deterministic schedule) or break the sync L-field decode: - test_smt_priority_hints_are_nops_not_yields - test_lwsync_ptesync_eieio_isync_decode_as_benign_noops Determinism preserved (tests-only): two cold lockstep `check -n 5M` (no persist) byte-identical; golden digest unchanged (no re-baseline). Full workspace suite green. 200M cascade unchanged (packets~172M, draws=0, shaders=0, swaps=1) — confirms the hint class is exhausted; the render gate is now downstream (tid14 0x109c per-job completion event), not CPU semantics. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/xenia-cpu/src/interpreter.rs | 66 +++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/crates/xenia-cpu/src/interpreter.rs b/crates/xenia-cpu/src/interpreter.rs index 582c673..85fb181 100644 --- a/crates/xenia-cpu/src/interpreter.rs +++ b/crates/xenia-cpu/src/interpreter.rs @@ -5100,6 +5100,72 @@ mod tests { assert_eq!(r, StepResult::Continue, "non-db16cyc or-self stays Continue"); } + #[test] + fn test_smt_priority_hints_are_nops_not_yields() { + // iterate-2H spin/yield/sync hint-class audit. The PowerPC SMT + // thread-priority hints `or 1,1,1` / `or 2,2,2` / `or 3,3,3` / `or 6,6,6` + // (and the db8cyc family `or 26..30`) are reserved no-op encodings. + // Canary's `InstrEmit_orx` emits `f.Nop()` for EVERY `or rX,rX,rX` + // (RT==RB==RA && !Rc) form EXCEPT the exact db16cyc code 0x7FFFFB78, + // which alone gets `f.DelayExecution()`. So ours must NOT yield on any + // of these — over-yielding would diverge from canary and perturb the + // deterministic schedule. (Audit evidence: none of 1/2/3/6/26..30 even + // appear in Sylpheed's image; only `or 31,31,31` (db16cyc) is used as a + // spin hint. This test locks the no-over-yield invariant regardless.) + for r in [1u32, 2, 3, 6, 26, 27, 28, 29, 30] { + let mut ctx = PpcContext::new(); + let mut mem = TestMem::new(); + // or rN,rN,rN, Rc=0: 31<<26 | r<<21 | r<<16 | r<<11 | 444<<1 + let raw = (31u32 << 26) | (r << 21) | (r << 16) | (r << 11) | (444 << 1); + write_instr(&mut mem, 0, raw); + ctx.pc = 0; + ctx.gpr[r as usize] = 0xDEAD_BEEF_F00D_BA11; + let res = step(&mut ctx, &mut mem); + assert_eq!( + ctx.gpr[r as usize], 0xDEAD_BEEF_F00D_BA11, + "or {r},{r},{r} is value-neutral" + ); + assert_eq!(ctx.pc, 4, "or {r},{r},{r} advances PC"); + assert_eq!( + res, + StepResult::Continue, + "priority hint or {r},{r},{r} is a plain no-op (canary Nop), NOT a yield" + ); + } + } + + #[test] + fn test_lwsync_ptesync_eieio_isync_decode_as_benign_noops() { + // Memory/sync barrier class. Canary keys `sync` on XO=598 only, so + // sync (L=0), lwsync (L=1), ptesync (L=2) all map to the same + // `InstrEmit_sync` -> `MemoryBarrier`; `eieio` -> `MemoryBarrier`; + // `isync` -> `Nop`. Under our single-host interpreter every one is a + // value-neutral no-op that advances PC and must DECODE (never trap as + // unknown). This guards the L-field disambiguation and the decode path. + let cases: &[(u32, &str)] = &[ + (0x7C00_04AC, "sync"), // L=0 + (0x7C20_04AC, "lwsync"), // L=1 + (0x7C40_04AC, "ptesync"), // L=2 + (0x7C00_06AC, "eieio"), + (0x4C00_012C, "isync"), + ]; + for &(raw, name) in cases { + let mut ctx = PpcContext::new(); + let mut mem = TestMem::new(); + let pre_xer = ctx.xer(); + let pre_fpscr = ctx.fpscr; + let pre_gpr = ctx.gpr; + write_instr(&mut mem, 0x200, raw); + ctx.pc = 0x200; + let res = step(&mut ctx, &mut mem); + assert_eq!(res, StepResult::Continue, "{name} continues"); + assert_eq!(ctx.pc, 0x204, "{name} advances PC (decoded, did not trap)"); + assert_eq!(ctx.xer(), pre_xer, "{name} leaves XER"); + assert_eq!(ctx.fpscr, pre_fpscr, "{name} leaves FPSCR"); + assert_eq!(ctx.gpr, pre_gpr, "{name} leaves GPRs"); + } + } + #[test] fn test_fadd() { let mut ctx = PpcContext::new();