Files
xenia-rs/audit-runs/phase-c23-keWait-timeout-encoding/fix.diff
MechaCat02 ef93a4fa14 handoff: VSync/event-wedge fixes + iterate 2.A–2.BC research notes
Source changes (dormant parity infra, retained from iterate 2.AI/2.AO):
- xenia-kernel/exports.rs: nt_create_event manual_reset polarity +
  related event wiring
- xenia-gpu/mmio_region.rs: D1MODE_VBLANK_VLINE_STATUS hardcode parity

Also lands the audit-runs/ analysis notes (.md/.txt/.json digests) for the
iterate 2.x VSync/0x10e8/0x1004 wedge investigation. Raw trace dumps
(.jsonl/.gz/.csv/.stdout) and agent worktrees (.claude/) are gitignored as
regenerable local artifacts — see memory + HANDOFF for the running findings.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-05 07:19:08 +02:00

137 lines
7.1 KiB
Diff
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
diff --git a/crates/xenia-cpu/src/interpreter.rs b/crates/xenia-cpu/src/interpreter.rs
index 0e150e8..9101b54 100644
--- a/crates/xenia-cpu/src/interpreter.rs
+++ b/crates/xenia-cpu/src/interpreter.rs
@@ -117,17 +117,27 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
ctx.pc += 4;
}
PpcOpcode::addis => {
- // Xbox 360 user mode is 32-bit ABI (MSR.SF=0), so addis must
- // produce a value whose upper 32 bits don't pollute downstream
- // 64-bit arithmetic. The PPC ISA in 64-bit mode sign-extends
- // simm16 before the shift, producing 0xFFFFFFFF_xxxx0000 for
- // negative simm16 (high bit set). When this value flows into
- // a 64-bit subfc against a zero-extended lwz value, the unsigned
- // 64-bit comparison yields wrong CA. Truncate to 32 bits to
- // simulate 32-bit ABI behavior.
- let ra_val = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
- let result = ra_val.wrapping_add((instr.simm16() as i64 as u64) << 16);
- ctx.gpr[instr.rd()] = result as u32 as u64;
+ // Phase C+23: `addis` (and the `lis` simplified mnemonic) must
+ // sign-extend the shifted immediate to the full 64 bits before
+ // storing into the GPR, matching canary's HIR emitter
+ // (`InstrEmit_addis` in `ppc_emit_alu.cc`: `EXTS16(SI) << 16`
+ // as a 64-bit constant). Game code commonly builds a negative
+ // 32-bit value via `lis rN, 0xFFFB; ori rN, rN, 0x6C20`
+ // (yielding the i32 -300,000 for a 30ms `KeWait` timeout) and
+ // then stores it as a 64-bit doubleword via `std`. Without
+ // sign extension the high half on the wire was 0x00000000,
+ // turning the timeout into a positive ~4.3-billion-tick
+ // absolute deadline (~7 minutes) instead of a 30ms relative
+ // wait — surfacing as `wait.begin.timeout_ns=429466729600`
+ // on canary tid=12 → ours tid=7 idx=3 sister chain
+ // (cold-vs-cold C+22 baseline). Defensive 32-bit truncation
+ // for the arithmetic chain consumers (`subfcx`/`addex`/etc.)
+ // is already implemented at each consumer site (see PPCBUG-002/
+ // 007/etc.), so widening `addis` here does NOT regress them.
+ let ra_val = if instr.ra() == 0 { 0i64 } else { ctx.gpr[instr.ra()] as i64 };
+ let shifted = (instr.simm16() as i64) << 16;
+ let result = ra_val.wrapping_add(shifted);
+ ctx.gpr[instr.rd()] = result as u64;
ctx.pc += 4;
}
PpcOpcode::addic => {
@@ -4934,6 +4944,92 @@ mod tests {
assert_eq!(ctx.gpr[3], 0x10000);
}
+ /// Phase C+23 regression: `addis rD, 0, neg_simm` (the `lis` form
+ /// with a negative immediate) must sign-extend the result to the
+ /// full 64 bits, matching canary's HIR emitter. Without this fix,
+ /// game code that builds a 32-bit negative value via
+ /// `lis r11, 0xFFFB; ori r11, r11, 0x6C20` and then stores the
+ /// result as a 64-bit doubleword via `std` would put 0x00000000
+ /// in the high half instead of the correct 0xFFFFFFFF, turning a
+ /// 30 ms relative `KeWaitForSingleObject` timeout into a positive
+ /// absolute deadline ~7 minutes away. Anchored by the cold-vs-cold
+ /// sister chain canary tid=12 → ours tid=7 idx=3 divergence.
+ #[test]
+ fn addis_with_negative_simm_sign_extends_to_64_bits() {
+ let mut ctx = PpcContext::new();
+ let mut mem = TestMem::new();
+ // addis r11, r0, 0xFFFB (lis r11, 0xFFFB)
+ // op=15, rd=11, ra=0, simm=0xFFFB.
+ let raw = (15u32 << 26) | (11u32 << 21) | (0u32 << 16) | 0xFFFBu32;
+ write_instr(&mut mem, 0, raw);
+ ctx.pc = 0;
+ step(&mut ctx, &mut mem);
+ assert_eq!(
+ ctx.gpr[11], 0xFFFFFFFF_FFFB0000u64,
+ "addis with negative simm must sign-extend to 64 bits"
+ );
+ }
+
+ /// Phase C+23 regression: the full `lis + ori + std` sequence that
+ /// builds the 300,000 timeout tick count used by Sylpheed for its
+ /// 30 ms `KeWait` calls must produce 0xFFFFFFFFFFFB6C20 on the wire,
+ /// not 0x00000000FFFB6C20. This is the proximate cause of the
+ /// `wait.begin.timeout_ns = 429466729600` divergence on canary tid=12
+ /// → ours tid=7 idx=3 in the cold-vs-cold C+22 baseline.
+ #[test]
+ fn lis_ori_std_negative_timeout_writes_sign_extended_doubleword() {
+ let mut ctx = PpcContext::new();
+ let mut mem = TestMem::new();
+ // r1 = 0x100 (stack pointer surrogate). Storage slot at r1+8.
+ ctx.gpr[1] = 0x100;
+ // lis r11, 0xFFFB ; r11 = 0xFFFFFFFFFFFB0000
+ let lis = (15u32 << 26) | (11u32 << 21) | (0u32 << 16) | 0xFFFBu32;
+ // ori r11, r11, 0x6C20 ; r11 = 0xFFFFFFFFFFFB6C20
+ // op=24 (ori): D-form encoding | rs(11) | ra(11) | uimm.
+ let ori = (24u32 << 26) | (11u32 << 21) | (11u32 << 16) | 0x6C20u32;
+ // std r11, 8(r1) ; mem[0x108..0x110] = 0xFFFFFFFFFFFB6C20
+ // op=62, DS-form, ds_field=8>>2=2, xo=0.
+ let std_op = (62u32 << 26) | (11u32 << 21) | (1u32 << 16) | (8u32 & 0xFFFCu32);
+ write_instr(&mut mem, 0, lis);
+ write_instr(&mut mem, 4, ori);
+ write_instr(&mut mem, 8, std_op);
+ ctx.pc = 0;
+ step(&mut ctx, &mut mem); // lis
+ assert_eq!(ctx.gpr[11], 0xFFFFFFFF_FFFB0000u64);
+ step(&mut ctx, &mut mem); // ori
+ assert_eq!(ctx.gpr[11], 0xFFFFFFFF_FFFB6C20u64);
+ step(&mut ctx, &mut mem); // std
+ let stored = mem.read_u64(0x108);
+ assert_eq!(
+ stored, 0xFFFFFFFF_FFFB6C20u64,
+ "std must persist all 64 bits of the sign-extended GPR"
+ );
+ // Interpreting the stored doubleword as a 100ns NT TIMEOUT tick
+ // count: it must round-trip to 300,000 (30 ms relative wait),
+ // NOT to +4,294,667,296 (the C+22 broken value).
+ assert_eq!(stored as i64, -300_000i64);
+ assert_eq!((stored as i64).wrapping_mul(100), -30_000_000i64);
+ }
+
+ /// Phase C+23 regression: ensure `addis` against a non-zero rA still
+ /// performs the canonical Add with 64-bit semantics. Used by
+ /// arithmetic chains that combine a sign-extended `lis` high half
+ /// with a subsequent `addi` low half. Equivalent to canary's HIR
+ /// `Add(LoadGPR(rA), const_i64(simm << 16))`.
+ #[test]
+ fn addis_with_nonzero_ra_adds_in_64_bit() {
+ let mut ctx = PpcContext::new();
+ let mut mem = TestMem::new();
+ // r4 = 0x1234 already. addis r5, r4, 0xFFFE => r5 = r4 + (-2<<16)
+ // = 0x1234 + 0xFFFFFFFFFFFE0000
+ ctx.gpr[4] = 0x1234;
+ let raw = (15u32 << 26) | (5u32 << 21) | (4u32 << 16) | 0xFFFEu32;
+ write_instr(&mut mem, 0, raw);
+ ctx.pc = 0;
+ step(&mut ctx, &mut mem);
+ assert_eq!(ctx.gpr[5], 0xFFFFFFFF_FFFE1234u64);
+ }
+
#[test]
fn test_lwz_stw() {
let mut ctx = PpcContext::new();