diff --git a/crates/xenia-cpu/src/interpreter.rs b/crates/xenia-cpu/src/interpreter.rs index 0e150e8..9101b54 100644 --- a/crates/xenia-cpu/src/interpreter.rs +++ b/crates/xenia-cpu/src/interpreter.rs @@ -117,17 +117,27 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - ctx.pc += 4; } PpcOpcode::addis => { - // Xbox 360 user mode is 32-bit ABI (MSR.SF=0), so addis must - // produce a value whose upper 32 bits don't pollute downstream - // 64-bit arithmetic. The PPC ISA in 64-bit mode sign-extends - // simm16 before the shift, producing 0xFFFFFFFF_xxxx0000 for - // negative simm16 (high bit set). When this value flows into - // a 64-bit subfc against a zero-extended lwz value, the unsigned - // 64-bit comparison yields wrong CA. Truncate to 32 bits to - // simulate 32-bit ABI behavior. - let ra_val = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; - let result = ra_val.wrapping_add((instr.simm16() as i64 as u64) << 16); - ctx.gpr[instr.rd()] = result as u32 as u64; + // Phase C+23: `addis` (and the `lis` simplified mnemonic) must + // sign-extend the shifted immediate to the full 64 bits before + // storing into the GPR, matching canary's HIR emitter + // (`InstrEmit_addis` in `ppc_emit_alu.cc`: `EXTS16(SI) << 16` + // as a 64-bit constant). Game code commonly builds a negative + // 32-bit value via `lis rN, 0xFFFB; ori rN, rN, 0x6C20` + // (yielding the i32 -300,000 for a 30ms `KeWait` timeout) and + // then stores it as a 64-bit doubleword via `std`. Without + // sign extension the high half on the wire was 0x00000000, + // turning the timeout into a positive ~4.3-billion-tick + // absolute deadline (~7 minutes) instead of a 30ms relative + // wait — surfacing as `wait.begin.timeout_ns=429466729600` + // on canary tid=12 → ours tid=7 idx=3 sister chain + // (cold-vs-cold C+22 baseline). Defensive 32-bit truncation + // for the arithmetic chain consumers (`subfcx`/`addex`/etc.) + // is already implemented at each consumer site (see PPCBUG-002/ + // 007/etc.), so widening `addis` here does NOT regress them. + let ra_val = if instr.ra() == 0 { 0i64 } else { ctx.gpr[instr.ra()] as i64 }; + let shifted = (instr.simm16() as i64) << 16; + let result = ra_val.wrapping_add(shifted); + ctx.gpr[instr.rd()] = result as u64; ctx.pc += 4; } PpcOpcode::addic => { @@ -4934,6 +4944,92 @@ mod tests { assert_eq!(ctx.gpr[3], 0x10000); } + /// Phase C+23 regression: `addis rD, 0, neg_simm` (the `lis` form + /// with a negative immediate) must sign-extend the result to the + /// full 64 bits, matching canary's HIR emitter. Without this fix, + /// game code that builds a 32-bit negative value via + /// `lis r11, 0xFFFB; ori r11, r11, 0x6C20` and then stores the + /// result as a 64-bit doubleword via `std` would put 0x00000000 + /// in the high half instead of the correct 0xFFFFFFFF, turning a + /// 30 ms relative `KeWaitForSingleObject` timeout into a positive + /// absolute deadline ~7 minutes away. Anchored by the cold-vs-cold + /// sister chain canary tid=12 → ours tid=7 idx=3 divergence. + #[test] + fn addis_with_negative_simm_sign_extends_to_64_bits() { + let mut ctx = PpcContext::new(); + let mut mem = TestMem::new(); + // addis r11, r0, 0xFFFB (lis r11, 0xFFFB) + // op=15, rd=11, ra=0, simm=0xFFFB. + let raw = (15u32 << 26) | (11u32 << 21) | (0u32 << 16) | 0xFFFBu32; + write_instr(&mut mem, 0, raw); + ctx.pc = 0; + step(&mut ctx, &mut mem); + assert_eq!( + ctx.gpr[11], 0xFFFFFFFF_FFFB0000u64, + "addis with negative simm must sign-extend to 64 bits" + ); + } + + /// Phase C+23 regression: the full `lis + ori + std` sequence that + /// builds the −300,000 timeout tick count used by Sylpheed for its + /// 30 ms `KeWait` calls must produce 0xFFFFFFFFFFFB6C20 on the wire, + /// not 0x00000000FFFB6C20. This is the proximate cause of the + /// `wait.begin.timeout_ns = 429466729600` divergence on canary tid=12 + /// → ours tid=7 idx=3 in the cold-vs-cold C+22 baseline. + #[test] + fn lis_ori_std_negative_timeout_writes_sign_extended_doubleword() { + let mut ctx = PpcContext::new(); + let mut mem = TestMem::new(); + // r1 = 0x100 (stack pointer surrogate). Storage slot at r1+8. + ctx.gpr[1] = 0x100; + // lis r11, 0xFFFB ; r11 = 0xFFFFFFFFFFFB0000 + let lis = (15u32 << 26) | (11u32 << 21) | (0u32 << 16) | 0xFFFBu32; + // ori r11, r11, 0x6C20 ; r11 = 0xFFFFFFFFFFFB6C20 + // op=24 (ori): D-form encoding | rs(11) | ra(11) | uimm. + let ori = (24u32 << 26) | (11u32 << 21) | (11u32 << 16) | 0x6C20u32; + // std r11, 8(r1) ; mem[0x108..0x110] = 0xFFFFFFFFFFFB6C20 + // op=62, DS-form, ds_field=8>>2=2, xo=0. + let std_op = (62u32 << 26) | (11u32 << 21) | (1u32 << 16) | (8u32 & 0xFFFCu32); + write_instr(&mut mem, 0, lis); + write_instr(&mut mem, 4, ori); + write_instr(&mut mem, 8, std_op); + ctx.pc = 0; + step(&mut ctx, &mut mem); // lis + assert_eq!(ctx.gpr[11], 0xFFFFFFFF_FFFB0000u64); + step(&mut ctx, &mut mem); // ori + assert_eq!(ctx.gpr[11], 0xFFFFFFFF_FFFB6C20u64); + step(&mut ctx, &mut mem); // std + let stored = mem.read_u64(0x108); + assert_eq!( + stored, 0xFFFFFFFF_FFFB6C20u64, + "std must persist all 64 bits of the sign-extended GPR" + ); + // Interpreting the stored doubleword as a 100ns NT TIMEOUT tick + // count: it must round-trip to −300,000 (30 ms relative wait), + // NOT to +4,294,667,296 (the C+22 broken value). + assert_eq!(stored as i64, -300_000i64); + assert_eq!((stored as i64).wrapping_mul(100), -30_000_000i64); + } + + /// Phase C+23 regression: ensure `addis` against a non-zero rA still + /// performs the canonical Add with 64-bit semantics. Used by + /// arithmetic chains that combine a sign-extended `lis` high half + /// with a subsequent `addi` low half. Equivalent to canary's HIR + /// `Add(LoadGPR(rA), const_i64(simm << 16))`. + #[test] + fn addis_with_nonzero_ra_adds_in_64_bit() { + let mut ctx = PpcContext::new(); + let mut mem = TestMem::new(); + // r4 = 0x1234 already. addis r5, r4, 0xFFFE => r5 = r4 + (-2<<16) + // = 0x1234 + 0xFFFFFFFFFFFE0000 + ctx.gpr[4] = 0x1234; + let raw = (15u32 << 26) | (5u32 << 21) | (4u32 << 16) | 0xFFFEu32; + write_instr(&mut mem, 0, raw); + ctx.pc = 0; + step(&mut ctx, &mut mem); + assert_eq!(ctx.gpr[5], 0xFFFFFFFF_FFFE1234u64); + } + #[test] fn test_lwz_stw() { let mut ctx = PpcContext::new();