fix(cpu): PPCBUG-001/002/003/004/005/007 4b immediate ALU truncation

Phase 4 batch 3: 6 PPCBUGs in the same-shape-as-addis (4b) sub-section. All share the pattern of computing on 64-bit values when the 32-bit ABI requires u32 arithmetic. - PPCBUG-001 addi: `li rT, -1` produced 0xFFFFFFFF_FFFFFFFF; now 0x00000000_FFFFFFFF. - PPCBUG-002 addic: writeback truncated + CA from u32 unsigned compare matching canary's `AddDidCarry`. - PPCBUG-003 addicx: same plus CR0 i32 view (regression vs. the frozen ppc-manual snapshot which had the correct form). - PPCBUG-004 mulli: 64-bit signed product now truncated to 32 bits. - PPCBUG-005 subficx: writeback + CA in u32 space; removes the bits-32-63 pollution from sign-extended negative SIMM. - PPCBUG-007 subfcx: defensive 32-bit truncation of CA compare. Same shape as the compare that broke addis (0x828F3F98 / 0x828F3F68 case). Tests: - addi_li_neg_one_zero_extends_upper (PPCBUG-001). - addic_carry_uses_32bit_compare (PPCBUG-002). - mulli_overflow_wraps_to_32 (PPCBUG-004). - subficx_neg_simm_zero_extends (PPCBUG-005). - subfcx_addis_incident_case (PPCBUG-007 — exact addis-incident case). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-02 11:41:49 +02:00
parent 145a7a4019
commit bf8208e88c
1 changed files with 131 additions and 27 deletions
--- a/crates/xenia-cpu/src/interpreter.rs
+++ b/crates/xenia-cpu/src/interpreter.rs
@@ -112,8 +112,10 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
    match instr.opcode {
        // ===== ALU: Immediate =====
        PpcOpcode::addi => {
            // PPCBUG-001: 32-bit ABI. `li rT, -1` (= addi rT, r0, -1) must produce
            // 0x00000000_FFFFFFFF, not 0xFFFFFFFF_FFFFFFFF (sign-extended simm16).
            let ra_val = if instr.ra() == 0 { 0 } else { ctx.gpr[instr.ra()] };
-            ctx.gpr[instr.rd()] = ra_val.wrapping_add(instr.simm16() as i64 as u64);
+            ctx.gpr[instr.rd()] = ra_val.wrapping_add(instr.simm16() as i64 as u64) as u32 as u64;
            ctx.pc += 4;
        }
        PpcOpcode::addis => {
@@ -131,35 +133,41 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
            ctx.pc += 4;
        }
        PpcOpcode::addic => {
-            let ra = ctx.gpr[instr.ra()];
+            // PPCBUG-002: 32-bit ABI. CA must be from a 32-bit unsigned compare;
-            let imm = instr.simm16() as i64 as u64;
+            // canary's `AddDidCarry` truncates both operands to int32 first.
-            let result = ra.wrapping_add(imm);
+            let ra32 = ctx.gpr[instr.ra()] as u32;
-            ctx.xer_ca = if result < ra { 1 } else { 0 };
+            let imm32 = instr.simm16() as i32 as u32;
-            ctx.gpr[instr.rd()] = result;
+            let result32 = ra32.wrapping_add(imm32);
            ctx.xer_ca = if result32 < ra32 { 1 } else { 0 };
            ctx.gpr[instr.rd()] = result32 as u64;
            ctx.pc += 4;
        }
        PpcOpcode::addicx => {
-            let ra = ctx.gpr[instr.ra()];
+            // PPCBUG-003: same fix as addic plus CR0 i32 view.
-            let imm = instr.simm16() as i64 as u64;
+            let ra32 = ctx.gpr[instr.ra()] as u32;
-            let result = ra.wrapping_add(imm);
+            let imm32 = instr.simm16() as i32 as u32;
-            ctx.xer_ca = if result < ra { 1 } else { 0 };
+            let result32 = ra32.wrapping_add(imm32);
-            ctx.gpr[instr.rd()] = result;
+            ctx.xer_ca = if result32 < ra32 { 1 } else { 0 };
-            // Update CR0
+            ctx.gpr[instr.rd()] = result32 as u64;
-            ctx.update_cr_signed(0, result as i64);
+            ctx.update_cr_signed(0, result32 as i32 as i64);
            ctx.pc += 4;
        }
        PpcOpcode::subficx => {
-            let ra = ctx.gpr[instr.ra()];
+            // PPCBUG-005: 32-bit ABI. Sign-extended imm has bits 32-63 set for
-            let imm = instr.simm16() as i64 as u64;
+            // negative SIMM, poisoning the writeback. Canary uses 32-bit form.
-            let result = imm.wrapping_sub(ra);
+            let ra32 = ctx.gpr[instr.ra()] as u32;
-            ctx.xer_ca = if imm >= ra { 1 } else { 0 };
+            let imm32 = instr.simm16() as i32 as u32;
-            ctx.gpr[instr.rd()] = result;
+            let result32 = imm32.wrapping_sub(ra32);
            ctx.xer_ca = if imm32 >= ra32 { 1 } else { 0 };
            ctx.gpr[instr.rd()] = result32 as u64;
            ctx.pc += 4;
        }
        PpcOpcode::mulli => {
-            let ra = ctx.gpr[instr.ra()] as i64;
+            // PPCBUG-004: 32-bit ABI. Read RA as i32 (low 32, sign-extended for
            // multiply), product fits in 32 bits per ISA (overflow wraps).
            let ra = ctx.gpr[instr.ra()] as i32 as i64;
            let imm = instr.simm16() as i64;
-            ctx.gpr[instr.rd()] = ra.wrapping_mul(imm) as u64;
+            ctx.gpr[instr.rd()] = (ra.wrapping_mul(imm) as u32) as u64;
            ctx.pc += 4;
        }
@@ -252,16 +260,20 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
            ctx.pc += 4;
        }
        PpcOpcode::subfcx => {
-            let ra = ctx.gpr[instr.ra()];
+            // PPCBUG-007: 32-bit ABI. The `rb >= ra` u64 unsigned compare is
-            let rb = ctx.gpr[instr.rb()];
+            // exactly the shape that broke addis. Defensive 32-bit truncation
-            let result = rb.wrapping_sub(ra);
+            // is required for correct CA even after upstream cleanup.
-            ctx.xer_ca = if rb >= ra { 1 } else { 0 };
+            let ra32 = ctx.gpr[instr.ra()] as u32;
-            ctx.gpr[instr.rd()] = result;
+            let rb32 = ctx.gpr[instr.rb()] as u32;
            let result32 = rb32.wrapping_sub(ra32);
            ctx.xer_ca = if rb32 >= ra32 { 1 } else { 0 };
            ctx.gpr[instr.rd()] = result32 as u64;
            if instr.oe() {
-                overflow::apply(ctx, overflow::sub_ov_64(ra, rb, result));
+                let true_diff = (rb32 as i32 as i128) - (ra32 as i32 as i128);
                overflow::apply(ctx, overflow::sum_overflow_64(true_diff, result32 as u64));
            }
            if instr.rc_bit() {
-                ctx.update_cr_signed(0, result as i64);
+                ctx.update_cr_signed(0, result32 as i32 as i64);
            }
            ctx.pc += 4;
        }
@@ -5186,6 +5198,98 @@ mod tests {
        assert_eq!(ctx.xer_ca, 1, "rb>=ra → CA=1 (10 > 5)");
    }
    #[test]
    fn addi_li_neg_one_zero_extends_upper() {
        // PPCBUG-001: `li r3, -1` (= addi r3, r0, -1) must produce
        // 0x00000000_FFFFFFFF, not 0xFFFFFFFF_FFFFFFFF.
        let mut ctx = PpcContext::new();
        let mut mem = TestMem::new();
        // addi r3, r0, -1: opcode 14, simm16 = 0xFFFF
        let raw = (14u32 << 26) | (3 << 21) | (0 << 16) | 0xFFFF;
        write_instr(&mut mem, 0, raw);
        ctx.pc = 0;
        step(&mut ctx, &mut mem);
        assert_eq!(ctx.gpr[3], 0x0000_0000_FFFF_FFFFu64);
    }
    #[test]
    fn addic_carry_uses_32bit_compare() {
        // PPCBUG-002: addic ra=0xFFFFFFFF_00000001, simm=-1 (0xFFFF).
        // 32-bit: 0x00000001 + 0xFFFFFFFF = 0x00000000 with CA=1.
        // 64-bit (buggy): result < ra → since 64-bit ra has high bits set,
        // the buggy form would compare against the polluted u64 and could
        // give wrong CA. Truncated form ignores upper 32 bits.
        let mut ctx = PpcContext::new();
        let mut mem = TestMem::new();
        ctx.gpr[3] = 0xFFFFFFFF_00000001u64;
        // addic r4, r3, -1: opcode 12
        let raw = (12u32 << 26) | (4 << 21) | (3 << 16) | 0xFFFF;
        write_instr(&mut mem, 0, raw);
        ctx.pc = 0;
        step(&mut ctx, &mut mem);
        // Result low 32: 0x00000001 + 0xFFFFFFFF = 0x00000000 with carry.
        assert_eq!(ctx.gpr[4], 0);
        assert_eq!(ctx.xer_ca, 1, "32-bit compare must see CA=1");
    }
    #[test]
    fn mulli_overflow_wraps_to_32() {
        // PPCBUG-004: mulli result must be truncated to 32 bits.
        // 0x10000 * 0x10000 = 0x1_00000000 — low 32 bits are 0.
        let mut ctx = PpcContext::new();
        let mut mem = TestMem::new();
        ctx.gpr[3] = 0x10000;
        // mulli r4, r3, 0x4000 (4 * 0x10000 = 0x40000, no overflow case for sanity)
        // Better case: 0x10000 * 0x4000 = 0x4000_0000 — fits in i32.
        // For overflow: ra=0x80000000 (i32::MIN), imm=2 → 0xFFFFFFFF_00000000, low32=0
        ctx.gpr[3] = 0x80000000u64;
        // mulli r4, r3, 2: opcode 7
        let raw = (7u32 << 26) | (4 << 21) | (3 << 16) | 2;
        write_instr(&mut mem, 0, raw);
        ctx.pc = 0;
        step(&mut ctx, &mut mem);
        // i32::MIN * 2 = 0xFFFFFFFF_00000000 in i64 view; low 32 = 0.
        assert_eq!(ctx.gpr[4], 0);
    }
    #[test]
    fn subficx_neg_simm_zero_extends() {
        // PPCBUG-005: subfic r4, r3, -1 with r3=5: imm-ra = 0xFFFFFFFF - 5 = 0xFFFFFFFA.
        // Buggy form: imm sign-extended to u64 0xFFFFFFFFFFFFFFFF - 5 = poisoned.
        let mut ctx = PpcContext::new();
        let mut mem = TestMem::new();
        ctx.gpr[3] = 5;
        // subfic r4, r3, -1: opcode 8, simm = 0xFFFF
        let raw = (8u32 << 26) | (4 << 21) | (3 << 16) | 0xFFFF;
        write_instr(&mut mem, 0, raw);
        ctx.pc = 0;
        step(&mut ctx, &mut mem);
        assert_eq!(ctx.gpr[4], 0x0000_0000_FFFF_FFFAu64);
        assert_eq!(ctx.xer_ca, 1, "0xFFFFFFFF >= 5 → CA=1");
    }
    #[test]
    fn subfcx_addis_incident_case() {
        // PPCBUG-007: regression for the exact case that revealed the addis bug.
        // After P1's addis fix this works coincidentally; P4 batch 3 makes
        // subfcx itself robust to 64-bit GPR pollution.
        let mut ctx = PpcContext::new();
        let mut mem = TestMem::new();
        // ra polluted in upper 32 bits, low 32 = 0x828F3F98
        ctx.gpr[3] = 0xFFFF_FFFF_828F_3F98u64;
        // rb clean low 32 = 0x828F3F68
        ctx.gpr[4] = 0x0000_0000_828F_3F68u64;
        // subfcx r5, r3, r4  (XO=8): result = rb - ra = 0xFFFFFFD0 (low 32)
        let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (8 << 1);
        write_instr(&mut mem, 0, raw);
        ctx.pc = 0;
        step(&mut ctx, &mut mem);
        // 32-bit unsigned: 0x828F3F68 < 0x828F3F98 → CA=0
        assert_eq!(ctx.xer_ca, 0, "32-bit unsigned: rb < ra → CA=0");
        // result = 0x828F3F68 - 0x828F3F98 = 0xFFFFFFD0 (low 32, upper 32 zero)
        assert_eq!(ctx.gpr[5], 0xFFFF_FFD0u64);
    }
    #[test]
    fn extsbx_negative_byte_zero_extends_upper() {
        // PPCBUG-034+036 coupled: extsb of 0x80 (negative byte) must produce