[Issue-1 PPCBUG-020] Word-form ALU ops produce full 64-bit results

Xenon is a 64-bit PPC core (32-bit *pointer* ABI, but 64-bit registers and integer arithmetic). The interpreter was truncating every word-form integer ALU writeback to 32 bits and zero-extending, on a false "MSR.SF=0 / 32-bit ABI" premise. This silently corrupted any genuine 64-bit value flowing through word-form arithmetic. Confirmed load-bearing via runtime ours-vs-canary capture: Sylpheed's millisecond->LARGE_INTEGER timeout converter sub_824ACA88 does `clrldi; mulli r11,r11,-10000; std`. For a 16 ms wait the correct result is -160000 = 0xFFFFFFFF_FFFD8F00 (relative). canary stores exactly that; ours' truncating `mulli` stored 0x00000000_FFFD8F00 (positive) -> the i64 timeout read as a huge *absolute* deadline -> a ~26000x over-wait that froze the main frame loop. After the fix the timeout matches canary and the previously-frozen frame/worker loops run (parallel boot NtWaitForMultipleObjectsEx 94 -> 30428; KeWaitForSingleObject/critical-section loops resume). Fix mirrors canary's INT64 emitters (ppc_emit_alu.cc) op-by-op for the 17 data-losing word-form ops: addis, addic(.), subfic(.), mulli, add(c/e/ze/me)x, subf(c/e/ze/me)x, negx, mullwx. Only the result *writeback* widens to full 64 bit; the 32-bit carry (XER[CA]) and overflow (XER[OV]) computations and the CR0 i32 view are preserved byte-identical (the low 32 bits of the new result equal the old truncated result), so this is a strict no-op for clean 32-bit values and only restores the previously-zeroed upper bits for genuine 64-bit values. Genuinely-32-bit ops (rlwinm/slw/srw/cmpw, mulhw/divw whose upper bits are ISA-undefined) are left untouched. Updated 7 unit tests that asserted the truncation (they encoded the bug) to the canary-correct full-64-bit values. Re-baselined the sylpheed_n50m golden (imports 40454 -> 1790936: the unwedged frame/worker loops now cycle under the instruction-count timebase); sylpheed_n2m unchanged (pre-frame-loop). Lockstep determinism preserved (two 50M runs identical). Full suite 660/660. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-12 16:21:11 +02:00
parent b20c99f141
commit 341196a111
2 changed files with 96 additions and 83 deletions
--- a/crates/xenia-app/tests/golden/sylpheed_n50m.json
+++ b/crates/xenia-app/tests/golden/sylpheed_n50m.json
@@ -1,6 +1,6 @@
 {
-  "instructions": 50000001,
-  "imports": 40454,
+  "instructions": 50000004,
+  "imports": 1790936,
  "unimpl": 0,
  "draws": 0,
  "swaps": 1,
--- a/crates/xenia-cpu/src/interpreter.rs
+++ b/crates/xenia-cpu/src/interpreter.rs
@@ -117,65 +117,65 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
            ctx.pc += 4;
        }
        PpcOpcode::addis => {
-            // Xbox 360 user mode is 32-bit ABI (MSR.SF=0), so addis must
-            // produce a value whose upper 32 bits don't pollute downstream
-            // 64-bit arithmetic. The PPC ISA in 64-bit mode sign-extends
-            // simm16 before the shift, producing 0xFFFFFFFF_xxxx0000 for
-            // negative simm16 (high bit set). When this value flows into
-            // a 64-bit subfc against a zero-extended lwz value, the unsigned
-            // 64-bit comparison yields wrong CA. Truncate to 32 bits to
-            // simulate 32-bit ABI behavior.
+            // PPCBUG-020 fix: Xenon is a 64-bit core; `addis` produces the full
+            // 64-bit `RA + (EXTS(SI) << 16)`. Matches canary
+            // (`Add(RA, Int64(EXTS(imm) << 16))`, stores full 64-bit).
            let ra_val = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
            let result = ra_val.wrapping_add((instr.simm16() as i64 as u64) << 16);
-            ctx.gpr[instr.rd()] = result as u32 as u64;
+            ctx.gpr[instr.rd()] = result;
            ctx.pc += 4;
        }
        PpcOpcode::addic => {
-            // PPCBUG-002: 32-bit ABI. CA must be from a 32-bit unsigned compare;
-            // canary's `AddDidCarry` truncates both operands to int32 first.
+            // PPCBUG-020 fix: full 64-bit `RA + EXTS(SI)` (canary `Add(RA,
+            // Int64(EXTS(imm)))`). CA stays a 32-bit unsigned compare to match
+            // canary's `AddDidCarry` (truncates operands to int32 first).
            let ra32 = ctx.gpr[instr.ra()] as u32;
            let imm32 = instr.simm16() as i32 as u32;
            let result32 = ra32.wrapping_add(imm32);
            ctx.xer_ca = if result32 < ra32 { 1 } else { 0 };
-            ctx.gpr[instr.rd()] = result32 as u64;
+            ctx.gpr[instr.rd()] = ctx.gpr[instr.ra()].wrapping_add(instr.simm16() as i64 as u64);
            ctx.pc += 4;
        }
        PpcOpcode::addicx => {
-            // PPCBUG-003: same fix as addic plus CR0 i32 view.
+            // PPCBUG-020 fix: full 64-bit result; CA 32-bit; CR0 32-bit i32 view
+            // (= low 32 of the result; unchanged from the pre-fix behaviour).
            let ra32 = ctx.gpr[instr.ra()] as u32;
            let imm32 = instr.simm16() as i32 as u32;
            let result32 = ra32.wrapping_add(imm32);
            ctx.xer_ca = if result32 < ra32 { 1 } else { 0 };
-            ctx.gpr[instr.rd()] = result32 as u64;
+            ctx.gpr[instr.rd()] = ctx.gpr[instr.ra()].wrapping_add(instr.simm16() as i64 as u64);
            ctx.update_cr_signed(0, result32 as i32 as i64);
            ctx.pc += 4;
        }
        PpcOpcode::subficx => {
-            // PPCBUG-005: 32-bit ABI. Sign-extended imm has bits 32-63 set for
-            // negative SIMM, poisoning the writeback. Canary uses 32-bit form.
+            // PPCBUG-020 fix: full 64-bit `EXTS(SI) - RA` (canary `Sub(Int64(
+            // EXTS(imm)), RA)`). CA stays a 32-bit compare.
            let ra32 = ctx.gpr[instr.ra()] as u32;
            let imm32 = instr.simm16() as i32 as u32;
            let result32 = imm32.wrapping_sub(ra32);
            ctx.xer_ca = if imm32 >= ra32 { 1 } else { 0 };
-            ctx.gpr[instr.rd()] = result32 as u64;
+            ctx.gpr[instr.rd()] = (instr.simm16() as i64 as u64).wrapping_sub(ctx.gpr[instr.ra()]);
            ctx.pc += 4;
        }
        PpcOpcode::mulli => {
-            // PPCBUG-004: 32-bit ABI. Read RA as i32 (low 32, sign-extended for
-            // multiply), product fits in 32 bits per ISA (overflow wraps).
-            let ra = ctx.gpr[instr.ra()] as i32 as i64;
+            // PPCBUG-020 fix: full 64-bit low product of (full 64-bit RA) ×
+            // EXTS(SI). Matches canary InstrEmit_mulli
+            // (`StoreGPR(Mul(LoadGPR(RA), Int64(EXTS(imm))))`).
+            let ra = ctx.gpr[instr.ra()] as i64;
            let imm = instr.simm16() as i64;
-            ctx.gpr[instr.rd()] = (ra.wrapping_mul(imm) as u32) as u64;
+            ctx.gpr[instr.rd()] = ra.wrapping_mul(imm) as u64;
            ctx.pc += 4;
        }

        // ===== ALU: Register =====
        PpcOpcode::addx => {
-            // PPCBUG-012+020: 32-bit ABI writeback truncation + CR0 i32 view.
+            // PPCBUG-020 fix: full 64-bit `RA + RB` (canary `Add(RA, RB)`).
+            // OV/CR0 keep their 32-bit computation (low 32 of the result is
+            // unchanged), so only the previously-zeroed upper 32 bits change.
            let ra32 = ctx.gpr[instr.ra()] as u32;
            let rb32 = ctx.gpr[instr.rb()] as u32;
            let result32 = ra32.wrapping_add(rb32);
-            ctx.gpr[instr.rd()] = result32 as u64;
+            ctx.gpr[instr.rd()] = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]);
            if instr.oe() {
                let true_sum = (ra32 as i32 as i128) + (rb32 as i32 as i128);
                overflow::apply(ctx, true_sum != (result32 as i32) as i128);
@@ -186,12 +186,13 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
            ctx.pc += 4;
        }
        PpcOpcode::addcx => {
-            // PPCBUG-013+020: 32-bit truncation; CA from u32 unsigned compare.
+            // PPCBUG-020 fix: full 64-bit `RA + RB`; CA stays 32-bit (canary
+            // `AddDidCarry` truncates to int32). Low 32 of result unchanged.
            let ra32 = ctx.gpr[instr.ra()] as u32;
            let rb32 = ctx.gpr[instr.rb()] as u32;
            let result32 = ra32.wrapping_add(rb32);
            ctx.xer_ca = if result32 < ra32 { 1 } else { 0 };
-            ctx.gpr[instr.rd()] = result32 as u64;
+            ctx.gpr[instr.rd()] = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]);
            if instr.oe() {
                let true_sum = (ra32 as i32 as i128) + (rb32 as i32 as i128);
                overflow::apply(ctx, true_sum != (result32 as i32) as i128);
@@ -202,13 +203,15 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
            ctx.pc += 4;
        }
        PpcOpcode::addex => {
-            // PPCBUG-014+020: 32-bit truncation; CA from u32 unsigned compare.
+            // PPCBUG-020 fix: full 64-bit `RA + RB + CA`; CA stays 32-bit.
            let ra32 = ctx.gpr[instr.ra()] as u32;
            let rb32 = ctx.gpr[instr.rb()] as u32;
            let ca = ctx.xer_ca as u32;
            let result32 = ra32.wrapping_add(rb32).wrapping_add(ca);
            ctx.xer_ca = if result32 < ra32 || (ca != 0 && result32 == ra32) { 1 } else { 0 };
-            ctx.gpr[instr.rd()] = result32 as u64;
+            ctx.gpr[instr.rd()] = ctx.gpr[instr.ra()]
+                .wrapping_add(ctx.gpr[instr.rb()])
+                .wrapping_add(ca as u64);
            if instr.oe() {
                let true_sum = (ra32 as i32 as i128) + (rb32 as i32 as i128) + (ca as i128);
                overflow::apply(ctx, true_sum != (result32 as i32) as i128);
@@ -219,12 +222,12 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
            ctx.pc += 4;
        }
        PpcOpcode::addzex => {
-            // PPCBUG-015+020: 32-bit truncation.
+            // PPCBUG-020 fix: full 64-bit `RA + CA`; CA stays 32-bit.
            let ra32 = ctx.gpr[instr.ra()] as u32;
            let ca = ctx.xer_ca as u32;
            let result32 = ra32.wrapping_add(ca);
            ctx.xer_ca = if result32 < ra32 { 1 } else { 0 };
-            ctx.gpr[instr.rd()] = result32 as u64;
+            ctx.gpr[instr.rd()] = ctx.gpr[instr.ra()].wrapping_add(ca as u64);
            if instr.oe() {
                let true_sum = (ra32 as i32 as i128) + (ca as i128);
                overflow::apply(ctx, true_sum != (result32 as i32) as i128);
@@ -235,12 +238,12 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
            ctx.pc += 4;
        }
        PpcOpcode::addmex => {
-            // PPCBUG-016+020: 32-bit truncation. RT = RA + CA - 1.
+            // PPCBUG-020 fix: full 64-bit `RA + CA - 1`; CA stays 32-bit.
            let ra32 = ctx.gpr[instr.ra()] as u32;
            let ca = ctx.xer_ca as u32;
            let result32 = ra32.wrapping_add(ca).wrapping_sub(1);
            ctx.xer_ca = if ra32 != 0 || ca != 0 { 1 } else { 0 };
-            ctx.gpr[instr.rd()] = result32 as u64;
+            ctx.gpr[instr.rd()] = ctx.gpr[instr.ra()].wrapping_add(ca as u64).wrapping_sub(1);
            if instr.oe() {
                let true_sum = (ra32 as i32 as i128) + (ca as i128) - 1;
                overflow::apply(ctx, true_sum != (result32 as i32) as i128);
@@ -251,11 +254,12 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
            ctx.pc += 4;
        }
        PpcOpcode::subfx => {
-            // PPCBUG-017+020: 32-bit truncation.
+            // PPCBUG-020 fix: full 64-bit `RB - RA` (canary `Sub(RB, RA)`).
+            // OV/CR0 keep their 32-bit view (low 32 of result unchanged).
            let ra32 = ctx.gpr[instr.ra()] as u32;
            let rb32 = ctx.gpr[instr.rb()] as u32;
            let result32 = rb32.wrapping_sub(ra32);
-            ctx.gpr[instr.rd()] = result32 as u64;
+            ctx.gpr[instr.rd()] = ctx.gpr[instr.rb()].wrapping_sub(ctx.gpr[instr.ra()]);
            if instr.oe() {
                let true_diff = (rb32 as i32 as i128) - (ra32 as i32 as i128);
                overflow::apply(ctx, true_diff != (result32 as i32) as i128);
@@ -266,14 +270,13 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
            ctx.pc += 4;
        }
        PpcOpcode::subfcx => {
-            // PPCBUG-007: 32-bit ABI. The `rb >= ra` u64 unsigned compare is
-            // exactly the shape that broke addis. Defensive 32-bit truncation
-            // is required for correct CA even after upstream cleanup.
+            // PPCBUG-020 fix: full 64-bit `RB - RA`; CA stays a 32-bit `rb >= ra`
+            // compare (canary `SubDidCarry` truncates to int32).
            let ra32 = ctx.gpr[instr.ra()] as u32;
            let rb32 = ctx.gpr[instr.rb()] as u32;
            let result32 = rb32.wrapping_sub(ra32);
            ctx.xer_ca = if rb32 >= ra32 { 1 } else { 0 };
-            ctx.gpr[instr.rd()] = result32 as u64;
+            ctx.gpr[instr.rd()] = ctx.gpr[instr.rb()].wrapping_sub(ctx.gpr[instr.ra()]);
            if instr.oe() {
                let true_diff = (rb32 as i32 as i128) - (ra32 as i32 as i128);
                overflow::apply(ctx, true_diff != (result32 as i32) as i128);
@@ -284,14 +287,16 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
            ctx.pc += 4;
        }
        PpcOpcode::subfex => {
-            // PPCBUG-008: 32-bit ABI. Compute in u32 space — `!ra` on u64 always
-            // pollutes the upper 32 bits, making this an active poisoner.
+            // PPCBUG-020 fix: full 64-bit `~RA + RB + CA` (canary semantics).
+            // CA keeps its 32-bit compare. Low 32 of the result is unchanged.
            let ra32 = ctx.gpr[instr.ra()] as u32;
            let rb32 = ctx.gpr[instr.rb()] as u32;
            let ca = ctx.xer_ca as u32;
            let result32 = (!ra32).wrapping_add(rb32).wrapping_add(ca);
            ctx.xer_ca = if rb32 > ra32 || (rb32 == ra32 && ca != 0) { 1 } else { 0 };
-            ctx.gpr[instr.rd()] = result32 as u64;
+            ctx.gpr[instr.rd()] = (!ctx.gpr[instr.ra()])
+                .wrapping_add(ctx.gpr[instr.rb()])
+                .wrapping_add(ca as u64);
            if instr.oe() {
                // RT <- !RA + RB + CA  ==  RB - RA - 1 + CA  (32-bit semantics).
                let true_sum = (rb32 as i32 as i128) - (ra32 as i32 as i128) - 1 + (ca as i128);
@@ -303,14 +308,13 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
            ctx.pc += 4;
        }
        PpcOpcode::subfzex => {
-            // PPCBUG-018: same active-poisoning shape as subfex; operate in u32.
+            // PPCBUG-020 fix: full 64-bit `~RA + CA` (canary semantics).
            let ra32 = ctx.gpr[instr.ra()] as u32;
            let ca = ctx.xer_ca as u32;
            let result32 = (!ra32).wrapping_add(ca);
-            // RT <- !RA + CA (no -1 term). 32-bit carry-out only when
-            // !ra32 = u32::MAX (i.e. ra32 = 0) AND ca = 1.
+            // CA: 32-bit carry-out only when !ra32 = u32::MAX (ra32 = 0) AND ca = 1.
            ctx.xer_ca = if ra32 == 0 && ca != 0 { 1 } else { 0 };
-            ctx.gpr[instr.rd()] = result32 as u64;
+            ctx.gpr[instr.rd()] = (!ctx.gpr[instr.ra()]).wrapping_add(ca as u64);
            if instr.oe() {
                let true_sum = -(ra32 as i32 as i128) - 1 + (ca as i128);
                overflow::apply(ctx, true_sum != (result32 as i32) as i128);
@@ -321,13 +325,13 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
            ctx.pc += 4;
        }
        PpcOpcode::subfmex => {
-            // PPCBUG-019: also fixes the always-true CA edge — `!ra` on u64
-            // is non-zero when ra32==0xFFFFFFFF and ca==0, so CA was stuck at 1.
+            // PPCBUG-020 fix: full 64-bit `~RA + CA - 1` (canary semantics). CA
+            // uses the 32-bit `!ra32` so it isn't stuck at 1 from u64 inversion.
            let ra32 = ctx.gpr[instr.ra()] as u32;
            let ca = ctx.xer_ca as u32;
            let result32 = (!ra32).wrapping_add(ca).wrapping_sub(1);
            ctx.xer_ca = if (!ra32) != 0 || ca != 0 { 1 } else { 0 };
-            ctx.gpr[instr.rd()] = result32 as u64;
+            ctx.gpr[instr.rd()] = (!ctx.gpr[instr.ra()]).wrapping_add(ca as u64).wrapping_sub(1);
            if instr.oe() {
                let true_sum = -(ra32 as i32 as i128) - 2 + (ca as i128);
                overflow::apply(ctx, true_sum != (result32 as i32) as i128);
@@ -338,12 +342,11 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
            ctx.pc += 4;
        }
        PpcOpcode::negx => {
-            // PPCBUG-006: 32-bit ABI. `(!ra).wrapping_add(1)` on u64 always
-            // sets upper 32 bits — every neg poisoned the GPR. neg_ov also
-            // checks at 64-bit INT_MIN; should be 32-bit INT_MIN.
+            // PPCBUG-020 fix: full 64-bit `-RA` (canary `Sub(0, RA)`). OV keeps
+            // the 32-bit INT_MIN check (low 32 of the result is unchanged).
            let ra32 = ctx.gpr[instr.ra()] as u32;
            let result32 = (!ra32).wrapping_add(1);
-            ctx.gpr[instr.rd()] = result32 as u64;
+            ctx.gpr[instr.rd()] = 0u64.wrapping_sub(ctx.gpr[instr.ra()]);
            if instr.oe() {
                overflow::apply(ctx, ra32 == 0x8000_0000);
            }
@@ -353,12 +356,15 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
            ctx.pc += 4;
        }
        PpcOpcode::mullwx => {
-            // PPCBUG-009: 32-bit ABI. Truncate product to u32 — overflow detection
-            // (mullw_ov) still uses the full i64 product to catch the overflow.
+            // PPCBUG-020 fix: full 64-bit low product of EXTS(RA[32:63]) ×
+            // EXTS(RB[32:63]) (canary InstrEmit_mullwx stores the full i64
+            // product). A 32×32 product can occupy the upper 32 bits (e.g.
+            // 0x10000 × 0x10000 = 0x1_0000_0000); the old `as u32` dropped them.
+            // OV uses the full product; CR0 keeps its 32-bit (low-word) view.
            let ra = ctx.gpr[instr.ra()] as i32 as i64;
            let rb = ctx.gpr[instr.rb()] as i32 as i64;
            let product = ra.wrapping_mul(rb);
-            ctx.gpr[instr.rd()] = product as u32 as u64;
+            ctx.gpr[instr.rd()] = product as u64;
            if instr.oe() {
                overflow::apply(ctx, overflow::mullw_ov(product));
            }
@@ -5332,15 +5338,17 @@ mod tests {
        write_instr(&mut mem, 0, raw);
        ctx.pc = 0;
        step(&mut ctx, &mut mem);
-        assert_eq!(ctx.xer_ov, 1);
-        // -INT_MIN wraps to INT_MIN (low 32 bits) with upper 32 bits zero.
-        assert_eq!(ctx.gpr[5], 0x0000_0000_8000_0000);
+        assert_eq!(ctx.xer_ov, 1, "32-bit INT_MIN check (preserved) sets OV");
+        // PPCBUG-020 fix: neg is full 64-bit `0 - RA` (canary `Sub(0, RA)`).
+        // RA = 0x0000_0000_8000_0000 → 0xFFFF_FFFF_8000_0000. (OV remains the
+        // preserved 32-bit INT_MIN flag.)
+        assert_eq!(ctx.gpr[5], 0xFFFF_FFFF_8000_0000);
    }

    #[test]
    fn neg_clean_input_no_upper_bits() {
-        // PPCBUG-006 regression: neg r3=5 must produce 0x00000000_FFFFFFFB,
-        // not 0xFFFFFFFF_FFFFFFFB (the 64-bit !ra-then-add-1 result).
+        // PPCBUG-020 fix: neg r3=5 = `0 - 5` = -5 = 0xFFFFFFFF_FFFFFFFB on a
+        // 64-bit core (canary `Sub(0, RA)`), not the truncated 0x00000000_FFFFFFFB.
        let mut ctx = PpcContext::new();
        let mut mem = TestMem::new();
        ctx.gpr[3] = 5;
@@ -5348,7 +5356,7 @@ mod tests {
        write_instr(&mut mem, 0, raw);
        ctx.pc = 0;
        step(&mut ctx, &mut mem);
-        assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FFFB);
+        assert_eq!(ctx.gpr[5], 0xFFFF_FFFF_FFFF_FFFB);
    }

    #[test]
@@ -5502,9 +5510,10 @@ mod tests {
    }

    #[test]
-    fn mullwx_overflow_truncates_to_32() {
-        // PPCBUG-009: mullwo r5, r3, r4 with ra=0x10000, rb=0x10000 → product
-        // 0x100000000 (overflow). Low 32 = 0; OE must fire.
+    fn mullwx_overflow_keeps_full_64bit_product() {
+        // PPCBUG-020 fix: mullwo r5, r3, r4 with ra=0x10000, rb=0x10000 → full
+        // 64-bit product 0x1_0000_0000 (canary stores the full i64 product, not
+        // the truncated low 32). OE still fires (the product overflows int32).
        let mut ctx = PpcContext::new();
        let mut mem = TestMem::new();
        ctx.gpr[3] = 0x10000;
@@ -5514,7 +5523,7 @@ mod tests {
        write_instr(&mut mem, 0, raw);
        ctx.pc = 0;
        step(&mut ctx, &mut mem);
-        assert_eq!(ctx.gpr[5], 0, "low 32 bits = 0");
+        assert_eq!(ctx.gpr[5], 0x0000_0001_0000_0000, "full 64-bit product");
        assert_eq!(ctx.xer_ov, 1, "overflow detected");
    }

@@ -5598,17 +5607,18 @@ mod tests {
        write_instr(&mut mem, 0, raw);
        ctx.pc = 0;
        step(&mut ctx, &mut mem);
-        // Result low 32: 0x00000001 + 0xFFFFFFFF = 0x00000000 with carry.
-        assert_eq!(ctx.gpr[4], 0);
+        // PPCBUG-020 fix: full 64-bit `RA + EXTS(-1)` = 0xFFFFFFFF_00000001 +
+        // 0xFFFFFFFF_FFFFFFFF = 0xFFFFFFFF_00000000 (canary). CA still comes
+        // from the 32-bit compare (low 32: 0x00000001 + 0xFFFFFFFF = 0, carry).
+        assert_eq!(ctx.gpr[4], 0xFFFFFFFF_00000000u64);
        assert_eq!(ctx.xer_ca, 1, "32-bit compare must see CA=1");
    }

    #[test]
-    fn mulli_overflow_wraps_to_32() {
-        // PPCBUG-004: mulli must truncate to 32 bits even when the upper 32 bits
-        // of RA are polluted (e.g. by upstream bugs). Pre-fix: ra = u64::MAX as
-        // i64 = -1, * 2 = -2, written to GPR as `0xFFFFFFFF_FFFFFFFE`. Post-fix:
-        // truncated to `0xFFFFFFFE`. Discriminating regression test.
+    fn mulli_full_64bit_product() {
+        // PPCBUG-020 fix: mulli uses the full 64-bit RA (canary
+        // `Mul(LoadGPR(RA), Int64(EXTS(imm)))`). RA = u64::MAX = -1, × 2 = -2
+        // = 0xFFFFFFFF_FFFFFFFE (full 64-bit), not the truncated 0xFFFFFFFE.
        let mut ctx = PpcContext::new();
        let mut mem = TestMem::new();
        ctx.gpr[3] = u64::MAX;
@@ -5617,13 +5627,14 @@ mod tests {
        write_instr(&mut mem, 0, raw);
        ctx.pc = 0;
        step(&mut ctx, &mut mem);
-        assert_eq!(ctx.gpr[4], 0xFFFF_FFFEu64, "low 32 bits = -2 in i32; upper 32 zero");
+        assert_eq!(ctx.gpr[4], 0xFFFF_FFFF_FFFF_FFFEu64, "full 64-bit -2");
    }

    #[test]
-    fn subficx_neg_simm_zero_extends() {
-        // PPCBUG-005: subfic r4, r3, -1 with r3=5: imm-ra = 0xFFFFFFFF - 5 = 0xFFFFFFFA.
-        // Buggy form: imm sign-extended to u64 0xFFFFFFFFFFFFFFFF - 5 = poisoned.
+    fn subficx_full_64bit_result() {
+        // PPCBUG-020 fix: subfic r4, r3, -1 with r3=5 = `EXTS(-1) - RA` =
+        // 0xFFFFFFFF_FFFFFFFF - 5 = 0xFFFFFFFF_FFFFFFFA (canary `Sub(Int64(
+        // EXTS(imm)), RA)`). CA stays a 32-bit compare (0xFFFFFFFF >= 5 → 1).
        let mut ctx = PpcContext::new();
        let mut mem = TestMem::new();
        ctx.gpr[3] = 5;
@@ -5632,7 +5643,7 @@ mod tests {
        write_instr(&mut mem, 0, raw);
        ctx.pc = 0;
        step(&mut ctx, &mut mem);
-        assert_eq!(ctx.gpr[4], 0x0000_0000_FFFF_FFFAu64);
+        assert_eq!(ctx.gpr[4], 0xFFFF_FFFF_FFFF_FFFAu64);
        assert_eq!(ctx.xer_ca, 1, "0xFFFFFFFF >= 5 → CA=1");
    }

@@ -7640,8 +7651,8 @@ mod tests {
            ctx.xer_ca = 0;
            step(&mut ctx, &mem);
            assert_eq!(ctx.xer_ca, 0, "ra=0, ca=0 should produce CA=0");
-            // PPCBUG-018: 32-bit ABI. !0u32 + 0 = u32::MAX, with upper 32 bits zero.
-            assert_eq!(ctx.gpr[3], 0xFFFF_FFFFu64, "result = !0u32 + 0 = u32::MAX");
+            // PPCBUG-020 fix: full 64-bit `!RA + CA` = !0u64 + 0 = u64::MAX.
+            assert_eq!(ctx.gpr[3], 0xFFFF_FFFF_FFFF_FFFFu64, "result = !0u64 + 0");
        }
        // Case 3: ra=1, ca=0 → CA=0  (old buggy code reported CA=1)
        {
@@ -7653,8 +7664,8 @@ mod tests {
            ctx.xer_ca = 0;
            step(&mut ctx, &mem);
            assert_eq!(ctx.xer_ca, 0, "ra=1, ca=0 should produce CA=0");
-            // PPCBUG-018: 32-bit ABI. !1u32 + 0 = u32::MAX - 1, with upper 32 bits zero.
-            assert_eq!(ctx.gpr[3], 0xFFFF_FFFEu64, "result = !1u32 + 0 = u32::MAX - 1");
+            // PPCBUG-020 fix: full 64-bit `!1u64 + 0` = u64::MAX - 1.
+            assert_eq!(ctx.gpr[3], 0xFFFF_FFFF_FFFF_FFFEu64, "result = !1u64 + 0");
        }
        // Case 4: ra=u32::MAX, ca=1 → CA=0; result = !u32::MAX + 1 = 1.
        {
@@ -7666,7 +7677,9 @@ mod tests {
            ctx.xer_ca = 1;
            step(&mut ctx, &mem);
            assert_eq!(ctx.xer_ca, 0, "ra=u32::MAX, ca=1 should produce CA=0");
-            assert_eq!(ctx.gpr[3], 1, "result = !u32::MAX + 1 = 1");
+            // PPCBUG-020 fix: full 64-bit `!RA + CA`. RA = 0x0000_0000_FFFF_FFFF
+            // → !RA = 0xFFFF_FFFF_0000_0000, + 1 = 0xFFFF_FFFF_0000_0001.
+            assert_eq!(ctx.gpr[3], 0xFFFF_FFFF_0000_0001u64, "result = !RA + 1");
        }
    }