From e18a0a40b8a925967f42cbdead54e2a9f484ba95 Mon Sep 17 00:00:00 2001
From: MechaCat02 <fabian@diekaulbachs.de>
Date: Sat, 2 May 2026 11:35:05 +0200
Subject: [PATCH 1/7] fix(cpu): PPCBUG-006/008/018/019/028/029/030/031/033 4a
 active poisoning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 4 batch 1: 9 PPCBUGs in the active-poisoning sub-section. All
follow the pattern `!val` on u64, which unconditionally flips the upper
32 bits and poisons the GPR even with clean inputs — every execution
corrupts the high 32 bits regardless of upstream state.

Sub/neg family:
- PPCBUG-006 negx: `(!ra).wrapping_add(1)` on u64 + neg_ov_64 checks
  64-bit INT_MIN. Fix: do arithmetic in u32, OE checks PPC[ra32==0x80000000].
- PPCBUG-008 subfex: same shape as above plus 64-bit unsigned CA compare.
  Fix: cast all operands to u32, compute, write `as u64`.
- PPCBUG-018 subfzex: `!ra` on u64. Fix: u32 arithmetic.
- PPCBUG-019 subfmex: `!ra` on u64 + always-true CA edge (`!ra != 0`
  was always true for clean ra<0xFFFFFFFF because high bits of !u64
  are non-zero). Fix: u32 arithmetic; CA predicate now correct.

Logical NOT family:
- PPCBUG-028 orcx: rs | !rb on u64 → high-bit poison.
- PPCBUG-029 norx: !(rs|rb) — the `not` simplified mnemonic. Hot path,
  every `not` corrupted GPR upper 32 bits.
- PPCBUG-030 nandx: !(rs&rb).
- PPCBUG-031 eqvx: !(rs^rb). The common `eqv rA,rA,rA` set-to-all-ones
  idiom now produces 0x00000000_FFFFFFFF instead of 0xFFFFFFFF_FFFFFFFF.
- PPCBUG-033 andcx: rs & !rb.

CR0 update at every Rc=1 path now uses `as u32 as i32 as i64` so a result
with bit 31 set gets classified as negative under the 32-bit ABI (was
positive before because upper bits were ones; will be positive in new
truncated form unless we cast through i32). This pre-emptively addresses
PPCBUG-020 for these specific opcodes; the catch-all sweep in batch 6
covers the remaining sites.

Tests:
- nego_sets_ov_only_on_int_min: updated from i64::MIN → 0x80000000 (32-bit).
- test_subfze_carry_only_when_ra_zero_and_ca_one: result expectations
  updated from u64::MAX → 0xFFFFFFFF (low 32 bits, upper 32 zero).
- New: neg_clean_input_no_upper_bits (PPCBUG-006 regression).
- New: norx_not_simplified_keeps_upper_bits_clean (PPCBUG-029 regression).
- New: eqvx_self_self_self_sets_low32_to_all_ones (PPCBUG-031 regression).
- New: andcx_bit_clear_keeps_upper_clean (PPCBUG-033 regression).
- New: subfex_clean_inputs_no_upper_bits (PPCBUG-008 regression).
- New: subfmex_ra_max_ca_zero_clears_ca (PPCBUG-019 always-true CA fix).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 crates/xenia-cpu/src/interpreter.rs | 232 +++++++++++++++++++++-------
 1 file changed, 176 insertions(+), 56 deletions(-)

diff --git a/crates/xenia-cpu/src/interpreter.rs b/crates/xenia-cpu/src/interpreter.rs
index 19fa865..2abc218 100644
--- a/crates/xenia-cpu/src/interpreter.rs
+++ b/crates/xenia-cpu/src/interpreter.rs
@@ -266,65 +266,71 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
             ctx.pc += 4;
         }
         PpcOpcode::subfex => {
-            let ra = ctx.gpr[instr.ra()];
-            let rb = ctx.gpr[instr.rb()];
-            let ca = ctx.xer_ca as u64;
-            let result = (!ra).wrapping_add(rb).wrapping_add(ca);
-            ctx.xer_ca = if rb > ra || (rb == ra && ca != 0) { 1 } else { 0 };
-            ctx.gpr[instr.rd()] = result;
+            // PPCBUG-008: 32-bit ABI. Compute in u32 space — `!ra` on u64 always
+            // pollutes the upper 32 bits, making this an active poisoner.
+            let ra32 = ctx.gpr[instr.ra()] as u32;
+            let rb32 = ctx.gpr[instr.rb()] as u32;
+            let ca = ctx.xer_ca as u32;
+            let result32 = (!ra32).wrapping_add(rb32).wrapping_add(ca);
+            ctx.xer_ca = if rb32 > ra32 || (rb32 == ra32 && ca != 0) { 1 } else { 0 };
+            ctx.gpr[instr.rd()] = result32 as u64;
             if instr.oe() {
-                // RT <- !RA + RB + CA  ==  RB - RA - 1 + CA
-                let true_sum = (rb as i64 as i128) - (ra as i64 as i128) - 1 + (ca as i128);
-                overflow::apply(ctx, overflow::sum_overflow_64(true_sum, result));
+                // RT <- !RA + RB + CA  ==  RB - RA - 1 + CA  (32-bit semantics).
+                let true_sum = (rb32 as i32 as i128) - (ra32 as i32 as i128) - 1 + (ca as i128);
+                overflow::apply(ctx, overflow::sum_overflow_64(true_sum, result32 as u64));
             }
             if instr.rc_bit() {
-                ctx.update_cr_signed(0, result as i64);
+                ctx.update_cr_signed(0, result32 as i32 as i64);
             }
             ctx.pc += 4;
         }
         PpcOpcode::subfzex => {
-            let ra = ctx.gpr[instr.ra()];
-            let ca = ctx.xer_ca as u64;
-            let result = (!ra).wrapping_add(ca);
-            // RT <- !RA + CA (no -1 term). 64-bit carry-out only when
-            // !RA = u64::MAX (i.e. RA = 0) AND CA = 1.
-            ctx.xer_ca = if ra == 0 && ca != 0 { 1 } else { 0 };
-            ctx.gpr[instr.rd()] = result;
+            // PPCBUG-018: same active-poisoning shape as subfex; operate in u32.
+            let ra32 = ctx.gpr[instr.ra()] as u32;
+            let ca = ctx.xer_ca as u32;
+            let result32 = (!ra32).wrapping_add(ca);
+            // RT <- !RA + CA (no -1 term). 32-bit carry-out only when
+            // !ra32 = u32::MAX (i.e. ra32 = 0) AND ca = 1.
+            ctx.xer_ca = if ra32 == 0 && ca != 0 { 1 } else { 0 };
+            ctx.gpr[instr.rd()] = result32 as u64;
             if instr.oe() {
-                // RT <- !RA + CA  ==  -RA - 1 + CA
-                let true_sum = -(ra as i64 as i128) - 1 + (ca as i128);
-                overflow::apply(ctx, overflow::sum_overflow_64(true_sum, result));
+                let true_sum = -(ra32 as i32 as i128) - 1 + (ca as i128);
+                overflow::apply(ctx, overflow::sum_overflow_64(true_sum, result32 as u64));
             }
             if instr.rc_bit() {
-                ctx.update_cr_signed(0, result as i64);
+                ctx.update_cr_signed(0, result32 as i32 as i64);
             }
             ctx.pc += 4;
         }
         PpcOpcode::subfmex => {
-            let ra = ctx.gpr[instr.ra()];
-            let ca = ctx.xer_ca as u64;
-            let result = (!ra).wrapping_add(ca).wrapping_sub(1);
-            ctx.xer_ca = if (!ra) != 0 || ca != 0 { 1 } else { 0 };
-            ctx.gpr[instr.rd()] = result;
+            // PPCBUG-019: also fixes the always-true CA edge — `!ra` on u64
+            // is non-zero when ra32==0xFFFFFFFF and ca==0, so CA was stuck at 1.
+            let ra32 = ctx.gpr[instr.ra()] as u32;
+            let ca = ctx.xer_ca as u32;
+            let result32 = (!ra32).wrapping_add(ca).wrapping_sub(1);
+            ctx.xer_ca = if (!ra32) != 0 || ca != 0 { 1 } else { 0 };
+            ctx.gpr[instr.rd()] = result32 as u64;
             if instr.oe() {
-                // RT <- !RA + CA + (-1)  ==  -RA - 2 + CA
-                let true_sum = -(ra as i64 as i128) - 2 + (ca as i128);
-                overflow::apply(ctx, overflow::sum_overflow_64(true_sum, result));
+                let true_sum = -(ra32 as i32 as i128) - 2 + (ca as i128);
+                overflow::apply(ctx, overflow::sum_overflow_64(true_sum, result32 as u64));
             }
             if instr.rc_bit() {
-                ctx.update_cr_signed(0, result as i64);
+                ctx.update_cr_signed(0, result32 as i32 as i64);
             }
             ctx.pc += 4;
         }
         PpcOpcode::negx => {
-            let ra = ctx.gpr[instr.ra()];
-            let result = (!ra).wrapping_add(1);
-            ctx.gpr[instr.rd()] = result;
+            // PPCBUG-006: 32-bit ABI. `(!ra).wrapping_add(1)` on u64 always
+            // sets upper 32 bits — every neg poisoned the GPR. neg_ov also
+            // checks at 64-bit INT_MIN; should be 32-bit INT_MIN.
+            let ra32 = ctx.gpr[instr.ra()] as u32;
+            let result32 = (!ra32).wrapping_add(1);
+            ctx.gpr[instr.rd()] = result32 as u64;
             if instr.oe() {
-                overflow::apply(ctx, overflow::neg_ov_64(ra));
+                overflow::apply(ctx, ra32 == 0x8000_0000);
             }
             if instr.rc_bit() {
-                ctx.update_cr_signed(0, result as i64);
+                ctx.update_cr_signed(0, result32 as i32 as i64);
             }
             ctx.pc += 4;
         }
@@ -497,8 +503,11 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
             ctx.pc += 4;
         }
         PpcOpcode::andcx => {
-            ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] & !ctx.gpr[instr.rb()];
-            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
+            // PPCBUG-033: !rb on u64 flips upper 32 bits — active poisoning.
+            let rs32 = ctx.gpr[instr.rs()] as u32;
+            let rb32 = ctx.gpr[instr.rb()] as u32;
+            ctx.gpr[instr.ra()] = (rs32 & !rb32) as u64;
+            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
             ctx.pc += 4;
         }
         PpcOpcode::orx => {
@@ -507,8 +516,11 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
             ctx.pc += 4;
         }
         PpcOpcode::orcx => {
-            ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] | !ctx.gpr[instr.rb()];
-            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
+            // PPCBUG-028: same shape as andcx — operate in u32.
+            let rs32 = ctx.gpr[instr.rs()] as u32;
+            let rb32 = ctx.gpr[instr.rb()] as u32;
+            ctx.gpr[instr.ra()] = (rs32 | !rb32) as u64;
+            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
             ctx.pc += 4;
         }
         PpcOpcode::xorx => {
@@ -517,18 +529,28 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
             ctx.pc += 4;
         }
         PpcOpcode::norx => {
-            ctx.gpr[instr.ra()] = !(ctx.gpr[instr.rs()] | ctx.gpr[instr.rb()]);
-            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
+            // PPCBUG-029: `not` simplified mnemonic — every `not` poisoned the GPR.
+            let rs32 = ctx.gpr[instr.rs()] as u32;
+            let rb32 = ctx.gpr[instr.rb()] as u32;
+            ctx.gpr[instr.ra()] = (!(rs32 | rb32)) as u64;
+            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
             ctx.pc += 4;
         }
         PpcOpcode::nandx => {
-            ctx.gpr[instr.ra()] = !(ctx.gpr[instr.rs()] & ctx.gpr[instr.rb()]);
-            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
+            // PPCBUG-030: same shape — operate in u32.
+            let rs32 = ctx.gpr[instr.rs()] as u32;
+            let rb32 = ctx.gpr[instr.rb()] as u32;
+            ctx.gpr[instr.ra()] = (!(rs32 & rb32)) as u64;
+            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
             ctx.pc += 4;
         }
         PpcOpcode::eqvx => {
-            ctx.gpr[instr.ra()] = !(ctx.gpr[instr.rs()] ^ ctx.gpr[instr.rb()]);
-            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
+            // PPCBUG-031: `eqv rA, rA, rA` is a common "set to all-ones" idiom;
+            // 64-bit form gave 0xFFFFFFFFFFFFFFFF but 32-bit ABI expects 0x00000000FFFFFFFF.
+            let rs32 = ctx.gpr[instr.rs()] as u32;
+            let rb32 = ctx.gpr[instr.rb()] as u32;
+            ctx.gpr[instr.ra()] = (!(rs32 ^ rb32)) as u64;
+            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
             ctx.pc += 4;
         }
 
@@ -5067,17 +5089,115 @@ mod tests {
 
     #[test]
     fn nego_sets_ov_only_on_int_min() {
+        // PPCBUG-006: 32-bit ABI. INT_MIN is 0x80000000 (low 32), not 0x8000000000000000.
         let mut ctx = PpcContext::new();
         let mut mem = TestMem::new();
         // nego r5, r3  (XO=104, OE=1)
-        ctx.gpr[3] = i64::MIN as u64;
+        ctx.gpr[3] = 0x8000_0000;
         let raw = (31 << 26) | (5 << 21) | (3 << 16) | (1 << 10) | (104 << 1);
         write_instr(&mut mem, 0, raw);
         ctx.pc = 0;
         step(&mut ctx, &mut mem);
         assert_eq!(ctx.xer_ov, 1);
-        // -INT_MIN wraps to INT_MIN
-        assert_eq!(ctx.gpr[5], i64::MIN as u64);
+        // -INT_MIN wraps to INT_MIN (low 32 bits) with upper 32 bits zero.
+        assert_eq!(ctx.gpr[5], 0x0000_0000_8000_0000);
+    }
+
+    #[test]
+    fn neg_clean_input_no_upper_bits() {
+        // PPCBUG-006 regression: neg r3=5 must produce 0x00000000_FFFFFFFB,
+        // not 0xFFFFFFFF_FFFFFFFB (the 64-bit !ra-then-add-1 result).
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.gpr[3] = 5;
+        let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (104 << 1);
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FFFB);
+    }
+
+    #[test]
+    fn norx_not_simplified_keeps_upper_bits_clean() {
+        // PPCBUG-029: `not rA, rB` (norx with rs==rb) is the canonical not
+        // simplified mnemonic. 64-bit !val poisons upper 32 bits of every
+        // execution; under the 32-bit ABI we must truncate.
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.gpr[3] = 0x0000_0000_0000_00FF;
+        // norx r5, r3, r3  (XO=124)
+        let raw = (31u32 << 26) | (3 << 21) | (5 << 16) | (3 << 11) | (124 << 1);
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FF00, "upper 32 bits must be zero");
+    }
+
+    #[test]
+    fn eqvx_self_self_self_sets_low32_to_all_ones() {
+        // PPCBUG-031: `eqv rA, rA, rA` is a common "set-to-all-ones" idiom.
+        // 64-bit !(0^0) gives u64::MAX (0xFFFFFFFF_FFFFFFFF); 32-bit ABI
+        // expects 0x00000000_FFFFFFFF.
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.gpr[3] = 0;
+        // eqvx r3, r3, r3  (XO=284)
+        let raw = (31u32 << 26) | (3 << 21) | (3 << 16) | (3 << 11) | (284 << 1);
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.gpr[3], 0x0000_0000_FFFF_FFFF);
+    }
+
+    #[test]
+    fn andcx_bit_clear_keeps_upper_clean() {
+        // PPCBUG-033: `andc rA, rS, rB` = rS & !rB. 64-bit !rB poisons.
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.gpr[3] = 0xFFFF_FFFF; // rS
+        ctx.gpr[4] = 0x000F;       // rB (low bits to clear)
+        // andcx r5, r3, r4  (XO=60)
+        let raw = (31u32 << 26) | (3 << 21) | (5 << 16) | (4 << 11) | (60 << 1);
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FFF0);
+    }
+
+    #[test]
+    fn subfex_clean_inputs_no_upper_bits() {
+        // PPCBUG-008: 32-bit ABI. RT = !RA + RB + CA. RA=5, RB=10, CA=1
+        // → !5u32 = 0xFFFFFFFA, +10 = 0x100000004, +1 = 0x100000005, low32 = 5.
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.gpr[3] = 5;
+        ctx.gpr[4] = 10;
+        ctx.xer_ca = 1;
+        // subfex r5, r3, r4  (XO=136)
+        let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (136 << 1);
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.gpr[5], 5);
+        assert_eq!(ctx.xer_ca, 1, "rb>=ra → CA=1 (10 > 5)");
+    }
+
+    #[test]
+    fn subfmex_ra_max_ca_zero_clears_ca() {
+        // PPCBUG-019: `subfme` with RA=u32::MAX and CA=0 should set CA=0
+        // (because !u32::MAX = 0). The buggy code's `!ra != 0` predicate
+        // on u64 was always true (because !u64-cast-of-u32::MAX has high
+        // bits flipped non-zero), wrongly setting CA=1.
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.gpr[3] = 0xFFFF_FFFFu64;
+        ctx.xer_ca = 0;
+        // subfmex r5, r3  (XO=232)
+        let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (232 << 1);
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.xer_ca, 0, "RA=u32::MAX, CA=0 → !RA32==0, CA=0");
     }
 
     // ---------- Phase 2 fixes: trap TO-field ----------
@@ -6086,7 +6206,8 @@ mod tests {
             ctx.xer_ca = 0;
             step(&mut ctx, &mem);
             assert_eq!(ctx.xer_ca, 0, "ra=0, ca=0 should produce CA=0");
-            assert_eq!(ctx.gpr[3], u64::MAX, "result = !0 + 0 = u64::MAX");
+            // PPCBUG-018: 32-bit ABI. !0u32 + 0 = u32::MAX, with upper 32 bits zero.
+            assert_eq!(ctx.gpr[3], 0xFFFF_FFFFu64, "result = !0u32 + 0 = u32::MAX");
         }
         // Case 3: ra=1, ca=0 → CA=0  (old buggy code reported CA=1)
         {
@@ -6098,21 +6219,20 @@ mod tests {
             ctx.xer_ca = 0;
             step(&mut ctx, &mem);
             assert_eq!(ctx.xer_ca, 0, "ra=1, ca=0 should produce CA=0");
-            assert_eq!(ctx.gpr[3], u64::MAX - 1, "result = !1 + 0 = u64::MAX - 1");
+            // PPCBUG-018: 32-bit ABI. !1u32 + 0 = u32::MAX - 1, with upper 32 bits zero.
+            assert_eq!(ctx.gpr[3], 0xFFFF_FFFEu64, "result = !1u32 + 0 = u32::MAX - 1");
         }
-        // Case 4: ra=u64::MAX, ca=0 → CA=0  (old buggy code reported CA=1
-        // because !ra == 0 only here, which the buggy `!ra != 0` predicate
-        // happened to handle right; flip ca=1 to exercise the other arm)
+        // Case 4: ra=u32::MAX, ca=1 → CA=0; result = !u32::MAX + 1 = 1.
         {
             let mut ctx = PpcContext::new();
             let mem = TestMem::new();
             write_instr(&mem, 0, raw);
             ctx.pc = 0;
-            ctx.gpr[4] = u64::MAX;
+            ctx.gpr[4] = 0xFFFF_FFFFu64;
             ctx.xer_ca = 1;
             step(&mut ctx, &mem);
-            assert_eq!(ctx.xer_ca, 0, "ra=u64::MAX, ca=1 should produce CA=0");
-            assert_eq!(ctx.gpr[3], 1, "result = !u64::MAX + 1 = 1");
+            assert_eq!(ctx.xer_ca, 0, "ra=u32::MAX, ca=1 should produce CA=0");
+            assert_eq!(ctx.gpr[3], 1, "result = !u32::MAX + 1 = 1");
         }
     }
 

From 145a7a401936324c8c3e0356f31afcd8c7a7d647 Mon Sep 17 00:00:00 2001
From: MechaCat02 <fabian@diekaulbachs.de>
Date: Sat, 2 May 2026 11:38:22 +0200
Subject: [PATCH 2/7] fix(cpu): PPCBUG-034+035+036+037 extsbx/extshx writeback
 + CR0 (coupled)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 4 batch 2: extsbx and extshx writeback truncation + CR0 view fix.
Coupled per audit — must land together because the writeback fix would
silently break CR0 sign classification if the CR0 fix didn't ship in
the same commit.

Before:
- extsbx: `as i8 as i64 as u64` — every negative byte poisoned upper
  32 bits (active poisoning, not latent). 0x80 → 0xFFFFFFFF_FFFFFF80.
- extshx: same shape for halfwords.
- CR0: `as i64` view — accidentally correct on the buggy 64-bit form
  because the high bits matched the byte's sign bit.

After:
- extsbx: `as i8 as i32 as u32 as u64` — sign-extend to i32 then
  zero-extend to u64. 0x80 → 0x00000000_FFFFFF80.
- extshx: same for halfwords.
- CR0: `as u32 as i32 as i64` — i32 view, so a result with bit 31 set
  is correctly classified as negative under the 32-bit ABI.

Tests:
- extsbx_negative_byte_zero_extends_upper: 0x80 input → 0x00000000_FFFFFF80
  with CR0.LT set.
- extshx_negative_halfword_zero_extends_upper: same shape for 0x8000.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 crates/xenia-cpu/src/interpreter.rs | 45 ++++++++++++++++++++++++++---
 1 file changed, 41 insertions(+), 4 deletions(-)

diff --git a/crates/xenia-cpu/src/interpreter.rs b/crates/xenia-cpu/src/interpreter.rs
index 2abc218..497d22f 100644
--- a/crates/xenia-cpu/src/interpreter.rs
+++ b/crates/xenia-cpu/src/interpreter.rs
@@ -556,13 +556,17 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
 
         // ===== Extend/Count =====
         PpcOpcode::extsbx => {
-            ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] as i8 as i64 as u64;
-            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
+            // PPCBUG-034: 32-bit ABI — sign-extend byte to i32, write zero-extended.
+            // PPCBUG-036 (coupled): CR0 must view result as i32, not i64.
+            ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] as i8 as i32 as u32 as u64;
+            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
             ctx.pc += 4;
         }
         PpcOpcode::extshx => {
-            ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] as i16 as i64 as u64;
-            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
+            // PPCBUG-035: same shape as extsbx for halfwords.
+            // PPCBUG-037 (coupled): CR0 i32 view.
+            ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] as i16 as i32 as u32 as u64;
+            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
             ctx.pc += 4;
         }
         PpcOpcode::extswx => {
@@ -5182,6 +5186,39 @@ mod tests {
         assert_eq!(ctx.xer_ca, 1, "rb>=ra → CA=1 (10 > 5)");
     }
 
+    #[test]
+    fn extsbx_negative_byte_zero_extends_upper() {
+        // PPCBUG-034+036 coupled: extsb of 0x80 (negative byte) must produce
+        // 0x00000000_FFFFFF80, NOT 0xFFFFFFFF_FFFFFF80. CR0.LT must still fire
+        // (i32 view of 0xFFFFFF80 is negative).
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.gpr[3] = 0x80;
+        // extsbx. r5, r3  (XO=954, Rc=1)
+        let raw = (31u32 << 26) | (3 << 21) | (5 << 16) | (954 << 1) | 1;
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FF80);
+        assert!(ctx.cr[0].lt, "CR0.LT must fire for negative i32");
+        assert!(!ctx.cr[0].gt);
+    }
+
+    #[test]
+    fn extshx_negative_halfword_zero_extends_upper() {
+        // PPCBUG-035+037 coupled: extsh of 0x8000 must produce 0x00000000_FFFF8000.
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.gpr[3] = 0x8000;
+        // extshx. r5, r3  (XO=922, Rc=1)
+        let raw = (31u32 << 26) | (3 << 21) | (5 << 16) | (922 << 1) | 1;
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_8000);
+        assert!(ctx.cr[0].lt);
+    }
+
     #[test]
     fn subfmex_ra_max_ca_zero_clears_ca() {
         // PPCBUG-019: `subfme` with RA=u32::MAX and CA=0 should set CA=0

From bf8208e88c82be23d7c1819d32aa999d399b5d1a Mon Sep 17 00:00:00 2001
From: MechaCat02 <fabian@diekaulbachs.de>
Date: Sat, 2 May 2026 11:41:49 +0200
Subject: [PATCH 3/7] fix(cpu): PPCBUG-001/002/003/004/005/007 4b immediate ALU
 truncation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 4 batch 3: 6 PPCBUGs in the same-shape-as-addis (4b) sub-section.
All share the pattern of computing on 64-bit values when the 32-bit ABI
requires u32 arithmetic.

- PPCBUG-001 addi: `li rT, -1` produced 0xFFFFFFFF_FFFFFFFF; now 0x00000000_FFFFFFFF.
- PPCBUG-002 addic: writeback truncated + CA from u32 unsigned compare
  matching canary's `AddDidCarry`.
- PPCBUG-003 addicx: same plus CR0 i32 view (regression vs. the frozen
  ppc-manual snapshot which had the correct form).
- PPCBUG-004 mulli: 64-bit signed product now truncated to 32 bits.
- PPCBUG-005 subficx: writeback + CA in u32 space; removes the bits-32-63
  pollution from sign-extended negative SIMM.
- PPCBUG-007 subfcx: defensive 32-bit truncation of CA compare. Same shape
  as the compare that broke addis (0x828F3F98 / 0x828F3F68 case).

Tests:
- addi_li_neg_one_zero_extends_upper (PPCBUG-001).
- addic_carry_uses_32bit_compare (PPCBUG-002).
- mulli_overflow_wraps_to_32 (PPCBUG-004).
- subficx_neg_simm_zero_extends (PPCBUG-005).
- subfcx_addis_incident_case (PPCBUG-007 — exact addis-incident case).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 crates/xenia-cpu/src/interpreter.rs | 158 +++++++++++++++++++++++-----
 1 file changed, 131 insertions(+), 27 deletions(-)

diff --git a/crates/xenia-cpu/src/interpreter.rs b/crates/xenia-cpu/src/interpreter.rs
index 497d22f..479c3d1 100644
--- a/crates/xenia-cpu/src/interpreter.rs
+++ b/crates/xenia-cpu/src/interpreter.rs
@@ -112,8 +112,10 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
     match instr.opcode {
         // ===== ALU: Immediate =====
         PpcOpcode::addi => {
+            // PPCBUG-001: 32-bit ABI. `li rT, -1` (= addi rT, r0, -1) must produce
+            // 0x00000000_FFFFFFFF, not 0xFFFFFFFF_FFFFFFFF (sign-extended simm16).
             let ra_val = if instr.ra() == 0 { 0 } else { ctx.gpr[instr.ra()] };
-            ctx.gpr[instr.rd()] = ra_val.wrapping_add(instr.simm16() as i64 as u64);
+            ctx.gpr[instr.rd()] = ra_val.wrapping_add(instr.simm16() as i64 as u64) as u32 as u64;
             ctx.pc += 4;
         }
         PpcOpcode::addis => {
@@ -131,35 +133,41 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
             ctx.pc += 4;
         }
         PpcOpcode::addic => {
-            let ra = ctx.gpr[instr.ra()];
-            let imm = instr.simm16() as i64 as u64;
-            let result = ra.wrapping_add(imm);
-            ctx.xer_ca = if result < ra { 1 } else { 0 };
-            ctx.gpr[instr.rd()] = result;
+            // PPCBUG-002: 32-bit ABI. CA must be from a 32-bit unsigned compare;
+            // canary's `AddDidCarry` truncates both operands to int32 first.
+            let ra32 = ctx.gpr[instr.ra()] as u32;
+            let imm32 = instr.simm16() as i32 as u32;
+            let result32 = ra32.wrapping_add(imm32);
+            ctx.xer_ca = if result32 < ra32 { 1 } else { 0 };
+            ctx.gpr[instr.rd()] = result32 as u64;
             ctx.pc += 4;
         }
         PpcOpcode::addicx => {
-            let ra = ctx.gpr[instr.ra()];
-            let imm = instr.simm16() as i64 as u64;
-            let result = ra.wrapping_add(imm);
-            ctx.xer_ca = if result < ra { 1 } else { 0 };
-            ctx.gpr[instr.rd()] = result;
-            // Update CR0
-            ctx.update_cr_signed(0, result as i64);
+            // PPCBUG-003: same fix as addic plus CR0 i32 view.
+            let ra32 = ctx.gpr[instr.ra()] as u32;
+            let imm32 = instr.simm16() as i32 as u32;
+            let result32 = ra32.wrapping_add(imm32);
+            ctx.xer_ca = if result32 < ra32 { 1 } else { 0 };
+            ctx.gpr[instr.rd()] = result32 as u64;
+            ctx.update_cr_signed(0, result32 as i32 as i64);
             ctx.pc += 4;
         }
         PpcOpcode::subficx => {
-            let ra = ctx.gpr[instr.ra()];
-            let imm = instr.simm16() as i64 as u64;
-            let result = imm.wrapping_sub(ra);
-            ctx.xer_ca = if imm >= ra { 1 } else { 0 };
-            ctx.gpr[instr.rd()] = result;
+            // PPCBUG-005: 32-bit ABI. Sign-extended imm has bits 32-63 set for
+            // negative SIMM, poisoning the writeback. Canary uses 32-bit form.
+            let ra32 = ctx.gpr[instr.ra()] as u32;
+            let imm32 = instr.simm16() as i32 as u32;
+            let result32 = imm32.wrapping_sub(ra32);
+            ctx.xer_ca = if imm32 >= ra32 { 1 } else { 0 };
+            ctx.gpr[instr.rd()] = result32 as u64;
             ctx.pc += 4;
         }
         PpcOpcode::mulli => {
-            let ra = ctx.gpr[instr.ra()] as i64;
+            // PPCBUG-004: 32-bit ABI. Read RA as i32 (low 32, sign-extended for
+            // multiply), product fits in 32 bits per ISA (overflow wraps).
+            let ra = ctx.gpr[instr.ra()] as i32 as i64;
             let imm = instr.simm16() as i64;
-            ctx.gpr[instr.rd()] = ra.wrapping_mul(imm) as u64;
+            ctx.gpr[instr.rd()] = (ra.wrapping_mul(imm) as u32) as u64;
             ctx.pc += 4;
         }
 
@@ -252,16 +260,20 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
             ctx.pc += 4;
         }
         PpcOpcode::subfcx => {
-            let ra = ctx.gpr[instr.ra()];
-            let rb = ctx.gpr[instr.rb()];
-            let result = rb.wrapping_sub(ra);
-            ctx.xer_ca = if rb >= ra { 1 } else { 0 };
-            ctx.gpr[instr.rd()] = result;
+            // PPCBUG-007: 32-bit ABI. The `rb >= ra` u64 unsigned compare is
+            // exactly the shape that broke addis. Defensive 32-bit truncation
+            // is required for correct CA even after upstream cleanup.
+            let ra32 = ctx.gpr[instr.ra()] as u32;
+            let rb32 = ctx.gpr[instr.rb()] as u32;
+            let result32 = rb32.wrapping_sub(ra32);
+            ctx.xer_ca = if rb32 >= ra32 { 1 } else { 0 };
+            ctx.gpr[instr.rd()] = result32 as u64;
             if instr.oe() {
-                overflow::apply(ctx, overflow::sub_ov_64(ra, rb, result));
+                let true_diff = (rb32 as i32 as i128) - (ra32 as i32 as i128);
+                overflow::apply(ctx, overflow::sum_overflow_64(true_diff, result32 as u64));
             }
             if instr.rc_bit() {
-                ctx.update_cr_signed(0, result as i64);
+                ctx.update_cr_signed(0, result32 as i32 as i64);
             }
             ctx.pc += 4;
         }
@@ -5186,6 +5198,98 @@ mod tests {
         assert_eq!(ctx.xer_ca, 1, "rb>=ra → CA=1 (10 > 5)");
     }
 
+    #[test]
+    fn addi_li_neg_one_zero_extends_upper() {
+        // PPCBUG-001: `li r3, -1` (= addi r3, r0, -1) must produce
+        // 0x00000000_FFFFFFFF, not 0xFFFFFFFF_FFFFFFFF.
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        // addi r3, r0, -1: opcode 14, simm16 = 0xFFFF
+        let raw = (14u32 << 26) | (3 << 21) | (0 << 16) | 0xFFFF;
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.gpr[3], 0x0000_0000_FFFF_FFFFu64);
+    }
+
+    #[test]
+    fn addic_carry_uses_32bit_compare() {
+        // PPCBUG-002: addic ra=0xFFFFFFFF_00000001, simm=-1 (0xFFFF).
+        // 32-bit: 0x00000001 + 0xFFFFFFFF = 0x00000000 with CA=1.
+        // 64-bit (buggy): result < ra → since 64-bit ra has high bits set,
+        // the buggy form would compare against the polluted u64 and could
+        // give wrong CA. Truncated form ignores upper 32 bits.
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.gpr[3] = 0xFFFFFFFF_00000001u64;
+        // addic r4, r3, -1: opcode 12
+        let raw = (12u32 << 26) | (4 << 21) | (3 << 16) | 0xFFFF;
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        // Result low 32: 0x00000001 + 0xFFFFFFFF = 0x00000000 with carry.
+        assert_eq!(ctx.gpr[4], 0);
+        assert_eq!(ctx.xer_ca, 1, "32-bit compare must see CA=1");
+    }
+
+    #[test]
+    fn mulli_overflow_wraps_to_32() {
+        // PPCBUG-004: mulli result must be truncated to 32 bits.
+        // 0x10000 * 0x10000 = 0x1_00000000 — low 32 bits are 0.
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.gpr[3] = 0x10000;
+        // mulli r4, r3, 0x4000 (4 * 0x10000 = 0x40000, no overflow case for sanity)
+        // Better case: 0x10000 * 0x4000 = 0x4000_0000 — fits in i32.
+        // For overflow: ra=0x80000000 (i32::MIN), imm=2 → 0xFFFFFFFF_00000000, low32=0
+        ctx.gpr[3] = 0x80000000u64;
+        // mulli r4, r3, 2: opcode 7
+        let raw = (7u32 << 26) | (4 << 21) | (3 << 16) | 2;
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        // i32::MIN * 2 = 0xFFFFFFFF_00000000 in i64 view; low 32 = 0.
+        assert_eq!(ctx.gpr[4], 0);
+    }
+
+    #[test]
+    fn subficx_neg_simm_zero_extends() {
+        // PPCBUG-005: subfic r4, r3, -1 with r3=5: imm-ra = 0xFFFFFFFF - 5 = 0xFFFFFFFA.
+        // Buggy form: imm sign-extended to u64 0xFFFFFFFFFFFFFFFF - 5 = poisoned.
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.gpr[3] = 5;
+        // subfic r4, r3, -1: opcode 8, simm = 0xFFFF
+        let raw = (8u32 << 26) | (4 << 21) | (3 << 16) | 0xFFFF;
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.gpr[4], 0x0000_0000_FFFF_FFFAu64);
+        assert_eq!(ctx.xer_ca, 1, "0xFFFFFFFF >= 5 → CA=1");
+    }
+
+    #[test]
+    fn subfcx_addis_incident_case() {
+        // PPCBUG-007: regression for the exact case that revealed the addis bug.
+        // After P1's addis fix this works coincidentally; P4 batch 3 makes
+        // subfcx itself robust to 64-bit GPR pollution.
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        // ra polluted in upper 32 bits, low 32 = 0x828F3F98
+        ctx.gpr[3] = 0xFFFF_FFFF_828F_3F98u64;
+        // rb clean low 32 = 0x828F3F68
+        ctx.gpr[4] = 0x0000_0000_828F_3F68u64;
+        // subfcx r5, r3, r4  (XO=8): result = rb - ra = 0xFFFFFFD0 (low 32)
+        let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (8 << 1);
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        // 32-bit unsigned: 0x828F3F68 < 0x828F3F98 → CA=0
+        assert_eq!(ctx.xer_ca, 0, "32-bit unsigned: rb < ra → CA=0");
+        // result = 0x828F3F68 - 0x828F3F98 = 0xFFFFFFD0 (low 32, upper 32 zero)
+        assert_eq!(ctx.gpr[5], 0xFFFF_FFD0u64);
+    }
+
     #[test]
     fn extsbx_negative_byte_zero_extends_upper() {
         // PPCBUG-034+036 coupled: extsb of 0x80 (negative byte) must produce

From 82a9bff93468eb2b5e81fc34fc0d898b203e8fef Mon Sep 17 00:00:00 2001
From: MechaCat02 <fabian@diekaulbachs.de>
Date: Sat, 2 May 2026 11:44:34 +0200
Subject: [PATCH 4/7] fix(cpu): PPCBUG-009/010+011/041+042+043 mul/div + srawx
 truncation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 4 batch 4: mulwx, divwx (coupled +CR0), srawx/srawix (coupled +CR0).

- PPCBUG-009 mullwx: 32-bit ABI. Product truncated to u32 before write.
  OE handler still uses full i64 product to detect overflow.
- PPCBUG-010+011 divwx (coupled): quotient zero-extended (canary uses
  ZeroExtend(v, INT64_TYPE)). CR0 view via i32 — without this, a negative
  i32 quotient (e.g. -3 from -10/3) would be classified as positive in
  i64 view of the now-zero-extended writeback.
- PPCBUG-041+042+043 srawx/srawix (coupled): writeback uses `as u32 as u64`
  (was `as i64 as u64`). All-ones case (sh>=32 with negative input) writes
  0x00000000_FFFFFFFF instead of u64::MAX. CR0 view via i32. CA logic
  preserved unchanged (audit-verified independently correct).

Tests:
- mullwx_overflow_truncates_to_32 (PPCBUG-009).
- divwx_negative_quotient_zero_extends (PPCBUG-010+011).
- srawx_negative_value_zero_extends_upper (PPCBUG-041+043).
- srawix_high_count_negative_input_yields_low32_all_ones (PPCBUG-042+043).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 crates/xenia-cpu/src/interpreter.rs | 97 +++++++++++++++++++++++++----
 1 file changed, 84 insertions(+), 13 deletions(-)

diff --git a/crates/xenia-cpu/src/interpreter.rs b/crates/xenia-cpu/src/interpreter.rs
index 479c3d1..652d566 100644
--- a/crates/xenia-cpu/src/interpreter.rs
+++ b/crates/xenia-cpu/src/interpreter.rs
@@ -347,16 +347,17 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
             ctx.pc += 4;
         }
         PpcOpcode::mullwx => {
+            // PPCBUG-009: 32-bit ABI. Truncate product to u32 — overflow detection
+            // (mullw_ov) still uses the full i64 product to catch the overflow.
             let ra = ctx.gpr[instr.ra()] as i32 as i64;
             let rb = ctx.gpr[instr.rb()] as i32 as i64;
             let product = ra.wrapping_mul(rb);
-            ctx.gpr[instr.rd()] = product as u64;
+            ctx.gpr[instr.rd()] = product as u32 as u64;
             if instr.oe() {
-                // OV iff the 64-bit product can't fit into 32-bit signed.
                 overflow::apply(ctx, overflow::mullw_ov(product));
             }
             if instr.rc_bit() {
-                ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as i64);
+                ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as u32 as i32 as i64);
             }
             ctx.pc += 4;
         }
@@ -381,20 +382,21 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
             ctx.pc += 4;
         }
         PpcOpcode::divwx => {
+            // PPCBUG-010+011 coupled: 32-bit ABI. Quotient zero-extended to u64
+            // (canary explicitly uses ZeroExtend(v, INT64_TYPE)). CR0 view via i32.
             let ra = ctx.gpr[instr.ra()] as i32;
             let rb = ctx.gpr[instr.rb()] as i32;
             let ov = overflow::divw_ov_signed(ra, rb);
             if ov {
-                // PPC: RT undefined on div-by-zero / INT_MIN/-1. Canary uses 0.
                 ctx.gpr[instr.rd()] = 0;
             } else {
-                ctx.gpr[instr.rd()] = (ra / rb) as i64 as u64;
+                ctx.gpr[instr.rd()] = (ra / rb) as u32 as u64;
             }
             if instr.oe() {
                 overflow::apply(ctx, ov);
             }
             if instr.rc_bit() {
-                ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as i64);
+                ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as u32 as i32 as i64);
             }
             ctx.pc += 4;
         }
@@ -615,34 +617,37 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
             ctx.pc += 4;
         }
         PpcOpcode::srawx => {
+            // PPCBUG-041+043 coupled: 32-bit ABI writeback truncation + CR0 i32.
+            // CA logic is independently correct (uses u32 shifted-out test).
             let rs = ctx.gpr[instr.rs()] as i32;
             let sh = ctx.gpr[instr.rb()] as u32 & 0x3F;
             if sh == 0 {
-                ctx.gpr[instr.ra()] = rs as i64 as u64;
+                ctx.gpr[instr.ra()] = rs as u32 as u64;
                 ctx.xer_ca = 0;
             } else if sh < 32 {
                 let result = rs >> sh;
                 ctx.xer_ca = if rs < 0 && (rs as u32) << (32 - sh) != 0 { 1 } else { 0 };
-                ctx.gpr[instr.ra()] = result as i64 as u64;
+                ctx.gpr[instr.ra()] = result as u32 as u64;
             } else {
-                ctx.gpr[instr.ra()] = if rs < 0 { u64::MAX } else { 0 };
+                ctx.gpr[instr.ra()] = if rs < 0 { 0xFFFF_FFFFu64 } else { 0 };
                 ctx.xer_ca = if rs < 0 { 1 } else { 0 };
             }
-            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
+            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
             ctx.pc += 4;
         }
         PpcOpcode::srawix => {
+            // PPCBUG-042+043 coupled: same shape as srawx for the sh-immediate form.
             let rs = ctx.gpr[instr.rs()] as i32;
             let sh = instr.sh();
             if sh == 0 {
-                ctx.gpr[instr.ra()] = rs as i64 as u64;
+                ctx.gpr[instr.ra()] = rs as u32 as u64;
                 ctx.xer_ca = 0;
             } else {
                 let result = rs >> sh;
                 ctx.xer_ca = if rs < 0 && (rs as u32) << (32 - sh) != 0 { 1 } else { 0 };
-                ctx.gpr[instr.ra()] = result as i64 as u64;
+                ctx.gpr[instr.ra()] = result as u32 as u64;
             }
-            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
+            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
             ctx.pc += 4;
         }
         PpcOpcode::sldx => {
@@ -5198,6 +5203,72 @@ mod tests {
         assert_eq!(ctx.xer_ca, 1, "rb>=ra → CA=1 (10 > 5)");
     }
 
+    #[test]
+    fn mullwx_overflow_truncates_to_32() {
+        // PPCBUG-009: mullwo r5, r3, r4 with ra=0x10000, rb=0x10000 → product
+        // 0x100000000 (overflow). Low 32 = 0; OE must fire.
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.gpr[3] = 0x10000;
+        ctx.gpr[4] = 0x10000;
+        // mullwo r5, r3, r4  (XO=235, OE=1)
+        let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (1 << 10) | (235 << 1);
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.gpr[5], 0, "low 32 bits = 0");
+        assert_eq!(ctx.xer_ov, 1, "overflow detected");
+    }
+
+    #[test]
+    fn divwx_negative_quotient_zero_extends() {
+        // PPCBUG-010+011: -10 / 3 = -3 must produce 0x00000000_FFFFFFFD,
+        // not 0xFFFFFFFF_FFFFFFFD. CR0.LT must still fire (i32 view of FFFFFFFD is negative).
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.gpr[3] = (-10i32) as u32 as u64;
+        ctx.gpr[4] = 3;
+        // divwx. r5, r3, r4  (XO=491, Rc=1)
+        let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (491 << 1) | 1;
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FFFDu64);
+        assert!(ctx.cr[0].lt, "CR0.LT must fire for negative i32 quotient");
+    }
+
+    #[test]
+    fn srawx_negative_value_zero_extends_upper() {
+        // PPCBUG-041+043: srawx of negative i32 by 1 produces a negative i32;
+        // writeback must zero-extend to u64 (not sign-extend).
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.gpr[3] = 0x8000_0000u64; // i32::MIN
+        ctx.gpr[4] = 1;
+        // srawx. r5, r3, r4  (XO=792, Rc=1)
+        let raw = (31u32 << 26) | (3 << 21) | (5 << 16) | (4 << 11) | (792 << 1) | 1;
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.gpr[5], 0x0000_0000_C000_0000u64);
+        assert!(ctx.cr[0].lt);
+    }
+
+    #[test]
+    fn srawix_high_count_negative_input_yields_low32_all_ones() {
+        // PPCBUG-042+043: srawi with count=31 on negative input → low 32 bits
+        // all ones (0xFFFFFFFF), upper 32 zero (was u64::MAX before fix).
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.gpr[3] = 0x8000_0000u64;
+        // srawix r5, r3, 31  (XO=824)
+        let raw = (31u32 << 26) | (3 << 21) | (5 << 16) | (31 << 11) | (824 << 1);
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FFFFu64);
+    }
+
     #[test]
     fn addi_li_neg_one_zero_extends_upper() {
         // PPCBUG-001: `li r3, -1` (= addi r3, r0, -1) must produce

From 20a730d69e29200a7cf0c1fea1e7711997d9f583 Mon Sep 17 00:00:00 2001
From: MechaCat02 <fabian@diekaulbachs.de>
Date: Sat, 2 May 2026 11:47:24 +0200
Subject: [PATCH 5/7] fix(cpu): PPCBUG-095/096/097/098/105 halfword + lwa load
 truncation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 4 batch 5: 5 PPCBUGs in the load family. lha/lhax/lhau/lhaux
sign-extended halfword results to u64 (active poisoning for negative
halfwords); lwa/lwax/lwaux sign-extended u32 results.

- PPCBUG-095/096/097/098 lha[ux]: `as i16 as i64 as u64` →
  `as i16 as i32 as u32 as u64`. Sign-extend to i32 then zero-extend.
  Common trigger: int16_t struct fields, PCM samples, packed vertex
  deltas. Memory 0x8000 was producing 0xFFFFFFFF_FFFF8000.
- PPCBUG-105 lwa/lwax/lwaux: `as i32 as i64 as u64` → `as u64`.
  Per-canary the 64-bit-mode form sign-extends, but in 32-bit ABI we
  must zero-extend (canary's behavior is rescued by x86 register
  zeroing in JIT; pure interpreter has no escape). Memory 0x80000000
  was producing 0xFFFFFFFF_80000000.

Tests:
- lha_negative_halfword_zero_extends_upper (PPCBUG-095).
- lhaux_negative_halfword_clean_writeback (PPCBUG-098 + EA update).
- lwa_high_bit_set_zero_extends_upper (PPCBUG-105).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 crates/xenia-cpu/src/interpreter.rs | 63 +++++++++++++++++++++++++----
 1 file changed, 56 insertions(+), 7 deletions(-)

diff --git a/crates/xenia-cpu/src/interpreter.rs b/crates/xenia-cpu/src/interpreter.rs
index 652d566..c730e75 100644
--- a/crates/xenia-cpu/src/interpreter.rs
+++ b/crates/xenia-cpu/src/interpreter.rs
@@ -1030,13 +1030,13 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
         PpcOpcode::lha => {
             let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
             let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32;
-            ctx.gpr[instr.rd()] = mem.read_u16(ea) as i16 as i64 as u64;
+            ctx.gpr[instr.rd()] = mem.read_u16(ea) as i16 as i32 as u32 as u64;
             ctx.pc += 4;
         }
         PpcOpcode::lhax => {
             let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
             let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
-            ctx.gpr[instr.rd()] = mem.read_u16(ea) as i16 as i64 as u64;
+            ctx.gpr[instr.rd()] = mem.read_u16(ea) as i16 as i32 as u32 as u64;
             ctx.pc += 4;
         }
         PpcOpcode::lhzux => {
@@ -1047,13 +1047,13 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
         }
         PpcOpcode::lhau => {
             let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32;
-            ctx.gpr[instr.rd()] = mem.read_u16(ea) as i16 as i64 as u64;
+            ctx.gpr[instr.rd()] = mem.read_u16(ea) as i16 as i32 as u32 as u64;
             ctx.gpr[instr.ra()] = ea as u64;
             ctx.pc += 4;
         }
         PpcOpcode::lhaux => {
             let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
-            ctx.gpr[instr.rd()] = mem.read_u16(ea) as i16 as i64 as u64;
+            ctx.gpr[instr.rd()] = mem.read_u16(ea) as i16 as i32 as u32 as u64;
             ctx.gpr[instr.ra()] = ea as u64;
             ctx.pc += 4;
         }
@@ -1072,18 +1072,18 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
         PpcOpcode::lwa => {
             let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
             let ea = ea.wrapping_add(instr.ds() as i64 as u64) as u32;
-            ctx.gpr[instr.rd()] = mem.read_u32(ea) as i32 as i64 as u64;
+            ctx.gpr[instr.rd()] = mem.read_u32(ea) as u64;
             ctx.pc += 4;
         }
         PpcOpcode::lwax => {
             let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
             let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
-            ctx.gpr[instr.rd()] = mem.read_u32(ea) as i32 as i64 as u64;
+            ctx.gpr[instr.rd()] = mem.read_u32(ea) as u64;
             ctx.pc += 4;
         }
         PpcOpcode::lwaux => {
             let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
-            ctx.gpr[instr.rd()] = mem.read_u32(ea) as i32 as i64 as u64;
+            ctx.gpr[instr.rd()] = mem.read_u32(ea) as u64;
             ctx.gpr[instr.ra()] = ea as u64;
             ctx.pc += 4;
         }
@@ -5203,6 +5203,55 @@ mod tests {
         assert_eq!(ctx.xer_ca, 1, "rb>=ra → CA=1 (10 > 5)");
     }
 
+    #[test]
+    fn lha_negative_halfword_zero_extends_upper() {
+        // PPCBUG-095: memory 0x8000 must yield gpr[rD] = 0x00000000_FFFF8000.
+        let mut ctx = PpcContext::new();
+        let mem = TestMem::new();
+        mem.write_u16(0x100, 0x8000);
+        ctx.gpr[3] = 0x100;
+        // lha r5, 0(r3): opcode 42
+        let raw = (42u32 << 26) | (5 << 21) | (3 << 16) | 0;
+        write_instr(&mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mem);
+        assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_8000u64);
+    }
+
+    #[test]
+    fn lhaux_negative_halfword_clean_writeback() {
+        // PPCBUG-098: indexed update form. Memory 0xFFFF → rD = 0x00000000_FFFFFFFF;
+        // rA must update to the EA.
+        let mut ctx = PpcContext::new();
+        let mem = TestMem::new();
+        mem.write_u16(0x200, 0xFFFF);
+        ctx.gpr[3] = 0x100;  // ra
+        ctx.gpr[4] = 0x100;  // rb
+        // lhaux r5, r3, r4  (XO=375)
+        let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (375 << 1);
+        write_instr(&mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mem);
+        assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FFFFu64);
+        assert_eq!(ctx.gpr[3], 0x200, "rA updated to EA");
+    }
+
+    #[test]
+    fn lwa_high_bit_set_zero_extends_upper() {
+        // PPCBUG-105: memory 0x80000000 must yield rD = 0x00000000_80000000
+        // under 32-bit ABI (no sign extension to bits 32-63).
+        let mut ctx = PpcContext::new();
+        let mem = TestMem::new();
+        mem.write_u32(0x100, 0x8000_0000);
+        ctx.gpr[3] = 0x100;
+        // lwa r5, 0(r3): opcode 58, XO=2 (DS-form, ds=0)
+        let raw = (58u32 << 26) | (5 << 21) | (3 << 16) | 2;
+        write_instr(&mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mem);
+        assert_eq!(ctx.gpr[5], 0x0000_0000_8000_0000u64);
+    }
+
     #[test]
     fn mullwx_overflow_truncates_to_32() {
         // PPCBUG-009: mullwo r5, r3, r4 with ra=0x10000, rb=0x10000 → product

From 16993bb8af745f21eaf8993a610d616480d67d1c Mon Sep 17 00:00:00 2001
From: MechaCat02 <fabian@diekaulbachs.de>
Date: Sat, 2 May 2026 11:55:50 +0200
Subject: [PATCH 6/7] fix(cpu): PPCBUG-012-017/020/023-026/032/044 4c+4d latent
 + CR0 catch-all
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 4 batch 6: latent writeback truncation (4c) and CR0 catch-all (4d).
~13 PPCBUGs across all remaining 32-bit ABI ALU sites.

Latent writeback (4c) — the 4a/4b fixes already eliminate the upstream
poisoning, but a defensive truncation here catches any future regression:
- PPCBUG-012 addx, PPCBUG-013 addcx, PPCBUG-014 addex, PPCBUG-015 addzex,
  PPCBUG-016 addmex, PPCBUG-017 subfx — all rewritten to compute on u32
  operands and write `as u64`. CA computed via 32-bit unsigned compare.
  Overflow now uses `true_sum != (result32 as i32) as i128` (32-bit
  predicate, since sum_overflow_64 is i64-bounded).
- PPCBUG-032 andx/orx/xorx — CR0 catch-all only (results inherit upper
  bits from operands; once those are clean, no truncation needed).

CR0 catch-all (4d) — fix the `update_cr_signed(0, X as i64)` pattern at
every 32-bit-ABI Rc=1 path:
- PPCBUG-020 catch-all: applied to mulhwx, mulhwux, divwux, mullwx (was
  already done in batch 4), addx/addcx/addex/addzex/addmex/subfx (now in
  4c above), andx/orx/xorx, andix, andisx, slwx, srwx, cntlzwx,
  rlwinmx, rlwimix, rlwnmx, mullwx (already), divwx (already),
  srawx/srawix (already in batch 4).
- PPCBUG-023 andisx: now correctly classifies bit-31 results as CR0.LT.
- PPCBUG-024 rlwinmx, PPCBUG-025 rlwimix, PPCBUG-026 rlwnmx.
- PPCBUG-044 slwx/srwx: bit-31 result like 0x80000000 now CR0.LT.

64-bit ABI ops (rldicl/rldicr/rldic/rldimi/rldcl/rldcr, sldx/srdx/sradx/
sradix, mulhdx/mulhdux/mulldx, divdx/divdux, cntlzdx) intentionally retain
the 64-bit `as i64` form per ISA — these are 64-bit-mode instructions.

Updated old tests:
- addo_sets_xer_ov_on_signed_overflow_and_stickies_so: i32::MAX + 1 → INT_MIN.
- addx_rc_uses_64bit_compare_not_32bit: renamed to ..._uses_32bit_compare_in_xbox_abi
  with assertions flipped to the correct 32-bit ABI behavior.

New tests:
- andisx_sign_bit_set_classifies_lt (PPCBUG-023).
- slwx_high_bit_result_classifies_lt (PPCBUG-044).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 crates/xenia-cpu/src/interpreter.rs | 201 ++++++++++++++++++----------
 1 file changed, 131 insertions(+), 70 deletions(-)

diff --git a/crates/xenia-cpu/src/interpreter.rs b/crates/xenia-cpu/src/interpreter.rs
index c730e75..4a37ccb 100644
--- a/crates/xenia-cpu/src/interpreter.rs
+++ b/crates/xenia-cpu/src/interpreter.rs
@@ -173,89 +173,97 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
 
         // ===== ALU: Register =====
         PpcOpcode::addx => {
-            let ra = ctx.gpr[instr.ra()];
-            let rb = ctx.gpr[instr.rb()];
-            let result = ra.wrapping_add(rb);
-            ctx.gpr[instr.rd()] = result;
+            // PPCBUG-012+020: 32-bit ABI writeback truncation + CR0 i32 view.
+            let ra32 = ctx.gpr[instr.ra()] as u32;
+            let rb32 = ctx.gpr[instr.rb()] as u32;
+            let result32 = ra32.wrapping_add(rb32);
+            ctx.gpr[instr.rd()] = result32 as u64;
             if instr.oe() {
-                overflow::apply(ctx, overflow::add_ov_64(ra, rb, result));
+                let true_sum = (ra32 as i32 as i128) + (rb32 as i32 as i128);
+                overflow::apply(ctx, true_sum != (result32 as i32) as i128);
             }
             if instr.rc_bit() {
-                ctx.update_cr_signed(0, result as i64);
+                ctx.update_cr_signed(0, result32 as i32 as i64);
             }
             ctx.pc += 4;
         }
         PpcOpcode::addcx => {
-            let ra = ctx.gpr[instr.ra()];
-            let rb = ctx.gpr[instr.rb()];
-            let result = ra.wrapping_add(rb);
-            ctx.xer_ca = if result < ra { 1 } else { 0 };
-            ctx.gpr[instr.rd()] = result;
+            // PPCBUG-013+020: 32-bit truncation; CA from u32 unsigned compare.
+            let ra32 = ctx.gpr[instr.ra()] as u32;
+            let rb32 = ctx.gpr[instr.rb()] as u32;
+            let result32 = ra32.wrapping_add(rb32);
+            ctx.xer_ca = if result32 < ra32 { 1 } else { 0 };
+            ctx.gpr[instr.rd()] = result32 as u64;
             if instr.oe() {
-                overflow::apply(ctx, overflow::add_ov_64(ra, rb, result));
+                let true_sum = (ra32 as i32 as i128) + (rb32 as i32 as i128);
+                overflow::apply(ctx, true_sum != (result32 as i32) as i128);
             }
             if instr.rc_bit() {
-                ctx.update_cr_signed(0, result as i64);
+                ctx.update_cr_signed(0, result32 as i32 as i64);
             }
             ctx.pc += 4;
         }
         PpcOpcode::addex => {
-            let ra = ctx.gpr[instr.ra()];
-            let rb = ctx.gpr[instr.rb()];
-            let ca = ctx.xer_ca as u64;
-            let result = ra.wrapping_add(rb).wrapping_add(ca);
-            ctx.xer_ca = if result < ra || (ca != 0 && result == ra) { 1 } else { 0 };
-            ctx.gpr[instr.rd()] = result;
+            // PPCBUG-014+020: 32-bit truncation; CA from u32 unsigned compare.
+            let ra32 = ctx.gpr[instr.ra()] as u32;
+            let rb32 = ctx.gpr[instr.rb()] as u32;
+            let ca = ctx.xer_ca as u32;
+            let result32 = ra32.wrapping_add(rb32).wrapping_add(ca);
+            ctx.xer_ca = if result32 < ra32 || (ca != 0 && result32 == ra32) { 1 } else { 0 };
+            ctx.gpr[instr.rd()] = result32 as u64;
             if instr.oe() {
-                let true_sum = (ra as i64 as i128) + (rb as i64 as i128) + (ca as i128);
-                overflow::apply(ctx, overflow::sum_overflow_64(true_sum, result));
+                let true_sum = (ra32 as i32 as i128) + (rb32 as i32 as i128) + (ca as i128);
+                overflow::apply(ctx, true_sum != (result32 as i32) as i128);
             }
             if instr.rc_bit() {
-                ctx.update_cr_signed(0, result as i64);
+                ctx.update_cr_signed(0, result32 as i32 as i64);
             }
             ctx.pc += 4;
         }
         PpcOpcode::addzex => {
-            let ra = ctx.gpr[instr.ra()];
-            let ca = ctx.xer_ca as u64;
-            let result = ra.wrapping_add(ca);
-            ctx.xer_ca = if result < ra { 1 } else { 0 };
-            ctx.gpr[instr.rd()] = result;
+            // PPCBUG-015+020: 32-bit truncation.
+            let ra32 = ctx.gpr[instr.ra()] as u32;
+            let ca = ctx.xer_ca as u32;
+            let result32 = ra32.wrapping_add(ca);
+            ctx.xer_ca = if result32 < ra32 { 1 } else { 0 };
+            ctx.gpr[instr.rd()] = result32 as u64;
             if instr.oe() {
-                let true_sum = (ra as i64 as i128) + (ca as i128);
-                overflow::apply(ctx, overflow::sum_overflow_64(true_sum, result));
+                let true_sum = (ra32 as i32 as i128) + (ca as i128);
+                overflow::apply(ctx, true_sum != (result32 as i32) as i128);
             }
             if instr.rc_bit() {
-                ctx.update_cr_signed(0, result as i64);
+                ctx.update_cr_signed(0, result32 as i32 as i64);
             }
             ctx.pc += 4;
         }
         PpcOpcode::addmex => {
-            let ra = ctx.gpr[instr.ra()];
-            let ca = ctx.xer_ca as u64;
-            let result = ra.wrapping_add(ca).wrapping_sub(1);
-            ctx.xer_ca = if ra != 0 || ca != 0 { 1 } else { 0 };
-            ctx.gpr[instr.rd()] = result;
+            // PPCBUG-016+020: 32-bit truncation. RT = RA + CA - 1.
+            let ra32 = ctx.gpr[instr.ra()] as u32;
+            let ca = ctx.xer_ca as u32;
+            let result32 = ra32.wrapping_add(ca).wrapping_sub(1);
+            ctx.xer_ca = if ra32 != 0 || ca != 0 { 1 } else { 0 };
+            ctx.gpr[instr.rd()] = result32 as u64;
             if instr.oe() {
-                // RT <- RA + CA + (-1)
-                let true_sum = (ra as i64 as i128) + (ca as i128) - 1;
-                overflow::apply(ctx, overflow::sum_overflow_64(true_sum, result));
+                let true_sum = (ra32 as i32 as i128) + (ca as i128) - 1;
+                overflow::apply(ctx, true_sum != (result32 as i32) as i128);
             }
             if instr.rc_bit() {
-                ctx.update_cr_signed(0, result as i64);
+                ctx.update_cr_signed(0, result32 as i32 as i64);
             }
             ctx.pc += 4;
         }
         PpcOpcode::subfx => {
-            let ra = ctx.gpr[instr.ra()];
-            let rb = ctx.gpr[instr.rb()];
-            let result = rb.wrapping_sub(ra);
-            ctx.gpr[instr.rd()] = result;
+            // PPCBUG-017+020: 32-bit truncation.
+            let ra32 = ctx.gpr[instr.ra()] as u32;
+            let rb32 = ctx.gpr[instr.rb()] as u32;
+            let result32 = rb32.wrapping_sub(ra32);
+            ctx.gpr[instr.rd()] = result32 as u64;
             if instr.oe() {
-                overflow::apply(ctx, overflow::sub_ov_64(ra, rb, result));
+                let true_diff = (rb32 as i32 as i128) - (ra32 as i32 as i128);
+                overflow::apply(ctx, overflow::sum_overflow_64(true_diff, result32 as u64));
             }
             if instr.rc_bit() {
-                ctx.update_cr_signed(0, result as i64);
+                ctx.update_cr_signed(0, result32 as i32 as i64);
             }
             ctx.pc += 4;
         }
@@ -289,7 +297,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
             if instr.oe() {
                 // RT <- !RA + RB + CA  ==  RB - RA - 1 + CA  (32-bit semantics).
                 let true_sum = (rb32 as i32 as i128) - (ra32 as i32 as i128) - 1 + (ca as i128);
-                overflow::apply(ctx, overflow::sum_overflow_64(true_sum, result32 as u64));
+                overflow::apply(ctx, true_sum != (result32 as i32) as i128);
             }
             if instr.rc_bit() {
                 ctx.update_cr_signed(0, result32 as i32 as i64);
@@ -307,7 +315,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
             ctx.gpr[instr.rd()] = result32 as u64;
             if instr.oe() {
                 let true_sum = -(ra32 as i32 as i128) - 1 + (ca as i128);
-                overflow::apply(ctx, overflow::sum_overflow_64(true_sum, result32 as u64));
+                overflow::apply(ctx, true_sum != (result32 as i32) as i128);
             }
             if instr.rc_bit() {
                 ctx.update_cr_signed(0, result32 as i32 as i64);
@@ -324,7 +332,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
             ctx.gpr[instr.rd()] = result32 as u64;
             if instr.oe() {
                 let true_sum = -(ra32 as i32 as i128) - 2 + (ca as i128);
-                overflow::apply(ctx, overflow::sum_overflow_64(true_sum, result32 as u64));
+                overflow::apply(ctx, true_sum != (result32 as i32) as i128);
             }
             if instr.rc_bit() {
                 ctx.update_cr_signed(0, result32 as i32 as i64);
@@ -362,22 +370,24 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
             ctx.pc += 4;
         }
         PpcOpcode::mulhwx => {
+            // PPCBUG-020: 32-bit ABI CR0 view.
             let ra = ctx.gpr[instr.ra()] as i32 as i64;
             let rb = ctx.gpr[instr.rb()] as i32 as i64;
             let result = ra.wrapping_mul(rb);
-            ctx.gpr[instr.rd()] = ((result >> 32) as i32 as i64 as u64) & 0xFFFF_FFFF;
+            ctx.gpr[instr.rd()] = ((result >> 32) as u32) as u64;
             if instr.rc_bit() {
-                ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as i64);
+                ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as u32 as i32 as i64);
             }
             ctx.pc += 4;
         }
         PpcOpcode::mulhwux => {
+            // PPCBUG-020: 32-bit ABI CR0 view.
             let ra = ctx.gpr[instr.ra()] as u32 as u64;
             let rb = ctx.gpr[instr.rb()] as u32 as u64;
             let result = ra.wrapping_mul(rb);
             ctx.gpr[instr.rd()] = (result >> 32) & 0xFFFF_FFFF;
             if instr.rc_bit() {
-                ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as i64);
+                ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as u32 as i32 as i64);
             }
             ctx.pc += 4;
         }
@@ -401,6 +411,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
             ctx.pc += 4;
         }
         PpcOpcode::divwux => {
+            // PPCBUG-020: 32-bit ABI CR0 view.
             let ra = ctx.gpr[instr.ra()] as u32;
             let rb = ctx.gpr[instr.rb()] as u32;
             let ov = overflow::divw_ov_unsigned(rb);
@@ -413,7 +424,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
                 overflow::apply(ctx, ov);
             }
             if instr.rc_bit() {
-                ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as i64);
+                ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as u32 as i32 as i64);
             }
             ctx.pc += 4;
         }
@@ -486,13 +497,16 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
 
         // ===== Logical =====
         PpcOpcode::andix => {
+            // PPCBUG-020: 32-bit ABI CR0 view.
             ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] & (instr.uimm16() as u64);
-            ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64);
+            ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64);
             ctx.pc += 4;
         }
         PpcOpcode::andisx => {
+            // PPCBUG-023: 32-bit ABI CR0 view. `andis. rA, rS, 0x8000` to test
+            // sign bit of a 32-bit word now correctly classifies bit 31 = 1 as LT.
             ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] & ((instr.uimm16() as u64) << 16);
-            ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64);
+            ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64);
             ctx.pc += 4;
         }
         PpcOpcode::ori => {
@@ -512,8 +526,9 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
             ctx.pc += 4;
         }
         PpcOpcode::andx => {
+            // PPCBUG-032+020: 32-bit ABI CR0 view (latent under clean inputs).
             ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] & ctx.gpr[instr.rb()];
-            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
+            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
             ctx.pc += 4;
         }
         PpcOpcode::andcx => {
@@ -525,8 +540,9 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
             ctx.pc += 4;
         }
         PpcOpcode::orx => {
+            // PPCBUG-032+020: 32-bit ABI CR0 view.
             ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] | ctx.gpr[instr.rb()];
-            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
+            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
             ctx.pc += 4;
         }
         PpcOpcode::orcx => {
@@ -538,8 +554,9 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
             ctx.pc += 4;
         }
         PpcOpcode::xorx => {
+            // PPCBUG-032+020: 32-bit ABI CR0 view.
             ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] ^ ctx.gpr[instr.rb()];
-            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
+            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
             ctx.pc += 4;
         }
         PpcOpcode::norx => {
@@ -589,8 +606,10 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
             ctx.pc += 4;
         }
         PpcOpcode::cntlzwx => {
+            // Result is 0..=32, fits in u32 with bit 31 always zero, so the
+            // CR0 view is benign — use the catch-all 32-bit form for consistency.
             ctx.gpr[instr.ra()] = (ctx.gpr[instr.rs()] as u32).leading_zeros() as u64;
-            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
+            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
             ctx.pc += 4;
         }
         PpcOpcode::cntlzdx => {
@@ -601,19 +620,23 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
 
         // ===== Shift =====
         PpcOpcode::slwx => {
+            // PPCBUG-044: 32-bit ABI CR0 view. A result with bit 31 set
+            // (e.g. 0x80000000) is negative in i32 view but positive in i64.
             let sh = ctx.gpr[instr.rb()] as u32;
             ctx.gpr[instr.ra()] = if sh < 32 {
                 ((ctx.gpr[instr.rs()] as u32) << sh) as u64
             } else { 0 };
-            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
+            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
             ctx.pc += 4;
         }
         PpcOpcode::srwx => {
+            // PPCBUG-044: 32-bit ABI CR0 view (zero-extended right shift can never
+            // have bit 31 set, but use the canonical form for consistency).
             let sh = ctx.gpr[instr.rb()] as u32;
             ctx.gpr[instr.ra()] = if sh < 32 {
                 ((ctx.gpr[instr.rs()] as u32) >> sh) as u64
             } else { 0 };
-            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
+            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
             ctx.pc += 4;
         }
         PpcOpcode::srawx => {
@@ -707,7 +730,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
             let rotated = rs.rotate_left(sh);
             let mask = rlw_mask(mb, me);
             ctx.gpr[instr.ra()] = (rotated & mask) as u64;
-            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
+            // PPCBUG-024: 32-bit ABI CR0 view.
+            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
             ctx.pc += 4;
         }
         PpcOpcode::rlwimix => {
@@ -719,7 +743,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
             let mask = rlw_mask(mb, me);
             let ra = ctx.gpr[instr.ra()] as u32;
             ctx.gpr[instr.ra()] = ((rotated & mask) | (ra & !mask)) as u64;
-            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
+            // PPCBUG-025: 32-bit ABI CR0 view.
+            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
             ctx.pc += 4;
         }
         PpcOpcode::rlwnmx => {
@@ -730,7 +755,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
             let rotated = rs.rotate_left(sh);
             let mask = rlw_mask(mb, me);
             ctx.gpr[instr.ra()] = (rotated & mask) as u64;
-            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
+            // PPCBUG-026: 32-bit ABI CR0 view.
+            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
             ctx.pc += 4;
         }
         PpcOpcode::rldiclx => {
@@ -5004,14 +5030,15 @@ mod tests {
 
     #[test]
     fn addo_sets_xer_ov_on_signed_overflow_and_stickies_so() {
+        // PPCBUG-012: 32-bit ABI. INT32_MAX + 1 overflows to INT32_MIN.
         let mut ctx = PpcContext::new();
         let mut mem = TestMem::new();
-        ctx.gpr[3] = i64::MAX as u64;
+        ctx.gpr[3] = i32::MAX as u32 as u64;
         ctx.gpr[4] = 1;
         write_instr(&mut mem, 0, addx_raw(5, 3, 4, true, false));
         ctx.pc = 0;
         step(&mut ctx, &mut mem);
-        assert_eq!(ctx.gpr[5], i64::MIN as u64);
+        assert_eq!(ctx.gpr[5], 0x8000_0000u64);
         assert_eq!(ctx.xer_ov, 1, "OV must be set on signed overflow");
         assert_eq!(ctx.xer_so, 1, "SO must be stickied from OV");
     }
@@ -5046,9 +5073,10 @@ mod tests {
     }
 
     #[test]
-    fn addx_rc_uses_64bit_compare_not_32bit() {
-        // r3 = 0x0000_0000_FFFF_FFFF, r4 = 0 → result = 0x0000_0000_FFFF_FFFF.
-        // As i32 this is -1 (lt). As i64 this is positive (gt). Spec says 64-bit.
+    fn addx_rc_uses_32bit_compare_in_xbox_abi() {
+        // PPCBUG-012+020: 32-bit ABI. r3 + r4 = 0xFFFFFFFF (low 32). As i32
+        // this is -1 (CR0.LT). The previous 64-bit compare wrongly classified
+        // this as positive (CR0.GT) for Xbox 360 binaries.
         let mut ctx = PpcContext::new();
         let mut mem = TestMem::new();
         ctx.gpr[3] = 0x0000_0000_FFFF_FFFF;
@@ -5057,8 +5085,8 @@ mod tests {
         ctx.pc = 0;
         step(&mut ctx, &mut mem);
         assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FFFF);
-        assert!(!ctx.cr[0].lt, "64-bit compare: value is positive, not negative");
-        assert!(ctx.cr[0].gt);
+        assert!(ctx.cr[0].lt, "32-bit ABI: 0xFFFFFFFF as i32 is -1, CR0.LT");
+        assert!(!ctx.cr[0].gt);
         assert!(!ctx.cr[0].eq);
     }
 
@@ -5203,6 +5231,39 @@ mod tests {
         assert_eq!(ctx.xer_ca, 1, "rb>=ra → CA=1 (10 > 5)");
     }
 
+    #[test]
+    fn andisx_sign_bit_set_classifies_lt() {
+        // PPCBUG-023: andis. r4, r3, 0x8000 with r3=0xFFFFFFFF should produce
+        // result=0x80000000 with CR0.LT=1 (i32 view).
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.gpr[3] = 0xFFFF_FFFFu64;
+        // andis. r4, r3, 0x8000: opcode 29, uimm16 = 0x8000
+        let raw = (29u32 << 26) | (3 << 21) | (4 << 16) | 0x8000;
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.gpr[4], 0x8000_0000u64);
+        assert!(ctx.cr[0].lt, "result=0x80000000 → i32 view negative → CR0.LT");
+    }
+
+    #[test]
+    fn slwx_high_bit_result_classifies_lt() {
+        // PPCBUG-044: slwx producing 0x80000000 must classify as CR0.LT under
+        // the 32-bit ABI, not CR0.GT (which 64-bit view would give).
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.gpr[3] = 0x4000_0000u64;
+        ctx.gpr[4] = 1;
+        // slwx. r5, r3, r4  (XO=24, Rc=1)
+        let raw = (31u32 << 26) | (3 << 21) | (5 << 16) | (4 << 11) | (24 << 1) | 1;
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.gpr[5], 0x8000_0000u64);
+        assert!(ctx.cr[0].lt, "0x80000000 as i32 is negative");
+    }
+
     #[test]
     fn lha_negative_halfword_zero_extends_upper() {
         // PPCBUG-095: memory 0x8000 must yield gpr[rD] = 0x00000000_FFFF8000.

From 49103bb898c6ec45b6a58b0b01ac933b44dcb562 Mon Sep 17 00:00:00 2001
From: MechaCat02 <fabian@diekaulbachs.de>
Date: Sat, 2 May 2026 12:07:32 +0200
Subject: [PATCH 7/7] =?UTF-8?q?fix(cpu):=20P4=20review-fix=20=E2=80=94=20s?=
 =?UTF-8?q?ubfx/subfcx=20OE=20predicate=20+=20mulli=20test=20rigor?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Independent reviewer of the P4 branch found two issues:

(1) BLOCKING — subfx and subfcx OE handlers still called the legacy
`overflow::sum_overflow_64(true_diff, result32 as u64)` while batch 6
had migrated all add* sites to the inline `true_sum != (result32 as i32)
as i128` form. The legacy helper compares `true_diff` against
`(result32 as u64) as i64 as i128`, which views any bit-31-set result
as a positive i64 (e.g. result=0x80000000 → +2147483648 in i64). For a
legitimate i32::MIN result with no actual 32-bit overflow, this caused
spurious OV=1.

Concrete repro now caught by `subfo_no_spurious_ov_when_result_has_bit31_set`:
r3=1, r4=0x80000001 → result=0x80000000, true_diff=-2147483648, no OV.
Pre-fix: spurious OV=1.

(2) Minor — `mulli_overflow_wraps_to_32` rubber-stamped: with ra=0x80000000
and imm=2, both pre-fix (`as i64 as u64`) and post-fix (`as u32 as u64`)
write the same value. Replaced with ra=u64::MAX (polluted upper bits) where
pre-fix writes 0xFFFFFFFF_FFFFFFFE and post-fix writes 0x00000000_FFFFFFFE.

Fixes:
- interpreter.rs subfx/subfcx OE: switch to inline 32-bit predicate
  matching the rest of batch 6.
- subfo_sets_xer_ov_on_min_minus_one: renamed and updated to test 32-bit
  overflow (r4=0x80000000 - 1 = 0x7FFFFFFF, OV=1).
- New: subfo_no_spurious_ov_when_result_has_bit31_set (PPCBUG-017
  review-fix regression).
- New: subfco_no_spurious_ov_when_result_has_bit31_set (same for PPCBUG-007).
- mulli_overflow_wraps_to_32: redesigned with polluted upper bits to
  actually discriminate pre/post fix.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 crates/xenia-cpu/src/interpreter.rs | 64 ++++++++++++++++++++++-------
 1 file changed, 49 insertions(+), 15 deletions(-)

diff --git a/crates/xenia-cpu/src/interpreter.rs b/crates/xenia-cpu/src/interpreter.rs
index 4a37ccb..aad2485 100644
--- a/crates/xenia-cpu/src/interpreter.rs
+++ b/crates/xenia-cpu/src/interpreter.rs
@@ -260,7 +260,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
             ctx.gpr[instr.rd()] = result32 as u64;
             if instr.oe() {
                 let true_diff = (rb32 as i32 as i128) - (ra32 as i32 as i128);
-                overflow::apply(ctx, overflow::sum_overflow_64(true_diff, result32 as u64));
+                overflow::apply(ctx, true_diff != (result32 as i32) as i128);
             }
             if instr.rc_bit() {
                 ctx.update_cr_signed(0, result32 as i32 as i64);
@@ -278,7 +278,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
             ctx.gpr[instr.rd()] = result32 as u64;
             if instr.oe() {
                 let true_diff = (rb32 as i32 as i128) - (ra32 as i32 as i128);
-                overflow::apply(ctx, overflow::sum_overflow_64(true_diff, result32 as u64));
+                overflow::apply(ctx, true_diff != (result32 as i32) as i128);
             }
             if instr.rc_bit() {
                 ctx.update_cr_signed(0, result32 as i32 as i64);
@@ -5091,21 +5091,58 @@ mod tests {
     }
 
     #[test]
-    fn subfo_sets_xer_ov_on_min_minus_one() {
+    fn subfo_sets_xer_ov_on_int32_min_minus_one() {
+        // PPCBUG-017: 32-bit ABI subfo overflow detection. r4=INT32_MIN, r3=1
+        // → result = INT32_MIN - 1 → wraps to INT32_MAX with OV=1.
         let mut ctx = PpcContext::new();
         let mut mem = TestMem::new();
-        // subfo r5, r3, r4  ->  r5 = r4 - r3
-        // r4 = INT64_MIN, r3 = 1 -> result overflows
         ctx.gpr[3] = 1;
-        ctx.gpr[4] = i64::MIN as u64;
+        ctx.gpr[4] = 0x8000_0000u64;
         let raw = (31 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (1 << 10) | (40 << 1);
         write_instr(&mut mem, 0, raw);
         ctx.pc = 0;
         step(&mut ctx, &mut mem);
+        assert_eq!(ctx.gpr[5], 0x7FFF_FFFFu64);
         assert_eq!(ctx.xer_ov, 1);
         assert_eq!(ctx.xer_so, 1);
     }
 
+    #[test]
+    fn subfo_no_spurious_ov_when_result_has_bit31_set() {
+        // PPCBUG-017 review-fix regression: subfo r5, r3, r4 with r3=1, r4=0x80000001
+        // → result = 0x80000000. This is i32::MIN — a legitimate negative value
+        // with no 32-bit overflow (true_diff = -2147483648, fits in i32).
+        // The legacy `sum_overflow_64` predicate compared against the u64 view
+        // of result (= +2147483648), spuriously flagging OV=1.
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.gpr[3] = 1;
+        ctx.gpr[4] = 0x8000_0001u64;
+        // subfo r5, r3, r4
+        let raw = (31 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (1 << 10) | (40 << 1);
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.gpr[5], 0x8000_0000u64);
+        assert_eq!(ctx.xer_ov, 0, "legitimate i32::MIN result must NOT trigger OV");
+    }
+
+    #[test]
+    fn subfco_no_spurious_ov_when_result_has_bit31_set() {
+        // PPCBUG-007 same review-fix: subfcx OE handler must use 32-bit predicate.
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.gpr[3] = 1;
+        ctx.gpr[4] = 0x8000_0001u64;
+        // subfco r5, r3, r4  (XO=8, OE=1)
+        let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (1 << 10) | (8 << 1);
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.gpr[5], 0x8000_0000u64);
+        assert_eq!(ctx.xer_ov, 0, "legitimate i32::MIN result must NOT trigger OV");
+    }
+
     #[test]
     fn mullwo_sets_xer_ov_when_product_overflows_32_bits() {
         let mut ctx = PpcContext::new();
@@ -5415,22 +5452,19 @@ mod tests {
 
     #[test]
     fn mulli_overflow_wraps_to_32() {
-        // PPCBUG-004: mulli result must be truncated to 32 bits.
-        // 0x10000 * 0x10000 = 0x1_00000000 — low 32 bits are 0.
+        // PPCBUG-004: mulli must truncate to 32 bits even when the upper 32 bits
+        // of RA are polluted (e.g. by upstream bugs). Pre-fix: ra = u64::MAX as
+        // i64 = -1, * 2 = -2, written to GPR as `0xFFFFFFFF_FFFFFFFE`. Post-fix:
+        // truncated to `0xFFFFFFFE`. Discriminating regression test.
         let mut ctx = PpcContext::new();
         let mut mem = TestMem::new();
-        ctx.gpr[3] = 0x10000;
-        // mulli r4, r3, 0x4000 (4 * 0x10000 = 0x40000, no overflow case for sanity)
-        // Better case: 0x10000 * 0x4000 = 0x4000_0000 — fits in i32.
-        // For overflow: ra=0x80000000 (i32::MIN), imm=2 → 0xFFFFFFFF_00000000, low32=0
-        ctx.gpr[3] = 0x80000000u64;
+        ctx.gpr[3] = u64::MAX;
         // mulli r4, r3, 2: opcode 7
         let raw = (7u32 << 26) | (4 << 21) | (3 << 16) | 2;
         write_instr(&mut mem, 0, raw);
         ctx.pc = 0;
         step(&mut ctx, &mut mem);
-        // i32::MIN * 2 = 0xFFFFFFFF_00000000 in i64 view; low 32 = 0.
-        assert_eq!(ctx.gpr[4], 0);
+        assert_eq!(ctx.gpr[4], 0xFFFF_FFFEu64, "low 32 bits = -2 in i32; upper 32 zero");
     }
 
     #[test]