From 82a9bff93468eb2b5e81fc34fc0d898b203e8fef Mon Sep 17 00:00:00 2001
From: MechaCat02 <fabian@diekaulbachs.de>
Date: Sat, 2 May 2026 11:44:34 +0200
Subject: [PATCH] fix(cpu): PPCBUG-009/010+011/041+042+043 mul/div + srawx
 truncation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 4 batch 4: mulwx, divwx (coupled +CR0), srawx/srawix (coupled +CR0).

- PPCBUG-009 mullwx: 32-bit ABI. Product truncated to u32 before write.
  OE handler still uses full i64 product to detect overflow.
- PPCBUG-010+011 divwx (coupled): quotient zero-extended (canary uses
  ZeroExtend(v, INT64_TYPE)). CR0 view via i32 — without this, a negative
  i32 quotient (e.g. -3 from -10/3) would be classified as positive in
  i64 view of the now-zero-extended writeback.
- PPCBUG-041+042+043 srawx/srawix (coupled): writeback uses `as u32 as u64`
  (was `as i64 as u64`). All-ones case (sh>=32 with negative input) writes
  0x00000000_FFFFFFFF instead of u64::MAX. CR0 view via i32. CA logic
  preserved unchanged (audit-verified independently correct).

Tests:
- mullwx_overflow_truncates_to_32 (PPCBUG-009).
- divwx_negative_quotient_zero_extends (PPCBUG-010+011).
- srawx_negative_value_zero_extends_upper (PPCBUG-041+043).
- srawix_high_count_negative_input_yields_low32_all_ones (PPCBUG-042+043).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 crates/xenia-cpu/src/interpreter.rs | 97 +++++++++++++++++++++++++----
 1 file changed, 84 insertions(+), 13 deletions(-)

diff --git a/crates/xenia-cpu/src/interpreter.rs b/crates/xenia-cpu/src/interpreter.rs
index 479c3d1..652d566 100644
--- a/crates/xenia-cpu/src/interpreter.rs
+++ b/crates/xenia-cpu/src/interpreter.rs
@@ -347,16 +347,17 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
             ctx.pc += 4;
         }
         PpcOpcode::mullwx => {
+            // PPCBUG-009: 32-bit ABI. Truncate product to u32 — overflow detection
+            // (mullw_ov) still uses the full i64 product to catch the overflow.
             let ra = ctx.gpr[instr.ra()] as i32 as i64;
             let rb = ctx.gpr[instr.rb()] as i32 as i64;
             let product = ra.wrapping_mul(rb);
-            ctx.gpr[instr.rd()] = product as u64;
+            ctx.gpr[instr.rd()] = product as u32 as u64;
             if instr.oe() {
-                // OV iff the 64-bit product can't fit into 32-bit signed.
                 overflow::apply(ctx, overflow::mullw_ov(product));
             }
             if instr.rc_bit() {
-                ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as i64);
+                ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as u32 as i32 as i64);
             }
             ctx.pc += 4;
         }
@@ -381,20 +382,21 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
             ctx.pc += 4;
         }
         PpcOpcode::divwx => {
+            // PPCBUG-010+011 coupled: 32-bit ABI. Quotient zero-extended to u64
+            // (canary explicitly uses ZeroExtend(v, INT64_TYPE)). CR0 view via i32.
             let ra = ctx.gpr[instr.ra()] as i32;
             let rb = ctx.gpr[instr.rb()] as i32;
             let ov = overflow::divw_ov_signed(ra, rb);
             if ov {
-                // PPC: RT undefined on div-by-zero / INT_MIN/-1. Canary uses 0.
                 ctx.gpr[instr.rd()] = 0;
             } else {
-                ctx.gpr[instr.rd()] = (ra / rb) as i64 as u64;
+                ctx.gpr[instr.rd()] = (ra / rb) as u32 as u64;
             }
             if instr.oe() {
                 overflow::apply(ctx, ov);
             }
             if instr.rc_bit() {
-                ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as i64);
+                ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as u32 as i32 as i64);
             }
             ctx.pc += 4;
         }
@@ -615,34 +617,37 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
             ctx.pc += 4;
         }
         PpcOpcode::srawx => {
+            // PPCBUG-041+043 coupled: 32-bit ABI writeback truncation + CR0 i32.
+            // CA logic is independently correct (uses u32 shifted-out test).
             let rs = ctx.gpr[instr.rs()] as i32;
             let sh = ctx.gpr[instr.rb()] as u32 & 0x3F;
             if sh == 0 {
-                ctx.gpr[instr.ra()] = rs as i64 as u64;
+                ctx.gpr[instr.ra()] = rs as u32 as u64;
                 ctx.xer_ca = 0;
             } else if sh < 32 {
                 let result = rs >> sh;
                 ctx.xer_ca = if rs < 0 && (rs as u32) << (32 - sh) != 0 { 1 } else { 0 };
-                ctx.gpr[instr.ra()] = result as i64 as u64;
+                ctx.gpr[instr.ra()] = result as u32 as u64;
             } else {
-                ctx.gpr[instr.ra()] = if rs < 0 { u64::MAX } else { 0 };
+                ctx.gpr[instr.ra()] = if rs < 0 { 0xFFFF_FFFFu64 } else { 0 };
                 ctx.xer_ca = if rs < 0 { 1 } else { 0 };
             }
-            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
+            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
             ctx.pc += 4;
         }
         PpcOpcode::srawix => {
+            // PPCBUG-042+043 coupled: same shape as srawx for the sh-immediate form.
             let rs = ctx.gpr[instr.rs()] as i32;
             let sh = instr.sh();
             if sh == 0 {
-                ctx.gpr[instr.ra()] = rs as i64 as u64;
+                ctx.gpr[instr.ra()] = rs as u32 as u64;
                 ctx.xer_ca = 0;
             } else {
                 let result = rs >> sh;
                 ctx.xer_ca = if rs < 0 && (rs as u32) << (32 - sh) != 0 { 1 } else { 0 };
-                ctx.gpr[instr.ra()] = result as i64 as u64;
+                ctx.gpr[instr.ra()] = result as u32 as u64;
             }
-            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
+            if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
             ctx.pc += 4;
         }
         PpcOpcode::sldx => {
@@ -5198,6 +5203,72 @@ mod tests {
         assert_eq!(ctx.xer_ca, 1, "rb>=ra → CA=1 (10 > 5)");
     }
 
+    #[test]
+    fn mullwx_overflow_truncates_to_32() {
+        // PPCBUG-009: mullwo r5, r3, r4 with ra=0x10000, rb=0x10000 → product
+        // 0x100000000 (overflow). Low 32 = 0; OE must fire.
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.gpr[3] = 0x10000;
+        ctx.gpr[4] = 0x10000;
+        // mullwo r5, r3, r4  (XO=235, OE=1)
+        let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (1 << 10) | (235 << 1);
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.gpr[5], 0, "low 32 bits = 0");
+        assert_eq!(ctx.xer_ov, 1, "overflow detected");
+    }
+
+    #[test]
+    fn divwx_negative_quotient_zero_extends() {
+        // PPCBUG-010+011: -10 / 3 = -3 must produce 0x00000000_FFFFFFFD,
+        // not 0xFFFFFFFF_FFFFFFFD. CR0.LT must still fire (i32 view of FFFFFFFD is negative).
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.gpr[3] = (-10i32) as u32 as u64;
+        ctx.gpr[4] = 3;
+        // divwx. r5, r3, r4  (XO=491, Rc=1)
+        let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (491 << 1) | 1;
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FFFDu64);
+        assert!(ctx.cr[0].lt, "CR0.LT must fire for negative i32 quotient");
+    }
+
+    #[test]
+    fn srawx_negative_value_zero_extends_upper() {
+        // PPCBUG-041+043: srawx of negative i32 by 1 produces a negative i32;
+        // writeback must zero-extend to u64 (not sign-extend).
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.gpr[3] = 0x8000_0000u64; // i32::MIN
+        ctx.gpr[4] = 1;
+        // srawx. r5, r3, r4  (XO=792, Rc=1)
+        let raw = (31u32 << 26) | (3 << 21) | (5 << 16) | (4 << 11) | (792 << 1) | 1;
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.gpr[5], 0x0000_0000_C000_0000u64);
+        assert!(ctx.cr[0].lt);
+    }
+
+    #[test]
+    fn srawix_high_count_negative_input_yields_low32_all_ones() {
+        // PPCBUG-042+043: srawi with count=31 on negative input → low 32 bits
+        // all ones (0xFFFFFFFF), upper 32 zero (was u64::MAX before fix).
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.gpr[3] = 0x8000_0000u64;
+        // srawix r5, r3, 31  (XO=824)
+        let raw = (31u32 << 26) | (3 << 21) | (5 << 16) | (31 << 11) | (824 << 1);
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FFFFu64);
+    }
+
     #[test]
     fn addi_li_neg_one_zero_extends_upper() {
         // PPCBUG-001: `li r3, -1` (= addi r3, r0, -1) must produce