From 49103bb898c6ec45b6a58b0b01ac933b44dcb562 Mon Sep 17 00:00:00 2001
From: MechaCat02 <fabian@diekaulbachs.de>
Date: Sat, 2 May 2026 12:07:32 +0200
Subject: [PATCH] =?UTF-8?q?fix(cpu):=20P4=20review-fix=20=E2=80=94=20subfx?=
 =?UTF-8?q?/subfcx=20OE=20predicate=20+=20mulli=20test=20rigor?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Independent reviewer of the P4 branch found two issues:

(1) BLOCKING — subfx and subfcx OE handlers still called the legacy
`overflow::sum_overflow_64(true_diff, result32 as u64)` while batch 6
had migrated all add* sites to the inline `true_sum != (result32 as i32)
as i128` form. The legacy helper compares `true_diff` against
`(result32 as u64) as i64 as i128`, which views any bit-31-set result
as a positive i64 (e.g. result=0x80000000 → +2147483648 in i64). For a
legitimate i32::MIN result with no actual 32-bit overflow, this caused
spurious OV=1.

Concrete repro now caught by `subfo_no_spurious_ov_when_result_has_bit31_set`:
r3=1, r4=0x80000001 → result=0x80000000, true_diff=-2147483648, no OV.
Pre-fix: spurious OV=1.

(2) Minor — `mulli_overflow_wraps_to_32` rubber-stamped: with ra=0x80000000
and imm=2, both pre-fix (`as i64 as u64`) and post-fix (`as u32 as u64`)
write the same value. Replaced with ra=u64::MAX (polluted upper bits) where
pre-fix writes 0xFFFFFFFF_FFFFFFFE and post-fix writes 0x00000000_FFFFFFFE.

Fixes:
- interpreter.rs subfx/subfcx OE: switch to inline 32-bit predicate
  matching the rest of batch 6.
- subfo_sets_xer_ov_on_min_minus_one: renamed and updated to test 32-bit
  overflow (r4=0x80000000 - 1 = 0x7FFFFFFF, OV=1).
- New: subfo_no_spurious_ov_when_result_has_bit31_set (PPCBUG-017
  review-fix regression).
- New: subfco_no_spurious_ov_when_result_has_bit31_set (same for PPCBUG-007).
- mulli_overflow_wraps_to_32: redesigned with polluted upper bits to
  actually discriminate pre/post fix.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 crates/xenia-cpu/src/interpreter.rs | 64 ++++++++++++++++++++++-------
 1 file changed, 49 insertions(+), 15 deletions(-)

diff --git a/crates/xenia-cpu/src/interpreter.rs b/crates/xenia-cpu/src/interpreter.rs
index 4a37ccb..aad2485 100644
--- a/crates/xenia-cpu/src/interpreter.rs
+++ b/crates/xenia-cpu/src/interpreter.rs
@@ -260,7 +260,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
             ctx.gpr[instr.rd()] = result32 as u64;
             if instr.oe() {
                 let true_diff = (rb32 as i32 as i128) - (ra32 as i32 as i128);
-                overflow::apply(ctx, overflow::sum_overflow_64(true_diff, result32 as u64));
+                overflow::apply(ctx, true_diff != (result32 as i32) as i128);
             }
             if instr.rc_bit() {
                 ctx.update_cr_signed(0, result32 as i32 as i64);
@@ -278,7 +278,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
             ctx.gpr[instr.rd()] = result32 as u64;
             if instr.oe() {
                 let true_diff = (rb32 as i32 as i128) - (ra32 as i32 as i128);
-                overflow::apply(ctx, overflow::sum_overflow_64(true_diff, result32 as u64));
+                overflow::apply(ctx, true_diff != (result32 as i32) as i128);
             }
             if instr.rc_bit() {
                 ctx.update_cr_signed(0, result32 as i32 as i64);
@@ -5091,21 +5091,58 @@ mod tests {
     }
 
     #[test]
-    fn subfo_sets_xer_ov_on_min_minus_one() {
+    fn subfo_sets_xer_ov_on_int32_min_minus_one() {
+        // PPCBUG-017: 32-bit ABI subfo overflow detection. r4=INT32_MIN, r3=1
+        // → result = INT32_MIN - 1 → wraps to INT32_MAX with OV=1.
         let mut ctx = PpcContext::new();
         let mut mem = TestMem::new();
-        // subfo r5, r3, r4  ->  r5 = r4 - r3
-        // r4 = INT64_MIN, r3 = 1 -> result overflows
         ctx.gpr[3] = 1;
-        ctx.gpr[4] = i64::MIN as u64;
+        ctx.gpr[4] = 0x8000_0000u64;
         let raw = (31 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (1 << 10) | (40 << 1);
         write_instr(&mut mem, 0, raw);
         ctx.pc = 0;
         step(&mut ctx, &mut mem);
+        assert_eq!(ctx.gpr[5], 0x7FFF_FFFFu64);
         assert_eq!(ctx.xer_ov, 1);
         assert_eq!(ctx.xer_so, 1);
     }
 
+    #[test]
+    fn subfo_no_spurious_ov_when_result_has_bit31_set() {
+        // PPCBUG-017 review-fix regression: subfo r5, r3, r4 with r3=1, r4=0x80000001
+        // → result = 0x80000000. This is i32::MIN — a legitimate negative value
+        // with no 32-bit overflow (true_diff = -2147483648, fits in i32).
+        // The legacy `sum_overflow_64` predicate compared against the u64 view
+        // of result (= +2147483648), spuriously flagging OV=1.
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.gpr[3] = 1;
+        ctx.gpr[4] = 0x8000_0001u64;
+        // subfo r5, r3, r4
+        let raw = (31 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (1 << 10) | (40 << 1);
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.gpr[5], 0x8000_0000u64);
+        assert_eq!(ctx.xer_ov, 0, "legitimate i32::MIN result must NOT trigger OV");
+    }
+
+    #[test]
+    fn subfco_no_spurious_ov_when_result_has_bit31_set() {
+        // PPCBUG-007 same review-fix: subfcx OE handler must use 32-bit predicate.
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.gpr[3] = 1;
+        ctx.gpr[4] = 0x8000_0001u64;
+        // subfco r5, r3, r4  (XO=8, OE=1)
+        let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (1 << 10) | (8 << 1);
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.gpr[5], 0x8000_0000u64);
+        assert_eq!(ctx.xer_ov, 0, "legitimate i32::MIN result must NOT trigger OV");
+    }
+
     #[test]
     fn mullwo_sets_xer_ov_when_product_overflows_32_bits() {
         let mut ctx = PpcContext::new();
@@ -5415,22 +5452,19 @@ mod tests {
 
     #[test]
     fn mulli_overflow_wraps_to_32() {
-        // PPCBUG-004: mulli result must be truncated to 32 bits.
-        // 0x10000 * 0x10000 = 0x1_00000000 — low 32 bits are 0.
+        // PPCBUG-004: mulli must truncate to 32 bits even when the upper 32 bits
+        // of RA are polluted (e.g. by upstream bugs). Pre-fix: ra = u64::MAX as
+        // i64 = -1, * 2 = -2, written to GPR as `0xFFFFFFFF_FFFFFFFE`. Post-fix:
+        // truncated to `0xFFFFFFFE`. Discriminating regression test.
         let mut ctx = PpcContext::new();
         let mut mem = TestMem::new();
-        ctx.gpr[3] = 0x10000;
-        // mulli r4, r3, 0x4000 (4 * 0x10000 = 0x40000, no overflow case for sanity)
-        // Better case: 0x10000 * 0x4000 = 0x4000_0000 — fits in i32.
-        // For overflow: ra=0x80000000 (i32::MIN), imm=2 → 0xFFFFFFFF_00000000, low32=0
-        ctx.gpr[3] = 0x80000000u64;
+        ctx.gpr[3] = u64::MAX;
         // mulli r4, r3, 2: opcode 7
         let raw = (7u32 << 26) | (4 << 21) | (3 << 16) | 2;
         write_instr(&mut mem, 0, raw);
         ctx.pc = 0;
         step(&mut ctx, &mut mem);
-        // i32::MIN * 2 = 0xFFFFFFFF_00000000 in i64 view; low 32 = 0.
-        assert_eq!(ctx.gpr[4], 0);
+        assert_eq!(ctx.gpr[4], 0xFFFF_FFFEu64, "low 32 bits = -2 in i32; upper 32 zero");
     }
 
     #[test]