From 49103bb898c6ec45b6a58b0b01ac933b44dcb562 Mon Sep 17 00:00:00 2001 From: MechaCat02 Date: Sat, 2 May 2026 12:07:32 +0200 Subject: [PATCH] =?UTF-8?q?fix(cpu):=20P4=20review-fix=20=E2=80=94=20subfx?= =?UTF-8?q?/subfcx=20OE=20predicate=20+=20mulli=20test=20rigor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Independent reviewer of the P4 branch found two issues: (1) BLOCKING — subfx and subfcx OE handlers still called the legacy `overflow::sum_overflow_64(true_diff, result32 as u64)` while batch 6 had migrated all add* sites to the inline `true_sum != (result32 as i32) as i128` form. The legacy helper compares `true_diff` against `(result32 as u64) as i64 as i128`, which views any bit-31-set result as a positive i64 (e.g. result=0x80000000 → +2147483648 in i64). For a legitimate i32::MIN result with no actual 32-bit overflow, this caused spurious OV=1. Concrete repro now caught by `subfo_no_spurious_ov_when_result_has_bit31_set`: r3=1, r4=0x80000001 → result=0x80000000, true_diff=-2147483648, no OV. Pre-fix: spurious OV=1. (2) Minor — `mulli_overflow_wraps_to_32` rubber-stamped: with ra=0x80000000 and imm=2, both pre-fix (`as i64 as u64`) and post-fix (`as u32 as u64`) write the same value. Replaced with ra=u64::MAX (polluted upper bits) where pre-fix writes 0xFFFFFFFF_FFFFFFFE and post-fix writes 0x00000000_FFFFFFFE. Fixes: - interpreter.rs subfx/subfcx OE: switch to inline 32-bit predicate matching the rest of batch 6. - subfo_sets_xer_ov_on_min_minus_one: renamed and updated to test 32-bit overflow (r4=0x80000000 - 1 = 0x7FFFFFFF, OV=1). - New: subfo_no_spurious_ov_when_result_has_bit31_set (PPCBUG-017 review-fix regression). - New: subfco_no_spurious_ov_when_result_has_bit31_set (same for PPCBUG-007). - mulli_overflow_wraps_to_32: redesigned with polluted upper bits to actually discriminate pre/post fix. Co-Authored-By: Claude Sonnet 4.6 --- crates/xenia-cpu/src/interpreter.rs | 64 ++++++++++++++++++++++------- 1 file changed, 49 insertions(+), 15 deletions(-) diff --git a/crates/xenia-cpu/src/interpreter.rs b/crates/xenia-cpu/src/interpreter.rs index 4a37ccb..aad2485 100644 --- a/crates/xenia-cpu/src/interpreter.rs +++ b/crates/xenia-cpu/src/interpreter.rs @@ -260,7 +260,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - ctx.gpr[instr.rd()] = result32 as u64; if instr.oe() { let true_diff = (rb32 as i32 as i128) - (ra32 as i32 as i128); - overflow::apply(ctx, overflow::sum_overflow_64(true_diff, result32 as u64)); + overflow::apply(ctx, true_diff != (result32 as i32) as i128); } if instr.rc_bit() { ctx.update_cr_signed(0, result32 as i32 as i64); @@ -278,7 +278,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - ctx.gpr[instr.rd()] = result32 as u64; if instr.oe() { let true_diff = (rb32 as i32 as i128) - (ra32 as i32 as i128); - overflow::apply(ctx, overflow::sum_overflow_64(true_diff, result32 as u64)); + overflow::apply(ctx, true_diff != (result32 as i32) as i128); } if instr.rc_bit() { ctx.update_cr_signed(0, result32 as i32 as i64); @@ -5091,21 +5091,58 @@ mod tests { } #[test] - fn subfo_sets_xer_ov_on_min_minus_one() { + fn subfo_sets_xer_ov_on_int32_min_minus_one() { + // PPCBUG-017: 32-bit ABI subfo overflow detection. r4=INT32_MIN, r3=1 + // → result = INT32_MIN - 1 → wraps to INT32_MAX with OV=1. let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); - // subfo r5, r3, r4 -> r5 = r4 - r3 - // r4 = INT64_MIN, r3 = 1 -> result overflows ctx.gpr[3] = 1; - ctx.gpr[4] = i64::MIN as u64; + ctx.gpr[4] = 0x8000_0000u64; let raw = (31 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (1 << 10) | (40 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); + assert_eq!(ctx.gpr[5], 0x7FFF_FFFFu64); assert_eq!(ctx.xer_ov, 1); assert_eq!(ctx.xer_so, 1); } + #[test] + fn subfo_no_spurious_ov_when_result_has_bit31_set() { + // PPCBUG-017 review-fix regression: subfo r5, r3, r4 with r3=1, r4=0x80000001 + // → result = 0x80000000. This is i32::MIN — a legitimate negative value + // with no 32-bit overflow (true_diff = -2147483648, fits in i32). + // The legacy `sum_overflow_64` predicate compared against the u64 view + // of result (= +2147483648), spuriously flagging OV=1. + let mut ctx = PpcContext::new(); + let mut mem = TestMem::new(); + ctx.gpr[3] = 1; + ctx.gpr[4] = 0x8000_0001u64; + // subfo r5, r3, r4 + let raw = (31 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (1 << 10) | (40 << 1); + write_instr(&mut mem, 0, raw); + ctx.pc = 0; + step(&mut ctx, &mut mem); + assert_eq!(ctx.gpr[5], 0x8000_0000u64); + assert_eq!(ctx.xer_ov, 0, "legitimate i32::MIN result must NOT trigger OV"); + } + + #[test] + fn subfco_no_spurious_ov_when_result_has_bit31_set() { + // PPCBUG-007 same review-fix: subfcx OE handler must use 32-bit predicate. + let mut ctx = PpcContext::new(); + let mut mem = TestMem::new(); + ctx.gpr[3] = 1; + ctx.gpr[4] = 0x8000_0001u64; + // subfco r5, r3, r4 (XO=8, OE=1) + let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (1 << 10) | (8 << 1); + write_instr(&mut mem, 0, raw); + ctx.pc = 0; + step(&mut ctx, &mut mem); + assert_eq!(ctx.gpr[5], 0x8000_0000u64); + assert_eq!(ctx.xer_ov, 0, "legitimate i32::MIN result must NOT trigger OV"); + } + #[test] fn mullwo_sets_xer_ov_when_product_overflows_32_bits() { let mut ctx = PpcContext::new(); @@ -5415,22 +5452,19 @@ mod tests { #[test] fn mulli_overflow_wraps_to_32() { - // PPCBUG-004: mulli result must be truncated to 32 bits. - // 0x10000 * 0x10000 = 0x1_00000000 — low 32 bits are 0. + // PPCBUG-004: mulli must truncate to 32 bits even when the upper 32 bits + // of RA are polluted (e.g. by upstream bugs). Pre-fix: ra = u64::MAX as + // i64 = -1, * 2 = -2, written to GPR as `0xFFFFFFFF_FFFFFFFE`. Post-fix: + // truncated to `0xFFFFFFFE`. Discriminating regression test. let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); - ctx.gpr[3] = 0x10000; - // mulli r4, r3, 0x4000 (4 * 0x10000 = 0x40000, no overflow case for sanity) - // Better case: 0x10000 * 0x4000 = 0x4000_0000 — fits in i32. - // For overflow: ra=0x80000000 (i32::MIN), imm=2 → 0xFFFFFFFF_00000000, low32=0 - ctx.gpr[3] = 0x80000000u64; + ctx.gpr[3] = u64::MAX; // mulli r4, r3, 2: opcode 7 let raw = (7u32 << 26) | (4 << 21) | (3 << 16) | 2; write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); - // i32::MIN * 2 = 0xFFFFFFFF_00000000 in i64 view; low 32 = 0. - assert_eq!(ctx.gpr[4], 0); + assert_eq!(ctx.gpr[4], 0xFFFF_FFFEu64, "low 32 bits = -2 in i32; upper 32 zero"); } #[test]