From 26b98975c3f5cfb2986aef0f6c372731199ec771 Mon Sep 17 00:00:00 2001 From: MechaCat02 Date: Sat, 2 May 2026 12:20:02 +0200 Subject: [PATCH] fix(cpu): PPCBUG-181/182/183/202/203/205 FMA VXISI + NaN sign preservation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 5 batch 2 (5b): VXISI / NaN handling for the FMA family. The 8 FMA opcodes (fmaddx/fmaddsx/fmsubx/fmsubsx/fnmaddx/fnmaddsx/fnmsubx/ fnmsubsx) all share two fix shapes: 1. VXISI on the add/sub step. The previous code passed `a*c` to check_invalid_add, which has separate rounding from the FMA. In extreme cases this gives the wrong sign (PPCBUG-202) or wrong infinity status. Worse, fmsub/fnmadd/fnmsub had NO add-step VXISI check at all (PPCBUG-181/182/203). The fnmsub pattern is the canonical Newton- Raphson step — the most common FPU path in Xbox 360 graphics code. 2. NaN sign preservation in fnmadd/fnmsub. ISA Book I §4.3.4 forbids negation of a NaN FMA result; Rust's unary `-` flips the IEEE-754 sign bit (PPCBUG-183/205). Fixes: - fpscr.rs: new helper `check_invalid_fma_add(ctx, a, c, b, sub)` that derives VXISI from input properties (mathematical-product sign + b sign) instead of from the lossy `a*c` value. Also covers SNaN. - interpreter.rs: all 8 FMA arms now use the new helper; fnmadd[s]/ fnmsub[s] gate the negation on `!fma.is_nan()`. Tests: - fmsub_inf_minus_inf_sets_vxisi: regression for PPCBUG-203. - fnmadd_nan_input_preserves_nan_sign: regression for PPCBUG-205. Co-Authored-By: Claude Sonnet 4.6 --- crates/xenia-cpu/src/fpscr.rs | 27 +++++++++++ crates/xenia-cpu/src/interpreter.rs | 71 +++++++++++++++++++++++++++-- 2 files changed, 93 insertions(+), 5 deletions(-) diff --git a/crates/xenia-cpu/src/fpscr.rs b/crates/xenia-cpu/src/fpscr.rs index 366e4f7..535a500 100644 --- a/crates/xenia-cpu/src/fpscr.rs +++ b/crates/xenia-cpu/src/fpscr.rs @@ -152,6 +152,33 @@ pub fn check_invalid_add(ctx: &mut PpcContext, a: f64, b: f64, sub: bool) -> boo false } +/// FMA-aware add/sub VXISI check. Per PPCBUG-202+203: the previous code +/// passed `a*c` as `lhs` to `check_invalid_add`, which suffers from two +/// rounding errors and can spuriously raise/miss VXISI in extreme cases. +/// This helper derives the mathematical product's sign and infinity status +/// from the inputs directly. +/// +/// `sub` follows the same semantics as `check_invalid_add`: +/// - false (add): VXISI when product and b have opposite signs at infinity +/// - true (sub): VXISI when product and b have same sign at infinity +pub fn check_invalid_fma_add(ctx: &mut PpcContext, a: f64, c: f64, b: f64, sub: bool) -> bool { + let mut bits = 0u32; + if is_snan(a) || is_snan(c) || is_snan(b) { bits |= VXSNAN; } + let product_is_inf = (a.is_infinite() || c.is_infinite()) + && a != 0.0 && c != 0.0 + && !a.is_nan() && !c.is_nan(); + if product_is_inf && b.is_infinite() { + let p_neg = a.is_sign_negative() != c.is_sign_negative(); + let b_neg = b.is_sign_negative(); + let same_sign = p_neg == b_neg; + if (sub && same_sign) || (!sub && !same_sign) { + bits |= VXISI; + } + } + if bits != 0 { set_exception(ctx, bits); return true; } + false +} + pub fn check_invalid_mul(ctx: &mut PpcContext, a: f64, b: f64) -> bool { let mut bits = 0u32; if is_snan(a) || is_snan(b) { bits |= VXSNAN; } diff --git a/crates/xenia-cpu/src/interpreter.rs b/crates/xenia-cpu/src/interpreter.rs index 9ff2b6b..6395b25 100644 --- a/crates/xenia-cpu/src/interpreter.rs +++ b/crates/xenia-cpu/src/interpreter.rs @@ -2573,11 +2573,12 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - // ===== FPU: Multiply-Add ===== PpcOpcode::fmaddx => { + // PPCBUG-202: VXISI from input properties (not from `a*c` which has wrong sign on overflow). let a = ctx.fpr[instr.ra()]; let c = ctx.fpr[instr.rc()]; let b = ctx.fpr[instr.rb()]; fpscr::check_invalid_mul(ctx, a, c); - fpscr::check_invalid_add(ctx, a * c, b, false); + fpscr::check_invalid_fma_add(ctx, a, c, b, false); let result = a.mul_add(c, b); ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite()); @@ -2585,10 +2586,12 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - ctx.pc += 4; } PpcOpcode::fmaddsx => { + // PPCBUG-181: missing VXISI on add step. let a = ctx.fpr[instr.ra()]; let c = ctx.fpr[instr.rc()]; let b = ctx.fpr[instr.rb()]; fpscr::check_invalid_mul(ctx, a, c); + fpscr::check_invalid_fma_add(ctx, a, c, b, false); let result = to_single(ctx, a.mul_add(c, b)); ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite()); @@ -2596,10 +2599,12 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - ctx.pc += 4; } PpcOpcode::fmsubx => { + // PPCBUG-203: missing VXISI on sub step. let a = ctx.fpr[instr.ra()]; let c = ctx.fpr[instr.rc()]; let b = ctx.fpr[instr.rb()]; fpscr::check_invalid_mul(ctx, a, c); + fpscr::check_invalid_fma_add(ctx, a, c, b, true); let result = a.mul_add(c, -b); ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite()); @@ -2607,10 +2612,12 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - ctx.pc += 4; } PpcOpcode::fmsubsx => { + // PPCBUG-182: missing VXISI on sub step. let a = ctx.fpr[instr.ra()]; let c = ctx.fpr[instr.rc()]; let b = ctx.fpr[instr.rb()]; fpscr::check_invalid_mul(ctx, a, c); + fpscr::check_invalid_fma_add(ctx, a, c, b, true); let result = to_single(ctx, a.mul_add(c, -b)); ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite()); @@ -2618,44 +2625,58 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - ctx.pc += 4; } PpcOpcode::fnmaddx => { + // PPCBUG-203: missing VXISI. PPCBUG-205: NaN sign preserved (no negation on NaN). let a = ctx.fpr[instr.ra()]; let c = ctx.fpr[instr.rc()]; let b = ctx.fpr[instr.rb()]; fpscr::check_invalid_mul(ctx, a, c); - let result = -(a.mul_add(c, b)); + fpscr::check_invalid_fma_add(ctx, a, c, b, false); + let fma = a.mul_add(c, b); + let result = if fma.is_nan() { fma } else { -fma }; ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite()); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::fnmaddsx => { + // PPCBUG-181 + PPCBUG-183: VXISI + NaN sign preservation. let a = ctx.fpr[instr.ra()]; let c = ctx.fpr[instr.rc()]; let b = ctx.fpr[instr.rb()]; fpscr::check_invalid_mul(ctx, a, c); - let result = to_single(ctx, -(a.mul_add(c, b))); + fpscr::check_invalid_fma_add(ctx, a, c, b, false); + let fma = a.mul_add(c, b); + let neg = if fma.is_nan() { fma } else { -fma }; + let result = to_single(ctx, neg); ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite()); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::fnmsubx => { + // PPCBUG-203: VXISI. PPCBUG-205: NaN sign preservation. let a = ctx.fpr[instr.ra()]; let c = ctx.fpr[instr.rc()]; let b = ctx.fpr[instr.rb()]; fpscr::check_invalid_mul(ctx, a, c); - let result = -(a.mul_add(c, -b)); + fpscr::check_invalid_fma_add(ctx, a, c, b, true); + let fma = a.mul_add(c, -b); + let result = if fma.is_nan() { fma } else { -fma }; ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite()); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::fnmsubsx => { + // PPCBUG-182 + PPCBUG-183: VXISI + NaN sign preservation. let a = ctx.fpr[instr.ra()]; let c = ctx.fpr[instr.rc()]; let b = ctx.fpr[instr.rb()]; fpscr::check_invalid_mul(ctx, a, c); - let result = to_single(ctx, -(a.mul_add(c, -b))); + fpscr::check_invalid_fma_add(ctx, a, c, b, true); + let fma = a.mul_add(c, -b); + let neg = if fma.is_nan() { fma } else { -fma }; + let result = to_single(ctx, neg); ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite()); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } @@ -5632,6 +5653,46 @@ mod tests { // ---------- Phase 2h: FPU / FPSCR ---------- + #[test] + fn fmsub_inf_minus_inf_sets_vxisi() { + // PPCBUG-203 regression: fmsub with a*c = +∞, -b = -∞ (b=+∞) → + // +∞ + (-∞) → VXISI. Pre-fix had no add-step VXISI check. + let mut ctx = PpcContext::new(); + let mut mem = TestMem::new(); + ctx.fpr[1] = f64::INFINITY; + ctx.fpr[2] = f64::INFINITY; // b + ctx.fpr[3] = 1.0; + // fmsub f4, f1, f3, f2 → 1*∞ - ∞ = VXISI + // A-form: opcode=63, XO=28 (fmsub double): (63<<26)|(rd<<21)|(ra<<16)|(rb<<11)|(rc<<6)|(28<<1) + let raw = (63u32 << 26) | (4 << 21) | (1 << 16) | (2 << 11) | (3 << 6) | (28 << 1); + write_instr(&mut mem, 0, raw); + ctx.pc = 0; + step(&mut ctx, &mut mem); + assert_ne!(ctx.fpscr & fpscr::VXISI, 0, "fmsub ∞-∞ must set VXISI"); + } + + #[test] + fn fnmadd_nan_input_preserves_nan_sign() { + // PPCBUG-205 regression: ISA forbids negating a NaN result. + // a*c+b producing a NaN → result must be the NaN unchanged, not -NaN. + let mut ctx = PpcContext::new(); + let mut mem = TestMem::new(); + let qnan = f64::NAN; + ctx.fpr[1] = qnan; + ctx.fpr[2] = 1.0; + ctx.fpr[3] = 2.0; + // fnmadd f4, f1, f3, f2 (XO=31) + let raw = (63u32 << 26) | (4 << 21) | (1 << 16) | (2 << 11) | (3 << 6) | (31 << 1); + write_instr(&mut mem, 0, raw); + ctx.pc = 0; + step(&mut ctx, &mut mem); + // Result must be NaN with the same sign bit as the input NaN. + let r = ctx.fpr[4]; + assert!(r.is_nan(), "result must be NaN"); + assert_eq!(r.is_sign_negative(), qnan.is_sign_negative(), + "fnmadd must preserve NaN sign (no negation on NaN)"); + } + #[test] fn fadd_inf_minus_inf_sets_vxisi() { let mut ctx = PpcContext::new();