fix(cpu): PPCBUG-426/427/433 single-FMA vnmsubfp + vctsxs NaN saturation

Phase 5 batch 6 (5f): saturation and FMA-rounding fixes.

- PPCBUG-426 vnmsubfp: was `bi - ai * ci` (two rounding steps); now
  `-ai.mul_add(ci, -bi)` which is mathematically equivalent (= bi - ai*ci)
  but uses a single FMA round per ISA.
- PPCBUG-427 vnmsubfp128: same single-FMA fix.
- PPCBUG-433 vctsxs / vcfpsxws128 NaN saturation: AltiVec ISA saturates
  NaN to INT_MIN (0x80000000); xenia returned 0. The vctuxs (unsigned)
  NaN→0 is correct per ISA.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-05-02 12:31:10 +02:00
parent 6ba8f83c30
commit 6fe2cbf251
2 changed files with 7 additions and 3 deletions

View File

@@ -2039,7 +2039,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
let ai = vmx::flush_denorm(a[i]);
let bi = vmx::flush_denorm(b[i]);
let ci = vmx::flush_denorm(c[i]);
r[i] = vmx::flush_denorm(bi - ai * ci);
// PPCBUG-426: single FMA rounding instead of two-step (b - a*c).
r[i] = vmx::flush_denorm(-ai.mul_add(ci, -bi));
}
ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r);
ctx.pc += 4;
@@ -2056,7 +2057,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
let ai = vmx::flush_denorm(a[i]);
let bi = vmx::flush_denorm(b[i]);
let di = vmx::flush_denorm(d[i]);
r[i] = vmx::flush_denorm(di - ai * bi);
// PPCBUG-427: single FMA rounding.
r[i] = vmx::flush_denorm(-ai.mul_add(bi, -di));
}
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
ctx.pc += 4;

View File

@@ -214,7 +214,9 @@ pub fn flush_denorm(x: f32) -> f32 {
//
// vctsxs / vctuxs flush denormal inputs to 0 before scaling, per Altivec.
#[inline] pub fn cvt_f32_to_i32_sat(x: f32, scale_bits: u32) -> (i32, bool) {
if x.is_nan() { return (0, true); }
// PPCBUG-433: AltiVec ISA saturates NaN to INT_MIN (0x80000000), not 0.
// (vctuxs's NaN→0 is correct per AltiVec ISA — see PPCBUG-434.)
if x.is_nan() { return (i32::MIN, true); }
let x = flush_denorm(x);
let scaled = (x as f64) * ((1u64 << scale_bits) as f64);
if scaled >= i32::MAX as f64 { return (i32::MAX, true); }