diff --git a/crates/xenia-cpu/src/fpscr.rs b/crates/xenia-cpu/src/fpscr.rs index 1d05e67..535a500 100644 --- a/crates/xenia-cpu/src/fpscr.rs +++ b/crates/xenia-cpu/src/fpscr.rs @@ -152,6 +152,33 @@ pub fn check_invalid_add(ctx: &mut PpcContext, a: f64, b: f64, sub: bool) -> boo false } +/// FMA-aware add/sub VXISI check. Per PPCBUG-202+203: the previous code +/// passed `a*c` as `lhs` to `check_invalid_add`, which suffers from two +/// rounding errors and can spuriously raise/miss VXISI in extreme cases. +/// This helper derives the mathematical product's sign and infinity status +/// from the inputs directly. +/// +/// `sub` follows the same semantics as `check_invalid_add`: +/// - false (add): VXISI when product and b have opposite signs at infinity +/// - true (sub): VXISI when product and b have same sign at infinity +pub fn check_invalid_fma_add(ctx: &mut PpcContext, a: f64, c: f64, b: f64, sub: bool) -> bool { + let mut bits = 0u32; + if is_snan(a) || is_snan(c) || is_snan(b) { bits |= VXSNAN; } + let product_is_inf = (a.is_infinite() || c.is_infinite()) + && a != 0.0 && c != 0.0 + && !a.is_nan() && !c.is_nan(); + if product_is_inf && b.is_infinite() { + let p_neg = a.is_sign_negative() != c.is_sign_negative(); + let b_neg = b.is_sign_negative(); + let same_sign = p_neg == b_neg; + if (sub && same_sign) || (!sub && !same_sign) { + bits |= VXISI; + } + } + if bits != 0 { set_exception(ctx, bits); return true; } + false +} + pub fn check_invalid_mul(ctx: &mut PpcContext, a: f64, b: f64) -> bool { let mut bits = 0u32; if is_snan(a) || is_snan(b) { bits |= VXSNAN; } @@ -220,15 +247,22 @@ pub fn round_to_single(ctx: &PpcContext, v: f64) -> f64 { pub fn round_to_i64(ctx: &PpcContext, v: f64) -> i64 { match rounding_mode(ctx) { RoundingMode::NearestEven => { - // Round-half-to-even (banker's rounding). - let r = v.round(); - // Rust's f64::round is round-half-away-from-zero. Correct ties to even: - let diff = (v - v.trunc()).abs(); - if (diff - 0.5).abs() < f64::EPSILON { - let floor = v.floor(); - if (floor as i64) & 1 == 0 { floor as i64 } else { v.ceil() as i64 } + // PPCBUG-221: round-half-to-even (banker's rounding). The previous + // tie-detection used `(diff - 0.5).abs() < f64::EPSILON` which + // breaks for |v| > 2^52 (where v.trunc() == v exactly, giving diff + // == 0). Use a fractional-part-only check that's exact for + // |v| <= 2^52 and degenerates correctly above. + let t = v.trunc(); + let frac = v - t; + let fa = frac.abs(); + if fa > 0.5 { + t as i64 + if v >= 0.0 { 1 } else { -1 } + } else if fa < 0.5 { + t as i64 } else { - r as i64 + // Exact 0.5 tie — round to even. + let fi = t as i64; + if fi & 1 == 0 { fi } else { fi + if v >= 0.0 { 1 } else { -1 } } } } RoundingMode::TowardZero => v.trunc() as i64, @@ -355,11 +389,35 @@ mod tests { #[test] fn round_to_i64_nearest_even_on_tie() { let c = ctx(); + assert_eq!(round_to_i64(&c, 0.5_f64), 0); + assert_eq!(round_to_i64(&c, 1.5_f64), 2); assert_eq!(round_to_i64(&c, 2.5_f64), 2); assert_eq!(round_to_i64(&c, 3.5_f64), 4); + assert_eq!(round_to_i64(&c, -0.5_f64), 0); + assert_eq!(round_to_i64(&c, -1.5_f64), -2); assert_eq!(round_to_i64(&c, -2.5_f64), -2); } + #[test] + fn round_to_i64_non_tie_cases() { + // PPCBUG-221 regression: non-tie fractions must round to nearest. + let c = ctx(); + assert_eq!(round_to_i64(&c, 0.4_f64), 0); + assert_eq!(round_to_i64(&c, 0.6_f64), 1); + assert_eq!(round_to_i64(&c, -0.4_f64), 0); + assert_eq!(round_to_i64(&c, -0.6_f64), -1); + } + + #[test] + fn round_to_i32_nearest_even_on_tie() { + // PPCBUG-227: round_to_i32 inherits round_to_i64's tie semantics. + let c = ctx(); + assert_eq!(round_to_i32(&c, 0.5_f64), 0); + assert_eq!(round_to_i32(&c, 1.5_f64), 2); + assert_eq!(round_to_i32(&c, 2.5_f64), 2); + assert_eq!(round_to_i32(&c, -1.5_f64), -2); + } + #[test] fn check_invalid_add_detects_inf_minus_inf() { let mut c = ctx(); diff --git a/crates/xenia-cpu/src/interpreter.rs b/crates/xenia-cpu/src/interpreter.rs index aad2485..31d54f5 100644 --- a/crates/xenia-cpu/src/interpreter.rs +++ b/crates/xenia-cpu/src/interpreter.rs @@ -1940,34 +1940,56 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - // ===== VMX: Float Arithmetic ===== PpcOpcode::vaddfp => { + // PPCBUG-435: VSCR.NJ=1 (Xbox 360 always boots with this set) requires + // flush-to-zero on subnormal inputs and outputs. Canary VMX float + // arithmetic flushes denormals unconditionally. let a = ctx.vr[instr.ra()].as_f32x4(); let b = ctx.vr[instr.rb()].as_f32x4(); let mut r = [0f32; 4]; - for i in 0..4 { r[i] = a[i] + b[i]; } + for i in 0..4 { + let ai = vmx::flush_denorm(a[i]); + let bi = vmx::flush_denorm(b[i]); + r[i] = vmx::flush_denorm(ai + bi); + } ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; } PpcOpcode::vaddfp128 => { + // PPCBUG-435: same as vaddfp. let a = ctx.vr[instr.va128()].as_f32x4(); let b = ctx.vr[instr.vb128()].as_f32x4(); let mut r = [0f32; 4]; - for i in 0..4 { r[i] = a[i] + b[i]; } + for i in 0..4 { + let ai = vmx::flush_denorm(a[i]); + let bi = vmx::flush_denorm(b[i]); + r[i] = vmx::flush_denorm(ai + bi); + } ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; } PpcOpcode::vsubfp => { + // PPCBUG-435. let a = ctx.vr[instr.ra()].as_f32x4(); let b = ctx.vr[instr.rb()].as_f32x4(); let mut r = [0f32; 4]; - for i in 0..4 { r[i] = a[i] - b[i]; } + for i in 0..4 { + let ai = vmx::flush_denorm(a[i]); + let bi = vmx::flush_denorm(b[i]); + r[i] = vmx::flush_denorm(ai - bi); + } ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; } PpcOpcode::vsubfp128 => { + // PPCBUG-435. let a = ctx.vr[instr.va128()].as_f32x4(); let b = ctx.vr[instr.vb128()].as_f32x4(); let mut r = [0f32; 4]; - for i in 0..4 { r[i] = a[i] - b[i]; } + for i in 0..4 { + let ai = vmx::flush_denorm(a[i]); + let bi = vmx::flush_denorm(b[i]); + r[i] = vmx::flush_denorm(ai - bi); + } ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; } @@ -1982,7 +2004,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let ai = vmx::flush_denorm(a[i]); let bi = vmx::flush_denorm(b[i]); let ci = vmx::flush_denorm(c[i]); - r[i] = ai.mul_add(ci, bi); + // PPCBUG-437: flush subnormal output too. + r[i] = vmx::flush_denorm(ai.mul_add(ci, bi)); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; @@ -2000,7 +2023,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let ai = vmx::flush_denorm(a[i]); let bi = vmx::flush_denorm(b[i]); let di = vmx::flush_denorm(d[i]); - r[i] = ai.mul_add(di, bi); + // PPCBUG-437. + r[i] = vmx::flush_denorm(ai.mul_add(di, bi)); } ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; @@ -2015,7 +2039,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let ai = vmx::flush_denorm(a[i]); let bi = vmx::flush_denorm(b[i]); let ci = vmx::flush_denorm(c[i]); - r[i] = bi - ai * ci; + // PPCBUG-426: single FMA rounding instead of two-step (b - a*c). + r[i] = vmx::flush_denorm(-ai.mul_add(ci, -bi)); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; @@ -2032,16 +2057,22 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let ai = vmx::flush_denorm(a[i]); let bi = vmx::flush_denorm(b[i]); let di = vmx::flush_denorm(d[i]); - r[i] = di - ai * bi; + // PPCBUG-427: single FMA rounding. + r[i] = vmx::flush_denorm(-ai.mul_add(bi, -di)); } ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; } PpcOpcode::vmulfp128 => { + // PPCBUG-435 + PPCBUG-437. let a = ctx.vr[instr.va128()].as_f32x4(); let b = ctx.vr[instr.vb128()].as_f32x4(); let mut r = [0f32; 4]; - for i in 0..4 { r[i] = a[i] * b[i]; } + for i in 0..4 { + let ai = vmx::flush_denorm(a[i]); + let bi = vmx::flush_denorm(b[i]); + r[i] = vmx::flush_denorm(ai * bi); + } ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; } @@ -2398,11 +2429,13 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - ctx.pc += 4; } PpcOpcode::vrfin | PpcOpcode::vrfin128 => { + // PPCBUG-432: ISA round-to-nearest-even, NOT Rust's `round()` + // (which is round-half-away-from-zero). let vb = if matches!(instr.opcode, PpcOpcode::vrfin128) { instr.vb128() } else { instr.rb() }; let vd = if matches!(instr.opcode, PpcOpcode::vrfin128) { instr.vd128() } else { instr.rd() }; let b = ctx.vr[vb].as_f32x4(); let mut r = [0f32; 4]; - for i in 0..4 { r[i] = b[i].round(); } + for i in 0..4 { r[i] = b[i].round_ties_even(); } ctx.vr[vd] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; } @@ -2559,11 +2592,12 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - // ===== FPU: Multiply-Add ===== PpcOpcode::fmaddx => { + // PPCBUG-202: VXISI from input properties (not from `a*c` which has wrong sign on overflow). let a = ctx.fpr[instr.ra()]; let c = ctx.fpr[instr.rc()]; let b = ctx.fpr[instr.rb()]; fpscr::check_invalid_mul(ctx, a, c); - fpscr::check_invalid_add(ctx, a * c, b, false); + fpscr::check_invalid_fma_add(ctx, a, c, b, false); let result = a.mul_add(c, b); ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite()); @@ -2571,10 +2605,12 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - ctx.pc += 4; } PpcOpcode::fmaddsx => { + // PPCBUG-181: missing VXISI on add step. let a = ctx.fpr[instr.ra()]; let c = ctx.fpr[instr.rc()]; let b = ctx.fpr[instr.rb()]; fpscr::check_invalid_mul(ctx, a, c); + fpscr::check_invalid_fma_add(ctx, a, c, b, false); let result = to_single(ctx, a.mul_add(c, b)); ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite()); @@ -2582,10 +2618,12 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - ctx.pc += 4; } PpcOpcode::fmsubx => { + // PPCBUG-203: missing VXISI on sub step. let a = ctx.fpr[instr.ra()]; let c = ctx.fpr[instr.rc()]; let b = ctx.fpr[instr.rb()]; fpscr::check_invalid_mul(ctx, a, c); + fpscr::check_invalid_fma_add(ctx, a, c, b, true); let result = a.mul_add(c, -b); ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite()); @@ -2593,10 +2631,12 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - ctx.pc += 4; } PpcOpcode::fmsubsx => { + // PPCBUG-182: missing VXISI on sub step. let a = ctx.fpr[instr.ra()]; let c = ctx.fpr[instr.rc()]; let b = ctx.fpr[instr.rb()]; fpscr::check_invalid_mul(ctx, a, c); + fpscr::check_invalid_fma_add(ctx, a, c, b, true); let result = to_single(ctx, a.mul_add(c, -b)); ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite()); @@ -2604,44 +2644,58 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - ctx.pc += 4; } PpcOpcode::fnmaddx => { + // PPCBUG-203: missing VXISI. PPCBUG-205: NaN sign preserved (no negation on NaN). let a = ctx.fpr[instr.ra()]; let c = ctx.fpr[instr.rc()]; let b = ctx.fpr[instr.rb()]; fpscr::check_invalid_mul(ctx, a, c); - let result = -(a.mul_add(c, b)); + fpscr::check_invalid_fma_add(ctx, a, c, b, false); + let fma = a.mul_add(c, b); + let result = if fma.is_nan() { fma } else { -fma }; ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite()); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::fnmaddsx => { + // PPCBUG-181 + PPCBUG-183: VXISI + NaN sign preservation. let a = ctx.fpr[instr.ra()]; let c = ctx.fpr[instr.rc()]; let b = ctx.fpr[instr.rb()]; fpscr::check_invalid_mul(ctx, a, c); - let result = to_single(ctx, -(a.mul_add(c, b))); + fpscr::check_invalid_fma_add(ctx, a, c, b, false); + let fma = a.mul_add(c, b); + let neg = if fma.is_nan() { fma } else { -fma }; + let result = to_single(ctx, neg); ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite()); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::fnmsubx => { + // PPCBUG-203: VXISI. PPCBUG-205: NaN sign preservation. let a = ctx.fpr[instr.ra()]; let c = ctx.fpr[instr.rc()]; let b = ctx.fpr[instr.rb()]; fpscr::check_invalid_mul(ctx, a, c); - let result = -(a.mul_add(c, -b)); + fpscr::check_invalid_fma_add(ctx, a, c, b, true); + let fma = a.mul_add(c, -b); + let result = if fma.is_nan() { fma } else { -fma }; ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite()); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::fnmsubsx => { + // PPCBUG-182 + PPCBUG-183: VXISI + NaN sign preservation. let a = ctx.fpr[instr.ra()]; let c = ctx.fpr[instr.rc()]; let b = ctx.fpr[instr.rb()]; fpscr::check_invalid_mul(ctx, a, c); - let result = to_single(ctx, -(a.mul_add(c, -b))); + fpscr::check_invalid_fma_add(ctx, a, c, b, true); + let fma = a.mul_add(c, -b); + let neg = if fma.is_nan() { fma } else { -fma }; + let result = to_single(ctx, neg); ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite()); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } @@ -2713,12 +2767,18 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - ctx.pc += 4; } PpcOpcode::fresx => { - // Single-precision reciprocal estimate: frD = 1.0 / frB - let b = ctx.fpr[instr.rb()]; + // Single-precision reciprocal estimate: frD = 1.0 / frB. + // PPCBUG-184: pre-quantize input to f32 to match canary's + // `f.Recip(f.Convert(frB, FLOAT32_TYPE))` behavior. Hardware + // produces a ~12-bit LUT estimate; both emulators produce a + // fully-IEEE single reciprocal, but the f32 quantization at + // least makes the input precision match. + let b_full = ctx.fpr[instr.rb()]; + let b = b_full as f32 as f64; if b == 0.0 { fpscr::set_exception(ctx, fpscr::ZX); } - if fpscr::is_snan(b) { + if fpscr::is_snan(b_full) { fpscr::set_exception(ctx, fpscr::VXSNAN); } let result = to_single(ctx, 1.0 / b); @@ -2748,28 +2808,38 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - // ===== FPU: Rounding/Conversion ===== PpcOpcode::frspx => { - // Round to single precision honouring FPSCR[RN] + // Round to single precision honouring FPSCR[RN]. + // PPCBUG-225: set XX on inexact rounding (almost every frsp call). let b = ctx.fpr[instr.rb()]; if fpscr::is_snan(b) { fpscr::set_exception(ctx, fpscr::VXSNAN); } let result = to_single(ctx, b); + if b.is_finite() && result.is_finite() && result != b { + fpscr::set_exception(ctx, fpscr::XX); + } ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, b.is_finite()); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::fcfidx => { - // Convert from integer doubleword: frD = (double)(int64_t)frB_as_bits + // Convert from integer doubleword: frD = (double)(int64_t)frB_as_bits. + // PPCBUG-224: set XX when |i64| > 2^53 (precision loss in conversion). let bits = ctx.fpr[instr.rb()].to_bits(); - let result = (bits as i64) as f64; + let i = bits as i64; + let result = i as f64; + if (result as i64) != i { + fpscr::set_exception(ctx, fpscr::XX); + } ctx.fpr[instr.rd()] = result; fpscr::set_fprf(ctx, fpscr::classify_fprf(result)); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::fctidx => { - // Convert to integer doubleword (round per FPSCR[RN]) + // Convert to integer doubleword (round per FPSCR[RN]). + // PPCBUG-229: set XX on inexact (fractional input). let val = ctx.fpr[instr.rb()]; let result = if val.is_nan() { fpscr::set_exception(ctx, fpscr::VXCVI | if fpscr::is_snan(val) { fpscr::VXSNAN } else { 0 }); @@ -2781,6 +2851,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - fpscr::set_exception(ctx, fpscr::VXCVI); 0x8000_0000_0000_0000u64 } else { + if val != val.trunc() { fpscr::set_exception(ctx, fpscr::XX); } fpscr::round_to_i64(ctx, val) as u64 }; ctx.fpr[instr.rd()] = f64::from_bits(result); @@ -2788,7 +2859,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - ctx.pc += 4; } PpcOpcode::fctidzx => { - // Convert to integer doubleword (round toward zero) + // Convert to integer doubleword (round toward zero). + // PPCBUG-229: set XX on inexact. let val = ctx.fpr[instr.rb()]; let result = if val.is_nan() { fpscr::set_exception(ctx, fpscr::VXCVI | if fpscr::is_snan(val) { fpscr::VXSNAN } else { 0 }); @@ -2800,6 +2872,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - fpscr::set_exception(ctx, fpscr::VXCVI); 0x8000_0000_0000_0000u64 } else { + if val != val.trunc() { fpscr::set_exception(ctx, fpscr::XX); } (val.trunc() as i64) as u64 }; ctx.fpr[instr.rd()] = f64::from_bits(result); @@ -2807,7 +2880,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - ctx.pc += 4; } PpcOpcode::fctiwx => { - // Convert to integer word (round per FPSCR[RN]) + // Convert to integer word (round per FPSCR[RN]). + // PPCBUG-230: set XX on inexact. let val = ctx.fpr[instr.rb()]; let result_u32: u32 = if val.is_nan() { fpscr::set_exception(ctx, fpscr::VXCVI | if fpscr::is_snan(val) { fpscr::VXSNAN } else { 0 }); @@ -2819,6 +2893,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - fpscr::set_exception(ctx, fpscr::VXCVI); 0x8000_0000 } else { + if val != val.trunc() { fpscr::set_exception(ctx, fpscr::XX); } fpscr::round_to_i32(ctx, val) as u32 }; ctx.fpr[instr.rd()] = f64::from_bits(result_u32 as u64); @@ -2826,7 +2901,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - ctx.pc += 4; } PpcOpcode::fctiwzx => { - // Convert to integer word (round toward zero) + // Convert to integer word (round toward zero). + // PPCBUG-230: set XX on inexact. let val = ctx.fpr[instr.rb()]; let result_u32: u32 = if val.is_nan() { fpscr::set_exception(ctx, fpscr::VXCVI | if fpscr::is_snan(val) { fpscr::VXSNAN } else { 0 }); @@ -2838,6 +2914,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - fpscr::set_exception(ctx, fpscr::VXCVI); 0x8000_0000 } else { + if val != val.trunc() { fpscr::set_exception(ctx, fpscr::XX); } val.trunc() as i32 as u32 }; ctx.fpr[instr.rd()] = f64::from_bits(result_u32 as u64); @@ -4378,7 +4455,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let ai = vmx::flush_denorm(a[i]); let bi = vmx::flush_denorm(b[i]); let di = vmx::flush_denorm(d[i]); - r[i] = ai.mul_add(di, bi); + // PPCBUG-437: flush subnormal output too. + r[i] = vmx::flush_denorm(ai.mul_add(di, bi)); } ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; @@ -4387,16 +4465,25 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - // Canary `InstrEmit_vmsum3fp128` flushes the *output* denormal // unconditionally (not the inputs) — see ppc_emit_altivec.cc:1067-1075. PpcOpcode::vmsum3fp128 => { + // PPCBUG-436: flush per-product intermediates (not just the final sum). let a = ctx.vr[instr.va128()].as_f32x4(); let b = ctx.vr[instr.vb128()].as_f32x4(); - let s = vmx::flush_denorm(a[0] * b[0] + a[1] * b[1] + a[2] * b[2]); + let p0 = vmx::flush_denorm(a[0] * b[0]); + let p1 = vmx::flush_denorm(a[1] * b[1]); + let p2 = vmx::flush_denorm(a[2] * b[2]); + let s = vmx::flush_denorm(p0 + p1 + p2); ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4(s, s, s, s); ctx.pc += 4; } PpcOpcode::vmsum4fp128 => { + // PPCBUG-436. let a = ctx.vr[instr.va128()].as_f32x4(); let b = ctx.vr[instr.vb128()].as_f32x4(); - let s = vmx::flush_denorm(a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3]); + let p0 = vmx::flush_denorm(a[0] * b[0]); + let p1 = vmx::flush_denorm(a[1] * b[1]); + let p2 = vmx::flush_denorm(a[2] * b[2]); + let p3 = vmx::flush_denorm(a[3] * b[3]); + let s = vmx::flush_denorm(p0 + p1 + p2 + p3); ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4(s, s, s, s); ctx.pc += 4; } @@ -5618,6 +5705,46 @@ mod tests { // ---------- Phase 2h: FPU / FPSCR ---------- + #[test] + fn fmsub_inf_minus_inf_sets_vxisi() { + // PPCBUG-203 regression: fmsub with a*c = +∞, -b = -∞ (b=+∞) → + // +∞ + (-∞) → VXISI. Pre-fix had no add-step VXISI check. + let mut ctx = PpcContext::new(); + let mut mem = TestMem::new(); + ctx.fpr[1] = f64::INFINITY; + ctx.fpr[2] = f64::INFINITY; // b + ctx.fpr[3] = 1.0; + // fmsub f4, f1, f3, f2 → 1*∞ - ∞ = VXISI + // A-form: opcode=63, XO=28 (fmsub double): (63<<26)|(rd<<21)|(ra<<16)|(rb<<11)|(rc<<6)|(28<<1) + let raw = (63u32 << 26) | (4 << 21) | (1 << 16) | (2 << 11) | (3 << 6) | (28 << 1); + write_instr(&mut mem, 0, raw); + ctx.pc = 0; + step(&mut ctx, &mut mem); + assert_ne!(ctx.fpscr & fpscr::VXISI, 0, "fmsub ∞-∞ must set VXISI"); + } + + #[test] + fn fnmadd_nan_input_preserves_nan_sign() { + // PPCBUG-205 regression: ISA forbids negating a NaN result. + // a*c+b producing a NaN → result must be the NaN unchanged, not -NaN. + let mut ctx = PpcContext::new(); + let mut mem = TestMem::new(); + let qnan = f64::NAN; + ctx.fpr[1] = qnan; + ctx.fpr[2] = 1.0; + ctx.fpr[3] = 2.0; + // fnmadd f4, f1, f3, f2 (XO=31) + let raw = (63u32 << 26) | (4 << 21) | (1 << 16) | (2 << 11) | (3 << 6) | (31 << 1); + write_instr(&mut mem, 0, raw); + ctx.pc = 0; + step(&mut ctx, &mut mem); + // Result must be NaN with the same sign bit as the input NaN. + let r = ctx.fpr[4]; + assert!(r.is_nan(), "result must be NaN"); + assert_eq!(r.is_sign_negative(), qnan.is_sign_negative(), + "fnmadd must preserve NaN sign (no negation on NaN)"); + } + #[test] fn fadd_inf_minus_inf_sets_vxisi() { let mut ctx = PpcContext::new(); diff --git a/crates/xenia-cpu/src/vmx.rs b/crates/xenia-cpu/src/vmx.rs index 5650a34..4be636f 100644 --- a/crates/xenia-cpu/src/vmx.rs +++ b/crates/xenia-cpu/src/vmx.rs @@ -214,7 +214,9 @@ pub fn flush_denorm(x: f32) -> f32 { // // vctsxs / vctuxs flush denormal inputs to 0 before scaling, per Altivec. #[inline] pub fn cvt_f32_to_i32_sat(x: f32, scale_bits: u32) -> (i32, bool) { - if x.is_nan() { return (0, true); } + // PPCBUG-433: AltiVec ISA saturates NaN to INT_MIN (0x80000000), not 0. + // (vctuxs's NaN→0 is correct per AltiVec ISA — see PPCBUG-434.) + if x.is_nan() { return (i32::MIN, true); } let x = flush_denorm(x); let scaled = (x as f64) * ((1u64 << scale_bits) as f64); if scaled >= i32::MAX as f64 { return (i32::MAX, true); }