Merge branch 'ppc-audit-fix/p5-fpu' — Phase 5 FPU correctness
Phase 5 of the PPC instruction audit fix application: FPU correctness across the scalar FPU and VMX float arithmetic. ~22 PPCBUGs across 6 sub-sections (5a-5f). -f6a444b: 5a — round_to_i64 + vrfin round-to-even (PPCBUG-221+227, 432) -26b9897: 5b — FMA VXISI + NaN sign preservation (PPCBUG-181/182/183/202/203/205) -49bf74f: 5c — FPU XX-on-inexact for conversions (PPCBUG-223/224/225/229/230) -538fa5a: 5d — VSCR.NJ subnormal flush for VMX float (PPCBUG-435/436/437) -6ba8f83: 5e — fresx canary parity (PPCBUG-184) -6fe2cbf: 5f — single-FMA vnmsubfp + vctsxs NaN saturation (PPCBUG-426/427/433) -05f2f72: review-fix nit — vrfin uses stdlib round_ties_even Independent reviewer found no blocking issues; two minor follow-up items remain open for tracking. The vrfin nit was applied immediately in05f2f72. Three substantive PPCBUGs were explicitly deferred — each requires substantial helper rework that's better landed as focused sub-batches: - PPCBUG-201: FPSCR.RN for double arithmetic (MXCSR set/restore wrappers) - PPCBUG-185: FPSCR.NI flush for scalar FPU (NI bit constant + post-op flush) - PPCBUG-180/200: XX/FR/FI in update_after_op (pre-vs-post-round comparison) These remain Status: open in audit-findings.md and will be picked up in a P5b sub-batch or P9 (test gaps) per planning. Verification at merge: cargo test --workspace --release reports 498 passed, 0 failed. Acid test deferred to end of all phases per user direction.
This commit is contained in:
@@ -152,6 +152,33 @@ pub fn check_invalid_add(ctx: &mut PpcContext, a: f64, b: f64, sub: bool) -> boo
|
|||||||
false
|
false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// FMA-aware add/sub VXISI check. Per PPCBUG-202+203: the previous code
|
||||||
|
/// passed `a*c` as `lhs` to `check_invalid_add`, which suffers from two
|
||||||
|
/// rounding errors and can spuriously raise/miss VXISI in extreme cases.
|
||||||
|
/// This helper derives the mathematical product's sign and infinity status
|
||||||
|
/// from the inputs directly.
|
||||||
|
///
|
||||||
|
/// `sub` follows the same semantics as `check_invalid_add`:
|
||||||
|
/// - false (add): VXISI when product and b have opposite signs at infinity
|
||||||
|
/// - true (sub): VXISI when product and b have same sign at infinity
|
||||||
|
pub fn check_invalid_fma_add(ctx: &mut PpcContext, a: f64, c: f64, b: f64, sub: bool) -> bool {
|
||||||
|
let mut bits = 0u32;
|
||||||
|
if is_snan(a) || is_snan(c) || is_snan(b) { bits |= VXSNAN; }
|
||||||
|
let product_is_inf = (a.is_infinite() || c.is_infinite())
|
||||||
|
&& a != 0.0 && c != 0.0
|
||||||
|
&& !a.is_nan() && !c.is_nan();
|
||||||
|
if product_is_inf && b.is_infinite() {
|
||||||
|
let p_neg = a.is_sign_negative() != c.is_sign_negative();
|
||||||
|
let b_neg = b.is_sign_negative();
|
||||||
|
let same_sign = p_neg == b_neg;
|
||||||
|
if (sub && same_sign) || (!sub && !same_sign) {
|
||||||
|
bits |= VXISI;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if bits != 0 { set_exception(ctx, bits); return true; }
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
pub fn check_invalid_mul(ctx: &mut PpcContext, a: f64, b: f64) -> bool {
|
pub fn check_invalid_mul(ctx: &mut PpcContext, a: f64, b: f64) -> bool {
|
||||||
let mut bits = 0u32;
|
let mut bits = 0u32;
|
||||||
if is_snan(a) || is_snan(b) { bits |= VXSNAN; }
|
if is_snan(a) || is_snan(b) { bits |= VXSNAN; }
|
||||||
@@ -220,15 +247,22 @@ pub fn round_to_single(ctx: &PpcContext, v: f64) -> f64 {
|
|||||||
pub fn round_to_i64(ctx: &PpcContext, v: f64) -> i64 {
|
pub fn round_to_i64(ctx: &PpcContext, v: f64) -> i64 {
|
||||||
match rounding_mode(ctx) {
|
match rounding_mode(ctx) {
|
||||||
RoundingMode::NearestEven => {
|
RoundingMode::NearestEven => {
|
||||||
// Round-half-to-even (banker's rounding).
|
// PPCBUG-221: round-half-to-even (banker's rounding). The previous
|
||||||
let r = v.round();
|
// tie-detection used `(diff - 0.5).abs() < f64::EPSILON` which
|
||||||
// Rust's f64::round is round-half-away-from-zero. Correct ties to even:
|
// breaks for |v| > 2^52 (where v.trunc() == v exactly, giving diff
|
||||||
let diff = (v - v.trunc()).abs();
|
// == 0). Use a fractional-part-only check that's exact for
|
||||||
if (diff - 0.5).abs() < f64::EPSILON {
|
// |v| <= 2^52 and degenerates correctly above.
|
||||||
let floor = v.floor();
|
let t = v.trunc();
|
||||||
if (floor as i64) & 1 == 0 { floor as i64 } else { v.ceil() as i64 }
|
let frac = v - t;
|
||||||
|
let fa = frac.abs();
|
||||||
|
if fa > 0.5 {
|
||||||
|
t as i64 + if v >= 0.0 { 1 } else { -1 }
|
||||||
|
} else if fa < 0.5 {
|
||||||
|
t as i64
|
||||||
} else {
|
} else {
|
||||||
r as i64
|
// Exact 0.5 tie — round to even.
|
||||||
|
let fi = t as i64;
|
||||||
|
if fi & 1 == 0 { fi } else { fi + if v >= 0.0 { 1 } else { -1 } }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
RoundingMode::TowardZero => v.trunc() as i64,
|
RoundingMode::TowardZero => v.trunc() as i64,
|
||||||
@@ -355,11 +389,35 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn round_to_i64_nearest_even_on_tie() {
|
fn round_to_i64_nearest_even_on_tie() {
|
||||||
let c = ctx();
|
let c = ctx();
|
||||||
|
assert_eq!(round_to_i64(&c, 0.5_f64), 0);
|
||||||
|
assert_eq!(round_to_i64(&c, 1.5_f64), 2);
|
||||||
assert_eq!(round_to_i64(&c, 2.5_f64), 2);
|
assert_eq!(round_to_i64(&c, 2.5_f64), 2);
|
||||||
assert_eq!(round_to_i64(&c, 3.5_f64), 4);
|
assert_eq!(round_to_i64(&c, 3.5_f64), 4);
|
||||||
|
assert_eq!(round_to_i64(&c, -0.5_f64), 0);
|
||||||
|
assert_eq!(round_to_i64(&c, -1.5_f64), -2);
|
||||||
assert_eq!(round_to_i64(&c, -2.5_f64), -2);
|
assert_eq!(round_to_i64(&c, -2.5_f64), -2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn round_to_i64_non_tie_cases() {
|
||||||
|
// PPCBUG-221 regression: non-tie fractions must round to nearest.
|
||||||
|
let c = ctx();
|
||||||
|
assert_eq!(round_to_i64(&c, 0.4_f64), 0);
|
||||||
|
assert_eq!(round_to_i64(&c, 0.6_f64), 1);
|
||||||
|
assert_eq!(round_to_i64(&c, -0.4_f64), 0);
|
||||||
|
assert_eq!(round_to_i64(&c, -0.6_f64), -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn round_to_i32_nearest_even_on_tie() {
|
||||||
|
// PPCBUG-227: round_to_i32 inherits round_to_i64's tie semantics.
|
||||||
|
let c = ctx();
|
||||||
|
assert_eq!(round_to_i32(&c, 0.5_f64), 0);
|
||||||
|
assert_eq!(round_to_i32(&c, 1.5_f64), 2);
|
||||||
|
assert_eq!(round_to_i32(&c, 2.5_f64), 2);
|
||||||
|
assert_eq!(round_to_i32(&c, -1.5_f64), -2);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn check_invalid_add_detects_inf_minus_inf() {
|
fn check_invalid_add_detects_inf_minus_inf() {
|
||||||
let mut c = ctx();
|
let mut c = ctx();
|
||||||
|
|||||||
@@ -1940,34 +1940,56 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
|
|
||||||
// ===== VMX: Float Arithmetic =====
|
// ===== VMX: Float Arithmetic =====
|
||||||
PpcOpcode::vaddfp => {
|
PpcOpcode::vaddfp => {
|
||||||
|
// PPCBUG-435: VSCR.NJ=1 (Xbox 360 always boots with this set) requires
|
||||||
|
// flush-to-zero on subnormal inputs and outputs. Canary VMX float
|
||||||
|
// arithmetic flushes denormals unconditionally.
|
||||||
let a = ctx.vr[instr.ra()].as_f32x4();
|
let a = ctx.vr[instr.ra()].as_f32x4();
|
||||||
let b = ctx.vr[instr.rb()].as_f32x4();
|
let b = ctx.vr[instr.rb()].as_f32x4();
|
||||||
let mut r = [0f32; 4];
|
let mut r = [0f32; 4];
|
||||||
for i in 0..4 { r[i] = a[i] + b[i]; }
|
for i in 0..4 {
|
||||||
|
let ai = vmx::flush_denorm(a[i]);
|
||||||
|
let bi = vmx::flush_denorm(b[i]);
|
||||||
|
r[i] = vmx::flush_denorm(ai + bi);
|
||||||
|
}
|
||||||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r);
|
ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r);
|
||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
}
|
}
|
||||||
PpcOpcode::vaddfp128 => {
|
PpcOpcode::vaddfp128 => {
|
||||||
|
// PPCBUG-435: same as vaddfp.
|
||||||
let a = ctx.vr[instr.va128()].as_f32x4();
|
let a = ctx.vr[instr.va128()].as_f32x4();
|
||||||
let b = ctx.vr[instr.vb128()].as_f32x4();
|
let b = ctx.vr[instr.vb128()].as_f32x4();
|
||||||
let mut r = [0f32; 4];
|
let mut r = [0f32; 4];
|
||||||
for i in 0..4 { r[i] = a[i] + b[i]; }
|
for i in 0..4 {
|
||||||
|
let ai = vmx::flush_denorm(a[i]);
|
||||||
|
let bi = vmx::flush_denorm(b[i]);
|
||||||
|
r[i] = vmx::flush_denorm(ai + bi);
|
||||||
|
}
|
||||||
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
|
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
|
||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
}
|
}
|
||||||
PpcOpcode::vsubfp => {
|
PpcOpcode::vsubfp => {
|
||||||
|
// PPCBUG-435.
|
||||||
let a = ctx.vr[instr.ra()].as_f32x4();
|
let a = ctx.vr[instr.ra()].as_f32x4();
|
||||||
let b = ctx.vr[instr.rb()].as_f32x4();
|
let b = ctx.vr[instr.rb()].as_f32x4();
|
||||||
let mut r = [0f32; 4];
|
let mut r = [0f32; 4];
|
||||||
for i in 0..4 { r[i] = a[i] - b[i]; }
|
for i in 0..4 {
|
||||||
|
let ai = vmx::flush_denorm(a[i]);
|
||||||
|
let bi = vmx::flush_denorm(b[i]);
|
||||||
|
r[i] = vmx::flush_denorm(ai - bi);
|
||||||
|
}
|
||||||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r);
|
ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r);
|
||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
}
|
}
|
||||||
PpcOpcode::vsubfp128 => {
|
PpcOpcode::vsubfp128 => {
|
||||||
|
// PPCBUG-435.
|
||||||
let a = ctx.vr[instr.va128()].as_f32x4();
|
let a = ctx.vr[instr.va128()].as_f32x4();
|
||||||
let b = ctx.vr[instr.vb128()].as_f32x4();
|
let b = ctx.vr[instr.vb128()].as_f32x4();
|
||||||
let mut r = [0f32; 4];
|
let mut r = [0f32; 4];
|
||||||
for i in 0..4 { r[i] = a[i] - b[i]; }
|
for i in 0..4 {
|
||||||
|
let ai = vmx::flush_denorm(a[i]);
|
||||||
|
let bi = vmx::flush_denorm(b[i]);
|
||||||
|
r[i] = vmx::flush_denorm(ai - bi);
|
||||||
|
}
|
||||||
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
|
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
|
||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
}
|
}
|
||||||
@@ -1982,7 +2004,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
let ai = vmx::flush_denorm(a[i]);
|
let ai = vmx::flush_denorm(a[i]);
|
||||||
let bi = vmx::flush_denorm(b[i]);
|
let bi = vmx::flush_denorm(b[i]);
|
||||||
let ci = vmx::flush_denorm(c[i]);
|
let ci = vmx::flush_denorm(c[i]);
|
||||||
r[i] = ai.mul_add(ci, bi);
|
// PPCBUG-437: flush subnormal output too.
|
||||||
|
r[i] = vmx::flush_denorm(ai.mul_add(ci, bi));
|
||||||
}
|
}
|
||||||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r);
|
ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r);
|
||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
@@ -2000,7 +2023,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
let ai = vmx::flush_denorm(a[i]);
|
let ai = vmx::flush_denorm(a[i]);
|
||||||
let bi = vmx::flush_denorm(b[i]);
|
let bi = vmx::flush_denorm(b[i]);
|
||||||
let di = vmx::flush_denorm(d[i]);
|
let di = vmx::flush_denorm(d[i]);
|
||||||
r[i] = ai.mul_add(di, bi);
|
// PPCBUG-437.
|
||||||
|
r[i] = vmx::flush_denorm(ai.mul_add(di, bi));
|
||||||
}
|
}
|
||||||
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
|
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
|
||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
@@ -2015,7 +2039,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
let ai = vmx::flush_denorm(a[i]);
|
let ai = vmx::flush_denorm(a[i]);
|
||||||
let bi = vmx::flush_denorm(b[i]);
|
let bi = vmx::flush_denorm(b[i]);
|
||||||
let ci = vmx::flush_denorm(c[i]);
|
let ci = vmx::flush_denorm(c[i]);
|
||||||
r[i] = bi - ai * ci;
|
// PPCBUG-426: single FMA rounding instead of two-step (b - a*c).
|
||||||
|
r[i] = vmx::flush_denorm(-ai.mul_add(ci, -bi));
|
||||||
}
|
}
|
||||||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r);
|
ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r);
|
||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
@@ -2032,16 +2057,22 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
let ai = vmx::flush_denorm(a[i]);
|
let ai = vmx::flush_denorm(a[i]);
|
||||||
let bi = vmx::flush_denorm(b[i]);
|
let bi = vmx::flush_denorm(b[i]);
|
||||||
let di = vmx::flush_denorm(d[i]);
|
let di = vmx::flush_denorm(d[i]);
|
||||||
r[i] = di - ai * bi;
|
// PPCBUG-427: single FMA rounding.
|
||||||
|
r[i] = vmx::flush_denorm(-ai.mul_add(bi, -di));
|
||||||
}
|
}
|
||||||
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
|
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
|
||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
}
|
}
|
||||||
PpcOpcode::vmulfp128 => {
|
PpcOpcode::vmulfp128 => {
|
||||||
|
// PPCBUG-435 + PPCBUG-437.
|
||||||
let a = ctx.vr[instr.va128()].as_f32x4();
|
let a = ctx.vr[instr.va128()].as_f32x4();
|
||||||
let b = ctx.vr[instr.vb128()].as_f32x4();
|
let b = ctx.vr[instr.vb128()].as_f32x4();
|
||||||
let mut r = [0f32; 4];
|
let mut r = [0f32; 4];
|
||||||
for i in 0..4 { r[i] = a[i] * b[i]; }
|
for i in 0..4 {
|
||||||
|
let ai = vmx::flush_denorm(a[i]);
|
||||||
|
let bi = vmx::flush_denorm(b[i]);
|
||||||
|
r[i] = vmx::flush_denorm(ai * bi);
|
||||||
|
}
|
||||||
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
|
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
|
||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
}
|
}
|
||||||
@@ -2398,11 +2429,13 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
}
|
}
|
||||||
PpcOpcode::vrfin | PpcOpcode::vrfin128 => {
|
PpcOpcode::vrfin | PpcOpcode::vrfin128 => {
|
||||||
|
// PPCBUG-432: ISA round-to-nearest-even, NOT Rust's `round()`
|
||||||
|
// (which is round-half-away-from-zero).
|
||||||
let vb = if matches!(instr.opcode, PpcOpcode::vrfin128) { instr.vb128() } else { instr.rb() };
|
let vb = if matches!(instr.opcode, PpcOpcode::vrfin128) { instr.vb128() } else { instr.rb() };
|
||||||
let vd = if matches!(instr.opcode, PpcOpcode::vrfin128) { instr.vd128() } else { instr.rd() };
|
let vd = if matches!(instr.opcode, PpcOpcode::vrfin128) { instr.vd128() } else { instr.rd() };
|
||||||
let b = ctx.vr[vb].as_f32x4();
|
let b = ctx.vr[vb].as_f32x4();
|
||||||
let mut r = [0f32; 4];
|
let mut r = [0f32; 4];
|
||||||
for i in 0..4 { r[i] = b[i].round(); }
|
for i in 0..4 { r[i] = b[i].round_ties_even(); }
|
||||||
ctx.vr[vd] = xenia_types::Vec128::from_f32x4_array(r);
|
ctx.vr[vd] = xenia_types::Vec128::from_f32x4_array(r);
|
||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
}
|
}
|
||||||
@@ -2559,11 +2592,12 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
|
|
||||||
// ===== FPU: Multiply-Add =====
|
// ===== FPU: Multiply-Add =====
|
||||||
PpcOpcode::fmaddx => {
|
PpcOpcode::fmaddx => {
|
||||||
|
// PPCBUG-202: VXISI from input properties (not from `a*c` which has wrong sign on overflow).
|
||||||
let a = ctx.fpr[instr.ra()];
|
let a = ctx.fpr[instr.ra()];
|
||||||
let c = ctx.fpr[instr.rc()];
|
let c = ctx.fpr[instr.rc()];
|
||||||
let b = ctx.fpr[instr.rb()];
|
let b = ctx.fpr[instr.rb()];
|
||||||
fpscr::check_invalid_mul(ctx, a, c);
|
fpscr::check_invalid_mul(ctx, a, c);
|
||||||
fpscr::check_invalid_add(ctx, a * c, b, false);
|
fpscr::check_invalid_fma_add(ctx, a, c, b, false);
|
||||||
let result = a.mul_add(c, b);
|
let result = a.mul_add(c, b);
|
||||||
ctx.fpr[instr.rd()] = result;
|
ctx.fpr[instr.rd()] = result;
|
||||||
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite());
|
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite());
|
||||||
@@ -2571,10 +2605,12 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
}
|
}
|
||||||
PpcOpcode::fmaddsx => {
|
PpcOpcode::fmaddsx => {
|
||||||
|
// PPCBUG-181: missing VXISI on add step.
|
||||||
let a = ctx.fpr[instr.ra()];
|
let a = ctx.fpr[instr.ra()];
|
||||||
let c = ctx.fpr[instr.rc()];
|
let c = ctx.fpr[instr.rc()];
|
||||||
let b = ctx.fpr[instr.rb()];
|
let b = ctx.fpr[instr.rb()];
|
||||||
fpscr::check_invalid_mul(ctx, a, c);
|
fpscr::check_invalid_mul(ctx, a, c);
|
||||||
|
fpscr::check_invalid_fma_add(ctx, a, c, b, false);
|
||||||
let result = to_single(ctx, a.mul_add(c, b));
|
let result = to_single(ctx, a.mul_add(c, b));
|
||||||
ctx.fpr[instr.rd()] = result;
|
ctx.fpr[instr.rd()] = result;
|
||||||
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite());
|
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite());
|
||||||
@@ -2582,10 +2618,12 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
}
|
}
|
||||||
PpcOpcode::fmsubx => {
|
PpcOpcode::fmsubx => {
|
||||||
|
// PPCBUG-203: missing VXISI on sub step.
|
||||||
let a = ctx.fpr[instr.ra()];
|
let a = ctx.fpr[instr.ra()];
|
||||||
let c = ctx.fpr[instr.rc()];
|
let c = ctx.fpr[instr.rc()];
|
||||||
let b = ctx.fpr[instr.rb()];
|
let b = ctx.fpr[instr.rb()];
|
||||||
fpscr::check_invalid_mul(ctx, a, c);
|
fpscr::check_invalid_mul(ctx, a, c);
|
||||||
|
fpscr::check_invalid_fma_add(ctx, a, c, b, true);
|
||||||
let result = a.mul_add(c, -b);
|
let result = a.mul_add(c, -b);
|
||||||
ctx.fpr[instr.rd()] = result;
|
ctx.fpr[instr.rd()] = result;
|
||||||
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite());
|
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite());
|
||||||
@@ -2593,10 +2631,12 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
}
|
}
|
||||||
PpcOpcode::fmsubsx => {
|
PpcOpcode::fmsubsx => {
|
||||||
|
// PPCBUG-182: missing VXISI on sub step.
|
||||||
let a = ctx.fpr[instr.ra()];
|
let a = ctx.fpr[instr.ra()];
|
||||||
let c = ctx.fpr[instr.rc()];
|
let c = ctx.fpr[instr.rc()];
|
||||||
let b = ctx.fpr[instr.rb()];
|
let b = ctx.fpr[instr.rb()];
|
||||||
fpscr::check_invalid_mul(ctx, a, c);
|
fpscr::check_invalid_mul(ctx, a, c);
|
||||||
|
fpscr::check_invalid_fma_add(ctx, a, c, b, true);
|
||||||
let result = to_single(ctx, a.mul_add(c, -b));
|
let result = to_single(ctx, a.mul_add(c, -b));
|
||||||
ctx.fpr[instr.rd()] = result;
|
ctx.fpr[instr.rd()] = result;
|
||||||
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite());
|
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite());
|
||||||
@@ -2604,44 +2644,58 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
}
|
}
|
||||||
PpcOpcode::fnmaddx => {
|
PpcOpcode::fnmaddx => {
|
||||||
|
// PPCBUG-203: missing VXISI. PPCBUG-205: NaN sign preserved (no negation on NaN).
|
||||||
let a = ctx.fpr[instr.ra()];
|
let a = ctx.fpr[instr.ra()];
|
||||||
let c = ctx.fpr[instr.rc()];
|
let c = ctx.fpr[instr.rc()];
|
||||||
let b = ctx.fpr[instr.rb()];
|
let b = ctx.fpr[instr.rb()];
|
||||||
fpscr::check_invalid_mul(ctx, a, c);
|
fpscr::check_invalid_mul(ctx, a, c);
|
||||||
let result = -(a.mul_add(c, b));
|
fpscr::check_invalid_fma_add(ctx, a, c, b, false);
|
||||||
|
let fma = a.mul_add(c, b);
|
||||||
|
let result = if fma.is_nan() { fma } else { -fma };
|
||||||
ctx.fpr[instr.rd()] = result;
|
ctx.fpr[instr.rd()] = result;
|
||||||
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite());
|
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite());
|
||||||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
}
|
}
|
||||||
PpcOpcode::fnmaddsx => {
|
PpcOpcode::fnmaddsx => {
|
||||||
|
// PPCBUG-181 + PPCBUG-183: VXISI + NaN sign preservation.
|
||||||
let a = ctx.fpr[instr.ra()];
|
let a = ctx.fpr[instr.ra()];
|
||||||
let c = ctx.fpr[instr.rc()];
|
let c = ctx.fpr[instr.rc()];
|
||||||
let b = ctx.fpr[instr.rb()];
|
let b = ctx.fpr[instr.rb()];
|
||||||
fpscr::check_invalid_mul(ctx, a, c);
|
fpscr::check_invalid_mul(ctx, a, c);
|
||||||
let result = to_single(ctx, -(a.mul_add(c, b)));
|
fpscr::check_invalid_fma_add(ctx, a, c, b, false);
|
||||||
|
let fma = a.mul_add(c, b);
|
||||||
|
let neg = if fma.is_nan() { fma } else { -fma };
|
||||||
|
let result = to_single(ctx, neg);
|
||||||
ctx.fpr[instr.rd()] = result;
|
ctx.fpr[instr.rd()] = result;
|
||||||
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite());
|
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite());
|
||||||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
}
|
}
|
||||||
PpcOpcode::fnmsubx => {
|
PpcOpcode::fnmsubx => {
|
||||||
|
// PPCBUG-203: VXISI. PPCBUG-205: NaN sign preservation.
|
||||||
let a = ctx.fpr[instr.ra()];
|
let a = ctx.fpr[instr.ra()];
|
||||||
let c = ctx.fpr[instr.rc()];
|
let c = ctx.fpr[instr.rc()];
|
||||||
let b = ctx.fpr[instr.rb()];
|
let b = ctx.fpr[instr.rb()];
|
||||||
fpscr::check_invalid_mul(ctx, a, c);
|
fpscr::check_invalid_mul(ctx, a, c);
|
||||||
let result = -(a.mul_add(c, -b));
|
fpscr::check_invalid_fma_add(ctx, a, c, b, true);
|
||||||
|
let fma = a.mul_add(c, -b);
|
||||||
|
let result = if fma.is_nan() { fma } else { -fma };
|
||||||
ctx.fpr[instr.rd()] = result;
|
ctx.fpr[instr.rd()] = result;
|
||||||
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite());
|
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite());
|
||||||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
}
|
}
|
||||||
PpcOpcode::fnmsubsx => {
|
PpcOpcode::fnmsubsx => {
|
||||||
|
// PPCBUG-182 + PPCBUG-183: VXISI + NaN sign preservation.
|
||||||
let a = ctx.fpr[instr.ra()];
|
let a = ctx.fpr[instr.ra()];
|
||||||
let c = ctx.fpr[instr.rc()];
|
let c = ctx.fpr[instr.rc()];
|
||||||
let b = ctx.fpr[instr.rb()];
|
let b = ctx.fpr[instr.rb()];
|
||||||
fpscr::check_invalid_mul(ctx, a, c);
|
fpscr::check_invalid_mul(ctx, a, c);
|
||||||
let result = to_single(ctx, -(a.mul_add(c, -b)));
|
fpscr::check_invalid_fma_add(ctx, a, c, b, true);
|
||||||
|
let fma = a.mul_add(c, -b);
|
||||||
|
let neg = if fma.is_nan() { fma } else { -fma };
|
||||||
|
let result = to_single(ctx, neg);
|
||||||
ctx.fpr[instr.rd()] = result;
|
ctx.fpr[instr.rd()] = result;
|
||||||
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite());
|
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite());
|
||||||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||||||
@@ -2713,12 +2767,18 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
}
|
}
|
||||||
PpcOpcode::fresx => {
|
PpcOpcode::fresx => {
|
||||||
// Single-precision reciprocal estimate: frD = 1.0 / frB
|
// Single-precision reciprocal estimate: frD = 1.0 / frB.
|
||||||
let b = ctx.fpr[instr.rb()];
|
// PPCBUG-184: pre-quantize input to f32 to match canary's
|
||||||
|
// `f.Recip(f.Convert(frB, FLOAT32_TYPE))` behavior. Hardware
|
||||||
|
// produces a ~12-bit LUT estimate; both emulators produce a
|
||||||
|
// fully-IEEE single reciprocal, but the f32 quantization at
|
||||||
|
// least makes the input precision match.
|
||||||
|
let b_full = ctx.fpr[instr.rb()];
|
||||||
|
let b = b_full as f32 as f64;
|
||||||
if b == 0.0 {
|
if b == 0.0 {
|
||||||
fpscr::set_exception(ctx, fpscr::ZX);
|
fpscr::set_exception(ctx, fpscr::ZX);
|
||||||
}
|
}
|
||||||
if fpscr::is_snan(b) {
|
if fpscr::is_snan(b_full) {
|
||||||
fpscr::set_exception(ctx, fpscr::VXSNAN);
|
fpscr::set_exception(ctx, fpscr::VXSNAN);
|
||||||
}
|
}
|
||||||
let result = to_single(ctx, 1.0 / b);
|
let result = to_single(ctx, 1.0 / b);
|
||||||
@@ -2748,28 +2808,38 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
|
|
||||||
// ===== FPU: Rounding/Conversion =====
|
// ===== FPU: Rounding/Conversion =====
|
||||||
PpcOpcode::frspx => {
|
PpcOpcode::frspx => {
|
||||||
// Round to single precision honouring FPSCR[RN]
|
// Round to single precision honouring FPSCR[RN].
|
||||||
|
// PPCBUG-225: set XX on inexact rounding (almost every frsp call).
|
||||||
let b = ctx.fpr[instr.rb()];
|
let b = ctx.fpr[instr.rb()];
|
||||||
if fpscr::is_snan(b) {
|
if fpscr::is_snan(b) {
|
||||||
fpscr::set_exception(ctx, fpscr::VXSNAN);
|
fpscr::set_exception(ctx, fpscr::VXSNAN);
|
||||||
}
|
}
|
||||||
let result = to_single(ctx, b);
|
let result = to_single(ctx, b);
|
||||||
|
if b.is_finite() && result.is_finite() && result != b {
|
||||||
|
fpscr::set_exception(ctx, fpscr::XX);
|
||||||
|
}
|
||||||
ctx.fpr[instr.rd()] = result;
|
ctx.fpr[instr.rd()] = result;
|
||||||
fpscr::update_after_op(ctx, result, b.is_finite());
|
fpscr::update_after_op(ctx, result, b.is_finite());
|
||||||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
}
|
}
|
||||||
PpcOpcode::fcfidx => {
|
PpcOpcode::fcfidx => {
|
||||||
// Convert from integer doubleword: frD = (double)(int64_t)frB_as_bits
|
// Convert from integer doubleword: frD = (double)(int64_t)frB_as_bits.
|
||||||
|
// PPCBUG-224: set XX when |i64| > 2^53 (precision loss in conversion).
|
||||||
let bits = ctx.fpr[instr.rb()].to_bits();
|
let bits = ctx.fpr[instr.rb()].to_bits();
|
||||||
let result = (bits as i64) as f64;
|
let i = bits as i64;
|
||||||
|
let result = i as f64;
|
||||||
|
if (result as i64) != i {
|
||||||
|
fpscr::set_exception(ctx, fpscr::XX);
|
||||||
|
}
|
||||||
ctx.fpr[instr.rd()] = result;
|
ctx.fpr[instr.rd()] = result;
|
||||||
fpscr::set_fprf(ctx, fpscr::classify_fprf(result));
|
fpscr::set_fprf(ctx, fpscr::classify_fprf(result));
|
||||||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
}
|
}
|
||||||
PpcOpcode::fctidx => {
|
PpcOpcode::fctidx => {
|
||||||
// Convert to integer doubleword (round per FPSCR[RN])
|
// Convert to integer doubleword (round per FPSCR[RN]).
|
||||||
|
// PPCBUG-229: set XX on inexact (fractional input).
|
||||||
let val = ctx.fpr[instr.rb()];
|
let val = ctx.fpr[instr.rb()];
|
||||||
let result = if val.is_nan() {
|
let result = if val.is_nan() {
|
||||||
fpscr::set_exception(ctx, fpscr::VXCVI | if fpscr::is_snan(val) { fpscr::VXSNAN } else { 0 });
|
fpscr::set_exception(ctx, fpscr::VXCVI | if fpscr::is_snan(val) { fpscr::VXSNAN } else { 0 });
|
||||||
@@ -2781,6 +2851,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
fpscr::set_exception(ctx, fpscr::VXCVI);
|
fpscr::set_exception(ctx, fpscr::VXCVI);
|
||||||
0x8000_0000_0000_0000u64
|
0x8000_0000_0000_0000u64
|
||||||
} else {
|
} else {
|
||||||
|
if val != val.trunc() { fpscr::set_exception(ctx, fpscr::XX); }
|
||||||
fpscr::round_to_i64(ctx, val) as u64
|
fpscr::round_to_i64(ctx, val) as u64
|
||||||
};
|
};
|
||||||
ctx.fpr[instr.rd()] = f64::from_bits(result);
|
ctx.fpr[instr.rd()] = f64::from_bits(result);
|
||||||
@@ -2788,7 +2859,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
}
|
}
|
||||||
PpcOpcode::fctidzx => {
|
PpcOpcode::fctidzx => {
|
||||||
// Convert to integer doubleword (round toward zero)
|
// Convert to integer doubleword (round toward zero).
|
||||||
|
// PPCBUG-229: set XX on inexact.
|
||||||
let val = ctx.fpr[instr.rb()];
|
let val = ctx.fpr[instr.rb()];
|
||||||
let result = if val.is_nan() {
|
let result = if val.is_nan() {
|
||||||
fpscr::set_exception(ctx, fpscr::VXCVI | if fpscr::is_snan(val) { fpscr::VXSNAN } else { 0 });
|
fpscr::set_exception(ctx, fpscr::VXCVI | if fpscr::is_snan(val) { fpscr::VXSNAN } else { 0 });
|
||||||
@@ -2800,6 +2872,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
fpscr::set_exception(ctx, fpscr::VXCVI);
|
fpscr::set_exception(ctx, fpscr::VXCVI);
|
||||||
0x8000_0000_0000_0000u64
|
0x8000_0000_0000_0000u64
|
||||||
} else {
|
} else {
|
||||||
|
if val != val.trunc() { fpscr::set_exception(ctx, fpscr::XX); }
|
||||||
(val.trunc() as i64) as u64
|
(val.trunc() as i64) as u64
|
||||||
};
|
};
|
||||||
ctx.fpr[instr.rd()] = f64::from_bits(result);
|
ctx.fpr[instr.rd()] = f64::from_bits(result);
|
||||||
@@ -2807,7 +2880,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
}
|
}
|
||||||
PpcOpcode::fctiwx => {
|
PpcOpcode::fctiwx => {
|
||||||
// Convert to integer word (round per FPSCR[RN])
|
// Convert to integer word (round per FPSCR[RN]).
|
||||||
|
// PPCBUG-230: set XX on inexact.
|
||||||
let val = ctx.fpr[instr.rb()];
|
let val = ctx.fpr[instr.rb()];
|
||||||
let result_u32: u32 = if val.is_nan() {
|
let result_u32: u32 = if val.is_nan() {
|
||||||
fpscr::set_exception(ctx, fpscr::VXCVI | if fpscr::is_snan(val) { fpscr::VXSNAN } else { 0 });
|
fpscr::set_exception(ctx, fpscr::VXCVI | if fpscr::is_snan(val) { fpscr::VXSNAN } else { 0 });
|
||||||
@@ -2819,6 +2893,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
fpscr::set_exception(ctx, fpscr::VXCVI);
|
fpscr::set_exception(ctx, fpscr::VXCVI);
|
||||||
0x8000_0000
|
0x8000_0000
|
||||||
} else {
|
} else {
|
||||||
|
if val != val.trunc() { fpscr::set_exception(ctx, fpscr::XX); }
|
||||||
fpscr::round_to_i32(ctx, val) as u32
|
fpscr::round_to_i32(ctx, val) as u32
|
||||||
};
|
};
|
||||||
ctx.fpr[instr.rd()] = f64::from_bits(result_u32 as u64);
|
ctx.fpr[instr.rd()] = f64::from_bits(result_u32 as u64);
|
||||||
@@ -2826,7 +2901,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
}
|
}
|
||||||
PpcOpcode::fctiwzx => {
|
PpcOpcode::fctiwzx => {
|
||||||
// Convert to integer word (round toward zero)
|
// Convert to integer word (round toward zero).
|
||||||
|
// PPCBUG-230: set XX on inexact.
|
||||||
let val = ctx.fpr[instr.rb()];
|
let val = ctx.fpr[instr.rb()];
|
||||||
let result_u32: u32 = if val.is_nan() {
|
let result_u32: u32 = if val.is_nan() {
|
||||||
fpscr::set_exception(ctx, fpscr::VXCVI | if fpscr::is_snan(val) { fpscr::VXSNAN } else { 0 });
|
fpscr::set_exception(ctx, fpscr::VXCVI | if fpscr::is_snan(val) { fpscr::VXSNAN } else { 0 });
|
||||||
@@ -2838,6 +2914,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
fpscr::set_exception(ctx, fpscr::VXCVI);
|
fpscr::set_exception(ctx, fpscr::VXCVI);
|
||||||
0x8000_0000
|
0x8000_0000
|
||||||
} else {
|
} else {
|
||||||
|
if val != val.trunc() { fpscr::set_exception(ctx, fpscr::XX); }
|
||||||
val.trunc() as i32 as u32
|
val.trunc() as i32 as u32
|
||||||
};
|
};
|
||||||
ctx.fpr[instr.rd()] = f64::from_bits(result_u32 as u64);
|
ctx.fpr[instr.rd()] = f64::from_bits(result_u32 as u64);
|
||||||
@@ -4378,7 +4455,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
let ai = vmx::flush_denorm(a[i]);
|
let ai = vmx::flush_denorm(a[i]);
|
||||||
let bi = vmx::flush_denorm(b[i]);
|
let bi = vmx::flush_denorm(b[i]);
|
||||||
let di = vmx::flush_denorm(d[i]);
|
let di = vmx::flush_denorm(d[i]);
|
||||||
r[i] = ai.mul_add(di, bi);
|
// PPCBUG-437: flush subnormal output too.
|
||||||
|
r[i] = vmx::flush_denorm(ai.mul_add(di, bi));
|
||||||
}
|
}
|
||||||
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
|
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
|
||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
@@ -4387,16 +4465,25 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
// Canary `InstrEmit_vmsum3fp128` flushes the *output* denormal
|
// Canary `InstrEmit_vmsum3fp128` flushes the *output* denormal
|
||||||
// unconditionally (not the inputs) — see ppc_emit_altivec.cc:1067-1075.
|
// unconditionally (not the inputs) — see ppc_emit_altivec.cc:1067-1075.
|
||||||
PpcOpcode::vmsum3fp128 => {
|
PpcOpcode::vmsum3fp128 => {
|
||||||
|
// PPCBUG-436: flush per-product intermediates (not just the final sum).
|
||||||
let a = ctx.vr[instr.va128()].as_f32x4();
|
let a = ctx.vr[instr.va128()].as_f32x4();
|
||||||
let b = ctx.vr[instr.vb128()].as_f32x4();
|
let b = ctx.vr[instr.vb128()].as_f32x4();
|
||||||
let s = vmx::flush_denorm(a[0] * b[0] + a[1] * b[1] + a[2] * b[2]);
|
let p0 = vmx::flush_denorm(a[0] * b[0]);
|
||||||
|
let p1 = vmx::flush_denorm(a[1] * b[1]);
|
||||||
|
let p2 = vmx::flush_denorm(a[2] * b[2]);
|
||||||
|
let s = vmx::flush_denorm(p0 + p1 + p2);
|
||||||
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4(s, s, s, s);
|
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4(s, s, s, s);
|
||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
}
|
}
|
||||||
PpcOpcode::vmsum4fp128 => {
|
PpcOpcode::vmsum4fp128 => {
|
||||||
|
// PPCBUG-436.
|
||||||
let a = ctx.vr[instr.va128()].as_f32x4();
|
let a = ctx.vr[instr.va128()].as_f32x4();
|
||||||
let b = ctx.vr[instr.vb128()].as_f32x4();
|
let b = ctx.vr[instr.vb128()].as_f32x4();
|
||||||
let s = vmx::flush_denorm(a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3]);
|
let p0 = vmx::flush_denorm(a[0] * b[0]);
|
||||||
|
let p1 = vmx::flush_denorm(a[1] * b[1]);
|
||||||
|
let p2 = vmx::flush_denorm(a[2] * b[2]);
|
||||||
|
let p3 = vmx::flush_denorm(a[3] * b[3]);
|
||||||
|
let s = vmx::flush_denorm(p0 + p1 + p2 + p3);
|
||||||
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4(s, s, s, s);
|
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4(s, s, s, s);
|
||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
}
|
}
|
||||||
@@ -5618,6 +5705,46 @@ mod tests {
|
|||||||
|
|
||||||
// ---------- Phase 2h: FPU / FPSCR ----------
|
// ---------- Phase 2h: FPU / FPSCR ----------
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn fmsub_inf_minus_inf_sets_vxisi() {
|
||||||
|
// PPCBUG-203 regression: fmsub with a*c = +∞, -b = -∞ (b=+∞) →
|
||||||
|
// +∞ + (-∞) → VXISI. Pre-fix had no add-step VXISI check.
|
||||||
|
let mut ctx = PpcContext::new();
|
||||||
|
let mut mem = TestMem::new();
|
||||||
|
ctx.fpr[1] = f64::INFINITY;
|
||||||
|
ctx.fpr[2] = f64::INFINITY; // b
|
||||||
|
ctx.fpr[3] = 1.0;
|
||||||
|
// fmsub f4, f1, f3, f2 → 1*∞ - ∞ = VXISI
|
||||||
|
// A-form: opcode=63, XO=28 (fmsub double): (63<<26)|(rd<<21)|(ra<<16)|(rb<<11)|(rc<<6)|(28<<1)
|
||||||
|
let raw = (63u32 << 26) | (4 << 21) | (1 << 16) | (2 << 11) | (3 << 6) | (28 << 1);
|
||||||
|
write_instr(&mut mem, 0, raw);
|
||||||
|
ctx.pc = 0;
|
||||||
|
step(&mut ctx, &mut mem);
|
||||||
|
assert_ne!(ctx.fpscr & fpscr::VXISI, 0, "fmsub ∞-∞ must set VXISI");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn fnmadd_nan_input_preserves_nan_sign() {
|
||||||
|
// PPCBUG-205 regression: ISA forbids negating a NaN result.
|
||||||
|
// a*c+b producing a NaN → result must be the NaN unchanged, not -NaN.
|
||||||
|
let mut ctx = PpcContext::new();
|
||||||
|
let mut mem = TestMem::new();
|
||||||
|
let qnan = f64::NAN;
|
||||||
|
ctx.fpr[1] = qnan;
|
||||||
|
ctx.fpr[2] = 1.0;
|
||||||
|
ctx.fpr[3] = 2.0;
|
||||||
|
// fnmadd f4, f1, f3, f2 (XO=31)
|
||||||
|
let raw = (63u32 << 26) | (4 << 21) | (1 << 16) | (2 << 11) | (3 << 6) | (31 << 1);
|
||||||
|
write_instr(&mut mem, 0, raw);
|
||||||
|
ctx.pc = 0;
|
||||||
|
step(&mut ctx, &mut mem);
|
||||||
|
// Result must be NaN with the same sign bit as the input NaN.
|
||||||
|
let r = ctx.fpr[4];
|
||||||
|
assert!(r.is_nan(), "result must be NaN");
|
||||||
|
assert_eq!(r.is_sign_negative(), qnan.is_sign_negative(),
|
||||||
|
"fnmadd must preserve NaN sign (no negation on NaN)");
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn fadd_inf_minus_inf_sets_vxisi() {
|
fn fadd_inf_minus_inf_sets_vxisi() {
|
||||||
let mut ctx = PpcContext::new();
|
let mut ctx = PpcContext::new();
|
||||||
|
|||||||
@@ -214,7 +214,9 @@ pub fn flush_denorm(x: f32) -> f32 {
|
|||||||
//
|
//
|
||||||
// vctsxs / vctuxs flush denormal inputs to 0 before scaling, per Altivec.
|
// vctsxs / vctuxs flush denormal inputs to 0 before scaling, per Altivec.
|
||||||
#[inline] pub fn cvt_f32_to_i32_sat(x: f32, scale_bits: u32) -> (i32, bool) {
|
#[inline] pub fn cvt_f32_to_i32_sat(x: f32, scale_bits: u32) -> (i32, bool) {
|
||||||
if x.is_nan() { return (0, true); }
|
// PPCBUG-433: AltiVec ISA saturates NaN to INT_MIN (0x80000000), not 0.
|
||||||
|
// (vctuxs's NaN→0 is correct per AltiVec ISA — see PPCBUG-434.)
|
||||||
|
if x.is_nan() { return (i32::MIN, true); }
|
||||||
let x = flush_denorm(x);
|
let x = flush_denorm(x);
|
||||||
let scaled = (x as f64) * ((1u64 << scale_bits) as f64);
|
let scaled = (x as f64) * ((1u64 << scale_bits) as f64);
|
||||||
if scaled >= i32::MAX as f64 { return (i32::MAX, true); }
|
if scaled >= i32::MAX as f64 { return (i32::MAX, true); }
|
||||||
|
|||||||
Reference in New Issue
Block a user