Post-P8 review nit: the if/else branches were identical (`adj_bits - 1` either way). Both positive and negative finite f32 values use the IEEE-754 sign bit as the MSB, and subtracting 1 from `to_bits()` always reduces magnitude by one ULP. Replace the mock-conditional with the unconditional form + a comment explaining why one operation works for both signs. No behavior change. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
448 lines
17 KiB
Rust
448 lines
17 KiB
Rust
//! FPSCR (Floating-Point Status and Control Register) maintenance.
|
|
//!
|
|
//! Scope per project plan: rounding modes honoured, plus the exception bits
|
|
//! games actually read (FX, FEX, VX, OX, UX, ZX, XX, FI, FPRF). Enabled-
|
|
//! exception dispatch (FE[0,1], VE/OE/UE/ZE/XE) is *not* modelled — games
|
|
//! running on Xenon almost never take FP traps.
|
|
//!
|
|
//! Bit layout (PowerISA, MSB-0 numbering; stored in a u32 with bit 31 = MSB):
|
|
//!
|
|
//! | PPC bit | u32 mask | Name |
|
|
//! |---------|-------------------------|-------------|
|
|
//! | 0 | `1<<31` | FX |
|
|
//! | 1 | `1<<30` | FEX |
|
|
//! | 2 | `1<<29` | VX (summary)|
|
|
//! | 3 | `1<<28` | OX |
|
|
//! | 4 | `1<<27` | UX |
|
|
//! | 5 | `1<<26` | ZX |
|
|
//! | 6 | `1<<25` | XX |
|
|
//! | 7 | `1<<24` | VXSNAN |
|
|
//! | 8 | `1<<23` | VXISI |
|
|
//! | 9 | `1<<22` | VXIDI |
|
|
//! | 10 | `1<<21` | VXZDZ |
|
|
//! | 11 | `1<<20` | VXIMZ |
|
|
//! | 12 | `1<<19` | VXVC |
|
|
//! | 13 | `1<<18` | FR |
|
|
//! | 14 | `1<<17` | FI |
|
|
//! | 15..19 | `0xF8000 >> 15` @ 15..19 | FPRF (5 bits)|
|
|
//! | 21 | `1<<10` | VXSOFT |
|
|
//! | 22 | `1<<9` | VXSQRT |
|
|
//! | 23 | `1<<8` | VXCVI |
|
|
//! | 30..31 | `0x3` | RN (2 bits) |
|
|
|
|
use crate::context::PpcContext;
|
|
|
|
pub const FX: u32 = 1 << 31;
|
|
pub const FEX: u32 = 1 << 30;
|
|
pub const VX: u32 = 1 << 29;
|
|
pub const OX: u32 = 1 << 28;
|
|
pub const UX: u32 = 1 << 27;
|
|
pub const ZX: u32 = 1 << 26;
|
|
pub const XX: u32 = 1 << 25;
|
|
pub const VXSNAN: u32 = 1 << 24;
|
|
pub const VXISI: u32 = 1 << 23;
|
|
pub const VXIDI: u32 = 1 << 22;
|
|
pub const VXZDZ: u32 = 1 << 21;
|
|
pub const VXIMZ: u32 = 1 << 20;
|
|
pub const VXVC: u32 = 1 << 19;
|
|
pub const FR: u32 = 1 << 18;
|
|
pub const FI: u32 = 1 << 17;
|
|
pub const FPRF_MASK: u32 = 0x1F << 12; // bits 15..19
|
|
pub const VXSOFT: u32 = 1 << 10;
|
|
pub const VXSQRT: u32 = 1 << 9;
|
|
pub const VXCVI: u32 = 1 << 8;
|
|
pub const RN_MASK: u32 = 0x3;
|
|
|
|
/// Union of all VX* bits (used for the VX summary recomputation).
|
|
pub const VX_ALL: u32 = VXSNAN | VXISI | VXIDI | VXZDZ | VXIMZ | VXVC | VXSOFT | VXSQRT | VXCVI;
|
|
|
|
/// FPRF classification codes (5-bit, placed in FPSCR bits 15..19).
|
|
/// The high bit ("C" in PowerISA) distinguishes ±zero/±denormal/QNaN from
|
|
/// ±normal/±inf. The next 4 bits are (FL, FG, FE, FU) = (less, greater, equal, unordered).
|
|
pub mod fprf {
|
|
pub const QNAN: u8 = 0b1_0001;
|
|
pub const NEG_INF: u8 = 0b0_1001;
|
|
pub const NEG_NORMAL: u8 = 0b0_1000;
|
|
pub const NEG_DENORMAL: u8 = 0b1_1000;
|
|
pub const NEG_ZERO: u8 = 0b1_0010;
|
|
pub const POS_ZERO: u8 = 0b0_0010;
|
|
pub const POS_DENORMAL: u8 = 0b1_0100;
|
|
pub const POS_NORMAL: u8 = 0b0_0100;
|
|
pub const POS_INF: u8 = 0b0_0101;
|
|
}
|
|
|
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
|
pub enum RoundingMode {
|
|
NearestEven, // RN=00
|
|
TowardZero, // RN=01
|
|
TowardPosInf, // RN=10
|
|
TowardNegInf, // RN=11
|
|
}
|
|
|
|
pub fn rounding_mode(ctx: &PpcContext) -> RoundingMode {
|
|
match ctx.fpscr & RN_MASK {
|
|
0 => RoundingMode::NearestEven,
|
|
1 => RoundingMode::TowardZero,
|
|
2 => RoundingMode::TowardPosInf,
|
|
_ => RoundingMode::TowardNegInf,
|
|
}
|
|
}
|
|
|
|
/// Classify a finite f64 into its FPRF 5-bit code.
|
|
pub fn classify_fprf(v: f64) -> u8 {
|
|
if v.is_nan() {
|
|
fprf::QNAN
|
|
} else if v.is_infinite() {
|
|
if v.is_sign_negative() { fprf::NEG_INF } else { fprf::POS_INF }
|
|
} else if v == 0.0 {
|
|
if v.is_sign_negative() { fprf::NEG_ZERO } else { fprf::POS_ZERO }
|
|
} else if v.is_subnormal() {
|
|
if v.is_sign_negative() { fprf::NEG_DENORMAL } else { fprf::POS_DENORMAL }
|
|
} else if v.is_sign_negative() { fprf::NEG_NORMAL } else { fprf::POS_NORMAL }
|
|
}
|
|
|
|
/// Write FPRF into FPSCR, preserving other bits.
|
|
pub fn set_fprf(ctx: &mut PpcContext, code: u8) {
|
|
ctx.fpscr = (ctx.fpscr & !FPRF_MASK) | ((code as u32 & 0x1F) << 12);
|
|
}
|
|
|
|
/// Set one or more exception bits on FPSCR, maintaining FX (sticky set on any
|
|
/// new exception) and VX (summary of VX* bits).
|
|
pub fn set_exception(ctx: &mut PpcContext, bits: u32) {
|
|
let prev = ctx.fpscr;
|
|
let new = prev | bits;
|
|
// FX is sticky-set if any new non-sticky bit transitions to 1. PPC defines
|
|
// FX as "any of OX, UX, ZX, XX, VX* newly set". Compute the transition set.
|
|
let transition = (new & !prev) & (OX | UX | ZX | XX | VX_ALL);
|
|
let mut updated = new;
|
|
if transition != 0 {
|
|
updated |= FX;
|
|
}
|
|
// Recompute VX summary from any VX* bits currently set.
|
|
if (updated & VX_ALL) != 0 { updated |= VX; }
|
|
ctx.fpscr = updated;
|
|
}
|
|
|
|
/// Classify the inputs of a floating-point arithmetic op and set appropriate
|
|
/// VX* bits. Returns true if any invalid-operation was detected (caller may
|
|
/// want to write a default QNaN result).
|
|
///
|
|
/// Detected cases:
|
|
/// * any SNaN input → VXSNAN
|
|
/// * infinity - infinity (same sign) → VXISI
|
|
/// * 0 / 0 → VXZDZ
|
|
/// * infinity / infinity → VXIDI
|
|
/// * 0 * infinity → VXIMZ
|
|
pub fn check_invalid_add(ctx: &mut PpcContext, a: f64, b: f64, sub: bool) -> bool {
|
|
let mut bits = 0u32;
|
|
if is_snan(a) || is_snan(b) { bits |= VXSNAN; }
|
|
if a.is_infinite() && b.is_infinite() {
|
|
// For add: VXISI iff same-sign(a,b) negated — inf - inf
|
|
// For sub: VXISI iff same-sign(a,b) — (+inf) - (+inf) or (-inf) - (-inf)
|
|
let both_pos = a.is_sign_positive() && b.is_sign_positive();
|
|
let both_neg = a.is_sign_negative() && b.is_sign_negative();
|
|
if sub {
|
|
if both_pos || both_neg { bits |= VXISI; }
|
|
} else {
|
|
// add: opposite signs cancel to inf-inf
|
|
if a.is_sign_positive() != b.is_sign_positive() { bits |= VXISI; }
|
|
}
|
|
}
|
|
if bits != 0 { set_exception(ctx, bits); return true; }
|
|
false
|
|
}
|
|
|
|
/// FMA-aware add/sub VXISI check. Per PPCBUG-202+203: the previous code
|
|
/// passed `a*c` as `lhs` to `check_invalid_add`, which suffers from two
|
|
/// rounding errors and can spuriously raise/miss VXISI in extreme cases.
|
|
/// This helper derives the mathematical product's sign and infinity status
|
|
/// from the inputs directly.
|
|
///
|
|
/// `sub` follows the same semantics as `check_invalid_add`:
|
|
/// - false (add): VXISI when product and b have opposite signs at infinity
|
|
/// - true (sub): VXISI when product and b have same sign at infinity
|
|
pub fn check_invalid_fma_add(ctx: &mut PpcContext, a: f64, c: f64, b: f64, sub: bool) -> bool {
|
|
let mut bits = 0u32;
|
|
if is_snan(a) || is_snan(c) || is_snan(b) { bits |= VXSNAN; }
|
|
let product_is_inf = (a.is_infinite() || c.is_infinite())
|
|
&& a != 0.0 && c != 0.0
|
|
&& !a.is_nan() && !c.is_nan();
|
|
if product_is_inf && b.is_infinite() {
|
|
let p_neg = a.is_sign_negative() != c.is_sign_negative();
|
|
let b_neg = b.is_sign_negative();
|
|
let same_sign = p_neg == b_neg;
|
|
if (sub && same_sign) || (!sub && !same_sign) {
|
|
bits |= VXISI;
|
|
}
|
|
}
|
|
if bits != 0 { set_exception(ctx, bits); return true; }
|
|
false
|
|
}
|
|
|
|
pub fn check_invalid_mul(ctx: &mut PpcContext, a: f64, b: f64) -> bool {
|
|
let mut bits = 0u32;
|
|
if is_snan(a) || is_snan(b) { bits |= VXSNAN; }
|
|
let zero_times_inf =
|
|
(a == 0.0 && b.is_infinite()) || (b == 0.0 && a.is_infinite());
|
|
if zero_times_inf { bits |= VXIMZ; }
|
|
if bits != 0 { set_exception(ctx, bits); return true; }
|
|
false
|
|
}
|
|
|
|
pub fn check_invalid_div(ctx: &mut PpcContext, a: f64, b: f64) -> bool {
|
|
let mut bits = 0u32;
|
|
if is_snan(a) || is_snan(b) { bits |= VXSNAN; }
|
|
if a == 0.0 && b == 0.0 { bits |= VXZDZ; }
|
|
if a.is_infinite() && b.is_infinite() { bits |= VXIDI; }
|
|
if bits != 0 { set_exception(ctx, bits); return true; }
|
|
false
|
|
}
|
|
|
|
/// Divide-by-zero (finite nonzero / 0) — sets ZX but not VX.
|
|
pub fn check_zero_divide(ctx: &mut PpcContext, a: f64, b: f64) {
|
|
if b == 0.0 && a != 0.0 && !a.is_nan() && !a.is_infinite() {
|
|
set_exception(ctx, ZX);
|
|
}
|
|
}
|
|
|
|
/// Post-op: classify the result and update FPRF + detect overflow/underflow/inexact.
|
|
/// `inputs_finite` lets us suppress OX for ops whose output is infinite because
|
|
/// an input already was.
|
|
pub fn update_after_op(ctx: &mut PpcContext, result: f64, inputs_were_finite: bool) {
|
|
let mut bits = 0u32;
|
|
if result.is_infinite() && inputs_were_finite {
|
|
bits |= OX;
|
|
}
|
|
if result.is_subnormal() {
|
|
bits |= UX;
|
|
}
|
|
if bits != 0 { set_exception(ctx, bits); }
|
|
set_fprf(ctx, classify_fprf(result));
|
|
}
|
|
|
|
/// Test whether an f64 is a signalling NaN.
|
|
/// In IEEE 754-2008 (binary64), the signalling bit is the high bit of the
|
|
/// mantissa. SNaN has it clear, QNaN has it set. NaN with high mantissa bit
|
|
/// clear (and mantissa nonzero) is an SNaN.
|
|
pub fn is_snan(x: f64) -> bool {
|
|
if !x.is_nan() { return false; }
|
|
let bits = x.to_bits();
|
|
// Highest mantissa bit (bit 51) clear ⇒ SNaN. Mantissa nonzero always true for NaN.
|
|
(bits & (1u64 << 51)) == 0
|
|
}
|
|
|
|
/// Round an f64 to f32 honouring FPSCR[RN]. Uses the current hardware
|
|
/// rounding mode when RN=0 (nearest-even, the PPC default), otherwise
|
|
/// emulates the directed rounding via bit-manipulation.
|
|
pub fn round_to_single(ctx: &PpcContext, v: f64) -> f64 {
|
|
match rounding_mode(ctx) {
|
|
RoundingMode::NearestEven => (v as f32) as f64,
|
|
RoundingMode::TowardZero => round_single_toward_zero(v) as f64,
|
|
RoundingMode::TowardPosInf => round_single_toward_pos_inf(v) as f64,
|
|
RoundingMode::TowardNegInf => round_single_toward_neg_inf(v) as f64,
|
|
}
|
|
}
|
|
|
|
/// Round an f64 to an i64 integer honouring FPSCR[RN]. Used by fctidx.
|
|
pub fn round_to_i64(ctx: &PpcContext, v: f64) -> i64 {
|
|
match rounding_mode(ctx) {
|
|
RoundingMode::NearestEven => {
|
|
// PPCBUG-221: round-half-to-even (banker's rounding). The previous
|
|
// tie-detection used `(diff - 0.5).abs() < f64::EPSILON` which
|
|
// breaks for |v| > 2^52 (where v.trunc() == v exactly, giving diff
|
|
// == 0). Use a fractional-part-only check that's exact for
|
|
// |v| <= 2^52 and degenerates correctly above.
|
|
let t = v.trunc();
|
|
let frac = v - t;
|
|
let fa = frac.abs();
|
|
if fa > 0.5 {
|
|
t as i64 + if v >= 0.0 { 1 } else { -1 }
|
|
} else if fa < 0.5 {
|
|
t as i64
|
|
} else {
|
|
// Exact 0.5 tie — round to even.
|
|
let fi = t as i64;
|
|
if fi & 1 == 0 { fi } else { fi + if v >= 0.0 { 1 } else { -1 } }
|
|
}
|
|
}
|
|
RoundingMode::TowardZero => v.trunc() as i64,
|
|
RoundingMode::TowardPosInf => v.ceil() as i64,
|
|
RoundingMode::TowardNegInf => v.floor() as i64,
|
|
}
|
|
}
|
|
|
|
/// Round an f64 to an i32 integer honouring FPSCR[RN]. Used by fctiwx.
|
|
pub fn round_to_i32(ctx: &PpcContext, v: f64) -> i32 {
|
|
round_to_i64(ctx, v).clamp(i32::MIN as i64, i32::MAX as i64) as i32
|
|
}
|
|
|
|
// ------ directed rounding helpers (f64 → f32) ------
|
|
|
|
fn round_single_toward_zero(v: f64) -> f32 {
|
|
// Default f64→f32 is round-to-nearest-even. Emulate truncation:
|
|
// take the default rounded value; if the absolute rounded magnitude
|
|
// exceeds |v|, bump down by one ULP toward zero.
|
|
let rn = v as f32;
|
|
if rn.is_nan() || rn.is_infinite() || rn == 0.0 { return rn; }
|
|
if rn.abs() as f64 <= v.abs() { return rn; }
|
|
let adj_bits = rn.to_bits();
|
|
// Both positive and negative finite f32 values have the IEEE-754 sign
|
|
// bit as the MSB; subtracting 1 from `to_bits()` always reduces the
|
|
// magnitude by one ULP (clearing the lowest mantissa bit, with carry
|
|
// never reaching the sign bit since adj_bits is already not-zero,
|
|
// not-inf, not-NaN, and we already returned early for those).
|
|
let lower = adj_bits - 1;
|
|
f32::from_bits(lower)
|
|
}
|
|
|
|
fn round_single_toward_pos_inf(v: f64) -> f32 {
|
|
let rn = v as f32;
|
|
if rn.is_nan() || rn.is_infinite() { return rn; }
|
|
if (rn as f64) >= v { return rn; }
|
|
// rn < v — bump up by one ULP in the +direction.
|
|
let b = rn.to_bits();
|
|
let nb = if rn.is_sign_negative() { b - 1 } else { b + 1 };
|
|
f32::from_bits(nb)
|
|
}
|
|
|
|
fn round_single_toward_neg_inf(v: f64) -> f32 {
|
|
let rn = v as f32;
|
|
if rn.is_nan() || rn.is_infinite() { return rn; }
|
|
if (rn as f64) <= v { return rn; }
|
|
// rn > v — bump down.
|
|
let b = rn.to_bits();
|
|
let nb = if rn.is_sign_negative() { b + 1 } else { b - 1 };
|
|
f32::from_bits(nb)
|
|
}
|
|
|
|
/// Drop-in replacement for the old `update_cr1_from_fpscr`. Reads the
|
|
/// currently-maintained FPSCR bits (FX, FEX, VX, OX) into CR1.
|
|
pub fn update_cr1(ctx: &mut PpcContext) {
|
|
ctx.cr[1].lt = (ctx.fpscr & FX) != 0;
|
|
ctx.cr[1].gt = (ctx.fpscr & FEX) != 0;
|
|
ctx.cr[1].eq = (ctx.fpscr & VX) != 0;
|
|
ctx.cr[1].so = (ctx.fpscr & OX) != 0;
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
fn ctx() -> PpcContext { PpcContext::new() }
|
|
|
|
#[test]
|
|
fn rn_default_is_nearest() {
|
|
assert_eq!(rounding_mode(&ctx()), RoundingMode::NearestEven);
|
|
}
|
|
|
|
#[test]
|
|
fn rn_bits_decode() {
|
|
let mut c = ctx();
|
|
c.fpscr = 0x1;
|
|
assert_eq!(rounding_mode(&c), RoundingMode::TowardZero);
|
|
c.fpscr = 0x2;
|
|
assert_eq!(rounding_mode(&c), RoundingMode::TowardPosInf);
|
|
c.fpscr = 0x3;
|
|
assert_eq!(rounding_mode(&c), RoundingMode::TowardNegInf);
|
|
}
|
|
|
|
#[test]
|
|
fn fprf_classifies_correctly() {
|
|
assert_eq!(classify_fprf(1.0), fprf::POS_NORMAL);
|
|
assert_eq!(classify_fprf(-1.0), fprf::NEG_NORMAL);
|
|
assert_eq!(classify_fprf(0.0), fprf::POS_ZERO);
|
|
assert_eq!(classify_fprf(-0.0), fprf::NEG_ZERO);
|
|
assert_eq!(classify_fprf(f64::INFINITY), fprf::POS_INF);
|
|
assert_eq!(classify_fprf(f64::NEG_INFINITY), fprf::NEG_INF);
|
|
assert_eq!(classify_fprf(f64::NAN), fprf::QNAN);
|
|
assert_eq!(classify_fprf(f64::from_bits(1)), fprf::POS_DENORMAL);
|
|
}
|
|
|
|
#[test]
|
|
fn fx_is_sticky_on_new_exception() {
|
|
let mut c = ctx();
|
|
set_exception(&mut c, OX);
|
|
assert_ne!(c.fpscr & FX, 0);
|
|
// Clear FX/OX manually.
|
|
c.fpscr &= !(FX | OX);
|
|
// Re-set OX; FX should re-latch.
|
|
set_exception(&mut c, OX);
|
|
assert_ne!(c.fpscr & FX, 0);
|
|
}
|
|
|
|
#[test]
|
|
fn vx_summary_set_on_any_vx_bit() {
|
|
let mut c = ctx();
|
|
set_exception(&mut c, VXSNAN);
|
|
assert_ne!(c.fpscr & VX, 0);
|
|
assert_ne!(c.fpscr & VXSNAN, 0);
|
|
}
|
|
|
|
#[test]
|
|
fn round_to_single_nearest_is_identity_on_representable() {
|
|
let c = ctx();
|
|
assert_eq!(round_to_single(&c, 1.0_f64), 1.0_f64);
|
|
}
|
|
|
|
#[test]
|
|
fn round_to_i32_clamps_out_of_range() {
|
|
let c = ctx();
|
|
assert_eq!(round_to_i32(&c, 1e20_f64), i32::MAX);
|
|
assert_eq!(round_to_i32(&c, -1e20_f64), i32::MIN);
|
|
}
|
|
|
|
#[test]
|
|
fn round_to_i64_nearest_even_on_tie() {
|
|
let c = ctx();
|
|
assert_eq!(round_to_i64(&c, 0.5_f64), 0);
|
|
assert_eq!(round_to_i64(&c, 1.5_f64), 2);
|
|
assert_eq!(round_to_i64(&c, 2.5_f64), 2);
|
|
assert_eq!(round_to_i64(&c, 3.5_f64), 4);
|
|
assert_eq!(round_to_i64(&c, -0.5_f64), 0);
|
|
assert_eq!(round_to_i64(&c, -1.5_f64), -2);
|
|
assert_eq!(round_to_i64(&c, -2.5_f64), -2);
|
|
}
|
|
|
|
#[test]
|
|
fn round_to_i64_non_tie_cases() {
|
|
// PPCBUG-221 regression: non-tie fractions must round to nearest.
|
|
let c = ctx();
|
|
assert_eq!(round_to_i64(&c, 0.4_f64), 0);
|
|
assert_eq!(round_to_i64(&c, 0.6_f64), 1);
|
|
assert_eq!(round_to_i64(&c, -0.4_f64), 0);
|
|
assert_eq!(round_to_i64(&c, -0.6_f64), -1);
|
|
}
|
|
|
|
#[test]
|
|
fn round_to_i32_nearest_even_on_tie() {
|
|
// PPCBUG-227: round_to_i32 inherits round_to_i64's tie semantics.
|
|
let c = ctx();
|
|
assert_eq!(round_to_i32(&c, 0.5_f64), 0);
|
|
assert_eq!(round_to_i32(&c, 1.5_f64), 2);
|
|
assert_eq!(round_to_i32(&c, 2.5_f64), 2);
|
|
assert_eq!(round_to_i32(&c, -1.5_f64), -2);
|
|
}
|
|
|
|
#[test]
|
|
fn check_invalid_add_detects_inf_minus_inf() {
|
|
let mut c = ctx();
|
|
assert!(check_invalid_add(&mut c, f64::INFINITY, f64::INFINITY, true));
|
|
assert_ne!(c.fpscr & VXISI, 0);
|
|
}
|
|
|
|
#[test]
|
|
fn check_invalid_div_detects_zero_over_zero() {
|
|
let mut c = ctx();
|
|
assert!(check_invalid_div(&mut c, 0.0, 0.0));
|
|
assert_ne!(c.fpscr & VXZDZ, 0);
|
|
}
|
|
|
|
#[test]
|
|
fn snan_detection() {
|
|
// SNaN in binary64: sign=0, exp=all-ones, mantissa nonzero with bit 51 clear.
|
|
let snan = f64::from_bits(0x7FF0_0000_0000_0001);
|
|
assert!(is_snan(snan));
|
|
assert!(!is_snan(f64::NAN));
|
|
}
|
|
}
|