xenia-rs/crates/xenia-cpu/src/fpscr.rs

//! FPSCR (Floating-Point Status and Control Register) maintenance.
//!
//! Scope per project plan: rounding modes honoured, plus the exception bits
//! games actually read (FX, FEX, VX, OX, UX, ZX, XX, FI, FPRF). Enabled-
//! exception dispatch (FE[0,1], VE/OE/UE/ZE/XE) is *not* modelled — games
//! running on Xenon almost never take FP traps.
//!
//! Bit layout (PowerISA, MSB-0 numbering; stored in a u32 with bit 31 = MSB):
//!
//! | PPC bit | u32 mask                | Name        |
//! |---------|-------------------------|-------------|
//! | 0       | `1<<31`                 | FX          |
//! | 1       | `1<<30`                 | FEX         |
//! | 2       | `1<<29`                 | VX (summary)|
//! | 3       | `1<<28`                 | OX          |
//! | 4       | `1<<27`                 | UX          |
//! | 5       | `1<<26`                 | ZX          |
//! | 6       | `1<<25`                 | XX          |
//! | 7       | `1<<24`                 | VXSNAN      |
//! | 8       | `1<<23`                 | VXISI       |
//! | 9       | `1<<22`                 | VXIDI       |
//! | 10      | `1<<21`                 | VXZDZ       |
//! | 11      | `1<<20`                 | VXIMZ       |
//! | 12      | `1<<19`                 | VXVC        |
//! | 13      | `1<<18`                 | FR          |
//! | 14      | `1<<17`                 | FI          |
//! | 15..19  | `0xF8000 >> 15` @ 15..19 | FPRF (5 bits)|
//! | 21      | `1<<10`                 | VXSOFT      |
//! | 22      | `1<<9`                  | VXSQRT      |
//! | 23      | `1<<8`                  | VXCVI       |
//! | 30..31  | `0x3`                   | RN (2 bits) |

use crate::context::PpcContext;

pub const FX: u32      = 1 << 31;
pub const FEX: u32     = 1 << 30;
pub const VX: u32      = 1 << 29;
pub const OX: u32      = 1 << 28;
pub const UX: u32      = 1 << 27;
pub const ZX: u32      = 1 << 26;
pub const XX: u32      = 1 << 25;
pub const VXSNAN: u32  = 1 << 24;
pub const VXISI: u32   = 1 << 23;
pub const VXIDI: u32   = 1 << 22;
pub const VXZDZ: u32   = 1 << 21;
pub const VXIMZ: u32   = 1 << 20;
pub const VXVC: u32    = 1 << 19;
pub const FR: u32      = 1 << 18;
pub const FI: u32      = 1 << 17;
pub const FPRF_MASK: u32 = 0x1F << 12;  // bits 15..19
pub const VXSOFT: u32  = 1 << 10;
pub const VXSQRT: u32  = 1 << 9;
pub const VXCVI: u32   = 1 << 8;
pub const RN_MASK: u32 = 0x3;

/// Union of all VX* bits (used for the VX summary recomputation).
pub const VX_ALL: u32 = VXSNAN | VXISI | VXIDI | VXZDZ | VXIMZ | VXVC | VXSOFT | VXSQRT | VXCVI;

/// FPRF classification codes (5-bit, placed in FPSCR bits 15..19).
/// The high bit ("C" in PowerISA) distinguishes ±zero/±denormal/QNaN from
/// ±normal/±inf. The next 4 bits are (FL, FG, FE, FU) = (less, greater, equal, unordered).
pub mod fprf {
    pub const QNAN: u8         = 0b1_0001;
    pub const NEG_INF: u8      = 0b0_1001;
    pub const NEG_NORMAL: u8   = 0b0_1000;
    pub const NEG_DENORMAL: u8 = 0b1_1000;
    pub const NEG_ZERO: u8     = 0b1_0010;
    pub const POS_ZERO: u8     = 0b0_0010;
    pub const POS_DENORMAL: u8 = 0b1_0100;
    pub const POS_NORMAL: u8   = 0b0_0100;
    pub const POS_INF: u8      = 0b0_0101;
}

#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum RoundingMode {
    NearestEven,    // RN=00
    TowardZero,     // RN=01
    TowardPosInf,   // RN=10
    TowardNegInf,   // RN=11
}

pub fn rounding_mode(ctx: &PpcContext) -> RoundingMode {
    match ctx.fpscr & RN_MASK {
        0 => RoundingMode::NearestEven,
        1 => RoundingMode::TowardZero,
        2 => RoundingMode::TowardPosInf,
        _ => RoundingMode::TowardNegInf,
    }
}

/// Classify a finite f64 into its FPRF 5-bit code.
pub fn classify_fprf(v: f64) -> u8 {
    if v.is_nan() {
        fprf::QNAN
    } else if v.is_infinite() {
        if v.is_sign_negative() { fprf::NEG_INF } else { fprf::POS_INF }
    } else if v == 0.0 {
        if v.is_sign_negative() { fprf::NEG_ZERO } else { fprf::POS_ZERO }
    } else if v.is_subnormal() {
        if v.is_sign_negative() { fprf::NEG_DENORMAL } else { fprf::POS_DENORMAL }
    } else if v.is_sign_negative() { fprf::NEG_NORMAL } else { fprf::POS_NORMAL }
}

/// Write FPRF into FPSCR, preserving other bits.
pub fn set_fprf(ctx: &mut PpcContext, code: u8) {
    ctx.fpscr = (ctx.fpscr & !FPRF_MASK) | ((code as u32 & 0x1F) << 12);
}

/// Set one or more exception bits on FPSCR, maintaining FX (sticky set on any
/// new exception) and VX (summary of VX* bits).
pub fn set_exception(ctx: &mut PpcContext, bits: u32) {
    let prev = ctx.fpscr;
    let new = prev | bits;
    // FX is sticky-set if any new non-sticky bit transitions to 1. PPC defines
    // FX as "any of OX, UX, ZX, XX, VX* newly set". Compute the transition set.
    let transition = (new & !prev) & (OX | UX | ZX | XX | VX_ALL);
    let mut updated = new;
    if transition != 0 {
        updated |= FX;
    }
    // Recompute VX summary from any VX* bits currently set.
    if (updated & VX_ALL) != 0 { updated |= VX; }
    ctx.fpscr = updated;
}

/// Classify the inputs of a floating-point arithmetic op and set appropriate
/// VX* bits. Returns true if any invalid-operation was detected (caller may
/// want to write a default QNaN result).
///
/// Detected cases:
///   * any SNaN input → VXSNAN
///   * infinity - infinity (same sign) → VXISI
///   * 0 / 0 → VXZDZ
///   * infinity / infinity → VXIDI
///   * 0 * infinity → VXIMZ
pub fn check_invalid_add(ctx: &mut PpcContext, a: f64, b: f64, sub: bool) -> bool {
    let mut bits = 0u32;
    if is_snan(a) || is_snan(b) { bits |= VXSNAN; }
    if a.is_infinite() && b.is_infinite() {
        // For add: VXISI iff same-sign(a,b) negated — inf - inf
        // For sub: VXISI iff same-sign(a,b) — (+inf) - (+inf) or (-inf) - (-inf)
        let both_pos = a.is_sign_positive() && b.is_sign_positive();
        let both_neg = a.is_sign_negative() && b.is_sign_negative();
        if sub {
            if both_pos || both_neg { bits |= VXISI; }
        } else {
            // add: opposite signs cancel to inf-inf
            if a.is_sign_positive() != b.is_sign_positive() { bits |= VXISI; }
        }
    }
    if bits != 0 { set_exception(ctx, bits); return true; }
    false
}

/// FMA-aware add/sub VXISI check. Per PPCBUG-202+203: the previous code
/// passed `a*c` as `lhs` to `check_invalid_add`, which suffers from two
/// rounding errors and can spuriously raise/miss VXISI in extreme cases.
/// This helper derives the mathematical product's sign and infinity status
/// from the inputs directly.
///
/// `sub` follows the same semantics as `check_invalid_add`:
///   - false (add): VXISI when product and b have opposite signs at infinity
///   - true  (sub): VXISI when product and b have same sign at infinity
pub fn check_invalid_fma_add(ctx: &mut PpcContext, a: f64, c: f64, b: f64, sub: bool) -> bool {
    let mut bits = 0u32;
    if is_snan(a) || is_snan(c) || is_snan(b) { bits |= VXSNAN; }
    let product_is_inf = (a.is_infinite() || c.is_infinite())
        && a != 0.0 && c != 0.0
        && !a.is_nan() && !c.is_nan();
    if product_is_inf && b.is_infinite() {
        let p_neg = a.is_sign_negative() != c.is_sign_negative();
        let b_neg = b.is_sign_negative();
        let same_sign = p_neg == b_neg;
        if (sub && same_sign) || (!sub && !same_sign) {
            bits |= VXISI;
        }
    }
    if bits != 0 { set_exception(ctx, bits); return true; }
    false
}

pub fn check_invalid_mul(ctx: &mut PpcContext, a: f64, b: f64) -> bool {
    let mut bits = 0u32;
    if is_snan(a) || is_snan(b) { bits |= VXSNAN; }
    let zero_times_inf =
        (a == 0.0 && b.is_infinite()) || (b == 0.0 && a.is_infinite());
    if zero_times_inf { bits |= VXIMZ; }
    if bits != 0 { set_exception(ctx, bits); return true; }
    false
}

pub fn check_invalid_div(ctx: &mut PpcContext, a: f64, b: f64) -> bool {
    let mut bits = 0u32;
    if is_snan(a) || is_snan(b) { bits |= VXSNAN; }
    if a == 0.0 && b == 0.0 { bits |= VXZDZ; }
    if a.is_infinite() && b.is_infinite() { bits |= VXIDI; }
    if bits != 0 { set_exception(ctx, bits); return true; }
    false
}

/// Divide-by-zero (finite nonzero / 0) — sets ZX but not VX.
pub fn check_zero_divide(ctx: &mut PpcContext, a: f64, b: f64) {
    if b == 0.0 && a != 0.0 && !a.is_nan() && !a.is_infinite() {
        set_exception(ctx, ZX);
    }
}

/// Post-op: classify the result and update FPRF + detect overflow/underflow/inexact.
/// `inputs_finite` lets us suppress OX for ops whose output is infinite because
/// an input already was.
pub fn update_after_op(ctx: &mut PpcContext, result: f64, inputs_were_finite: bool) {
    let mut bits = 0u32;
    if result.is_infinite() && inputs_were_finite {
        bits |= OX;
    }
    if result.is_subnormal() {
        bits |= UX;
    }
    if bits != 0 { set_exception(ctx, bits); }
    set_fprf(ctx, classify_fprf(result));
}

/// Test whether an f64 is a signalling NaN.
/// In IEEE 754-2008 (binary64), the signalling bit is the high bit of the
/// mantissa. SNaN has it clear, QNaN has it set. NaN with high mantissa bit
/// clear (and mantissa nonzero) is an SNaN.
pub fn is_snan(x: f64) -> bool {
    if !x.is_nan() { return false; }
    let bits = x.to_bits();
    // Highest mantissa bit (bit 51) clear ⇒ SNaN. Mantissa nonzero always true for NaN.
    (bits & (1u64 << 51)) == 0
}

/// Round an f64 to f32 honouring FPSCR[RN]. Uses the current hardware
/// rounding mode when RN=0 (nearest-even, the PPC default), otherwise
/// emulates the directed rounding via bit-manipulation.
pub fn round_to_single(ctx: &PpcContext, v: f64) -> f64 {
    match rounding_mode(ctx) {
        RoundingMode::NearestEven => (v as f32) as f64,
        RoundingMode::TowardZero => round_single_toward_zero(v) as f64,
        RoundingMode::TowardPosInf => round_single_toward_pos_inf(v) as f64,
        RoundingMode::TowardNegInf => round_single_toward_neg_inf(v) as f64,
    }
}

/// Round an f64 to an i64 integer honouring FPSCR[RN]. Used by fctidx.
pub fn round_to_i64(ctx: &PpcContext, v: f64) -> i64 {
    match rounding_mode(ctx) {
        RoundingMode::NearestEven => {
            // PPCBUG-221: round-half-to-even (banker's rounding). The previous
            // tie-detection used `(diff - 0.5).abs() < f64::EPSILON` which
            // breaks for |v| > 2^52 (where v.trunc() == v exactly, giving diff
            // == 0). Use a fractional-part-only check that's exact for
            // |v| <= 2^52 and degenerates correctly above.
            let t = v.trunc();
            let frac = v - t;
            let fa = frac.abs();
            if fa > 0.5 {
                t as i64 + if v >= 0.0 { 1 } else { -1 }
            } else if fa < 0.5 {
                t as i64
            } else {
                // Exact 0.5 tie — round to even.
                let fi = t as i64;
                if fi & 1 == 0 { fi } else { fi + if v >= 0.0 { 1 } else { -1 } }
            }
        }
        RoundingMode::TowardZero => v.trunc() as i64,
        RoundingMode::TowardPosInf => v.ceil() as i64,
        RoundingMode::TowardNegInf => v.floor() as i64,
    }
}

/// Round an f64 to an i32 integer honouring FPSCR[RN]. Used by fctiwx.
pub fn round_to_i32(ctx: &PpcContext, v: f64) -> i32 {
    round_to_i64(ctx, v).clamp(i32::MIN as i64, i32::MAX as i64) as i32
}

// ------ directed rounding helpers (f64 → f32) ------

fn round_single_toward_zero(v: f64) -> f32 {
    // Default f64→f32 is round-to-nearest-even. Emulate truncation:
    // take the default rounded value; if the absolute rounded magnitude
    // exceeds |v|, bump down by one ULP toward zero.
    let rn = v as f32;
    if rn.is_nan() || rn.is_infinite() || rn == 0.0 { return rn; }
    if rn.abs() as f64 <= v.abs() { return rn; }
    let adj_bits = rn.to_bits();
    // Both positive and negative finite f32 values have the IEEE-754 sign
    // bit as the MSB; subtracting 1 from `to_bits()` always reduces the
    // magnitude by one ULP (clearing the lowest mantissa bit, with carry
    // never reaching the sign bit since adj_bits is already not-zero,
    // not-inf, not-NaN, and we already returned early for those).
    let lower = adj_bits - 1;
    f32::from_bits(lower)
}

fn round_single_toward_pos_inf(v: f64) -> f32 {
    let rn = v as f32;
    if rn.is_nan() || rn.is_infinite() { return rn; }
    if (rn as f64) >= v { return rn; }
    // rn < v — bump up by one ULP in the +direction.
    let b = rn.to_bits();
    let nb = if rn.is_sign_negative() { b - 1 } else { b + 1 };
    f32::from_bits(nb)
}

fn round_single_toward_neg_inf(v: f64) -> f32 {
    let rn = v as f32;
    if rn.is_nan() || rn.is_infinite() { return rn; }
    if (rn as f64) <= v { return rn; }
    // rn > v — bump down.
    let b = rn.to_bits();
    let nb = if rn.is_sign_negative() { b + 1 } else { b - 1 };
    f32::from_bits(nb)
}

/// Drop-in replacement for the old `update_cr1_from_fpscr`. Reads the
/// currently-maintained FPSCR bits (FX, FEX, VX, OX) into CR1.
pub fn update_cr1(ctx: &mut PpcContext) {
    ctx.cr[1].lt = (ctx.fpscr & FX) != 0;
    ctx.cr[1].gt = (ctx.fpscr & FEX) != 0;
    ctx.cr[1].eq = (ctx.fpscr & VX) != 0;
    ctx.cr[1].so = (ctx.fpscr & OX) != 0;
}

#[cfg(test)]
mod tests {
    use super::*;

    fn ctx() -> PpcContext { PpcContext::new() }

    #[test]
    fn rn_default_is_nearest() {
        assert_eq!(rounding_mode(&ctx()), RoundingMode::NearestEven);
    }

    #[test]
    fn rn_bits_decode() {
        let mut c = ctx();
        c.fpscr = 0x1;
        assert_eq!(rounding_mode(&c), RoundingMode::TowardZero);
        c.fpscr = 0x2;
        assert_eq!(rounding_mode(&c), RoundingMode::TowardPosInf);
        c.fpscr = 0x3;
        assert_eq!(rounding_mode(&c), RoundingMode::TowardNegInf);
    }

    #[test]
    fn fprf_classifies_correctly() {
        assert_eq!(classify_fprf(1.0), fprf::POS_NORMAL);
        assert_eq!(classify_fprf(-1.0), fprf::NEG_NORMAL);
        assert_eq!(classify_fprf(0.0), fprf::POS_ZERO);
        assert_eq!(classify_fprf(-0.0), fprf::NEG_ZERO);
        assert_eq!(classify_fprf(f64::INFINITY), fprf::POS_INF);
        assert_eq!(classify_fprf(f64::NEG_INFINITY), fprf::NEG_INF);
        assert_eq!(classify_fprf(f64::NAN), fprf::QNAN);
        assert_eq!(classify_fprf(f64::from_bits(1)), fprf::POS_DENORMAL);
    }

    #[test]
    fn fx_is_sticky_on_new_exception() {
        let mut c = ctx();
        set_exception(&mut c, OX);
        assert_ne!(c.fpscr & FX, 0);
        // Clear FX/OX manually.
        c.fpscr &= !(FX | OX);
        // Re-set OX; FX should re-latch.
        set_exception(&mut c, OX);
        assert_ne!(c.fpscr & FX, 0);
    }

    #[test]
    fn vx_summary_set_on_any_vx_bit() {
        let mut c = ctx();
        set_exception(&mut c, VXSNAN);
        assert_ne!(c.fpscr & VX, 0);
        assert_ne!(c.fpscr & VXSNAN, 0);
    }

    #[test]
    fn round_to_single_nearest_is_identity_on_representable() {
        let c = ctx();
        assert_eq!(round_to_single(&c, 1.0_f64), 1.0_f64);
    }

    #[test]
    fn round_to_i32_clamps_out_of_range() {
        let c = ctx();
        assert_eq!(round_to_i32(&c, 1e20_f64), i32::MAX);
        assert_eq!(round_to_i32(&c, -1e20_f64), i32::MIN);
    }

    #[test]
    fn round_to_i64_nearest_even_on_tie() {
        let c = ctx();
        assert_eq!(round_to_i64(&c, 0.5_f64), 0);
        assert_eq!(round_to_i64(&c, 1.5_f64), 2);
        assert_eq!(round_to_i64(&c, 2.5_f64), 2);
        assert_eq!(round_to_i64(&c, 3.5_f64), 4);
        assert_eq!(round_to_i64(&c, -0.5_f64), 0);
        assert_eq!(round_to_i64(&c, -1.5_f64), -2);
        assert_eq!(round_to_i64(&c, -2.5_f64), -2);
    }

    #[test]
    fn round_to_i64_non_tie_cases() {
        // PPCBUG-221 regression: non-tie fractions must round to nearest.
        let c = ctx();
        assert_eq!(round_to_i64(&c, 0.4_f64), 0);
        assert_eq!(round_to_i64(&c, 0.6_f64), 1);
        assert_eq!(round_to_i64(&c, -0.4_f64), 0);
        assert_eq!(round_to_i64(&c, -0.6_f64), -1);
    }

    #[test]
    fn round_to_i32_nearest_even_on_tie() {
        // PPCBUG-227: round_to_i32 inherits round_to_i64's tie semantics.
        let c = ctx();
        assert_eq!(round_to_i32(&c, 0.5_f64), 0);
        assert_eq!(round_to_i32(&c, 1.5_f64), 2);
        assert_eq!(round_to_i32(&c, 2.5_f64), 2);
        assert_eq!(round_to_i32(&c, -1.5_f64), -2);
    }

    #[test]
    fn check_invalid_add_detects_inf_minus_inf() {
        let mut c = ctx();
        assert!(check_invalid_add(&mut c, f64::INFINITY, f64::INFINITY, true));
        assert_ne!(c.fpscr & VXISI, 0);
    }

    #[test]
    fn check_invalid_div_detects_zero_over_zero() {
        let mut c = ctx();
        assert!(check_invalid_div(&mut c, 0.0, 0.0));
        assert_ne!(c.fpscr & VXZDZ, 0);
    }

    #[test]
    fn snan_detection() {
        // SNaN in binary64: sign=0, exp=all-ones, mantissa nonzero with bit 51 clear.
        let snan = f64::from_bits(0x7FF0_0000_0000_0001);
        assert!(is_snan(snan));
        assert!(!is_snan(f64::NAN));
    }
}