xenia-cpu: VMX128, FPSCR, decoder split, scheduler, decode/block caches

Split the monolithic interpreter into cohesive modules: dedicated decoder (decoder.rs) producing 8-byte DecodedInstr; opcode tables (opcode.rs); explicit traps (trap.rs); FPSCR helpers (fpscr.rs); overflow/carry helpers (overflow.rs); a 4 KiB-page-versioned decode cache and basic-block cache (block_cache.rs); and a full VMX/VMX128 implementation (vmx.rs) covering AltiVec + Xenon's 128-bit extensions. Add the parallel-execution substrate behind --parallel: a 7-party phaser (phaser.rs) for round-based barrier sync, ReservationTable (reservation.rs) for guest LL/SC, and the per-HW-thread scheduler core (scheduler.rs) that owns ThreadRefs, runqueues, and pending IRQs. Disassembler is now the single source of truth: disasm.rs gains the full base + extended + VMX128 mnemonic set, with golden JSON fixtures and a disasm_goldens test suite. Add a criterion-style interpreter bench. context.rs grows the per-thread state the new modules need (reservation slot, FPSCR, vector regs). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 16:27:43 +02:00
parent e9b2b57a44
commit c36cca14f9
20 changed files with 12284 additions and 458 deletions
--- a/crates/xenia-cpu/src/fpscr.rs
+++ b/crates/xenia-cpu/src/fpscr.rs
@@ -0,0 +1,384 @@
+//! FPSCR (Floating-Point Status and Control Register) maintenance.
+//!
+//! Scope per project plan: rounding modes honoured, plus the exception bits
+//! games actually read (FX, FEX, VX, OX, UX, ZX, XX, FI, FPRF). Enabled-
+//! exception dispatch (FE[0,1], VE/OE/UE/ZE/XE) is *not* modelled — games
+//! running on Xenon almost never take FP traps.
+//!
+//! Bit layout (PowerISA, MSB-0 numbering; stored in a u32 with bit 31 = MSB):
+//!
+//! | PPC bit | u32 mask                | Name        |
+//! |---------|-------------------------|-------------|
+//! | 0       | `1<<31`                 | FX          |
+//! | 1       | `1<<30`                 | FEX         |
+//! | 2       | `1<<29`                 | VX (summary)|
+//! | 3       | `1<<28`                 | OX          |
+//! | 4       | `1<<27`                 | UX          |
+//! | 5       | `1<<26`                 | ZX          |
+//! | 6       | `1<<25`                 | XX          |
+//! | 7       | `1<<24`                 | VXSNAN      |
+//! | 8       | `1<<23`                 | VXISI       |
+//! | 9       | `1<<22`                 | VXIDI       |
+//! | 10      | `1<<21`                 | VXZDZ       |
+//! | 11      | `1<<20`                 | VXIMZ       |
+//! | 12      | `1<<19`                 | VXVC        |
+//! | 13      | `1<<18`                 | FR          |
+//! | 14      | `1<<17`                 | FI          |
+//! | 15..19  | `0xF8000 >> 15` @ 15..19 | FPRF (5 bits)|
+//! | 21      | `1<<10`                 | VXSOFT      |
+//! | 22      | `1<<9`                  | VXSQRT      |
+//! | 23      | `1<<8`                  | VXCVI       |
+//! | 30..31  | `0x3`                   | RN (2 bits) |
+
+use crate::context::PpcContext;
+
+pub const FX: u32      = 1 << 31;
+pub const FEX: u32     = 1 << 30;
+pub const VX: u32      = 1 << 29;
+pub const OX: u32      = 1 << 28;
+pub const UX: u32      = 1 << 27;
+pub const ZX: u32      = 1 << 26;
+pub const XX: u32      = 1 << 25;
+pub const VXSNAN: u32  = 1 << 24;
+pub const VXISI: u32   = 1 << 23;
+pub const VXIDI: u32   = 1 << 22;
+pub const VXZDZ: u32   = 1 << 21;
+pub const VXIMZ: u32   = 1 << 20;
+pub const VXVC: u32    = 1 << 19;
+pub const FR: u32      = 1 << 18;
+pub const FI: u32      = 1 << 17;
+pub const FPRF_MASK: u32 = 0x1F << 12;  // bits 15..19
+pub const VXSOFT: u32  = 1 << 10;
+pub const VXSQRT: u32  = 1 << 9;
+pub const VXCVI: u32   = 1 << 8;
+pub const RN_MASK: u32 = 0x3;
+
+/// Union of all VX* bits (used for the VX summary recomputation).
+pub const VX_ALL: u32 = VXSNAN | VXISI | VXIDI | VXZDZ | VXIMZ | VXVC | VXSOFT | VXSQRT | VXCVI;
+
+/// FPRF classification codes (5-bit, placed in FPSCR bits 15..19).
+/// The high bit ("C" in PowerISA) distinguishes ±zero/±denormal/QNaN from
+/// ±normal/±inf. The next 4 bits are (FL, FG, FE, FU) = (less, greater, equal, unordered).
+pub mod fprf {
+    pub const QNAN: u8         = 0b1_0001;
+    pub const NEG_INF: u8      = 0b0_1001;
+    pub const NEG_NORMAL: u8   = 0b0_1000;
+    pub const NEG_DENORMAL: u8 = 0b1_1000;
+    pub const NEG_ZERO: u8     = 0b1_0010;
+    pub const POS_ZERO: u8     = 0b0_0010;
+    pub const POS_DENORMAL: u8 = 0b1_0100;
+    pub const POS_NORMAL: u8   = 0b0_0100;
+    pub const POS_INF: u8      = 0b0_0101;
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum RoundingMode {
+    NearestEven,    // RN=00
+    TowardZero,     // RN=01
+    TowardPosInf,   // RN=10
+    TowardNegInf,   // RN=11
+}
+
+pub fn rounding_mode(ctx: &PpcContext) -> RoundingMode {
+    match ctx.fpscr & RN_MASK {
+        0 => RoundingMode::NearestEven,
+        1 => RoundingMode::TowardZero,
+        2 => RoundingMode::TowardPosInf,
+        _ => RoundingMode::TowardNegInf,
+    }
+}
+
+/// Classify a finite f64 into its FPRF 5-bit code.
+pub fn classify_fprf(v: f64) -> u8 {
+    if v.is_nan() {
+        fprf::QNAN
+    } else if v.is_infinite() {
+        if v.is_sign_negative() { fprf::NEG_INF } else { fprf::POS_INF }
+    } else if v == 0.0 {
+        if v.is_sign_negative() { fprf::NEG_ZERO } else { fprf::POS_ZERO }
+    } else if v.is_subnormal() {
+        if v.is_sign_negative() { fprf::NEG_DENORMAL } else { fprf::POS_DENORMAL }
+    } else if v.is_sign_negative() { fprf::NEG_NORMAL } else { fprf::POS_NORMAL }
+}
+
+/// Write FPRF into FPSCR, preserving other bits.
+pub fn set_fprf(ctx: &mut PpcContext, code: u8) {
+    ctx.fpscr = (ctx.fpscr & !FPRF_MASK) | ((code as u32 & 0x1F) << 12);
+}
+
+/// Set one or more exception bits on FPSCR, maintaining FX (sticky set on any
+/// new exception) and VX (summary of VX* bits).
+pub fn set_exception(ctx: &mut PpcContext, bits: u32) {
+    let prev = ctx.fpscr;
+    let new = prev | bits;
+    // FX is sticky-set if any new non-sticky bit transitions to 1. PPC defines
+    // FX as "any of OX, UX, ZX, XX, VX* newly set". Compute the transition set.
+    let transition = (new & !prev) & (OX | UX | ZX | XX | VX_ALL);
+    let mut updated = new;
+    if transition != 0 {
+        updated |= FX;
+    }
+    // Recompute VX summary from any VX* bits currently set.
+    if (updated & VX_ALL) != 0 { updated |= VX; }
+    ctx.fpscr = updated;
+}
+
+/// Classify the inputs of a floating-point arithmetic op and set appropriate
+/// VX* bits. Returns true if any invalid-operation was detected (caller may
+/// want to write a default QNaN result).
+///
+/// Detected cases:
+///   * any SNaN input → VXSNAN
+///   * infinity - infinity (same sign) → VXISI
+///   * 0 / 0 → VXZDZ
+///   * infinity / infinity → VXIDI
+///   * 0 * infinity → VXIMZ
+pub fn check_invalid_add(ctx: &mut PpcContext, a: f64, b: f64, sub: bool) -> bool {
+    let mut bits = 0u32;
+    if is_snan(a) || is_snan(b) { bits |= VXSNAN; }
+    if a.is_infinite() && b.is_infinite() {
+        // For add: VXISI iff same-sign(a,b) negated — inf - inf
+        // For sub: VXISI iff same-sign(a,b) — (+inf) - (+inf) or (-inf) - (-inf)
+        let both_pos = a.is_sign_positive() && b.is_sign_positive();
+        let both_neg = a.is_sign_negative() && b.is_sign_negative();
+        if sub {
+            if both_pos || both_neg { bits |= VXISI; }
+        } else {
+            // add: opposite signs cancel to inf-inf
+            if a.is_sign_positive() != b.is_sign_positive() { bits |= VXISI; }
+        }
+    }
+    if bits != 0 { set_exception(ctx, bits); return true; }
+    false
+}
+
+pub fn check_invalid_mul(ctx: &mut PpcContext, a: f64, b: f64) -> bool {
+    let mut bits = 0u32;
+    if is_snan(a) || is_snan(b) { bits |= VXSNAN; }
+    let zero_times_inf =
+        (a == 0.0 && b.is_infinite()) || (b == 0.0 && a.is_infinite());
+    if zero_times_inf { bits |= VXIMZ; }
+    if bits != 0 { set_exception(ctx, bits); return true; }
+    false
+}
+
+pub fn check_invalid_div(ctx: &mut PpcContext, a: f64, b: f64) -> bool {
+    let mut bits = 0u32;
+    if is_snan(a) || is_snan(b) { bits |= VXSNAN; }
+    if a == 0.0 && b == 0.0 { bits |= VXZDZ; }
+    if a.is_infinite() && b.is_infinite() { bits |= VXIDI; }
+    if bits != 0 { set_exception(ctx, bits); return true; }
+    false
+}
+
+/// Divide-by-zero (finite nonzero / 0) — sets ZX but not VX.
+pub fn check_zero_divide(ctx: &mut PpcContext, a: f64, b: f64) {
+    if b == 0.0 && a != 0.0 && !a.is_nan() && !a.is_infinite() {
+        set_exception(ctx, ZX);
+    }
+}
+
+/// Post-op: classify the result and update FPRF + detect overflow/underflow/inexact.
+/// `inputs_finite` lets us suppress OX for ops whose output is infinite because
+/// an input already was.
+pub fn update_after_op(ctx: &mut PpcContext, result: f64, inputs_were_finite: bool) {
+    let mut bits = 0u32;
+    if result.is_infinite() && inputs_were_finite {
+        bits |= OX;
+    }
+    if result.is_subnormal() {
+        bits |= UX;
+    }
+    if bits != 0 { set_exception(ctx, bits); }
+    set_fprf(ctx, classify_fprf(result));
+}
+
+/// Test whether an f64 is a signalling NaN.
+/// In IEEE 754-2008 (binary64), the signalling bit is the high bit of the
+/// mantissa. SNaN has it clear, QNaN has it set. NaN with high mantissa bit
+/// clear (and mantissa nonzero) is an SNaN.
+pub fn is_snan(x: f64) -> bool {
+    if !x.is_nan() { return false; }
+    let bits = x.to_bits();
+    // Highest mantissa bit (bit 51) clear ⇒ SNaN. Mantissa nonzero always true for NaN.
+    (bits & (1u64 << 51)) == 0
+}
+
+/// Round an f64 to f32 honouring FPSCR[RN]. Uses the current hardware
+/// rounding mode when RN=0 (nearest-even, the PPC default), otherwise
+/// emulates the directed rounding via bit-manipulation.
+pub fn round_to_single(ctx: &PpcContext, v: f64) -> f64 {
+    match rounding_mode(ctx) {
+        RoundingMode::NearestEven => (v as f32) as f64,
+        RoundingMode::TowardZero => round_single_toward_zero(v) as f64,
+        RoundingMode::TowardPosInf => round_single_toward_pos_inf(v) as f64,
+        RoundingMode::TowardNegInf => round_single_toward_neg_inf(v) as f64,
+    }
+}
+
+/// Round an f64 to an i64 integer honouring FPSCR[RN]. Used by fctidx.
+pub fn round_to_i64(ctx: &PpcContext, v: f64) -> i64 {
+    match rounding_mode(ctx) {
+        RoundingMode::NearestEven => {
+            // Round-half-to-even (banker's rounding).
+            let r = v.round();
+            // Rust's f64::round is round-half-away-from-zero. Correct ties to even:
+            let diff = (v - v.trunc()).abs();
+            if (diff - 0.5).abs() < f64::EPSILON {
+                let floor = v.floor();
+                if (floor as i64) & 1 == 0 { floor as i64 } else { v.ceil() as i64 }
+            } else {
+                r as i64
+            }
+        }
+        RoundingMode::TowardZero => v.trunc() as i64,
+        RoundingMode::TowardPosInf => v.ceil() as i64,
+        RoundingMode::TowardNegInf => v.floor() as i64,
+    }
+}
+
+/// Round an f64 to an i32 integer honouring FPSCR[RN]. Used by fctiwx.
+pub fn round_to_i32(ctx: &PpcContext, v: f64) -> i32 {
+    round_to_i64(ctx, v).clamp(i32::MIN as i64, i32::MAX as i64) as i32
+}
+
+// ------ directed rounding helpers (f64 → f32) ------
+
+fn round_single_toward_zero(v: f64) -> f32 {
+    // Default f64→f32 is round-to-nearest-even. Emulate truncation:
+    // take the default rounded value; if the absolute rounded magnitude
+    // exceeds |v|, bump down by one ULP toward zero.
+    let rn = v as f32;
+    if rn.is_nan() || rn.is_infinite() || rn == 0.0 { return rn; }
+    if rn.abs() as f64 <= v.abs() { return rn; }
+    let adj_bits = rn.to_bits();
+    let lower = if rn.is_sign_positive() { adj_bits - 1 } else { adj_bits - 1 };
+    f32::from_bits(lower)
+}
+
+fn round_single_toward_pos_inf(v: f64) -> f32 {
+    let rn = v as f32;
+    if rn.is_nan() || rn.is_infinite() { return rn; }
+    if (rn as f64) >= v { return rn; }
+    // rn < v — bump up by one ULP in the +direction.
+    let b = rn.to_bits();
+    let nb = if rn.is_sign_negative() { b - 1 } else { b + 1 };
+    f32::from_bits(nb)
+}
+
+fn round_single_toward_neg_inf(v: f64) -> f32 {
+    let rn = v as f32;
+    if rn.is_nan() || rn.is_infinite() { return rn; }
+    if (rn as f64) <= v { return rn; }
+    // rn > v — bump down.
+    let b = rn.to_bits();
+    let nb = if rn.is_sign_negative() { b + 1 } else { b - 1 };
+    f32::from_bits(nb)
+}
+
+/// Drop-in replacement for the old `update_cr1_from_fpscr`. Reads the
+/// currently-maintained FPSCR bits (FX, FEX, VX, OX) into CR1.
+pub fn update_cr1(ctx: &mut PpcContext) {
+    ctx.cr[1].lt = (ctx.fpscr & FX) != 0;
+    ctx.cr[1].gt = (ctx.fpscr & FEX) != 0;
+    ctx.cr[1].eq = (ctx.fpscr & VX) != 0;
+    ctx.cr[1].so = (ctx.fpscr & OX) != 0;
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn ctx() -> PpcContext { PpcContext::new() }
+
+    #[test]
+    fn rn_default_is_nearest() {
+        assert_eq!(rounding_mode(&ctx()), RoundingMode::NearestEven);
+    }
+
+    #[test]
+    fn rn_bits_decode() {
+        let mut c = ctx();
+        c.fpscr = 0x1;
+        assert_eq!(rounding_mode(&c), RoundingMode::TowardZero);
+        c.fpscr = 0x2;
+        assert_eq!(rounding_mode(&c), RoundingMode::TowardPosInf);
+        c.fpscr = 0x3;
+        assert_eq!(rounding_mode(&c), RoundingMode::TowardNegInf);
+    }
+
+    #[test]
+    fn fprf_classifies_correctly() {
+        assert_eq!(classify_fprf(1.0), fprf::POS_NORMAL);
+        assert_eq!(classify_fprf(-1.0), fprf::NEG_NORMAL);
+        assert_eq!(classify_fprf(0.0), fprf::POS_ZERO);
+        assert_eq!(classify_fprf(-0.0), fprf::NEG_ZERO);
+        assert_eq!(classify_fprf(f64::INFINITY), fprf::POS_INF);
+        assert_eq!(classify_fprf(f64::NEG_INFINITY), fprf::NEG_INF);
+        assert_eq!(classify_fprf(f64::NAN), fprf::QNAN);
+        assert_eq!(classify_fprf(f64::from_bits(1)), fprf::POS_DENORMAL);
+    }
+
+    #[test]
+    fn fx_is_sticky_on_new_exception() {
+        let mut c = ctx();
+        set_exception(&mut c, OX);
+        assert_ne!(c.fpscr & FX, 0);
+        // Clear FX/OX manually.
+        c.fpscr &= !(FX | OX);
+        // Re-set OX; FX should re-latch.
+        set_exception(&mut c, OX);
+        assert_ne!(c.fpscr & FX, 0);
+    }
+
+    #[test]
+    fn vx_summary_set_on_any_vx_bit() {
+        let mut c = ctx();
+        set_exception(&mut c, VXSNAN);
+        assert_ne!(c.fpscr & VX, 0);
+        assert_ne!(c.fpscr & VXSNAN, 0);
+    }
+
+    #[test]
+    fn round_to_single_nearest_is_identity_on_representable() {
+        let c = ctx();
+        assert_eq!(round_to_single(&c, 1.0_f64), 1.0_f64);
+    }
+
+    #[test]
+    fn round_to_i32_clamps_out_of_range() {
+        let c = ctx();
+        assert_eq!(round_to_i32(&c, 1e20_f64), i32::MAX);
+        assert_eq!(round_to_i32(&c, -1e20_f64), i32::MIN);
+    }
+
+    #[test]
+    fn round_to_i64_nearest_even_on_tie() {
+        let c = ctx();
+        assert_eq!(round_to_i64(&c, 2.5_f64), 2);
+        assert_eq!(round_to_i64(&c, 3.5_f64), 4);
+        assert_eq!(round_to_i64(&c, -2.5_f64), -2);
+    }
+
+    #[test]
+    fn check_invalid_add_detects_inf_minus_inf() {
+        let mut c = ctx();
+        assert!(check_invalid_add(&mut c, f64::INFINITY, f64::INFINITY, true));
+        assert_ne!(c.fpscr & VXISI, 0);
+    }
+
+    #[test]
+    fn check_invalid_div_detects_zero_over_zero() {
+        let mut c = ctx();
+        assert!(check_invalid_div(&mut c, 0.0, 0.0));
+        assert_ne!(c.fpscr & VXZDZ, 0);
+    }
+
+    #[test]
+    fn snan_detection() {
+        // SNaN in binary64: sign=0, exp=all-ones, mantissa nonzero with bit 51 clear.
+        let snan = f64::from_bits(0x7FF0_0000_0000_0001);
+        assert!(is_snan(snan));
+        assert!(!is_snan(f64::NAN));
+    }
+}