xenia-cpu: VMX128, FPSCR, decoder split, scheduler, decode/block caches

Split the monolithic interpreter into cohesive modules: dedicated
decoder (decoder.rs) producing 8-byte DecodedInstr; opcode tables
(opcode.rs); explicit traps (trap.rs); FPSCR helpers (fpscr.rs);
overflow/carry helpers (overflow.rs); a 4 KiB-page-versioned decode
cache and basic-block cache (block_cache.rs); and a full VMX/VMX128
implementation (vmx.rs) covering AltiVec + Xenon's 128-bit extensions.

Add the parallel-execution substrate behind --parallel: a 7-party
phaser (phaser.rs) for round-based barrier sync, ReservationTable
(reservation.rs) for guest LL/SC, and the per-HW-thread scheduler
core (scheduler.rs) that owns ThreadRefs, runqueues, and pending IRQs.

Disassembler is now the single source of truth: disasm.rs gains the
full base + extended + VMX128 mnemonic set, with golden JSON fixtures
and a disasm_goldens test suite. Add a criterion-style interpreter
bench. context.rs grows the per-thread state the new modules need
(reservation slot, FPSCR, vector regs).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-05-01 16:27:43 +02:00
parent e9b2b57a44
commit c36cca14f9
20 changed files with 12284 additions and 458 deletions

View File

@@ -0,0 +1,384 @@
//! FPSCR (Floating-Point Status and Control Register) maintenance.
//!
//! Scope per project plan: rounding modes honoured, plus the exception bits
//! games actually read (FX, FEX, VX, OX, UX, ZX, XX, FI, FPRF). Enabled-
//! exception dispatch (FE[0,1], VE/OE/UE/ZE/XE) is *not* modelled — games
//! running on Xenon almost never take FP traps.
//!
//! Bit layout (PowerISA, MSB-0 numbering; stored in a u32 with bit 31 = MSB):
//!
//! | PPC bit | u32 mask | Name |
//! |---------|-------------------------|-------------|
//! | 0 | `1<<31` | FX |
//! | 1 | `1<<30` | FEX |
//! | 2 | `1<<29` | VX (summary)|
//! | 3 | `1<<28` | OX |
//! | 4 | `1<<27` | UX |
//! | 5 | `1<<26` | ZX |
//! | 6 | `1<<25` | XX |
//! | 7 | `1<<24` | VXSNAN |
//! | 8 | `1<<23` | VXISI |
//! | 9 | `1<<22` | VXIDI |
//! | 10 | `1<<21` | VXZDZ |
//! | 11 | `1<<20` | VXIMZ |
//! | 12 | `1<<19` | VXVC |
//! | 13 | `1<<18` | FR |
//! | 14 | `1<<17` | FI |
//! | 15..19 | `0xF8000 >> 15` @ 15..19 | FPRF (5 bits)|
//! | 21 | `1<<10` | VXSOFT |
//! | 22 | `1<<9` | VXSQRT |
//! | 23 | `1<<8` | VXCVI |
//! | 30..31 | `0x3` | RN (2 bits) |
use crate::context::PpcContext;
pub const FX: u32 = 1 << 31;
pub const FEX: u32 = 1 << 30;
pub const VX: u32 = 1 << 29;
pub const OX: u32 = 1 << 28;
pub const UX: u32 = 1 << 27;
pub const ZX: u32 = 1 << 26;
pub const XX: u32 = 1 << 25;
pub const VXSNAN: u32 = 1 << 24;
pub const VXISI: u32 = 1 << 23;
pub const VXIDI: u32 = 1 << 22;
pub const VXZDZ: u32 = 1 << 21;
pub const VXIMZ: u32 = 1 << 20;
pub const VXVC: u32 = 1 << 19;
pub const FR: u32 = 1 << 18;
pub const FI: u32 = 1 << 17;
pub const FPRF_MASK: u32 = 0x1F << 12; // bits 15..19
pub const VXSOFT: u32 = 1 << 10;
pub const VXSQRT: u32 = 1 << 9;
pub const VXCVI: u32 = 1 << 8;
pub const RN_MASK: u32 = 0x3;
/// Union of all VX* bits (used for the VX summary recomputation).
pub const VX_ALL: u32 = VXSNAN | VXISI | VXIDI | VXZDZ | VXIMZ | VXVC | VXSOFT | VXSQRT | VXCVI;
/// FPRF classification codes (5-bit, placed in FPSCR bits 15..19).
/// The high bit ("C" in PowerISA) distinguishes ±zero/±denormal/QNaN from
/// ±normal/±inf. The next 4 bits are (FL, FG, FE, FU) = (less, greater, equal, unordered).
pub mod fprf {
pub const QNAN: u8 = 0b1_0001;
pub const NEG_INF: u8 = 0b0_1001;
pub const NEG_NORMAL: u8 = 0b0_1000;
pub const NEG_DENORMAL: u8 = 0b1_1000;
pub const NEG_ZERO: u8 = 0b1_0010;
pub const POS_ZERO: u8 = 0b0_0010;
pub const POS_DENORMAL: u8 = 0b1_0100;
pub const POS_NORMAL: u8 = 0b0_0100;
pub const POS_INF: u8 = 0b0_0101;
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum RoundingMode {
NearestEven, // RN=00
TowardZero, // RN=01
TowardPosInf, // RN=10
TowardNegInf, // RN=11
}
pub fn rounding_mode(ctx: &PpcContext) -> RoundingMode {
match ctx.fpscr & RN_MASK {
0 => RoundingMode::NearestEven,
1 => RoundingMode::TowardZero,
2 => RoundingMode::TowardPosInf,
_ => RoundingMode::TowardNegInf,
}
}
/// Classify a finite f64 into its FPRF 5-bit code.
pub fn classify_fprf(v: f64) -> u8 {
if v.is_nan() {
fprf::QNAN
} else if v.is_infinite() {
if v.is_sign_negative() { fprf::NEG_INF } else { fprf::POS_INF }
} else if v == 0.0 {
if v.is_sign_negative() { fprf::NEG_ZERO } else { fprf::POS_ZERO }
} else if v.is_subnormal() {
if v.is_sign_negative() { fprf::NEG_DENORMAL } else { fprf::POS_DENORMAL }
} else if v.is_sign_negative() { fprf::NEG_NORMAL } else { fprf::POS_NORMAL }
}
/// Write FPRF into FPSCR, preserving other bits.
pub fn set_fprf(ctx: &mut PpcContext, code: u8) {
ctx.fpscr = (ctx.fpscr & !FPRF_MASK) | ((code as u32 & 0x1F) << 12);
}
/// Set one or more exception bits on FPSCR, maintaining FX (sticky set on any
/// new exception) and VX (summary of VX* bits).
pub fn set_exception(ctx: &mut PpcContext, bits: u32) {
let prev = ctx.fpscr;
let new = prev | bits;
// FX is sticky-set if any new non-sticky bit transitions to 1. PPC defines
// FX as "any of OX, UX, ZX, XX, VX* newly set". Compute the transition set.
let transition = (new & !prev) & (OX | UX | ZX | XX | VX_ALL);
let mut updated = new;
if transition != 0 {
updated |= FX;
}
// Recompute VX summary from any VX* bits currently set.
if (updated & VX_ALL) != 0 { updated |= VX; }
ctx.fpscr = updated;
}
/// Classify the inputs of a floating-point arithmetic op and set appropriate
/// VX* bits. Returns true if any invalid-operation was detected (caller may
/// want to write a default QNaN result).
///
/// Detected cases:
/// * any SNaN input → VXSNAN
/// * infinity - infinity (same sign) → VXISI
/// * 0 / 0 → VXZDZ
/// * infinity / infinity → VXIDI
/// * 0 * infinity → VXIMZ
pub fn check_invalid_add(ctx: &mut PpcContext, a: f64, b: f64, sub: bool) -> bool {
let mut bits = 0u32;
if is_snan(a) || is_snan(b) { bits |= VXSNAN; }
if a.is_infinite() && b.is_infinite() {
// For add: VXISI iff same-sign(a,b) negated — inf - inf
// For sub: VXISI iff same-sign(a,b) — (+inf) - (+inf) or (-inf) - (-inf)
let both_pos = a.is_sign_positive() && b.is_sign_positive();
let both_neg = a.is_sign_negative() && b.is_sign_negative();
if sub {
if both_pos || both_neg { bits |= VXISI; }
} else {
// add: opposite signs cancel to inf-inf
if a.is_sign_positive() != b.is_sign_positive() { bits |= VXISI; }
}
}
if bits != 0 { set_exception(ctx, bits); return true; }
false
}
pub fn check_invalid_mul(ctx: &mut PpcContext, a: f64, b: f64) -> bool {
let mut bits = 0u32;
if is_snan(a) || is_snan(b) { bits |= VXSNAN; }
let zero_times_inf =
(a == 0.0 && b.is_infinite()) || (b == 0.0 && a.is_infinite());
if zero_times_inf { bits |= VXIMZ; }
if bits != 0 { set_exception(ctx, bits); return true; }
false
}
pub fn check_invalid_div(ctx: &mut PpcContext, a: f64, b: f64) -> bool {
let mut bits = 0u32;
if is_snan(a) || is_snan(b) { bits |= VXSNAN; }
if a == 0.0 && b == 0.0 { bits |= VXZDZ; }
if a.is_infinite() && b.is_infinite() { bits |= VXIDI; }
if bits != 0 { set_exception(ctx, bits); return true; }
false
}
/// Divide-by-zero (finite nonzero / 0) — sets ZX but not VX.
pub fn check_zero_divide(ctx: &mut PpcContext, a: f64, b: f64) {
if b == 0.0 && a != 0.0 && !a.is_nan() && !a.is_infinite() {
set_exception(ctx, ZX);
}
}
/// Post-op: classify the result and update FPRF + detect overflow/underflow/inexact.
/// `inputs_finite` lets us suppress OX for ops whose output is infinite because
/// an input already was.
pub fn update_after_op(ctx: &mut PpcContext, result: f64, inputs_were_finite: bool) {
let mut bits = 0u32;
if result.is_infinite() && inputs_were_finite {
bits |= OX;
}
if result.is_subnormal() {
bits |= UX;
}
if bits != 0 { set_exception(ctx, bits); }
set_fprf(ctx, classify_fprf(result));
}
/// Test whether an f64 is a signalling NaN.
/// In IEEE 754-2008 (binary64), the signalling bit is the high bit of the
/// mantissa. SNaN has it clear, QNaN has it set. NaN with high mantissa bit
/// clear (and mantissa nonzero) is an SNaN.
pub fn is_snan(x: f64) -> bool {
if !x.is_nan() { return false; }
let bits = x.to_bits();
// Highest mantissa bit (bit 51) clear ⇒ SNaN. Mantissa nonzero always true for NaN.
(bits & (1u64 << 51)) == 0
}
/// Round an f64 to f32 honouring FPSCR[RN]. Uses the current hardware
/// rounding mode when RN=0 (nearest-even, the PPC default), otherwise
/// emulates the directed rounding via bit-manipulation.
pub fn round_to_single(ctx: &PpcContext, v: f64) -> f64 {
match rounding_mode(ctx) {
RoundingMode::NearestEven => (v as f32) as f64,
RoundingMode::TowardZero => round_single_toward_zero(v) as f64,
RoundingMode::TowardPosInf => round_single_toward_pos_inf(v) as f64,
RoundingMode::TowardNegInf => round_single_toward_neg_inf(v) as f64,
}
}
/// Round an f64 to an i64 integer honouring FPSCR[RN]. Used by fctidx.
pub fn round_to_i64(ctx: &PpcContext, v: f64) -> i64 {
match rounding_mode(ctx) {
RoundingMode::NearestEven => {
// Round-half-to-even (banker's rounding).
let r = v.round();
// Rust's f64::round is round-half-away-from-zero. Correct ties to even:
let diff = (v - v.trunc()).abs();
if (diff - 0.5).abs() < f64::EPSILON {
let floor = v.floor();
if (floor as i64) & 1 == 0 { floor as i64 } else { v.ceil() as i64 }
} else {
r as i64
}
}
RoundingMode::TowardZero => v.trunc() as i64,
RoundingMode::TowardPosInf => v.ceil() as i64,
RoundingMode::TowardNegInf => v.floor() as i64,
}
}
/// Round an f64 to an i32 integer honouring FPSCR[RN]. Used by fctiwx.
pub fn round_to_i32(ctx: &PpcContext, v: f64) -> i32 {
round_to_i64(ctx, v).clamp(i32::MIN as i64, i32::MAX as i64) as i32
}
// ------ directed rounding helpers (f64 → f32) ------
fn round_single_toward_zero(v: f64) -> f32 {
// Default f64→f32 is round-to-nearest-even. Emulate truncation:
// take the default rounded value; if the absolute rounded magnitude
// exceeds |v|, bump down by one ULP toward zero.
let rn = v as f32;
if rn.is_nan() || rn.is_infinite() || rn == 0.0 { return rn; }
if rn.abs() as f64 <= v.abs() { return rn; }
let adj_bits = rn.to_bits();
let lower = if rn.is_sign_positive() { adj_bits - 1 } else { adj_bits - 1 };
f32::from_bits(lower)
}
fn round_single_toward_pos_inf(v: f64) -> f32 {
let rn = v as f32;
if rn.is_nan() || rn.is_infinite() { return rn; }
if (rn as f64) >= v { return rn; }
// rn < v — bump up by one ULP in the +direction.
let b = rn.to_bits();
let nb = if rn.is_sign_negative() { b - 1 } else { b + 1 };
f32::from_bits(nb)
}
fn round_single_toward_neg_inf(v: f64) -> f32 {
let rn = v as f32;
if rn.is_nan() || rn.is_infinite() { return rn; }
if (rn as f64) <= v { return rn; }
// rn > v — bump down.
let b = rn.to_bits();
let nb = if rn.is_sign_negative() { b + 1 } else { b - 1 };
f32::from_bits(nb)
}
/// Drop-in replacement for the old `update_cr1_from_fpscr`. Reads the
/// currently-maintained FPSCR bits (FX, FEX, VX, OX) into CR1.
pub fn update_cr1(ctx: &mut PpcContext) {
ctx.cr[1].lt = (ctx.fpscr & FX) != 0;
ctx.cr[1].gt = (ctx.fpscr & FEX) != 0;
ctx.cr[1].eq = (ctx.fpscr & VX) != 0;
ctx.cr[1].so = (ctx.fpscr & OX) != 0;
}
#[cfg(test)]
mod tests {
use super::*;
fn ctx() -> PpcContext { PpcContext::new() }
#[test]
fn rn_default_is_nearest() {
assert_eq!(rounding_mode(&ctx()), RoundingMode::NearestEven);
}
#[test]
fn rn_bits_decode() {
let mut c = ctx();
c.fpscr = 0x1;
assert_eq!(rounding_mode(&c), RoundingMode::TowardZero);
c.fpscr = 0x2;
assert_eq!(rounding_mode(&c), RoundingMode::TowardPosInf);
c.fpscr = 0x3;
assert_eq!(rounding_mode(&c), RoundingMode::TowardNegInf);
}
#[test]
fn fprf_classifies_correctly() {
assert_eq!(classify_fprf(1.0), fprf::POS_NORMAL);
assert_eq!(classify_fprf(-1.0), fprf::NEG_NORMAL);
assert_eq!(classify_fprf(0.0), fprf::POS_ZERO);
assert_eq!(classify_fprf(-0.0), fprf::NEG_ZERO);
assert_eq!(classify_fprf(f64::INFINITY), fprf::POS_INF);
assert_eq!(classify_fprf(f64::NEG_INFINITY), fprf::NEG_INF);
assert_eq!(classify_fprf(f64::NAN), fprf::QNAN);
assert_eq!(classify_fprf(f64::from_bits(1)), fprf::POS_DENORMAL);
}
#[test]
fn fx_is_sticky_on_new_exception() {
let mut c = ctx();
set_exception(&mut c, OX);
assert_ne!(c.fpscr & FX, 0);
// Clear FX/OX manually.
c.fpscr &= !(FX | OX);
// Re-set OX; FX should re-latch.
set_exception(&mut c, OX);
assert_ne!(c.fpscr & FX, 0);
}
#[test]
fn vx_summary_set_on_any_vx_bit() {
let mut c = ctx();
set_exception(&mut c, VXSNAN);
assert_ne!(c.fpscr & VX, 0);
assert_ne!(c.fpscr & VXSNAN, 0);
}
#[test]
fn round_to_single_nearest_is_identity_on_representable() {
let c = ctx();
assert_eq!(round_to_single(&c, 1.0_f64), 1.0_f64);
}
#[test]
fn round_to_i32_clamps_out_of_range() {
let c = ctx();
assert_eq!(round_to_i32(&c, 1e20_f64), i32::MAX);
assert_eq!(round_to_i32(&c, -1e20_f64), i32::MIN);
}
#[test]
fn round_to_i64_nearest_even_on_tie() {
let c = ctx();
assert_eq!(round_to_i64(&c, 2.5_f64), 2);
assert_eq!(round_to_i64(&c, 3.5_f64), 4);
assert_eq!(round_to_i64(&c, -2.5_f64), -2);
}
#[test]
fn check_invalid_add_detects_inf_minus_inf() {
let mut c = ctx();
assert!(check_invalid_add(&mut c, f64::INFINITY, f64::INFINITY, true));
assert_ne!(c.fpscr & VXISI, 0);
}
#[test]
fn check_invalid_div_detects_zero_over_zero() {
let mut c = ctx();
assert!(check_invalid_div(&mut c, 0.0, 0.0));
assert_ne!(c.fpscr & VXZDZ, 0);
}
#[test]
fn snan_detection() {
// SNaN in binary64: sign=0, exp=all-ones, mantissa nonzero with bit 51 clear.
let snan = f64::from_bits(0x7FF0_0000_0000_0001);
assert!(is_snan(snan));
assert!(!is_snan(f64::NAN));
}
}