xenia-cpu: VMX128, FPSCR, decoder split, scheduler, decode/block caches

Split the monolithic interpreter into cohesive modules: dedicated
decoder (decoder.rs) producing 8-byte DecodedInstr; opcode tables
(opcode.rs); explicit traps (trap.rs); FPSCR helpers (fpscr.rs);
overflow/carry helpers (overflow.rs); a 4 KiB-page-versioned decode
cache and basic-block cache (block_cache.rs); and a full VMX/VMX128
implementation (vmx.rs) covering AltiVec + Xenon's 128-bit extensions.

Add the parallel-execution substrate behind --parallel: a 7-party
phaser (phaser.rs) for round-based barrier sync, ReservationTable
(reservation.rs) for guest LL/SC, and the per-HW-thread scheduler
core (scheduler.rs) that owns ThreadRefs, runqueues, and pending IRQs.

Disassembler is now the single source of truth: disasm.rs gains the
full base + extended + VMX128 mnemonic set, with golden JSON fixtures
and a disasm_goldens test suite. Add a criterion-style interpreter
bench. context.rs grows the per-thread state the new modules need
(reservation slot, FPSCR, vector regs).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-05-01 16:27:43 +02:00
parent e9b2b57a44
commit c36cca14f9
20 changed files with 12284 additions and 458 deletions

View File

@@ -0,0 +1,173 @@
//! OE / XER[OV] / XER[SO] handling for integer arithmetic.
//!
//! PPC integer ops with the OE bit set update XER[OV] (overflow) and sticky-set
//! XER[SO]. When OE is clear the instruction leaves XER untouched. Signed
//! overflow is predicated on the operation width and operand signs per the
//! PowerISA pseudocode. For 32-bit-word operations (`addw`, `mullw`, `divw`,
//! `neg`, etc. — on PPC these all have `w` in the mnemonic in spec
//! descriptions even when the assembler spells them without) the predicate
//! uses the low 32 bits. For 64-bit operations (`add`, `mulld`, `divd`) the
//! predicate uses the full 64 bits.
use crate::context::PpcContext;
#[inline]
pub fn apply(ctx: &mut PpcContext, overflowed: bool) {
if overflowed {
ctx.xer_ov = 1;
ctx.xer_so = 1;
} else {
ctx.xer_ov = 0;
}
}
/// Signed addition overflow at width-64 (plain `add`, `addc`, `subf`, `subfc`).
///
/// Predicate: same-sign inputs with opposite-sign result.
/// For sub callers, rewrite as `a + b'` first (see `_sub`).
#[inline]
pub fn add_ov_64(a: u64, b: u64, result: u64) -> bool {
((!(a ^ b)) & (a ^ result)) >> 63 != 0
}
/// Universal signed-overflow predicate for 64-bit arithmetic.
///
/// Caller computes the mathematical (infinite-precision) signed sum as i128,
/// plus the stored 64-bit result. Overflow iff the two disagree — i.e. the
/// true value doesn't fit in i64.
///
/// Use this for multi-term chains (`adde`, `addme`, `addze`, `subfe`, `subfme`,
/// `subfze`) where the carry-in makes the bit-predicate above awkward.
#[inline]
pub fn sum_overflow_64(true_sum: i128, result: u64) -> bool {
true_sum != (result as i64) as i128
}
/// Signed subtraction: RT = b - a. Overflow iff opposite-sign inputs with
/// result sign != b's sign. Equivalently, reduce to addition with `!a + 1`.
#[inline]
pub fn sub_ov_64(a: u64, b: u64, result: u64) -> bool {
((a ^ b) & (b ^ result)) >> 63 != 0
}
/// Signed `addc`/`adde` chain overflow. Same rule as `add_ov_64` — the carry
/// in doesn't alter the sign predicate directly because it's already folded
/// into the stored result.
#[inline]
pub fn adde_ov_64(a: u64, b: u64, result: u64) -> bool {
add_ov_64(a, b, result)
}
/// Signed 32-bit multiply overflow (`mullwo`): result fits in 32 bits signed
/// iff bit 32 equals bits 33..63 of the 64-bit product.
#[inline]
pub fn mullw_ov(product: i64) -> bool {
let lo = product as i32 as i64;
lo != product
}
/// Signed 64-bit multiply overflow (`mulldo`). Detected via checked_mul.
#[inline]
pub fn mulld_ov(a: i64, b: i64) -> bool {
a.checked_mul(b).is_none()
}
/// `divwo` / `divwuo` / `divdo` / `divduo` raise OV in two cases:
/// * divisor is zero, or
/// * signed division of `INT_MIN / -1` (quotient doesn't fit).
#[inline]
pub fn divw_ov_signed(ra: i32, rb: i32) -> bool {
rb == 0 || (ra == i32::MIN && rb == -1)
}
#[inline]
pub fn divw_ov_unsigned(rb: u32) -> bool {
rb == 0
}
#[inline]
pub fn divd_ov_signed(ra: i64, rb: i64) -> bool {
rb == 0 || (ra == i64::MIN && rb == -1)
}
#[inline]
pub fn divd_ov_unsigned(rb: u64) -> bool {
rb == 0
}
/// `negx`: RT = -(RA). Overflow only when RA = INT_MIN (the negation doesn't fit).
#[inline]
pub fn neg_ov_64(ra: u64) -> bool {
ra == 0x8000_0000_0000_0000
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn add_no_overflow() {
assert!(!add_ov_64(1, 2, 3));
assert!(!add_ov_64(u64::MAX, 0, u64::MAX));
}
#[test]
fn add_positive_overflow() {
// INT64_MAX + 1 = INT64_MIN — signed overflow
let a = i64::MAX as u64;
let b = 1u64;
let r = a.wrapping_add(b);
assert!(add_ov_64(a, b, r));
}
#[test]
fn add_negative_overflow() {
// INT64_MIN + -1 = INT64_MAX — signed overflow
let a = i64::MIN as u64;
let b = (-1i64) as u64;
let r = a.wrapping_add(b);
assert!(add_ov_64(a, b, r));
}
#[test]
fn sub_overflow_min_minus_pos() {
// INT64_MIN - 1 overflows
let b = i64::MIN as u64;
let a = 1u64;
let r = b.wrapping_sub(a);
assert!(sub_ov_64(a, b, r));
}
#[test]
fn sub_no_overflow() {
let b = 5u64;
let a = 2u64;
let r = b.wrapping_sub(a);
assert!(!sub_ov_64(a, b, r));
}
#[test]
fn mullw_fits_32_bits() {
assert!(!mullw_ov((i32::MAX as i64) * 1));
assert!(!mullw_ov(-1i64));
}
#[test]
fn mullw_overflows_32_bits() {
let p = (i32::MAX as i64) * 2;
assert!(mullw_ov(p));
}
#[test]
fn mulld_overflows() {
assert!(mulld_ov(i64::MAX, 2));
assert!(!mulld_ov(i64::MAX, 1));
}
#[test]
fn neg_ov_only_at_min() {
assert!(neg_ov_64(i64::MIN as u64));
assert!(!neg_ov_64(0));
assert!(!neg_ov_64(1));
}
}