xenia-cpu: VMX128, FPSCR, decoder split, scheduler, decode/block caches
Split the monolithic interpreter into cohesive modules: dedicated decoder (decoder.rs) producing 8-byte DecodedInstr; opcode tables (opcode.rs); explicit traps (trap.rs); FPSCR helpers (fpscr.rs); overflow/carry helpers (overflow.rs); a 4 KiB-page-versioned decode cache and basic-block cache (block_cache.rs); and a full VMX/VMX128 implementation (vmx.rs) covering AltiVec + Xenon's 128-bit extensions. Add the parallel-execution substrate behind --parallel: a 7-party phaser (phaser.rs) for round-based barrier sync, ReservationTable (reservation.rs) for guest LL/SC, and the per-HW-thread scheduler core (scheduler.rs) that owns ThreadRefs, runqueues, and pending IRQs. Disassembler is now the single source of truth: disasm.rs gains the full base + extended + VMX128 mnemonic set, with golden JSON fixtures and a disasm_goldens test suite. Add a criterion-style interpreter bench. context.rs grows the per-thread state the new modules need (reservation slot, FPSCR, vector regs). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -29,16 +29,37 @@ pub mod spr {
|
||||
pub const XER: u32 = 1;
|
||||
pub const LR: u32 = 8;
|
||||
pub const CTR: u32 = 9;
|
||||
pub const TBL: u32 = 268;
|
||||
pub const TBU: u32 = 269;
|
||||
pub const DSISR: u32 = 18;
|
||||
pub const DAR: u32 = 19;
|
||||
/// Decrementer (hypervisor-visible, 32-bit down-counter).
|
||||
pub const DEC: u32 = 22;
|
||||
pub const TBL: u32 = 268; // Read (user)
|
||||
pub const TBU: u32 = 269; // Read (user)
|
||||
/// Time-base write (supervisor). Separate SPR number from TBL (268) for
|
||||
/// access-control reasons.
|
||||
pub const TBL_WRITE: u32 = 284;
|
||||
pub const TBU_WRITE: u32 = 285;
|
||||
pub const SPRG0: u32 = 272;
|
||||
pub const SPRG1: u32 = 273;
|
||||
pub const SPRG2: u32 = 274;
|
||||
pub const SPRG3: u32 = 275;
|
||||
pub const VRSAVE: u32 = 256;
|
||||
pub const PVR: u32 = 287;
|
||||
pub const HID0: u32 = 1008;
|
||||
pub const HID1: u32 = 1009;
|
||||
pub const PIR: u32 = 1023;
|
||||
}
|
||||
|
||||
/// LR halt sentinel. When `bclr` returns to this address, the interpreter
|
||||
/// loop halts cleanly (matches the "entry returned" convention).
|
||||
pub const LR_HALT_SENTINEL: u64 = 0xBCBC_BCBC;
|
||||
|
||||
/// VSCR NJ (Non-Java mode) bit. Stored in word 3 at bit 16 (mask 0x0001_0000).
|
||||
/// Set at startup; when clear, denormals are flushed to zero following IEEE-754.
|
||||
pub const VSCR_NJ_MASK: u32 = 0x0001_0000;
|
||||
/// VSCR SAT (saturation sticky) bit. Stored in word 3 at bit 31 (mask 0x0000_0001).
|
||||
pub const VSCR_SAT_MASK: u32 = 0x0000_0001;
|
||||
|
||||
/// PowerPC processor context. Holds all register state for one guest thread.
|
||||
/// Mirrors PPCContext from ppc_context.h, minus JIT-specific fields.
|
||||
#[repr(C, align(64))]
|
||||
@@ -64,15 +85,39 @@ pub struct PpcContext {
|
||||
pub xer_ca: u8,
|
||||
pub xer_ov: u8,
|
||||
pub xer_so: u8,
|
||||
// Altivec VSCR saturation bit
|
||||
pub vscr_sat: u8,
|
||||
// Altivec VSCR. Only bits 16 (NJ) and 31 (SAT) of word 3 are meaningful.
|
||||
pub vscr: Vec128,
|
||||
// VRSAVE (SPR 256). Bitmask of which VRs need saving across context switches.
|
||||
pub vrsave: u32,
|
||||
|
||||
// Program counter
|
||||
pub pc: u32,
|
||||
// Reservation address/value for lwarx/stwcx
|
||||
pub reserved_addr: u32,
|
||||
// Reservation for lwarx/ldarx/stwcx/stdcx. Xenon's reservation granule is
|
||||
// one L2 cache line (128 bytes) — `reserved_line` is stored as the base
|
||||
// address of that line (`ea & !0x7F`). `has_reservation` gates the
|
||||
// validity; stwcx./stdcx. check that both match before committing.
|
||||
// `reserved_val` is retained for possible future use by a coherency
|
||||
// observer; the store-conditional logic itself does not compare it.
|
||||
pub reserved_line: u32,
|
||||
pub reserved_val: u64,
|
||||
pub has_reservation: bool,
|
||||
/// M3.7 — generation stamp returned by [`crate::ReservationTable::reserve`]
|
||||
/// at the most recent `lwarx`/`ldarx`. Paired with `reserved_line`;
|
||||
/// `stwcx.`/`stdcx.` pass this back to `try_commit`. Meaningful only
|
||||
/// when `reservation_table` is `Some` and the table is enabled.
|
||||
pub reserved_generation: u32,
|
||||
/// M3.7 — optional handle to the inter-thread reservation table.
|
||||
/// When `Some(table)` *and* `table.is_enabled()`, the interpreter's
|
||||
/// `lwarx`/`stwcx.`/`ldarx`/`stdcx.` arms route through the table;
|
||||
/// otherwise they use the legacy per-`PpcContext` fields above. The
|
||||
/// scheduler populates this when it spawns a thread under a kernel
|
||||
/// that has `reservations` set.
|
||||
pub reservation_table: Option<std::sync::Arc<crate::ReservationTable>>,
|
||||
/// M3.7 — emulated HW slot ID this thread is bound to. Used as the
|
||||
/// reservation table's `hw_id` discriminator so two threads on
|
||||
/// different slots can't accidentally commit each other's
|
||||
/// reservations. Populated by the scheduler at spawn / migration.
|
||||
pub hw_id: u8,
|
||||
|
||||
// Thread ID (for kernel use)
|
||||
pub thread_id: u32,
|
||||
@@ -82,6 +127,12 @@ pub struct PpcContext {
|
||||
|
||||
// Time base (incremented each instruction for debugging)
|
||||
pub timebase: u64,
|
||||
|
||||
// Decrementer (SPR 22): 32-bit down-counter that fires an external
|
||||
// interrupt at underflow on real hw. Xenia-rs doesn't dispatch DEC
|
||||
// interrupts to the guest; this value is maintained so that mfspr DEC
|
||||
// returns something coherent.
|
||||
pub dec: u32,
|
||||
}
|
||||
|
||||
impl PpcContext {
|
||||
@@ -89,7 +140,9 @@ impl PpcContext {
|
||||
Self {
|
||||
gpr: [0; 32],
|
||||
ctr: 0,
|
||||
lr: 0,
|
||||
// Canary sets LR to the halt sentinel at thread start so `blr`
|
||||
// from the top-level entry falls out of the interpreter loop.
|
||||
lr: LR_HALT_SENTINEL,
|
||||
msr: 0,
|
||||
fpr: [0.0; 32],
|
||||
vr: [Vec128::ZERO; 128],
|
||||
@@ -98,14 +151,21 @@ impl PpcContext {
|
||||
xer_ca: 0,
|
||||
xer_ov: 0,
|
||||
xer_so: 0,
|
||||
vscr_sat: 0,
|
||||
// VSCR starts with NJ bit set (denormals flushed) — matches canary
|
||||
// thread_state.cc initialization.
|
||||
vscr: Vec128::from_u32x4(0, 0, 0, VSCR_NJ_MASK),
|
||||
vrsave: 0xFFFF_FFFF,
|
||||
pc: 0,
|
||||
reserved_addr: 0,
|
||||
reserved_line: 0,
|
||||
reserved_val: 0,
|
||||
has_reservation: false,
|
||||
reserved_generation: 0,
|
||||
reservation_table: None,
|
||||
hw_id: 0,
|
||||
thread_id: 0,
|
||||
cycle_count: 0,
|
||||
timebase: 0,
|
||||
dec: 0,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -182,6 +242,27 @@ impl PpcContext {
|
||||
self.xer_ov = ((val >> 30) & 1) as u8;
|
||||
self.xer_ca = ((val >> 29) & 1) as u8;
|
||||
}
|
||||
|
||||
/// Read the VSCR SAT (sticky saturation) bit.
|
||||
pub fn vscr_sat(&self) -> bool {
|
||||
(self.vscr.u32x4(3) & VSCR_SAT_MASK) != 0
|
||||
}
|
||||
|
||||
/// Set or clear VSCR SAT. Preserves the NJ bit (and any other word-3 bits).
|
||||
pub fn set_vscr_sat(&mut self, v: bool) {
|
||||
let mut w = self.vscr.u32x4(3);
|
||||
if v {
|
||||
w |= VSCR_SAT_MASK;
|
||||
} else {
|
||||
w &= !VSCR_SAT_MASK;
|
||||
}
|
||||
self.vscr.set_u32x4(3, w);
|
||||
}
|
||||
|
||||
/// Read the VSCR NJ (non-Java mode / flush-denormals) bit.
|
||||
pub fn vscr_nj(&self) -> bool {
|
||||
(self.vscr.u32x4(3) & VSCR_NJ_MASK) != 0
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for PpcContext {
|
||||
|
||||
Reference in New Issue
Block a user