Track lwarx vs ldarx reservation width in PpcContext as a u8 (4 = word,
8 = doubleword, 0 = none). stwcx. requires width==4; stdcx. requires
width==8. Cross-width pairs (lwarx + stdcx., ldarx + stwcx.) now fail
deterministically with CR0.EQ=0 instead of spuriously succeeding.
The width is held per-thread; the cross-thread reservation table keeps
its existing slot encoding because each host thread consults its own
ctx.reservation_width before committing.
Affected:
PPCBUG-151 stwcx./stdcx. shared the same reservation slot without
width discriminator; cross-width commits silently succeeded
Tests: lwarx_then_stdcx_cross_width_fails,
ldarx_then_stwcx_cross_width_fails
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
280 lines
9.4 KiB
Rust
280 lines
9.4 KiB
Rust
use xenia_types::Vec128;
|
|
|
|
/// Condition register field (one of CR0-CR7).
|
|
#[derive(Debug, Clone, Copy, Default)]
|
|
pub struct CrField {
|
|
pub lt: bool,
|
|
pub gt: bool,
|
|
pub eq: bool,
|
|
pub so: bool,
|
|
}
|
|
|
|
impl CrField {
|
|
pub fn as_u8(&self) -> u8 {
|
|
((self.lt as u8) << 3) | ((self.gt as u8) << 2) | ((self.eq as u8) << 1) | (self.so as u8)
|
|
}
|
|
|
|
pub fn from_u8(val: u8) -> Self {
|
|
Self {
|
|
lt: val & 8 != 0,
|
|
gt: val & 4 != 0,
|
|
eq: val & 2 != 0,
|
|
so: val & 1 != 0,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// SPR (Special Purpose Register) numbers used by mfspr/mtspr.
|
|
pub mod spr {
|
|
pub const XER: u32 = 1;
|
|
pub const LR: u32 = 8;
|
|
pub const CTR: u32 = 9;
|
|
pub const DSISR: u32 = 18;
|
|
pub const DAR: u32 = 19;
|
|
/// Decrementer (hypervisor-visible, 32-bit down-counter).
|
|
pub const DEC: u32 = 22;
|
|
pub const TBL: u32 = 268; // Read (user)
|
|
pub const TBU: u32 = 269; // Read (user)
|
|
/// Time-base write (supervisor). Separate SPR number from TBL (268) for
|
|
/// access-control reasons.
|
|
pub const TBL_WRITE: u32 = 284;
|
|
pub const TBU_WRITE: u32 = 285;
|
|
pub const SPRG0: u32 = 272;
|
|
pub const SPRG1: u32 = 273;
|
|
pub const SPRG2: u32 = 274;
|
|
pub const SPRG3: u32 = 275;
|
|
pub const VRSAVE: u32 = 256;
|
|
pub const PVR: u32 = 287;
|
|
pub const HID0: u32 = 1008;
|
|
pub const HID1: u32 = 1009;
|
|
pub const PIR: u32 = 1023;
|
|
}
|
|
|
|
/// LR halt sentinel. When `bclr` returns to this address, the interpreter
|
|
/// loop halts cleanly (matches the "entry returned" convention).
|
|
pub const LR_HALT_SENTINEL: u64 = 0xBCBC_BCBC;
|
|
|
|
/// VSCR NJ (Non-Java mode) bit. Stored in word 3 at bit 16 (mask 0x0001_0000).
|
|
/// Set at startup; when clear, denormals are flushed to zero following IEEE-754.
|
|
pub const VSCR_NJ_MASK: u32 = 0x0001_0000;
|
|
/// VSCR SAT (saturation sticky) bit. Stored in word 3 at bit 31 (mask 0x0000_0001).
|
|
pub const VSCR_SAT_MASK: u32 = 0x0000_0001;
|
|
|
|
/// PowerPC processor context. Holds all register state for one guest thread.
|
|
/// Mirrors PPCContext from ppc_context.h, minus JIT-specific fields.
|
|
#[repr(C, align(64))]
|
|
pub struct PpcContext {
|
|
// General purpose registers (R0-R31)
|
|
pub gpr: [u64; 32],
|
|
// Count register
|
|
pub ctr: u64,
|
|
// Link register
|
|
pub lr: u64,
|
|
// Machine state register
|
|
pub msr: u64,
|
|
// Floating-point registers (F0-F31)
|
|
pub fpr: [f64; 32],
|
|
// VMX128 vector registers (V0-V127, Xbox 360 extended set)
|
|
pub vr: [Vec128; 128],
|
|
|
|
// Condition register fields (CR0-CR7)
|
|
pub cr: [CrField; 8],
|
|
// Floating-point status and control register
|
|
pub fpscr: u32,
|
|
// XER register (split for easy individual updates)
|
|
pub xer_ca: u8,
|
|
pub xer_ov: u8,
|
|
pub xer_so: u8,
|
|
// Altivec VSCR. Only bits 16 (NJ) and 31 (SAT) of word 3 are meaningful.
|
|
pub vscr: Vec128,
|
|
// VRSAVE (SPR 256). Bitmask of which VRs need saving across context switches.
|
|
pub vrsave: u32,
|
|
|
|
// Program counter
|
|
pub pc: u32,
|
|
// Reservation for lwarx/ldarx/stwcx/stdcx. Xenon's reservation granule is
|
|
// one L2 cache line (128 bytes) — `reserved_line` is stored as the base
|
|
// address of that line (`ea & !0x7F`). `has_reservation` gates the
|
|
// validity; stwcx./stdcx. check that both match before committing.
|
|
// `reserved_val` is retained for possible future use by a coherency
|
|
// observer; the store-conditional logic itself does not compare it.
|
|
pub reserved_line: u32,
|
|
pub reserved_val: u64,
|
|
pub has_reservation: bool,
|
|
/// PPCBUG-151 — width of the active reservation: 4 = `lwarx` (word),
|
|
/// 8 = `ldarx` (doubleword), 0 = no reservation. `stwcx.` requires
|
|
/// width==4; `stdcx.` requires width==8. Cross-width pairs fail
|
|
/// deterministically with CR0.EQ=0. Cleared alongside `has_reservation`
|
|
/// on every `stwcx.`/`stdcx.` exit (success or failure).
|
|
pub reservation_width: u8,
|
|
/// M3.7 — generation stamp returned by [`crate::ReservationTable::reserve`]
|
|
/// at the most recent `lwarx`/`ldarx`. Paired with `reserved_line`;
|
|
/// `stwcx.`/`stdcx.` pass this back to `try_commit`. Meaningful only
|
|
/// when `reservation_table` is `Some` and the table is enabled.
|
|
pub reserved_generation: u32,
|
|
/// M3.7 — optional handle to the inter-thread reservation table.
|
|
/// When `Some(table)` *and* `table.is_enabled()`, the interpreter's
|
|
/// `lwarx`/`stwcx.`/`ldarx`/`stdcx.` arms route through the table;
|
|
/// otherwise they use the legacy per-`PpcContext` fields above. The
|
|
/// scheduler populates this when it spawns a thread under a kernel
|
|
/// that has `reservations` set.
|
|
pub reservation_table: Option<std::sync::Arc<crate::ReservationTable>>,
|
|
/// M3.7 — emulated HW slot ID this thread is bound to. Used as the
|
|
/// reservation table's `hw_id` discriminator so two threads on
|
|
/// different slots can't accidentally commit each other's
|
|
/// reservations. Populated by the scheduler at spawn / migration.
|
|
pub hw_id: u8,
|
|
|
|
// Thread ID (for kernel use)
|
|
pub thread_id: u32,
|
|
|
|
// Cycle counter for timing
|
|
pub cycle_count: u64,
|
|
|
|
// Time base (incremented each instruction for debugging)
|
|
pub timebase: u64,
|
|
|
|
// Decrementer (SPR 22): 32-bit down-counter that fires an external
|
|
// interrupt at underflow on real hw. Xenia-rs doesn't dispatch DEC
|
|
// interrupts to the guest; this value is maintained so that mfspr DEC
|
|
// returns something coherent.
|
|
pub dec: u32,
|
|
}
|
|
|
|
impl PpcContext {
|
|
pub fn new() -> Self {
|
|
Self {
|
|
gpr: [0; 32],
|
|
ctr: 0,
|
|
// Canary sets LR to the halt sentinel at thread start so `blr`
|
|
// from the top-level entry falls out of the interpreter loop.
|
|
lr: LR_HALT_SENTINEL,
|
|
msr: 0,
|
|
fpr: [0.0; 32],
|
|
vr: [Vec128::ZERO; 128],
|
|
cr: [CrField::default(); 8],
|
|
fpscr: 0,
|
|
xer_ca: 0,
|
|
xer_ov: 0,
|
|
xer_so: 0,
|
|
// VSCR starts with NJ bit set (denormals flushed) — matches canary
|
|
// thread_state.cc initialization.
|
|
vscr: Vec128::from_u32x4(0, 0, 0, VSCR_NJ_MASK),
|
|
vrsave: 0xFFFF_FFFF,
|
|
pc: 0,
|
|
reserved_line: 0,
|
|
reserved_val: 0,
|
|
has_reservation: false,
|
|
reservation_width: 0,
|
|
reserved_generation: 0,
|
|
reservation_table: None,
|
|
hw_id: 0,
|
|
thread_id: 0,
|
|
cycle_count: 0,
|
|
timebase: 0,
|
|
dec: 0,
|
|
}
|
|
}
|
|
|
|
/// Get the full 32-bit condition register.
|
|
pub fn cr(&self) -> u32 {
|
|
let mut val = 0u32;
|
|
for (i, field) in self.cr.iter().enumerate() {
|
|
val |= (field.as_u8() as u32) << (28 - i * 4);
|
|
}
|
|
val
|
|
}
|
|
|
|
/// Set the full 32-bit condition register.
|
|
pub fn set_cr(&mut self, val: u32) {
|
|
for i in 0..8 {
|
|
self.cr[i] = CrField::from_u8(((val >> (28 - i * 4)) & 0xF) as u8);
|
|
}
|
|
}
|
|
|
|
/// Get a single CR bit by absolute bit number (0-31).
|
|
pub fn get_cr_bit(&self, bit: u32) -> bool {
|
|
let field = (bit / 4) as usize;
|
|
let sub = bit % 4;
|
|
match sub {
|
|
0 => self.cr[field].lt,
|
|
1 => self.cr[field].gt,
|
|
2 => self.cr[field].eq,
|
|
3 => self.cr[field].so,
|
|
_ => unreachable!(),
|
|
}
|
|
}
|
|
|
|
/// Set a single CR bit by absolute bit number (0-31).
|
|
pub fn set_cr_bit(&mut self, bit: u32, val: bool) {
|
|
let field = (bit / 4) as usize;
|
|
let sub = bit % 4;
|
|
match sub {
|
|
0 => self.cr[field].lt = val,
|
|
1 => self.cr[field].gt = val,
|
|
2 => self.cr[field].eq = val,
|
|
3 => self.cr[field].so = val,
|
|
_ => unreachable!(),
|
|
}
|
|
}
|
|
|
|
/// Update a condition register field based on a comparison result (signed).
|
|
pub fn update_cr_signed(&mut self, field: usize, val: i64) {
|
|
self.cr[field] = CrField {
|
|
lt: val < 0,
|
|
gt: val > 0,
|
|
eq: val == 0,
|
|
so: self.xer_so != 0,
|
|
};
|
|
}
|
|
|
|
/// Update a condition register field based on a comparison result (unsigned).
|
|
pub fn update_cr_unsigned(&mut self, field: usize, a: u64, b: u64) {
|
|
self.cr[field] = CrField {
|
|
lt: a < b,
|
|
gt: a > b,
|
|
eq: a == b,
|
|
so: self.xer_so != 0,
|
|
};
|
|
}
|
|
|
|
/// Get the full XER register value.
|
|
pub fn xer(&self) -> u32 {
|
|
((self.xer_so as u32) << 31) | ((self.xer_ov as u32) << 30) | ((self.xer_ca as u32) << 29)
|
|
}
|
|
|
|
/// Set XER from a full 32-bit value.
|
|
pub fn set_xer(&mut self, val: u32) {
|
|
self.xer_so = ((val >> 31) & 1) as u8;
|
|
self.xer_ov = ((val >> 30) & 1) as u8;
|
|
self.xer_ca = ((val >> 29) & 1) as u8;
|
|
}
|
|
|
|
/// Read the VSCR SAT (sticky saturation) bit.
|
|
pub fn vscr_sat(&self) -> bool {
|
|
(self.vscr.u32x4(3) & VSCR_SAT_MASK) != 0
|
|
}
|
|
|
|
/// Set or clear VSCR SAT. Preserves the NJ bit (and any other word-3 bits).
|
|
pub fn set_vscr_sat(&mut self, v: bool) {
|
|
let mut w = self.vscr.u32x4(3);
|
|
if v {
|
|
w |= VSCR_SAT_MASK;
|
|
} else {
|
|
w &= !VSCR_SAT_MASK;
|
|
}
|
|
self.vscr.set_u32x4(3, w);
|
|
}
|
|
|
|
/// Read the VSCR NJ (non-Java mode / flush-denormals) bit.
|
|
pub fn vscr_nj(&self) -> bool {
|
|
(self.vscr.u32x4(3) & VSCR_NJ_MASK) != 0
|
|
}
|
|
}
|
|
|
|
impl Default for PpcContext {
|
|
fn default() -> Self {
|
|
Self::new()
|
|
}
|
|
}
|