fix(cpu): PPCBUG-511 PPCBUG-512 PPCBUG-513 PPCBUG-514 add invalidate_for_write to VMX stores

Continuation of the PPCBUG-107 cascade sweep. All 16 VMX store opcodes
(stvx/stvxl, stvebx/stvehx/stvewx, stvlx/stvrx and 128 variants of each)
now invalidate the reservation table before writing.

stvlx/stvrx partial-vector stores can write at non-16-byte-aligned EAs;
they invalidate both potentially-touched cache lines.

stvewx128 currently writes 16 bytes at the wrong EA scope (PPCBUG-510);
the invalidate guard fires at that over-wide EA today and will narrow
automatically when PPCBUG-510 is fixed in P3.

Affected:
  PPCBUG-511  stvx, stvx128, stvxl, stvxl128
  PPCBUG-512  stvebx, stvehx, stvewx, stvewx128
  PPCBUG-513  stvlx, stvlx128, stvlxl, stvlxl128
  PPCBUG-514  stvrx, stvrx128, stvrxl, stvrxl128

Tests: lwarx_then_plain_stvx_invalidates_reservation,
       lwarx_then_plain_stvlx_invalidates_reservation

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-05-01 17:36:17 +02:00
parent af54eb28bd
commit d4e227eeab

View File

@@ -1698,6 +1698,10 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
PpcOpcode::stvx => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) & !0xF) as u32;
// PPCBUG-511: stvx was missing invalidate_for_write.
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
let bytes = ctx.vr[instr.rs()].as_bytes();
for i in 0..16 { mem.write_u8(ea + i as u32, bytes[i]); }
ctx.pc += 4;
@@ -1705,6 +1709,10 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
PpcOpcode::stvx128 => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) & !0xF) as u32;
// PPCBUG-511: stvx128 was missing invalidate_for_write.
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
let bytes = ctx.vr[instr.vs128()].as_bytes();
for i in 0..16 { mem.write_u8(ea + i as u32, bytes[i]); }
ctx.pc += 4;
@@ -1756,6 +1764,10 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
// Store vS[EA & 0xF] (1 byte) to memory at EA.
let base = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = base.wrapping_add(ctx.gpr[instr.rb()]) as u32;
// PPCBUG-512: stvebx was missing invalidate_for_write.
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
let slot = (ea & 0xF) as usize;
let bytes = ctx.vr[instr.rs()].as_bytes();
mem.write_u8(ea, bytes[slot]);
@@ -1766,6 +1778,10 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
let base = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea_unaligned = base.wrapping_add(ctx.gpr[instr.rb()]) as u32;
let ea = ea_unaligned & !0x1u32;
// PPCBUG-512: stvehx was missing invalidate_for_write.
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
let slot = ((ea_unaligned & 0xF) >> 1) as usize;
let bytes = ctx.vr[instr.rs()].as_bytes();
let h = ((bytes[slot * 2] as u16) << 8) | (bytes[slot * 2 + 1] as u16);
@@ -1777,6 +1793,10 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
let base = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea_unaligned = base.wrapping_add(ctx.gpr[instr.rb()]) as u32;
let ea = ea_unaligned & !0x3u32;
// PPCBUG-512: stvewx was missing invalidate_for_write.
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
let slot = ((ea_unaligned & 0xF) >> 2) as usize;
let bytes = ctx.vr[instr.rs()].as_bytes();
let w = ((bytes[slot * 4] as u32) << 24)
@@ -1799,6 +1819,10 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
PpcOpcode::stvxl | PpcOpcode::stvxl128 => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) & !0xF) as u32;
// PPCBUG-511: stvxl/stvxl128 were missing invalidate_for_write.
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
let vs = if matches!(instr.opcode, PpcOpcode::stvxl128) { instr.vs128() } else { instr.rs() };
let bytes = ctx.vr[vs].as_bytes();
for i in 0..16 { mem.write_u8(ea + i as u32, bytes[i]); }
@@ -2845,21 +2869,63 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
}
PpcOpcode::stvlx | PpcOpcode::stvlxl => {
let ea = ea_indexed(ctx, instr);
// PPCBUG-513: stvlx/stvlxl were missing invalidate_for_write.
// store_vector_left writes [ea, (ea & !0xF)+15]; in the worst case (ea & 0xF == 0)
// that is exactly 16 bytes all within the same 16-byte block, so ea+15 lands in the
// same 128-byte cache line. Two-call form is kept for defensive correctness.
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() {
let first_line = ea & !RESERVATION_MASK;
let last_line = ea.wrapping_add(15) & !RESERVATION_MASK;
t.invalidate_for_write(first_line);
if last_line != first_line { t.invalidate_for_write(last_line); }
}
}
crate::vmx::store_vector_left(mem, ea, ctx.vr[instr.rs()]);
ctx.pc += 4;
}
PpcOpcode::stvlx128 | PpcOpcode::stvlxl128 => {
let ea = ea_indexed(ctx, instr);
// PPCBUG-513: stvlx128/stvlxl128 were missing invalidate_for_write.
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() {
let first_line = ea & !RESERVATION_MASK;
let last_line = ea.wrapping_add(15) & !RESERVATION_MASK;
t.invalidate_for_write(first_line);
if last_line != first_line { t.invalidate_for_write(last_line); }
}
}
crate::vmx::store_vector_left(mem, ea, ctx.vr[instr.vs128()]);
ctx.pc += 4;
}
PpcOpcode::stvrx | PpcOpcode::stvrxl => {
let ea = ea_indexed(ctx, instr);
// PPCBUG-514: stvrx/stvrxl were missing invalidate_for_write.
// store_vector_right writes [ea & !0xF, ea-1] (up to 15 bytes, all within a single
// 16-byte-aligned block). Two-call form is kept for defensive correctness.
// stvrx at shift==0 is a no-op; the guard fires unconditionally (cheap).
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() {
let first_line = ea & !RESERVATION_MASK;
let last_line = ea.wrapping_add(15) & !RESERVATION_MASK;
t.invalidate_for_write(first_line);
if last_line != first_line { t.invalidate_for_write(last_line); }
}
}
crate::vmx::store_vector_right(mem, ea, ctx.vr[instr.rs()]);
ctx.pc += 4;
}
PpcOpcode::stvrx128 | PpcOpcode::stvrxl128 => {
let ea = ea_indexed(ctx, instr);
// PPCBUG-514: stvrx128/stvrxl128 were missing invalidate_for_write.
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() {
let first_line = ea & !RESERVATION_MASK;
let last_line = ea.wrapping_add(15) & !RESERVATION_MASK;
t.invalidate_for_write(first_line);
if last_line != first_line { t.invalidate_for_write(last_line); }
}
}
crate::vmx::store_vector_right(mem, ea, ctx.vr[instr.vs128()]);
ctx.pc += 4;
}
@@ -2875,6 +2941,13 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
}
PpcOpcode::stvewx128 => {
let ea = ea_indexed(ctx, instr) & !0xF;
// TODO PPCBUG-510: stvewx128 currently writes 16 bytes at ea & !0xF; the EA scope is
// wrong (should be word-aligned, 4 bytes only). When P3 fixes EA, this invalidate's
// range narrows automatically.
// PPCBUG-512: stvewx128 was missing invalidate_for_write.
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
let bytes = ctx.vr[instr.vs128()].as_bytes();
for i in 0..16 { mem.write_u8(ea + i as u32, bytes[i]); }
ctx.pc += 4;
@@ -5906,6 +5979,102 @@ mod tests {
}
}
// ---------- PPCBUG-511/513: invalidate_for_write via VMX stores ----------
/// PPCBUG-511: A plain `stvx` to a reserved line must invalidate the
/// reservation so that a subsequent `stwcx.` fails (CR0.EQ=0).
#[test]
fn lwarx_then_plain_stvx_invalidates_reservation() {
let table = std::sync::Arc::new(crate::ReservationTable::new());
table.enable();
let mut ctx = PpcContext::new();
ctx.reservation_table = Some(table.clone());
ctx.hw_id = 0;
let mut mem = TestMem::new();
// r4=0x1000 (reservation + store address), r5=0 (index for lwarx/stwcx.), r7=stwcx val.
ctx.gpr[4] = 0x1000;
ctx.gpr[5] = 0;
ctx.gpr[7] = 0xCCCC_CCCC;
// VR 0: recognizable pattern to confirm the store lands.
ctx.vr[0] = xenia_types::Vec128::from_bytes([0xAA; 16]);
// Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20)
let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1);
write_instr(&mut mem, 0, lwarx);
// Instr 1: stvx v0, r0, r4 (opcode 31, XO 231; rA=0 → base=0, EA = 0 + r4 = 0x1000, aligned)
// (31<<26) | (vs=0<<21) | (ra=0<<16) | (rb=4<<11) | (231<<1)
let stvx = (31u32 << 26) | (0 << 21) | (0 << 16) | (4 << 11) | (231 << 1);
write_instr(&mut mem, 4, stvx);
// Instr 2: stwcx. r7, r4, r5 (opcode 31, XO 150, Rc=1)
let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1;
write_instr(&mut mem, 8, stwcx);
// Execute lwarx — reserves 0x1000's cache line.
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert!(ctx.has_reservation, "lwarx must set has_reservation");
// Execute stvx — must call invalidate_for_write and clear the reservation.
step(&mut ctx, &mut mem);
assert_eq!(mem.read_u8(0x1000), 0xAA, "stvx must write the VR bytes");
// Execute stwcx. — reservation was invalidated; must fail (CR0.EQ=0).
step(&mut ctx, &mut mem);
assert!(!ctx.cr[0].eq, "stwcx. must fail after reservation was invalidated by stvx");
assert_eq!(mem.read_u8(0x1000), 0xAA, "stwcx. must not overwrite on failure");
}
/// PPCBUG-513: A plain `stvlx` to a reserved line must invalidate the
/// reservation so that a subsequent `stwcx.` fails (CR0.EQ=0).
/// stvlx with EA=0x1003 writes bytes 0x1003-0x100F (13 bytes from VR0's high lanes).
#[test]
fn lwarx_then_plain_stvlx_invalidates_reservation() {
let table = std::sync::Arc::new(crate::ReservationTable::new());
table.enable();
let mut ctx = PpcContext::new();
ctx.reservation_table = Some(table.clone());
ctx.hw_id = 0;
let mut mem = TestMem::new();
// Reserve at 0x1000 (same cache line as the stvlx target 0x1003).
ctx.gpr[4] = 0x1000; // lwarx/stwcx. reservation address
ctx.gpr[5] = 0; // index register (0 for lwarx/stwcx.)
ctx.gpr[6] = 0x1003; // stvlx EA: rb=6, ra=0 → ea = 0 + 0x1003 = 0x1003
ctx.gpr[7] = 0xCCCC_CCCC; // stwcx. value
// VR 0: recognizable pattern.
ctx.vr[0] = xenia_types::Vec128::from_bytes([0xBB; 16]);
// Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20)
let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1);
write_instr(&mut mem, 0, lwarx);
// Instr 1: stvlx v0, r0, r6 (opcode 31, XO 647; rA=0 → base=0, EA = r6 = 0x1003)
// store_vector_left writes shift=3 skipped bytes, then bytes 3..15 of VR0 → 0x1003..0x100F
// (31<<26) | (vs=0<<21) | (ra=0<<16) | (rb=6<<11) | (647<<1)
let stvlx = (31u32 << 26) | (0 << 21) | (0 << 16) | (6 << 11) | (647 << 1);
write_instr(&mut mem, 4, stvlx);
// Instr 2: stwcx. r7, r4, r5 (opcode 31, XO 150, Rc=1)
let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1;
write_instr(&mut mem, 8, stwcx);
// Execute lwarx — reserves 0x1000's cache line.
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert!(ctx.has_reservation, "lwarx must set has_reservation");
// Execute stvlx — must call invalidate_for_write and clear the reservation.
step(&mut ctx, &mut mem);
// store_vector_left(ea=0x1003): shift=3, n=13 → writes bytes 0x1003-0x100F = 0xBB.
assert_eq!(mem.read_u8(0x1003), 0xBB, "stvlx must write VR bytes starting at EA");
assert_eq!(mem.read_u8(0x100F), 0xBB, "stvlx must write up to (ea & !0xF)+15");
// Execute stwcx. — reservation was invalidated; must fail (CR0.EQ=0).
step(&mut ctx, &mut mem);
assert!(!ctx.cr[0].eq, "stwcx. must fail after reservation was invalidated by stvlx");
}
/// Regression: `lvebx` must preserve the prior contents of the
/// destination VR for lanes other than the loaded byte. Previously
/// the handler started from a zeroed buffer.