fix(cpu): PPCBUG-511 PPCBUG-512 PPCBUG-513 PPCBUG-514 add invalidate_for_write to VMX stores
Continuation of the PPCBUG-107 cascade sweep. All 16 VMX store opcodes
(stvx/stvxl, stvebx/stvehx/stvewx, stvlx/stvrx and 128 variants of each)
now invalidate the reservation table before writing.
stvlx/stvrx partial-vector stores can write at non-16-byte-aligned EAs;
they invalidate both potentially-touched cache lines.
stvewx128 currently writes 16 bytes at the wrong EA scope (PPCBUG-510);
the invalidate guard fires at that over-wide EA today and will narrow
automatically when PPCBUG-510 is fixed in P3.
Affected:
PPCBUG-511 stvx, stvx128, stvxl, stvxl128
PPCBUG-512 stvebx, stvehx, stvewx, stvewx128
PPCBUG-513 stvlx, stvlx128, stvlxl, stvlxl128
PPCBUG-514 stvrx, stvrx128, stvrxl, stvrxl128
Tests: lwarx_then_plain_stvx_invalidates_reservation,
lwarx_then_plain_stvlx_invalidates_reservation
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1698,6 +1698,10 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
||||
PpcOpcode::stvx => {
|
||||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||||
let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) & !0xF) as u32;
|
||||
// PPCBUG-511: stvx was missing invalidate_for_write.
|
||||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||||
}
|
||||
let bytes = ctx.vr[instr.rs()].as_bytes();
|
||||
for i in 0..16 { mem.write_u8(ea + i as u32, bytes[i]); }
|
||||
ctx.pc += 4;
|
||||
@@ -1705,6 +1709,10 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
||||
PpcOpcode::stvx128 => {
|
||||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||||
let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) & !0xF) as u32;
|
||||
// PPCBUG-511: stvx128 was missing invalidate_for_write.
|
||||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||||
}
|
||||
let bytes = ctx.vr[instr.vs128()].as_bytes();
|
||||
for i in 0..16 { mem.write_u8(ea + i as u32, bytes[i]); }
|
||||
ctx.pc += 4;
|
||||
@@ -1756,6 +1764,10 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
||||
// Store vS[EA & 0xF] (1 byte) to memory at EA.
|
||||
let base = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||||
let ea = base.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||||
// PPCBUG-512: stvebx was missing invalidate_for_write.
|
||||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||||
}
|
||||
let slot = (ea & 0xF) as usize;
|
||||
let bytes = ctx.vr[instr.rs()].as_bytes();
|
||||
mem.write_u8(ea, bytes[slot]);
|
||||
@@ -1766,6 +1778,10 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
||||
let base = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||||
let ea_unaligned = base.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||||
let ea = ea_unaligned & !0x1u32;
|
||||
// PPCBUG-512: stvehx was missing invalidate_for_write.
|
||||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||||
}
|
||||
let slot = ((ea_unaligned & 0xF) >> 1) as usize;
|
||||
let bytes = ctx.vr[instr.rs()].as_bytes();
|
||||
let h = ((bytes[slot * 2] as u16) << 8) | (bytes[slot * 2 + 1] as u16);
|
||||
@@ -1777,6 +1793,10 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
||||
let base = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||||
let ea_unaligned = base.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||||
let ea = ea_unaligned & !0x3u32;
|
||||
// PPCBUG-512: stvewx was missing invalidate_for_write.
|
||||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||||
}
|
||||
let slot = ((ea_unaligned & 0xF) >> 2) as usize;
|
||||
let bytes = ctx.vr[instr.rs()].as_bytes();
|
||||
let w = ((bytes[slot * 4] as u32) << 24)
|
||||
@@ -1799,6 +1819,10 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
||||
PpcOpcode::stvxl | PpcOpcode::stvxl128 => {
|
||||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||||
let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) & !0xF) as u32;
|
||||
// PPCBUG-511: stvxl/stvxl128 were missing invalidate_for_write.
|
||||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||||
}
|
||||
let vs = if matches!(instr.opcode, PpcOpcode::stvxl128) { instr.vs128() } else { instr.rs() };
|
||||
let bytes = ctx.vr[vs].as_bytes();
|
||||
for i in 0..16 { mem.write_u8(ea + i as u32, bytes[i]); }
|
||||
@@ -2845,21 +2869,63 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
||||
}
|
||||
PpcOpcode::stvlx | PpcOpcode::stvlxl => {
|
||||
let ea = ea_indexed(ctx, instr);
|
||||
// PPCBUG-513: stvlx/stvlxl were missing invalidate_for_write.
|
||||
// store_vector_left writes [ea, (ea & !0xF)+15]; in the worst case (ea & 0xF == 0)
|
||||
// that is exactly 16 bytes all within the same 16-byte block, so ea+15 lands in the
|
||||
// same 128-byte cache line. Two-call form is kept for defensive correctness.
|
||||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||
if t.has_active_reservers() {
|
||||
let first_line = ea & !RESERVATION_MASK;
|
||||
let last_line = ea.wrapping_add(15) & !RESERVATION_MASK;
|
||||
t.invalidate_for_write(first_line);
|
||||
if last_line != first_line { t.invalidate_for_write(last_line); }
|
||||
}
|
||||
}
|
||||
crate::vmx::store_vector_left(mem, ea, ctx.vr[instr.rs()]);
|
||||
ctx.pc += 4;
|
||||
}
|
||||
PpcOpcode::stvlx128 | PpcOpcode::stvlxl128 => {
|
||||
let ea = ea_indexed(ctx, instr);
|
||||
// PPCBUG-513: stvlx128/stvlxl128 were missing invalidate_for_write.
|
||||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||
if t.has_active_reservers() {
|
||||
let first_line = ea & !RESERVATION_MASK;
|
||||
let last_line = ea.wrapping_add(15) & !RESERVATION_MASK;
|
||||
t.invalidate_for_write(first_line);
|
||||
if last_line != first_line { t.invalidate_for_write(last_line); }
|
||||
}
|
||||
}
|
||||
crate::vmx::store_vector_left(mem, ea, ctx.vr[instr.vs128()]);
|
||||
ctx.pc += 4;
|
||||
}
|
||||
PpcOpcode::stvrx | PpcOpcode::stvrxl => {
|
||||
let ea = ea_indexed(ctx, instr);
|
||||
// PPCBUG-514: stvrx/stvrxl were missing invalidate_for_write.
|
||||
// store_vector_right writes [ea & !0xF, ea-1] (up to 15 bytes, all within a single
|
||||
// 16-byte-aligned block). Two-call form is kept for defensive correctness.
|
||||
// stvrx at shift==0 is a no-op; the guard fires unconditionally (cheap).
|
||||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||
if t.has_active_reservers() {
|
||||
let first_line = ea & !RESERVATION_MASK;
|
||||
let last_line = ea.wrapping_add(15) & !RESERVATION_MASK;
|
||||
t.invalidate_for_write(first_line);
|
||||
if last_line != first_line { t.invalidate_for_write(last_line); }
|
||||
}
|
||||
}
|
||||
crate::vmx::store_vector_right(mem, ea, ctx.vr[instr.rs()]);
|
||||
ctx.pc += 4;
|
||||
}
|
||||
PpcOpcode::stvrx128 | PpcOpcode::stvrxl128 => {
|
||||
let ea = ea_indexed(ctx, instr);
|
||||
// PPCBUG-514: stvrx128/stvrxl128 were missing invalidate_for_write.
|
||||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||
if t.has_active_reservers() {
|
||||
let first_line = ea & !RESERVATION_MASK;
|
||||
let last_line = ea.wrapping_add(15) & !RESERVATION_MASK;
|
||||
t.invalidate_for_write(first_line);
|
||||
if last_line != first_line { t.invalidate_for_write(last_line); }
|
||||
}
|
||||
}
|
||||
crate::vmx::store_vector_right(mem, ea, ctx.vr[instr.vs128()]);
|
||||
ctx.pc += 4;
|
||||
}
|
||||
@@ -2875,6 +2941,13 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
||||
}
|
||||
PpcOpcode::stvewx128 => {
|
||||
let ea = ea_indexed(ctx, instr) & !0xF;
|
||||
// TODO PPCBUG-510: stvewx128 currently writes 16 bytes at ea & !0xF; the EA scope is
|
||||
// wrong (should be word-aligned, 4 bytes only). When P3 fixes EA, this invalidate's
|
||||
// range narrows automatically.
|
||||
// PPCBUG-512: stvewx128 was missing invalidate_for_write.
|
||||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||||
}
|
||||
let bytes = ctx.vr[instr.vs128()].as_bytes();
|
||||
for i in 0..16 { mem.write_u8(ea + i as u32, bytes[i]); }
|
||||
ctx.pc += 4;
|
||||
@@ -5906,6 +5979,102 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
// ---------- PPCBUG-511/513: invalidate_for_write via VMX stores ----------
|
||||
|
||||
/// PPCBUG-511: A plain `stvx` to a reserved line must invalidate the
|
||||
/// reservation so that a subsequent `stwcx.` fails (CR0.EQ=0).
|
||||
#[test]
|
||||
fn lwarx_then_plain_stvx_invalidates_reservation() {
|
||||
let table = std::sync::Arc::new(crate::ReservationTable::new());
|
||||
table.enable();
|
||||
|
||||
let mut ctx = PpcContext::new();
|
||||
ctx.reservation_table = Some(table.clone());
|
||||
ctx.hw_id = 0;
|
||||
let mut mem = TestMem::new();
|
||||
|
||||
// r4=0x1000 (reservation + store address), r5=0 (index for lwarx/stwcx.), r7=stwcx val.
|
||||
ctx.gpr[4] = 0x1000;
|
||||
ctx.gpr[5] = 0;
|
||||
ctx.gpr[7] = 0xCCCC_CCCC;
|
||||
// VR 0: recognizable pattern to confirm the store lands.
|
||||
ctx.vr[0] = xenia_types::Vec128::from_bytes([0xAA; 16]);
|
||||
|
||||
// Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20)
|
||||
let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1);
|
||||
write_instr(&mut mem, 0, lwarx);
|
||||
// Instr 1: stvx v0, r0, r4 (opcode 31, XO 231; rA=0 → base=0, EA = 0 + r4 = 0x1000, aligned)
|
||||
// (31<<26) | (vs=0<<21) | (ra=0<<16) | (rb=4<<11) | (231<<1)
|
||||
let stvx = (31u32 << 26) | (0 << 21) | (0 << 16) | (4 << 11) | (231 << 1);
|
||||
write_instr(&mut mem, 4, stvx);
|
||||
// Instr 2: stwcx. r7, r4, r5 (opcode 31, XO 150, Rc=1)
|
||||
let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1;
|
||||
write_instr(&mut mem, 8, stwcx);
|
||||
|
||||
// Execute lwarx — reserves 0x1000's cache line.
|
||||
ctx.pc = 0;
|
||||
step(&mut ctx, &mut mem);
|
||||
assert!(ctx.has_reservation, "lwarx must set has_reservation");
|
||||
|
||||
// Execute stvx — must call invalidate_for_write and clear the reservation.
|
||||
step(&mut ctx, &mut mem);
|
||||
assert_eq!(mem.read_u8(0x1000), 0xAA, "stvx must write the VR bytes");
|
||||
|
||||
// Execute stwcx. — reservation was invalidated; must fail (CR0.EQ=0).
|
||||
step(&mut ctx, &mut mem);
|
||||
assert!(!ctx.cr[0].eq, "stwcx. must fail after reservation was invalidated by stvx");
|
||||
assert_eq!(mem.read_u8(0x1000), 0xAA, "stwcx. must not overwrite on failure");
|
||||
}
|
||||
|
||||
/// PPCBUG-513: A plain `stvlx` to a reserved line must invalidate the
|
||||
/// reservation so that a subsequent `stwcx.` fails (CR0.EQ=0).
|
||||
/// stvlx with EA=0x1003 writes bytes 0x1003-0x100F (13 bytes from VR0's high lanes).
|
||||
#[test]
|
||||
fn lwarx_then_plain_stvlx_invalidates_reservation() {
|
||||
let table = std::sync::Arc::new(crate::ReservationTable::new());
|
||||
table.enable();
|
||||
|
||||
let mut ctx = PpcContext::new();
|
||||
ctx.reservation_table = Some(table.clone());
|
||||
ctx.hw_id = 0;
|
||||
let mut mem = TestMem::new();
|
||||
|
||||
// Reserve at 0x1000 (same cache line as the stvlx target 0x1003).
|
||||
ctx.gpr[4] = 0x1000; // lwarx/stwcx. reservation address
|
||||
ctx.gpr[5] = 0; // index register (0 for lwarx/stwcx.)
|
||||
ctx.gpr[6] = 0x1003; // stvlx EA: rb=6, ra=0 → ea = 0 + 0x1003 = 0x1003
|
||||
ctx.gpr[7] = 0xCCCC_CCCC; // stwcx. value
|
||||
// VR 0: recognizable pattern.
|
||||
ctx.vr[0] = xenia_types::Vec128::from_bytes([0xBB; 16]);
|
||||
|
||||
// Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20)
|
||||
let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1);
|
||||
write_instr(&mut mem, 0, lwarx);
|
||||
// Instr 1: stvlx v0, r0, r6 (opcode 31, XO 647; rA=0 → base=0, EA = r6 = 0x1003)
|
||||
// store_vector_left writes shift=3 skipped bytes, then bytes 3..15 of VR0 → 0x1003..0x100F
|
||||
// (31<<26) | (vs=0<<21) | (ra=0<<16) | (rb=6<<11) | (647<<1)
|
||||
let stvlx = (31u32 << 26) | (0 << 21) | (0 << 16) | (6 << 11) | (647 << 1);
|
||||
write_instr(&mut mem, 4, stvlx);
|
||||
// Instr 2: stwcx. r7, r4, r5 (opcode 31, XO 150, Rc=1)
|
||||
let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1;
|
||||
write_instr(&mut mem, 8, stwcx);
|
||||
|
||||
// Execute lwarx — reserves 0x1000's cache line.
|
||||
ctx.pc = 0;
|
||||
step(&mut ctx, &mut mem);
|
||||
assert!(ctx.has_reservation, "lwarx must set has_reservation");
|
||||
|
||||
// Execute stvlx — must call invalidate_for_write and clear the reservation.
|
||||
step(&mut ctx, &mut mem);
|
||||
// store_vector_left(ea=0x1003): shift=3, n=13 → writes bytes 0x1003-0x100F = 0xBB.
|
||||
assert_eq!(mem.read_u8(0x1003), 0xBB, "stvlx must write VR bytes starting at EA");
|
||||
assert_eq!(mem.read_u8(0x100F), 0xBB, "stvlx must write up to (ea & !0xF)+15");
|
||||
|
||||
// Execute stwcx. — reservation was invalidated; must fail (CR0.EQ=0).
|
||||
step(&mut ctx, &mut mem);
|
||||
assert!(!ctx.cr[0].eq, "stwcx. must fail after reservation was invalidated by stvlx");
|
||||
}
|
||||
|
||||
/// Regression: `lvebx` must preserve the prior contents of the
|
||||
/// destination VR for lanes other than the loaded byte. Previously
|
||||
/// the handler started from a zeroed buffer.
|
||||
|
||||
Reference in New Issue
Block a user