fix(cpu): PPCBUG-160 PPCBUG-167 add invalidate_for_write to multiple/string + FP stores

Continuation of the PPCBUG-107 cascade sweep. stmw/stswi/stswx (multiple
and string stores) and the 9 floating-point stores now invalidate the
reservation table before writing.

stmw can span two cache lines when the writeback range crosses a line
boundary; the guard iterates over all touched lines so multi-line atomic
holds the same guarantee as single-line stores.

Affected:
  PPCBUG-160  3 multiple/string stores: stmw, stswi, stswx
  PPCBUG-167  9 FP stores: stfs, stfsu, stfsx, stfsux,
                            stfd, stfdu, stfdx, stfdux, stfiwx

Tests: lwarx_then_plain_stmw_spans_two_lines_and_invalidates,
       lwarx_then_plain_stfd_invalidates_reservation

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-05-01 17:24:46 +02:00
parent 24d347436a
commit af54eb28bd

View File

@@ -1329,11 +1329,17 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
PpcOpcode::stfs => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_f32(ea, ctx.fpr[instr.rs()] as f32);
ctx.pc += 4;
}
PpcOpcode::stfsu => {
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_f32(ea, ctx.fpr[instr.rs()] as f32);
ctx.gpr[instr.ra()] = ea as u64;
ctx.pc += 4;
@@ -1341,11 +1347,17 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
PpcOpcode::stfsx => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_f32(ea, ctx.fpr[instr.rs()] as f32);
ctx.pc += 4;
}
PpcOpcode::stfsux => {
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_f32(ea, ctx.fpr[instr.rs()] as f32);
ctx.gpr[instr.ra()] = ea as u64;
ctx.pc += 4;
@@ -1353,11 +1365,17 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
PpcOpcode::stfd => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_f64(ea, ctx.fpr[instr.rs()]);
ctx.pc += 4;
}
PpcOpcode::stfdu => {
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_f64(ea, ctx.fpr[instr.rs()]);
ctx.gpr[instr.ra()] = ea as u64;
ctx.pc += 4;
@@ -1365,11 +1383,17 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
PpcOpcode::stfdx => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_f64(ea, ctx.fpr[instr.rs()]);
ctx.pc += 4;
}
PpcOpcode::stfdux => {
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_f64(ea, ctx.fpr[instr.rs()]);
ctx.gpr[instr.ra()] = ea as u64;
ctx.pc += 4;
@@ -1378,6 +1402,9 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
// Store FP as integer word: stores low 32 bits of FPR as-is
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_u32(ea, ctx.fpr[instr.rs()].to_bits() as u32);
ctx.pc += 4;
}
@@ -1407,6 +1434,9 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
let nb = if instr.rb() == 0 { 32 } else { instr.rb() as u32 };
let mut rs = instr.rs();
let mut bytes_left = nb;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
while bytes_left > 0 {
let val = ctx.gpr[rs] as u32;
for byte_idx in 0..4 {
@@ -1569,6 +1599,22 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
PpcOpcode::stmw => {
let mut ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
ea = ea.wrapping_add(instr.d() as i64 as u64);
// PPCBUG-160: stmw can span two cache lines when (32-rs)*4 > one line.
// Iterate over every touched line so any reservation on a later line
// is also invalidated (same guarantee as single-word stores).
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() {
let start_ea = ea as u32;
let last_ea = start_ea.wrapping_add((32 - instr.rs() as u32) * 4).wrapping_sub(1);
let line_size = RESERVATION_MASK + 1;
let mut line = start_ea & !RESERVATION_MASK;
loop {
t.invalidate_for_write(line);
if line >= (last_ea & !RESERVATION_MASK) { break; }
line = line.wrapping_add(line_size);
}
}
}
for r in instr.rs()..32 {
mem.write_u32(ea as u32, ctx.gpr[r] as u32);
ea = ea.wrapping_add(4);
@@ -4248,6 +4294,9 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
let nb = ctx.xer() & 0x7F;
let mut rs = instr.rs();
let mut bytes_left = nb;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
while bytes_left > 0 {
let val = ctx.gpr[rs] as u32;
for byte_idx in 0..4 {
@@ -5432,6 +5481,117 @@ mod tests {
assert_eq!(mem.read_u64(0x1000), 0xDEADBEEF_CAFEBABEu64, "stwcx. must not overwrite on failure");
}
// ---------- PPCBUG-160: stmw multi-line invalidation ----------
/// PPCBUG-160: `stmw r28, 0x78(r4)` with r4=0x1000 writes four words at
/// 0x1078, 0x107C, 0x1080, 0x1084 — crossing the cache-line boundary at
/// 0x1080 (RESERVATION_MASK=0x7F, line size=128 bytes).
///
/// A reservation on the *second* line (0x1080) must be invalidated even
/// though the store starts in the first line (0x1000-0x107F). This
/// verifies the multi-line loop added to the stmw arm.
#[test]
fn lwarx_then_plain_stmw_spans_two_lines_and_invalidates() {
let table = std::sync::Arc::new(crate::ReservationTable::new());
table.enable();
let mut ctx = PpcContext::new();
ctx.reservation_table = Some(table.clone());
ctx.hw_id = 0;
let mut mem = TestMem::new();
// r4=0x1080 (reservation address — second line), r5=0 (index for lwarx/stwcx.)
// r8=0x1000 (stmw base), r28-r31 = store values
ctx.gpr[4] = 0x1080;
ctx.gpr[5] = 0;
ctx.gpr[8] = 0x1000;
ctx.gpr[7] = 0xCCCC_CCCC; // stwcx. value
ctx.gpr[28] = 0xAAAA_0001;
ctx.gpr[29] = 0xBBBB_0002;
ctx.gpr[30] = 0xCCCC_0003;
ctx.gpr[31] = 0xDDDD_0004;
// Instr 0: lwarx r3, r4, r5 — reserves line containing 0x1080
let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1);
write_instr(&mut mem, 0, lwarx);
// Instr 1: stmw r28, 0x78(r8) — writes 4 words at 0x1078,0x107C,0x1080,0x1084
// opcode=47, rs=28, ra=8, d=0x0078
let stmw = (47u32 << 26) | (28 << 21) | (8 << 16) | 0x0078;
write_instr(&mut mem, 4, stmw);
// Instr 2: stwcx. r7, r4, r5 — should fail (line 2 reservation invalidated)
let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1;
write_instr(&mut mem, 8, stwcx);
// Execute lwarx — reserves 0x1080's cache line.
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert!(ctx.has_reservation, "lwarx must set has_reservation");
// Execute stmw — must invalidate both lines including the one reserved at 0x1080.
step(&mut ctx, &mut mem);
assert_eq!(mem.read_u32(0x1078), 0xAAAA_0001, "stmw word 0 must land");
assert_eq!(mem.read_u32(0x107C), 0xBBBB_0002, "stmw word 1 must land");
assert_eq!(mem.read_u32(0x1080), 0xCCCC_0003, "stmw word 2 (line 2) must land");
assert_eq!(mem.read_u32(0x1084), 0xDDDD_0004, "stmw word 3 must land");
// Execute stwcx. — reservation on line 2 (0x1080) was invalidated; must fail.
step(&mut ctx, &mut mem);
assert!(!ctx.cr[0].eq, "stwcx. must fail: stmw invalidated the reservation on line 2");
assert_eq!(mem.read_u32(0x1080), 0xCCCC_0003, "stwcx. must not overwrite on failure");
}
// ---------- PPCBUG-167: invalidate_for_write via plain stfd ----------
/// PPCBUG-167: A plain `stfd` to a reserved line must invalidate the
/// reservation so that a subsequent `stwcx.` fails (CR0.EQ=0).
/// Also verifies big-endian byte layout of the stored double.
#[test]
fn lwarx_then_plain_stfd_invalidates_reservation() {
let table = std::sync::Arc::new(crate::ReservationTable::new());
table.enable();
let mut ctx = PpcContext::new();
ctx.reservation_table = Some(table.clone());
ctx.hw_id = 0;
let mut mem = TestMem::new();
// r4=0x1000 (target addr), r5=0 (index), r7=stwcx val.
ctx.gpr[4] = 0x1000;
ctx.gpr[5] = 0;
ctx.gpr[7] = 0xCCCC_CCCC;
// FPR 5 holds a specific bit pattern.
ctx.fpr[5] = f64::from_bits(0xCAFEBABE_DEADBEEFu64);
// Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20)
let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1);
write_instr(&mut mem, 0, lwarx);
// Instr 1: stfd f5, 0(r4) (opcode 54, D-form)
let stfd_plain = (54u32 << 26) | (5 << 21) | (4 << 16) | 0;
write_instr(&mut mem, 4, stfd_plain);
// Instr 2: stwcx. r7, r4, r5 (opcode 31, XO 150, Rc=1)
let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1;
write_instr(&mut mem, 8, stwcx);
// Execute lwarx — reserves 0x1000's cache line.
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert!(ctx.has_reservation, "lwarx must set has_reservation");
// Execute plain stfd — must call invalidate_for_write and clear the reservation.
step(&mut ctx, &mut mem);
// write_f64 delegates to write_u64, which writes big-endian; verify layout.
assert_eq!(
mem.read_u64(0x1000),
0xCAFEBABE_DEADBEEFu64,
"stfd must store FPR bit pattern in big-endian order"
);
// Execute stwcx. — reservation was invalidated; must fail (CR0.EQ=0).
step(&mut ctx, &mut mem);
assert!(!ctx.cr[0].eq, "stwcx. must fail after reservation was invalidated by plain stfd");
assert_eq!(mem.read_u64(0x1000), 0xCAFEBABE_DEADBEEFu64, "stwcx. must not overwrite on failure");
}
// ---------- Phase 2m: SPR DEC + TBL/TBU write ----------
#[test]