fix(cpu): PPCBUG-160 PPCBUG-167 add invalidate_for_write to multiple/string + FP stores
Continuation of the PPCBUG-107 cascade sweep. stmw/stswi/stswx (multiple
and string stores) and the 9 floating-point stores now invalidate the
reservation table before writing.
stmw can span two cache lines when the writeback range crosses a line
boundary; the guard iterates over all touched lines so multi-line atomic
holds the same guarantee as single-line stores.
Affected:
PPCBUG-160 3 multiple/string stores: stmw, stswi, stswx
PPCBUG-167 9 FP stores: stfs, stfsu, stfsx, stfsux,
stfd, stfdu, stfdx, stfdux, stfiwx
Tests: lwarx_then_plain_stmw_spans_two_lines_and_invalidates,
lwarx_then_plain_stfd_invalidates_reservation
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1329,11 +1329,17 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
||||
PpcOpcode::stfs => {
|
||||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||||
let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32;
|
||||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||||
}
|
||||
mem.write_f32(ea, ctx.fpr[instr.rs()] as f32);
|
||||
ctx.pc += 4;
|
||||
}
|
||||
PpcOpcode::stfsu => {
|
||||
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32;
|
||||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||||
}
|
||||
mem.write_f32(ea, ctx.fpr[instr.rs()] as f32);
|
||||
ctx.gpr[instr.ra()] = ea as u64;
|
||||
ctx.pc += 4;
|
||||
@@ -1341,11 +1347,17 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
||||
PpcOpcode::stfsx => {
|
||||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||||
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||||
}
|
||||
mem.write_f32(ea, ctx.fpr[instr.rs()] as f32);
|
||||
ctx.pc += 4;
|
||||
}
|
||||
PpcOpcode::stfsux => {
|
||||
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||||
}
|
||||
mem.write_f32(ea, ctx.fpr[instr.rs()] as f32);
|
||||
ctx.gpr[instr.ra()] = ea as u64;
|
||||
ctx.pc += 4;
|
||||
@@ -1353,11 +1365,17 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
||||
PpcOpcode::stfd => {
|
||||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||||
let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32;
|
||||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||||
}
|
||||
mem.write_f64(ea, ctx.fpr[instr.rs()]);
|
||||
ctx.pc += 4;
|
||||
}
|
||||
PpcOpcode::stfdu => {
|
||||
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32;
|
||||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||||
}
|
||||
mem.write_f64(ea, ctx.fpr[instr.rs()]);
|
||||
ctx.gpr[instr.ra()] = ea as u64;
|
||||
ctx.pc += 4;
|
||||
@@ -1365,11 +1383,17 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
||||
PpcOpcode::stfdx => {
|
||||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||||
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||||
}
|
||||
mem.write_f64(ea, ctx.fpr[instr.rs()]);
|
||||
ctx.pc += 4;
|
||||
}
|
||||
PpcOpcode::stfdux => {
|
||||
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||||
}
|
||||
mem.write_f64(ea, ctx.fpr[instr.rs()]);
|
||||
ctx.gpr[instr.ra()] = ea as u64;
|
||||
ctx.pc += 4;
|
||||
@@ -1378,6 +1402,9 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
||||
// Store FP as integer word: stores low 32 bits of FPR as-is
|
||||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||||
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||||
}
|
||||
mem.write_u32(ea, ctx.fpr[instr.rs()].to_bits() as u32);
|
||||
ctx.pc += 4;
|
||||
}
|
||||
@@ -1407,6 +1434,9 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
||||
let nb = if instr.rb() == 0 { 32 } else { instr.rb() as u32 };
|
||||
let mut rs = instr.rs();
|
||||
let mut bytes_left = nb;
|
||||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||||
}
|
||||
while bytes_left > 0 {
|
||||
let val = ctx.gpr[rs] as u32;
|
||||
for byte_idx in 0..4 {
|
||||
@@ -1569,6 +1599,22 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
||||
PpcOpcode::stmw => {
|
||||
let mut ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||||
ea = ea.wrapping_add(instr.d() as i64 as u64);
|
||||
// PPCBUG-160: stmw can span two cache lines when (32-rs)*4 > one line.
|
||||
// Iterate over every touched line so any reservation on a later line
|
||||
// is also invalidated (same guarantee as single-word stores).
|
||||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||
if t.has_active_reservers() {
|
||||
let start_ea = ea as u32;
|
||||
let last_ea = start_ea.wrapping_add((32 - instr.rs() as u32) * 4).wrapping_sub(1);
|
||||
let line_size = RESERVATION_MASK + 1;
|
||||
let mut line = start_ea & !RESERVATION_MASK;
|
||||
loop {
|
||||
t.invalidate_for_write(line);
|
||||
if line >= (last_ea & !RESERVATION_MASK) { break; }
|
||||
line = line.wrapping_add(line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
for r in instr.rs()..32 {
|
||||
mem.write_u32(ea as u32, ctx.gpr[r] as u32);
|
||||
ea = ea.wrapping_add(4);
|
||||
@@ -4248,6 +4294,9 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
||||
let nb = ctx.xer() & 0x7F;
|
||||
let mut rs = instr.rs();
|
||||
let mut bytes_left = nb;
|
||||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||||
}
|
||||
while bytes_left > 0 {
|
||||
let val = ctx.gpr[rs] as u32;
|
||||
for byte_idx in 0..4 {
|
||||
@@ -5432,6 +5481,117 @@ mod tests {
|
||||
assert_eq!(mem.read_u64(0x1000), 0xDEADBEEF_CAFEBABEu64, "stwcx. must not overwrite on failure");
|
||||
}
|
||||
|
||||
// ---------- PPCBUG-160: stmw multi-line invalidation ----------
|
||||
|
||||
/// PPCBUG-160: `stmw r28, 0x78(r4)` with r4=0x1000 writes four words at
|
||||
/// 0x1078, 0x107C, 0x1080, 0x1084 — crossing the cache-line boundary at
|
||||
/// 0x1080 (RESERVATION_MASK=0x7F, line size=128 bytes).
|
||||
///
|
||||
/// A reservation on the *second* line (0x1080) must be invalidated even
|
||||
/// though the store starts in the first line (0x1000-0x107F). This
|
||||
/// verifies the multi-line loop added to the stmw arm.
|
||||
#[test]
|
||||
fn lwarx_then_plain_stmw_spans_two_lines_and_invalidates() {
|
||||
let table = std::sync::Arc::new(crate::ReservationTable::new());
|
||||
table.enable();
|
||||
|
||||
let mut ctx = PpcContext::new();
|
||||
ctx.reservation_table = Some(table.clone());
|
||||
ctx.hw_id = 0;
|
||||
let mut mem = TestMem::new();
|
||||
|
||||
// r4=0x1080 (reservation address — second line), r5=0 (index for lwarx/stwcx.)
|
||||
// r8=0x1000 (stmw base), r28-r31 = store values
|
||||
ctx.gpr[4] = 0x1080;
|
||||
ctx.gpr[5] = 0;
|
||||
ctx.gpr[8] = 0x1000;
|
||||
ctx.gpr[7] = 0xCCCC_CCCC; // stwcx. value
|
||||
ctx.gpr[28] = 0xAAAA_0001;
|
||||
ctx.gpr[29] = 0xBBBB_0002;
|
||||
ctx.gpr[30] = 0xCCCC_0003;
|
||||
ctx.gpr[31] = 0xDDDD_0004;
|
||||
|
||||
// Instr 0: lwarx r3, r4, r5 — reserves line containing 0x1080
|
||||
let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1);
|
||||
write_instr(&mut mem, 0, lwarx);
|
||||
// Instr 1: stmw r28, 0x78(r8) — writes 4 words at 0x1078,0x107C,0x1080,0x1084
|
||||
// opcode=47, rs=28, ra=8, d=0x0078
|
||||
let stmw = (47u32 << 26) | (28 << 21) | (8 << 16) | 0x0078;
|
||||
write_instr(&mut mem, 4, stmw);
|
||||
// Instr 2: stwcx. r7, r4, r5 — should fail (line 2 reservation invalidated)
|
||||
let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1;
|
||||
write_instr(&mut mem, 8, stwcx);
|
||||
|
||||
// Execute lwarx — reserves 0x1080's cache line.
|
||||
ctx.pc = 0;
|
||||
step(&mut ctx, &mut mem);
|
||||
assert!(ctx.has_reservation, "lwarx must set has_reservation");
|
||||
|
||||
// Execute stmw — must invalidate both lines including the one reserved at 0x1080.
|
||||
step(&mut ctx, &mut mem);
|
||||
assert_eq!(mem.read_u32(0x1078), 0xAAAA_0001, "stmw word 0 must land");
|
||||
assert_eq!(mem.read_u32(0x107C), 0xBBBB_0002, "stmw word 1 must land");
|
||||
assert_eq!(mem.read_u32(0x1080), 0xCCCC_0003, "stmw word 2 (line 2) must land");
|
||||
assert_eq!(mem.read_u32(0x1084), 0xDDDD_0004, "stmw word 3 must land");
|
||||
|
||||
// Execute stwcx. — reservation on line 2 (0x1080) was invalidated; must fail.
|
||||
step(&mut ctx, &mut mem);
|
||||
assert!(!ctx.cr[0].eq, "stwcx. must fail: stmw invalidated the reservation on line 2");
|
||||
assert_eq!(mem.read_u32(0x1080), 0xCCCC_0003, "stwcx. must not overwrite on failure");
|
||||
}
|
||||
|
||||
// ---------- PPCBUG-167: invalidate_for_write via plain stfd ----------
|
||||
|
||||
/// PPCBUG-167: A plain `stfd` to a reserved line must invalidate the
|
||||
/// reservation so that a subsequent `stwcx.` fails (CR0.EQ=0).
|
||||
/// Also verifies big-endian byte layout of the stored double.
|
||||
#[test]
|
||||
fn lwarx_then_plain_stfd_invalidates_reservation() {
|
||||
let table = std::sync::Arc::new(crate::ReservationTable::new());
|
||||
table.enable();
|
||||
|
||||
let mut ctx = PpcContext::new();
|
||||
ctx.reservation_table = Some(table.clone());
|
||||
ctx.hw_id = 0;
|
||||
let mut mem = TestMem::new();
|
||||
|
||||
// r4=0x1000 (target addr), r5=0 (index), r7=stwcx val.
|
||||
ctx.gpr[4] = 0x1000;
|
||||
ctx.gpr[5] = 0;
|
||||
ctx.gpr[7] = 0xCCCC_CCCC;
|
||||
// FPR 5 holds a specific bit pattern.
|
||||
ctx.fpr[5] = f64::from_bits(0xCAFEBABE_DEADBEEFu64);
|
||||
|
||||
// Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20)
|
||||
let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1);
|
||||
write_instr(&mut mem, 0, lwarx);
|
||||
// Instr 1: stfd f5, 0(r4) (opcode 54, D-form)
|
||||
let stfd_plain = (54u32 << 26) | (5 << 21) | (4 << 16) | 0;
|
||||
write_instr(&mut mem, 4, stfd_plain);
|
||||
// Instr 2: stwcx. r7, r4, r5 (opcode 31, XO 150, Rc=1)
|
||||
let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1;
|
||||
write_instr(&mut mem, 8, stwcx);
|
||||
|
||||
// Execute lwarx — reserves 0x1000's cache line.
|
||||
ctx.pc = 0;
|
||||
step(&mut ctx, &mut mem);
|
||||
assert!(ctx.has_reservation, "lwarx must set has_reservation");
|
||||
|
||||
// Execute plain stfd — must call invalidate_for_write and clear the reservation.
|
||||
step(&mut ctx, &mut mem);
|
||||
// write_f64 delegates to write_u64, which writes big-endian; verify layout.
|
||||
assert_eq!(
|
||||
mem.read_u64(0x1000),
|
||||
0xCAFEBABE_DEADBEEFu64,
|
||||
"stfd must store FPR bit pattern in big-endian order"
|
||||
);
|
||||
|
||||
// Execute stwcx. — reservation was invalidated; must fail (CR0.EQ=0).
|
||||
step(&mut ctx, &mut mem);
|
||||
assert!(!ctx.cr[0].eq, "stwcx. must fail after reservation was invalidated by plain stfd");
|
||||
assert_eq!(mem.read_u64(0x1000), 0xCAFEBABE_DEADBEEFu64, "stwcx. must not overwrite on failure");
|
||||
}
|
||||
|
||||
// ---------- Phase 2m: SPR DEC + TBL/TBU write ----------
|
||||
|
||||
#[test]
|
||||
|
||||
Reference in New Issue
Block a user