fix(cpu): PPCBUG-160 PPCBUG-167 add invalidate_for_write to multiple/string + FP stores
Continuation of the PPCBUG-107 cascade sweep. stmw/stswi/stswx (multiple
and string stores) and the 9 floating-point stores now invalidate the
reservation table before writing.
stmw can span two cache lines when the writeback range crosses a line
boundary; the guard iterates over all touched lines so multi-line atomic
holds the same guarantee as single-line stores.
Affected:
PPCBUG-160 3 multiple/string stores: stmw, stswi, stswx
PPCBUG-167 9 FP stores: stfs, stfsu, stfsx, stfsux,
stfd, stfdu, stfdx, stfdux, stfiwx
Tests: lwarx_then_plain_stmw_spans_two_lines_and_invalidates,
lwarx_then_plain_stfd_invalidates_reservation
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1329,11 +1329,17 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
PpcOpcode::stfs => {
|
PpcOpcode::stfs => {
|
||||||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||||||
let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32;
|
let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32;
|
||||||
|
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||||
|
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||||||
|
}
|
||||||
mem.write_f32(ea, ctx.fpr[instr.rs()] as f32);
|
mem.write_f32(ea, ctx.fpr[instr.rs()] as f32);
|
||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
}
|
}
|
||||||
PpcOpcode::stfsu => {
|
PpcOpcode::stfsu => {
|
||||||
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32;
|
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32;
|
||||||
|
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||||
|
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||||||
|
}
|
||||||
mem.write_f32(ea, ctx.fpr[instr.rs()] as f32);
|
mem.write_f32(ea, ctx.fpr[instr.rs()] as f32);
|
||||||
ctx.gpr[instr.ra()] = ea as u64;
|
ctx.gpr[instr.ra()] = ea as u64;
|
||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
@@ -1341,11 +1347,17 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
PpcOpcode::stfsx => {
|
PpcOpcode::stfsx => {
|
||||||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||||||
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||||||
|
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||||
|
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||||||
|
}
|
||||||
mem.write_f32(ea, ctx.fpr[instr.rs()] as f32);
|
mem.write_f32(ea, ctx.fpr[instr.rs()] as f32);
|
||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
}
|
}
|
||||||
PpcOpcode::stfsux => {
|
PpcOpcode::stfsux => {
|
||||||
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||||||
|
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||||
|
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||||||
|
}
|
||||||
mem.write_f32(ea, ctx.fpr[instr.rs()] as f32);
|
mem.write_f32(ea, ctx.fpr[instr.rs()] as f32);
|
||||||
ctx.gpr[instr.ra()] = ea as u64;
|
ctx.gpr[instr.ra()] = ea as u64;
|
||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
@@ -1353,11 +1365,17 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
PpcOpcode::stfd => {
|
PpcOpcode::stfd => {
|
||||||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||||||
let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32;
|
let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32;
|
||||||
|
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||||
|
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||||||
|
}
|
||||||
mem.write_f64(ea, ctx.fpr[instr.rs()]);
|
mem.write_f64(ea, ctx.fpr[instr.rs()]);
|
||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
}
|
}
|
||||||
PpcOpcode::stfdu => {
|
PpcOpcode::stfdu => {
|
||||||
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32;
|
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32;
|
||||||
|
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||||
|
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||||||
|
}
|
||||||
mem.write_f64(ea, ctx.fpr[instr.rs()]);
|
mem.write_f64(ea, ctx.fpr[instr.rs()]);
|
||||||
ctx.gpr[instr.ra()] = ea as u64;
|
ctx.gpr[instr.ra()] = ea as u64;
|
||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
@@ -1365,11 +1383,17 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
PpcOpcode::stfdx => {
|
PpcOpcode::stfdx => {
|
||||||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||||||
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||||||
|
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||||
|
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||||||
|
}
|
||||||
mem.write_f64(ea, ctx.fpr[instr.rs()]);
|
mem.write_f64(ea, ctx.fpr[instr.rs()]);
|
||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
}
|
}
|
||||||
PpcOpcode::stfdux => {
|
PpcOpcode::stfdux => {
|
||||||
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||||||
|
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||||
|
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||||||
|
}
|
||||||
mem.write_f64(ea, ctx.fpr[instr.rs()]);
|
mem.write_f64(ea, ctx.fpr[instr.rs()]);
|
||||||
ctx.gpr[instr.ra()] = ea as u64;
|
ctx.gpr[instr.ra()] = ea as u64;
|
||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
@@ -1378,6 +1402,9 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
// Store FP as integer word: stores low 32 bits of FPR as-is
|
// Store FP as integer word: stores low 32 bits of FPR as-is
|
||||||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||||||
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||||||
|
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||||
|
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||||||
|
}
|
||||||
mem.write_u32(ea, ctx.fpr[instr.rs()].to_bits() as u32);
|
mem.write_u32(ea, ctx.fpr[instr.rs()].to_bits() as u32);
|
||||||
ctx.pc += 4;
|
ctx.pc += 4;
|
||||||
}
|
}
|
||||||
@@ -1407,6 +1434,9 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
let nb = if instr.rb() == 0 { 32 } else { instr.rb() as u32 };
|
let nb = if instr.rb() == 0 { 32 } else { instr.rb() as u32 };
|
||||||
let mut rs = instr.rs();
|
let mut rs = instr.rs();
|
||||||
let mut bytes_left = nb;
|
let mut bytes_left = nb;
|
||||||
|
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||||
|
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||||||
|
}
|
||||||
while bytes_left > 0 {
|
while bytes_left > 0 {
|
||||||
let val = ctx.gpr[rs] as u32;
|
let val = ctx.gpr[rs] as u32;
|
||||||
for byte_idx in 0..4 {
|
for byte_idx in 0..4 {
|
||||||
@@ -1569,6 +1599,22 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
PpcOpcode::stmw => {
|
PpcOpcode::stmw => {
|
||||||
let mut ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
let mut ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||||||
ea = ea.wrapping_add(instr.d() as i64 as u64);
|
ea = ea.wrapping_add(instr.d() as i64 as u64);
|
||||||
|
// PPCBUG-160: stmw can span two cache lines when (32-rs)*4 > one line.
|
||||||
|
// Iterate over every touched line so any reservation on a later line
|
||||||
|
// is also invalidated (same guarantee as single-word stores).
|
||||||
|
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||||
|
if t.has_active_reservers() {
|
||||||
|
let start_ea = ea as u32;
|
||||||
|
let last_ea = start_ea.wrapping_add((32 - instr.rs() as u32) * 4).wrapping_sub(1);
|
||||||
|
let line_size = RESERVATION_MASK + 1;
|
||||||
|
let mut line = start_ea & !RESERVATION_MASK;
|
||||||
|
loop {
|
||||||
|
t.invalidate_for_write(line);
|
||||||
|
if line >= (last_ea & !RESERVATION_MASK) { break; }
|
||||||
|
line = line.wrapping_add(line_size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
for r in instr.rs()..32 {
|
for r in instr.rs()..32 {
|
||||||
mem.write_u32(ea as u32, ctx.gpr[r] as u32);
|
mem.write_u32(ea as u32, ctx.gpr[r] as u32);
|
||||||
ea = ea.wrapping_add(4);
|
ea = ea.wrapping_add(4);
|
||||||
@@ -4248,6 +4294,9 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
let nb = ctx.xer() & 0x7F;
|
let nb = ctx.xer() & 0x7F;
|
||||||
let mut rs = instr.rs();
|
let mut rs = instr.rs();
|
||||||
let mut bytes_left = nb;
|
let mut bytes_left = nb;
|
||||||
|
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||||
|
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||||||
|
}
|
||||||
while bytes_left > 0 {
|
while bytes_left > 0 {
|
||||||
let val = ctx.gpr[rs] as u32;
|
let val = ctx.gpr[rs] as u32;
|
||||||
for byte_idx in 0..4 {
|
for byte_idx in 0..4 {
|
||||||
@@ -5432,6 +5481,117 @@ mod tests {
|
|||||||
assert_eq!(mem.read_u64(0x1000), 0xDEADBEEF_CAFEBABEu64, "stwcx. must not overwrite on failure");
|
assert_eq!(mem.read_u64(0x1000), 0xDEADBEEF_CAFEBABEu64, "stwcx. must not overwrite on failure");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---------- PPCBUG-160: stmw multi-line invalidation ----------
|
||||||
|
|
||||||
|
/// PPCBUG-160: `stmw r28, 0x78(r4)` with r4=0x1000 writes four words at
|
||||||
|
/// 0x1078, 0x107C, 0x1080, 0x1084 — crossing the cache-line boundary at
|
||||||
|
/// 0x1080 (RESERVATION_MASK=0x7F, line size=128 bytes).
|
||||||
|
///
|
||||||
|
/// A reservation on the *second* line (0x1080) must be invalidated even
|
||||||
|
/// though the store starts in the first line (0x1000-0x107F). This
|
||||||
|
/// verifies the multi-line loop added to the stmw arm.
|
||||||
|
#[test]
|
||||||
|
fn lwarx_then_plain_stmw_spans_two_lines_and_invalidates() {
|
||||||
|
let table = std::sync::Arc::new(crate::ReservationTable::new());
|
||||||
|
table.enable();
|
||||||
|
|
||||||
|
let mut ctx = PpcContext::new();
|
||||||
|
ctx.reservation_table = Some(table.clone());
|
||||||
|
ctx.hw_id = 0;
|
||||||
|
let mut mem = TestMem::new();
|
||||||
|
|
||||||
|
// r4=0x1080 (reservation address — second line), r5=0 (index for lwarx/stwcx.)
|
||||||
|
// r8=0x1000 (stmw base), r28-r31 = store values
|
||||||
|
ctx.gpr[4] = 0x1080;
|
||||||
|
ctx.gpr[5] = 0;
|
||||||
|
ctx.gpr[8] = 0x1000;
|
||||||
|
ctx.gpr[7] = 0xCCCC_CCCC; // stwcx. value
|
||||||
|
ctx.gpr[28] = 0xAAAA_0001;
|
||||||
|
ctx.gpr[29] = 0xBBBB_0002;
|
||||||
|
ctx.gpr[30] = 0xCCCC_0003;
|
||||||
|
ctx.gpr[31] = 0xDDDD_0004;
|
||||||
|
|
||||||
|
// Instr 0: lwarx r3, r4, r5 — reserves line containing 0x1080
|
||||||
|
let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1);
|
||||||
|
write_instr(&mut mem, 0, lwarx);
|
||||||
|
// Instr 1: stmw r28, 0x78(r8) — writes 4 words at 0x1078,0x107C,0x1080,0x1084
|
||||||
|
// opcode=47, rs=28, ra=8, d=0x0078
|
||||||
|
let stmw = (47u32 << 26) | (28 << 21) | (8 << 16) | 0x0078;
|
||||||
|
write_instr(&mut mem, 4, stmw);
|
||||||
|
// Instr 2: stwcx. r7, r4, r5 — should fail (line 2 reservation invalidated)
|
||||||
|
let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1;
|
||||||
|
write_instr(&mut mem, 8, stwcx);
|
||||||
|
|
||||||
|
// Execute lwarx — reserves 0x1080's cache line.
|
||||||
|
ctx.pc = 0;
|
||||||
|
step(&mut ctx, &mut mem);
|
||||||
|
assert!(ctx.has_reservation, "lwarx must set has_reservation");
|
||||||
|
|
||||||
|
// Execute stmw — must invalidate both lines including the one reserved at 0x1080.
|
||||||
|
step(&mut ctx, &mut mem);
|
||||||
|
assert_eq!(mem.read_u32(0x1078), 0xAAAA_0001, "stmw word 0 must land");
|
||||||
|
assert_eq!(mem.read_u32(0x107C), 0xBBBB_0002, "stmw word 1 must land");
|
||||||
|
assert_eq!(mem.read_u32(0x1080), 0xCCCC_0003, "stmw word 2 (line 2) must land");
|
||||||
|
assert_eq!(mem.read_u32(0x1084), 0xDDDD_0004, "stmw word 3 must land");
|
||||||
|
|
||||||
|
// Execute stwcx. — reservation on line 2 (0x1080) was invalidated; must fail.
|
||||||
|
step(&mut ctx, &mut mem);
|
||||||
|
assert!(!ctx.cr[0].eq, "stwcx. must fail: stmw invalidated the reservation on line 2");
|
||||||
|
assert_eq!(mem.read_u32(0x1080), 0xCCCC_0003, "stwcx. must not overwrite on failure");
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------- PPCBUG-167: invalidate_for_write via plain stfd ----------
|
||||||
|
|
||||||
|
/// PPCBUG-167: A plain `stfd` to a reserved line must invalidate the
|
||||||
|
/// reservation so that a subsequent `stwcx.` fails (CR0.EQ=0).
|
||||||
|
/// Also verifies big-endian byte layout of the stored double.
|
||||||
|
#[test]
|
||||||
|
fn lwarx_then_plain_stfd_invalidates_reservation() {
|
||||||
|
let table = std::sync::Arc::new(crate::ReservationTable::new());
|
||||||
|
table.enable();
|
||||||
|
|
||||||
|
let mut ctx = PpcContext::new();
|
||||||
|
ctx.reservation_table = Some(table.clone());
|
||||||
|
ctx.hw_id = 0;
|
||||||
|
let mut mem = TestMem::new();
|
||||||
|
|
||||||
|
// r4=0x1000 (target addr), r5=0 (index), r7=stwcx val.
|
||||||
|
ctx.gpr[4] = 0x1000;
|
||||||
|
ctx.gpr[5] = 0;
|
||||||
|
ctx.gpr[7] = 0xCCCC_CCCC;
|
||||||
|
// FPR 5 holds a specific bit pattern.
|
||||||
|
ctx.fpr[5] = f64::from_bits(0xCAFEBABE_DEADBEEFu64);
|
||||||
|
|
||||||
|
// Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20)
|
||||||
|
let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1);
|
||||||
|
write_instr(&mut mem, 0, lwarx);
|
||||||
|
// Instr 1: stfd f5, 0(r4) (opcode 54, D-form)
|
||||||
|
let stfd_plain = (54u32 << 26) | (5 << 21) | (4 << 16) | 0;
|
||||||
|
write_instr(&mut mem, 4, stfd_plain);
|
||||||
|
// Instr 2: stwcx. r7, r4, r5 (opcode 31, XO 150, Rc=1)
|
||||||
|
let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1;
|
||||||
|
write_instr(&mut mem, 8, stwcx);
|
||||||
|
|
||||||
|
// Execute lwarx — reserves 0x1000's cache line.
|
||||||
|
ctx.pc = 0;
|
||||||
|
step(&mut ctx, &mut mem);
|
||||||
|
assert!(ctx.has_reservation, "lwarx must set has_reservation");
|
||||||
|
|
||||||
|
// Execute plain stfd — must call invalidate_for_write and clear the reservation.
|
||||||
|
step(&mut ctx, &mut mem);
|
||||||
|
// write_f64 delegates to write_u64, which writes big-endian; verify layout.
|
||||||
|
assert_eq!(
|
||||||
|
mem.read_u64(0x1000),
|
||||||
|
0xCAFEBABE_DEADBEEFu64,
|
||||||
|
"stfd must store FPR bit pattern in big-endian order"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Execute stwcx. — reservation was invalidated; must fail (CR0.EQ=0).
|
||||||
|
step(&mut ctx, &mut mem);
|
||||||
|
assert!(!ctx.cr[0].eq, "stwcx. must fail after reservation was invalidated by plain stfd");
|
||||||
|
assert_eq!(mem.read_u64(0x1000), 0xCAFEBABE_DEADBEEFu64, "stwcx. must not overwrite on failure");
|
||||||
|
}
|
||||||
|
|
||||||
// ---------- Phase 2m: SPR DEC + TBL/TBU write ----------
|
// ---------- Phase 2m: SPR DEC + TBL/TBU write ----------
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|||||||
Reference in New Issue
Block a user