From 4538fa9e70bfc8d76efc12d7b0b8295c98c369a1 Mon Sep 17 00:00:00 2001 From: MechaCat02 Date: Fri, 1 May 2026 16:57:05 +0200 Subject: [PATCH 1/7] fix(cpu): PPCBUG-107 PPCBUG-140-144 add invalidate_for_write to word stores Word stores (stw, stwu, stwx, stwux, stwbrx) now invalidate the reservation table for the target line before writing. Without this, plain stores by other host threads silently fail to clear reservations held by lwarx, causing stwcx. to spuriously succeed under --parallel. Affected: PPCBUG-107 ReservationTable::invalidate_for_write never called from any store PPCBUG-140 stw missing invalidate_for_write (interpreter.rs:1183) PPCBUG-141 stwu missing invalidate_for_write (interpreter.rs:1189) PPCBUG-142 stwx missing invalidate_for_write (interpreter.rs:1195) PPCBUG-143 stwux missing invalidate_for_write (interpreter.rs:1201) PPCBUG-144 stwbrx missing invalidate_for_write (interpreter.rs:1568) Tests: lwarx_then_plain_stw_invalidates_reservation, lwarx_then_stwcx_succeeds_without_intervening_store Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/xenia-cpu/src/interpreter.rs | 93 +++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/crates/xenia-cpu/src/interpreter.rs b/crates/xenia-cpu/src/interpreter.rs index 4d26a97..3eb9309 100644 --- a/crates/xenia-cpu/src/interpreter.rs +++ b/crates/xenia-cpu/src/interpreter.rs @@ -1183,11 +1183,17 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - PpcOpcode::stw => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32; + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } mem.write_u32(ea, ctx.gpr[instr.rs()] as u32); ctx.pc += 4; } PpcOpcode::stwu => { let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32; + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } mem.write_u32(ea, ctx.gpr[instr.rs()] as u32); ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; @@ -1195,11 +1201,17 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - PpcOpcode::stwx => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32; + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } mem.write_u32(ea, ctx.gpr[instr.rs()] as u32); ctx.pc += 4; } PpcOpcode::stwux => { let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32; + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } mem.write_u32(ea, ctx.gpr[instr.rs()] as u32); ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; @@ -1568,6 +1580,9 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - PpcOpcode::stwbrx => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32; + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } mem.write_u32(ea, (ctx.gpr[instr.rs()] as u32).swap_bytes()); ctx.pc += 4; } @@ -5207,6 +5222,84 @@ mod tests { assert_eq!(mem.read_u32(0x1080), 0, "memory not written on failure"); } + // ---------- PPCBUG-107/140: invalidate_for_write via plain stw ---------- + + /// PPCBUG-107/140: A plain `stw` to a reserved line must invalidate the + /// reservation so that a subsequent `stwcx.` fails (CR0.EQ=0). + #[test] + fn lwarx_then_plain_stw_invalidates_reservation() { + let table = std::sync::Arc::new(crate::ReservationTable::new()); + table.enable(); + + let mut ctx = PpcContext::new(); + ctx.reservation_table = Some(table.clone()); + ctx.hw_id = 0; + let mut mem = TestMem::new(); + + // Set up registers: r4=0x1000 (target addr), r5=0 (index), r6=plain store val, r7=stwcx val. + ctx.gpr[4] = 0x1000; + ctx.gpr[5] = 0; + ctx.gpr[6] = 0xBBBB_BBBB; + ctx.gpr[7] = 0xCCCC_CCCC; + + // Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20) + let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1); + write_instr(&mut mem, 0, lwarx); + // Instr 1: stw r6, 0(r4) (opcode 36, D-form) + let stw_plain = (36u32 << 26) | (6 << 21) | (4 << 16) | 0; + write_instr(&mut mem, 4, stw_plain); + // Instr 2: stwcx. r7, r4, r5 (opcode 31, XO 150, Rc=1) + let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1; + write_instr(&mut mem, 8, stwcx); + + // Execute lwarx — reserves 0x1000's cache line. + ctx.pc = 0; + step(&mut ctx, &mut mem); + assert!(ctx.has_reservation, "lwarx must set has_reservation"); + + // Execute plain stw — must call invalidate_for_write and clear the reservation. + step(&mut ctx, &mut mem); + assert_eq!(mem.read_u32(0x1000), 0xBBBB_BBBB, "plain stw must land"); + + // Execute stwcx. — reservation was invalidated; must fail (CR0.EQ=0). + step(&mut ctx, &mut mem); + assert!(!ctx.cr[0].eq, "stwcx. must fail after reservation was invalidated by plain stw"); + // Memory must still hold the value from the plain stw, not from stwcx.. + assert_eq!(mem.read_u32(0x1000), 0xBBBB_BBBB, "stwcx. must not overwrite on failure"); + } + + /// Regression: without any intervening store, `lwarx` + `stwcx.` must still + /// succeed (CR0.EQ=1). Ensures the fix didn't accidentally break the happy path. + #[test] + fn lwarx_then_stwcx_succeeds_without_intervening_store() { + let table = std::sync::Arc::new(crate::ReservationTable::new()); + table.enable(); + + let mut ctx = PpcContext::new(); + ctx.reservation_table = Some(table.clone()); + ctx.hw_id = 0; + let mut mem = TestMem::new(); + + ctx.gpr[4] = 0x1000; + ctx.gpr[5] = 0; + ctx.gpr[7] = 0xDEAD_BEEF; + + // Instr 0: lwarx r3, r4, r5 + let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1); + write_instr(&mut mem, 0, lwarx); + // Instr 1: stwcx. r7, r4, r5 + let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1; + write_instr(&mut mem, 4, stwcx); + + ctx.pc = 0; + step(&mut ctx, &mut mem); + assert!(ctx.has_reservation, "lwarx must set has_reservation"); + + step(&mut ctx, &mut mem); + assert!(ctx.cr[0].eq, "stwcx. must succeed when reservation is intact"); + assert_eq!(mem.read_u32(0x1000), 0xDEAD_BEEF, "stwcx. must write on success"); + } + // ---------- Phase 2m: SPR DEC + TBL/TBU write ---------- #[test] From 24d347436a4daec9f8d7fa3625c1bbbc542c45cd Mon Sep 17 00:00:00 2001 From: MechaCat02 Date: Fri, 1 May 2026 17:13:31 +0200 Subject: [PATCH 2/7] fix(cpu): PPCBUG-130 PPCBUG-150 add invalidate_for_write to byte/halfword/doubleword stores Continuation of the PPCBUG-107 cascade sweep (batch 1: word stores landed in 4538fa9). Plain stb/stbu/stbx/stbux, sth/sthu/sthx/sthux/sthbrx, and std/stdu/stdx/stdux/stdbrx now invalidate the reservation table before writing, so cross-thread lwarx/stwcx. atomicity holds when these widths are written by another host thread. Affected: PPCBUG-130 9 byte/halfword stores missing invalidate_for_write stb, stbu, stbx, stbux, sth, sthu, sthx, sthux, sthbrx PPCBUG-150 5 doubleword stores missing invalidate_for_write std, stdu, stdx, stdux, stdbrx Tests: lwarx_then_plain_stb_invalidates_reservation, lwarx_then_plain_std_invalidates_reservation Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/xenia-cpu/src/interpreter.rs | 132 ++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) diff --git a/crates/xenia-cpu/src/interpreter.rs b/crates/xenia-cpu/src/interpreter.rs index 3eb9309..f76bd0f 100644 --- a/crates/xenia-cpu/src/interpreter.rs +++ b/crates/xenia-cpu/src/interpreter.rs @@ -1219,11 +1219,17 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - PpcOpcode::stb => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32; + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } mem.write_u8(ea, ctx.gpr[instr.rs()] as u8); ctx.pc += 4; } PpcOpcode::stbu => { let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32; + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } mem.write_u8(ea, ctx.gpr[instr.rs()] as u8); ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; @@ -1231,11 +1237,17 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - PpcOpcode::stbx => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32; + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } mem.write_u8(ea, ctx.gpr[instr.rs()] as u8); ctx.pc += 4; } PpcOpcode::stbux => { let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32; + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } mem.write_u8(ea, ctx.gpr[instr.rs()] as u8); ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; @@ -1243,11 +1255,17 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - PpcOpcode::sth => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32; + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } mem.write_u16(ea, ctx.gpr[instr.rs()] as u16); ctx.pc += 4; } PpcOpcode::sthu => { let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32; + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } mem.write_u16(ea, ctx.gpr[instr.rs()] as u16); ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; @@ -1255,11 +1273,17 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - PpcOpcode::sthx => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32; + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } mem.write_u16(ea, ctx.gpr[instr.rs()] as u16); ctx.pc += 4; } PpcOpcode::sthux => { let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32; + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } mem.write_u16(ea, ctx.gpr[instr.rs()] as u16); ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; @@ -1267,23 +1291,35 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - PpcOpcode::std => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(instr.ds() as i64 as u64) as u32; + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } mem.write_u64(ea, ctx.gpr[instr.rs()]); ctx.pc += 4; } PpcOpcode::stdx => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32; + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } mem.write_u64(ea, ctx.gpr[instr.rs()]); ctx.pc += 4; } PpcOpcode::stdu => { let ea = ctx.gpr[instr.ra()].wrapping_add(instr.ds() as i64 as u64) as u32; + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } mem.write_u64(ea, ctx.gpr[instr.rs()]); ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; } PpcOpcode::stdux => { let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32; + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } mem.write_u64(ea, ctx.gpr[instr.rs()]); ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; @@ -1589,6 +1625,9 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - PpcOpcode::sthbrx => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32; + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } mem.write_u16(ea, (ctx.gpr[instr.rs()] as u16).swap_bytes()); ctx.pc += 4; } @@ -4175,6 +4214,9 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - } PpcOpcode::stdbrx => { let ea = ea_indexed(ctx, instr); + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } mem.write_u64(ea, ctx.gpr[instr.rs()].swap_bytes()); ctx.pc += 4; } @@ -5300,6 +5342,96 @@ mod tests { assert_eq!(mem.read_u32(0x1000), 0xDEAD_BEEF, "stwcx. must write on success"); } + // ---------- PPCBUG-130: invalidate_for_write via plain stb ---------- + + /// PPCBUG-130: A plain `stb` to a reserved line must invalidate the + /// reservation so that a subsequent `stwcx.` fails (CR0.EQ=0). + #[test] + fn lwarx_then_plain_stb_invalidates_reservation() { + let table = std::sync::Arc::new(crate::ReservationTable::new()); + table.enable(); + + let mut ctx = PpcContext::new(); + ctx.reservation_table = Some(table.clone()); + ctx.hw_id = 0; + let mut mem = TestMem::new(); + + // r4=0x1000 (target addr), r5=0 (index), r6=byte store val, r7=stwcx val. + ctx.gpr[4] = 0x1000; + ctx.gpr[5] = 0; + ctx.gpr[6] = 0xAB; + ctx.gpr[7] = 0xCCCC_CCCC; + + // Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20) + let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1); + write_instr(&mut mem, 0, lwarx); + // Instr 1: stb r6, 0(r4) (opcode 38, D-form) + let stb_plain = (38u32 << 26) | (6 << 21) | (4 << 16) | 0; + write_instr(&mut mem, 4, stb_plain); + // Instr 2: stwcx. r7, r4, r5 (opcode 31, XO 150, Rc=1) + let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1; + write_instr(&mut mem, 8, stwcx); + + // Execute lwarx — reserves 0x1000's cache line. + ctx.pc = 0; + step(&mut ctx, &mut mem); + assert!(ctx.has_reservation, "lwarx must set has_reservation"); + + // Execute plain stb — must call invalidate_for_write and clear the reservation. + step(&mut ctx, &mut mem); + assert_eq!(mem.read_u8(0x1000), 0xAB, "plain stb must land"); + + // Execute stwcx. — reservation was invalidated; must fail (CR0.EQ=0). + step(&mut ctx, &mut mem); + assert!(!ctx.cr[0].eq, "stwcx. must fail after reservation was invalidated by plain stb"); + assert_eq!(mem.read_u8(0x1000), 0xAB, "stwcx. must not overwrite on failure"); + } + + // ---------- PPCBUG-150: invalidate_for_write via plain std ---------- + + /// PPCBUG-150: A plain `std` to a reserved line must invalidate the + /// reservation so that a subsequent `stwcx.` fails (CR0.EQ=0). + #[test] + fn lwarx_then_plain_std_invalidates_reservation() { + let table = std::sync::Arc::new(crate::ReservationTable::new()); + table.enable(); + + let mut ctx = PpcContext::new(); + ctx.reservation_table = Some(table.clone()); + ctx.hw_id = 0; + let mut mem = TestMem::new(); + + // r4=0x1000 (target addr), r5=0 (index), r6=doubleword store val, r7=stwcx val. + ctx.gpr[4] = 0x1000; + ctx.gpr[5] = 0; + ctx.gpr[6] = 0xDEADBEEF_CAFEBABEu64; + ctx.gpr[7] = 0xCCCC_CCCC; + + // Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20) + let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1); + write_instr(&mut mem, 0, lwarx); + // Instr 1: std r6, 0(r4) (opcode 62, DS-form, XO=0b00) + let std_plain = (62u32 << 26) | (6 << 21) | (4 << 16) | 0; + write_instr(&mut mem, 4, std_plain); + // Instr 2: stwcx. r7, r4, r5 (opcode 31, XO 150, Rc=1) + let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1; + write_instr(&mut mem, 8, stwcx); + + // Execute lwarx — reserves 0x1000's cache line. + ctx.pc = 0; + step(&mut ctx, &mut mem); + assert!(ctx.has_reservation, "lwarx must set has_reservation"); + + // Execute plain std — must call invalidate_for_write and clear the reservation. + step(&mut ctx, &mut mem); + assert_eq!(mem.read_u64(0x1000), 0xDEADBEEF_CAFEBABEu64, "plain std must land"); + + // Execute stwcx. — reservation was invalidated; must fail (CR0.EQ=0). + step(&mut ctx, &mut mem); + assert!(!ctx.cr[0].eq, "stwcx. must fail after reservation was invalidated by plain std"); + assert_eq!(mem.read_u64(0x1000), 0xDEADBEEF_CAFEBABEu64, "stwcx. must not overwrite on failure"); + } + // ---------- Phase 2m: SPR DEC + TBL/TBU write ---------- #[test] From af54eb28bdb6c236064f610785f592d7a1e64e1b Mon Sep 17 00:00:00 2001 From: MechaCat02 Date: Fri, 1 May 2026 17:24:46 +0200 Subject: [PATCH 3/7] fix(cpu): PPCBUG-160 PPCBUG-167 add invalidate_for_write to multiple/string + FP stores Continuation of the PPCBUG-107 cascade sweep. stmw/stswi/stswx (multiple and string stores) and the 9 floating-point stores now invalidate the reservation table before writing. stmw can span two cache lines when the writeback range crosses a line boundary; the guard iterates over all touched lines so multi-line atomic holds the same guarantee as single-line stores. Affected: PPCBUG-160 3 multiple/string stores: stmw, stswi, stswx PPCBUG-167 9 FP stores: stfs, stfsu, stfsx, stfsux, stfd, stfdu, stfdx, stfdux, stfiwx Tests: lwarx_then_plain_stmw_spans_two_lines_and_invalidates, lwarx_then_plain_stfd_invalidates_reservation Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/xenia-cpu/src/interpreter.rs | 160 ++++++++++++++++++++++++++++ 1 file changed, 160 insertions(+) diff --git a/crates/xenia-cpu/src/interpreter.rs b/crates/xenia-cpu/src/interpreter.rs index f76bd0f..f30b25f 100644 --- a/crates/xenia-cpu/src/interpreter.rs +++ b/crates/xenia-cpu/src/interpreter.rs @@ -1329,11 +1329,17 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - PpcOpcode::stfs => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32; + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } mem.write_f32(ea, ctx.fpr[instr.rs()] as f32); ctx.pc += 4; } PpcOpcode::stfsu => { let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32; + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } mem.write_f32(ea, ctx.fpr[instr.rs()] as f32); ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; @@ -1341,11 +1347,17 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - PpcOpcode::stfsx => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32; + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } mem.write_f32(ea, ctx.fpr[instr.rs()] as f32); ctx.pc += 4; } PpcOpcode::stfsux => { let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32; + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } mem.write_f32(ea, ctx.fpr[instr.rs()] as f32); ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; @@ -1353,11 +1365,17 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - PpcOpcode::stfd => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32; + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } mem.write_f64(ea, ctx.fpr[instr.rs()]); ctx.pc += 4; } PpcOpcode::stfdu => { let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32; + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } mem.write_f64(ea, ctx.fpr[instr.rs()]); ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; @@ -1365,11 +1383,17 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - PpcOpcode::stfdx => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32; + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } mem.write_f64(ea, ctx.fpr[instr.rs()]); ctx.pc += 4; } PpcOpcode::stfdux => { let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32; + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } mem.write_f64(ea, ctx.fpr[instr.rs()]); ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; @@ -1378,6 +1402,9 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - // Store FP as integer word: stores low 32 bits of FPR as-is let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32; + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } mem.write_u32(ea, ctx.fpr[instr.rs()].to_bits() as u32); ctx.pc += 4; } @@ -1407,6 +1434,9 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let nb = if instr.rb() == 0 { 32 } else { instr.rb() as u32 }; let mut rs = instr.rs(); let mut bytes_left = nb; + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } while bytes_left > 0 { let val = ctx.gpr[rs] as u32; for byte_idx in 0..4 { @@ -1569,6 +1599,22 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - PpcOpcode::stmw => { let mut ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; ea = ea.wrapping_add(instr.d() as i64 as u64); + // PPCBUG-160: stmw can span two cache lines when (32-rs)*4 > one line. + // Iterate over every touched line so any reservation on a later line + // is also invalidated (same guarantee as single-word stores). + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { + let start_ea = ea as u32; + let last_ea = start_ea.wrapping_add((32 - instr.rs() as u32) * 4).wrapping_sub(1); + let line_size = RESERVATION_MASK + 1; + let mut line = start_ea & !RESERVATION_MASK; + loop { + t.invalidate_for_write(line); + if line >= (last_ea & !RESERVATION_MASK) { break; } + line = line.wrapping_add(line_size); + } + } + } for r in instr.rs()..32 { mem.write_u32(ea as u32, ctx.gpr[r] as u32); ea = ea.wrapping_add(4); @@ -4248,6 +4294,9 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let nb = ctx.xer() & 0x7F; let mut rs = instr.rs(); let mut bytes_left = nb; + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } while bytes_left > 0 { let val = ctx.gpr[rs] as u32; for byte_idx in 0..4 { @@ -5432,6 +5481,117 @@ mod tests { assert_eq!(mem.read_u64(0x1000), 0xDEADBEEF_CAFEBABEu64, "stwcx. must not overwrite on failure"); } + // ---------- PPCBUG-160: stmw multi-line invalidation ---------- + + /// PPCBUG-160: `stmw r28, 0x78(r4)` with r4=0x1000 writes four words at + /// 0x1078, 0x107C, 0x1080, 0x1084 — crossing the cache-line boundary at + /// 0x1080 (RESERVATION_MASK=0x7F, line size=128 bytes). + /// + /// A reservation on the *second* line (0x1080) must be invalidated even + /// though the store starts in the first line (0x1000-0x107F). This + /// verifies the multi-line loop added to the stmw arm. + #[test] + fn lwarx_then_plain_stmw_spans_two_lines_and_invalidates() { + let table = std::sync::Arc::new(crate::ReservationTable::new()); + table.enable(); + + let mut ctx = PpcContext::new(); + ctx.reservation_table = Some(table.clone()); + ctx.hw_id = 0; + let mut mem = TestMem::new(); + + // r4=0x1080 (reservation address — second line), r5=0 (index for lwarx/stwcx.) + // r8=0x1000 (stmw base), r28-r31 = store values + ctx.gpr[4] = 0x1080; + ctx.gpr[5] = 0; + ctx.gpr[8] = 0x1000; + ctx.gpr[7] = 0xCCCC_CCCC; // stwcx. value + ctx.gpr[28] = 0xAAAA_0001; + ctx.gpr[29] = 0xBBBB_0002; + ctx.gpr[30] = 0xCCCC_0003; + ctx.gpr[31] = 0xDDDD_0004; + + // Instr 0: lwarx r3, r4, r5 — reserves line containing 0x1080 + let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1); + write_instr(&mut mem, 0, lwarx); + // Instr 1: stmw r28, 0x78(r8) — writes 4 words at 0x1078,0x107C,0x1080,0x1084 + // opcode=47, rs=28, ra=8, d=0x0078 + let stmw = (47u32 << 26) | (28 << 21) | (8 << 16) | 0x0078; + write_instr(&mut mem, 4, stmw); + // Instr 2: stwcx. r7, r4, r5 — should fail (line 2 reservation invalidated) + let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1; + write_instr(&mut mem, 8, stwcx); + + // Execute lwarx — reserves 0x1080's cache line. + ctx.pc = 0; + step(&mut ctx, &mut mem); + assert!(ctx.has_reservation, "lwarx must set has_reservation"); + + // Execute stmw — must invalidate both lines including the one reserved at 0x1080. + step(&mut ctx, &mut mem); + assert_eq!(mem.read_u32(0x1078), 0xAAAA_0001, "stmw word 0 must land"); + assert_eq!(mem.read_u32(0x107C), 0xBBBB_0002, "stmw word 1 must land"); + assert_eq!(mem.read_u32(0x1080), 0xCCCC_0003, "stmw word 2 (line 2) must land"); + assert_eq!(mem.read_u32(0x1084), 0xDDDD_0004, "stmw word 3 must land"); + + // Execute stwcx. — reservation on line 2 (0x1080) was invalidated; must fail. + step(&mut ctx, &mut mem); + assert!(!ctx.cr[0].eq, "stwcx. must fail: stmw invalidated the reservation on line 2"); + assert_eq!(mem.read_u32(0x1080), 0xCCCC_0003, "stwcx. must not overwrite on failure"); + } + + // ---------- PPCBUG-167: invalidate_for_write via plain stfd ---------- + + /// PPCBUG-167: A plain `stfd` to a reserved line must invalidate the + /// reservation so that a subsequent `stwcx.` fails (CR0.EQ=0). + /// Also verifies big-endian byte layout of the stored double. + #[test] + fn lwarx_then_plain_stfd_invalidates_reservation() { + let table = std::sync::Arc::new(crate::ReservationTable::new()); + table.enable(); + + let mut ctx = PpcContext::new(); + ctx.reservation_table = Some(table.clone()); + ctx.hw_id = 0; + let mut mem = TestMem::new(); + + // r4=0x1000 (target addr), r5=0 (index), r7=stwcx val. + ctx.gpr[4] = 0x1000; + ctx.gpr[5] = 0; + ctx.gpr[7] = 0xCCCC_CCCC; + // FPR 5 holds a specific bit pattern. + ctx.fpr[5] = f64::from_bits(0xCAFEBABE_DEADBEEFu64); + + // Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20) + let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1); + write_instr(&mut mem, 0, lwarx); + // Instr 1: stfd f5, 0(r4) (opcode 54, D-form) + let stfd_plain = (54u32 << 26) | (5 << 21) | (4 << 16) | 0; + write_instr(&mut mem, 4, stfd_plain); + // Instr 2: stwcx. r7, r4, r5 (opcode 31, XO 150, Rc=1) + let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1; + write_instr(&mut mem, 8, stwcx); + + // Execute lwarx — reserves 0x1000's cache line. + ctx.pc = 0; + step(&mut ctx, &mut mem); + assert!(ctx.has_reservation, "lwarx must set has_reservation"); + + // Execute plain stfd — must call invalidate_for_write and clear the reservation. + step(&mut ctx, &mut mem); + // write_f64 delegates to write_u64, which writes big-endian; verify layout. + assert_eq!( + mem.read_u64(0x1000), + 0xCAFEBABE_DEADBEEFu64, + "stfd must store FPR bit pattern in big-endian order" + ); + + // Execute stwcx. — reservation was invalidated; must fail (CR0.EQ=0). + step(&mut ctx, &mut mem); + assert!(!ctx.cr[0].eq, "stwcx. must fail after reservation was invalidated by plain stfd"); + assert_eq!(mem.read_u64(0x1000), 0xCAFEBABE_DEADBEEFu64, "stwcx. must not overwrite on failure"); + } + // ---------- Phase 2m: SPR DEC + TBL/TBU write ---------- #[test] From d4e227eeabb350f61045aa331d14bd8dda9b857b Mon Sep 17 00:00:00 2001 From: MechaCat02 Date: Fri, 1 May 2026 17:36:17 +0200 Subject: [PATCH 4/7] fix(cpu): PPCBUG-511 PPCBUG-512 PPCBUG-513 PPCBUG-514 add invalidate_for_write to VMX stores Continuation of the PPCBUG-107 cascade sweep. All 16 VMX store opcodes (stvx/stvxl, stvebx/stvehx/stvewx, stvlx/stvrx and 128 variants of each) now invalidate the reservation table before writing. stvlx/stvrx partial-vector stores can write at non-16-byte-aligned EAs; they invalidate both potentially-touched cache lines. stvewx128 currently writes 16 bytes at the wrong EA scope (PPCBUG-510); the invalidate guard fires at that over-wide EA today and will narrow automatically when PPCBUG-510 is fixed in P3. Affected: PPCBUG-511 stvx, stvx128, stvxl, stvxl128 PPCBUG-512 stvebx, stvehx, stvewx, stvewx128 PPCBUG-513 stvlx, stvlx128, stvlxl, stvlxl128 PPCBUG-514 stvrx, stvrx128, stvrxl, stvrxl128 Tests: lwarx_then_plain_stvx_invalidates_reservation, lwarx_then_plain_stvlx_invalidates_reservation Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/xenia-cpu/src/interpreter.rs | 169 ++++++++++++++++++++++++++++ 1 file changed, 169 insertions(+) diff --git a/crates/xenia-cpu/src/interpreter.rs b/crates/xenia-cpu/src/interpreter.rs index f30b25f..c4694b5 100644 --- a/crates/xenia-cpu/src/interpreter.rs +++ b/crates/xenia-cpu/src/interpreter.rs @@ -1698,6 +1698,10 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - PpcOpcode::stvx => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) & !0xF) as u32; + // PPCBUG-511: stvx was missing invalidate_for_write. + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } let bytes = ctx.vr[instr.rs()].as_bytes(); for i in 0..16 { mem.write_u8(ea + i as u32, bytes[i]); } ctx.pc += 4; @@ -1705,6 +1709,10 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - PpcOpcode::stvx128 => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) & !0xF) as u32; + // PPCBUG-511: stvx128 was missing invalidate_for_write. + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } let bytes = ctx.vr[instr.vs128()].as_bytes(); for i in 0..16 { mem.write_u8(ea + i as u32, bytes[i]); } ctx.pc += 4; @@ -1756,6 +1764,10 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - // Store vS[EA & 0xF] (1 byte) to memory at EA. let base = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = base.wrapping_add(ctx.gpr[instr.rb()]) as u32; + // PPCBUG-512: stvebx was missing invalidate_for_write. + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } let slot = (ea & 0xF) as usize; let bytes = ctx.vr[instr.rs()].as_bytes(); mem.write_u8(ea, bytes[slot]); @@ -1766,6 +1778,10 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let base = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea_unaligned = base.wrapping_add(ctx.gpr[instr.rb()]) as u32; let ea = ea_unaligned & !0x1u32; + // PPCBUG-512: stvehx was missing invalidate_for_write. + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } let slot = ((ea_unaligned & 0xF) >> 1) as usize; let bytes = ctx.vr[instr.rs()].as_bytes(); let h = ((bytes[slot * 2] as u16) << 8) | (bytes[slot * 2 + 1] as u16); @@ -1777,6 +1793,10 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let base = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea_unaligned = base.wrapping_add(ctx.gpr[instr.rb()]) as u32; let ea = ea_unaligned & !0x3u32; + // PPCBUG-512: stvewx was missing invalidate_for_write. + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } let slot = ((ea_unaligned & 0xF) >> 2) as usize; let bytes = ctx.vr[instr.rs()].as_bytes(); let w = ((bytes[slot * 4] as u32) << 24) @@ -1799,6 +1819,10 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - PpcOpcode::stvxl | PpcOpcode::stvxl128 => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) & !0xF) as u32; + // PPCBUG-511: stvxl/stvxl128 were missing invalidate_for_write. + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } let vs = if matches!(instr.opcode, PpcOpcode::stvxl128) { instr.vs128() } else { instr.rs() }; let bytes = ctx.vr[vs].as_bytes(); for i in 0..16 { mem.write_u8(ea + i as u32, bytes[i]); } @@ -2845,21 +2869,63 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - } PpcOpcode::stvlx | PpcOpcode::stvlxl => { let ea = ea_indexed(ctx, instr); + // PPCBUG-513: stvlx/stvlxl were missing invalidate_for_write. + // store_vector_left writes [ea, (ea & !0xF)+15]; in the worst case (ea & 0xF == 0) + // that is exactly 16 bytes all within the same 16-byte block, so ea+15 lands in the + // same 128-byte cache line. Two-call form is kept for defensive correctness. + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { + let first_line = ea & !RESERVATION_MASK; + let last_line = ea.wrapping_add(15) & !RESERVATION_MASK; + t.invalidate_for_write(first_line); + if last_line != first_line { t.invalidate_for_write(last_line); } + } + } crate::vmx::store_vector_left(mem, ea, ctx.vr[instr.rs()]); ctx.pc += 4; } PpcOpcode::stvlx128 | PpcOpcode::stvlxl128 => { let ea = ea_indexed(ctx, instr); + // PPCBUG-513: stvlx128/stvlxl128 were missing invalidate_for_write. + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { + let first_line = ea & !RESERVATION_MASK; + let last_line = ea.wrapping_add(15) & !RESERVATION_MASK; + t.invalidate_for_write(first_line); + if last_line != first_line { t.invalidate_for_write(last_line); } + } + } crate::vmx::store_vector_left(mem, ea, ctx.vr[instr.vs128()]); ctx.pc += 4; } PpcOpcode::stvrx | PpcOpcode::stvrxl => { let ea = ea_indexed(ctx, instr); + // PPCBUG-514: stvrx/stvrxl were missing invalidate_for_write. + // store_vector_right writes [ea & !0xF, ea-1] (up to 15 bytes, all within a single + // 16-byte-aligned block). Two-call form is kept for defensive correctness. + // stvrx at shift==0 is a no-op; the guard fires unconditionally (cheap). + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { + let first_line = ea & !RESERVATION_MASK; + let last_line = ea.wrapping_add(15) & !RESERVATION_MASK; + t.invalidate_for_write(first_line); + if last_line != first_line { t.invalidate_for_write(last_line); } + } + } crate::vmx::store_vector_right(mem, ea, ctx.vr[instr.rs()]); ctx.pc += 4; } PpcOpcode::stvrx128 | PpcOpcode::stvrxl128 => { let ea = ea_indexed(ctx, instr); + // PPCBUG-514: stvrx128/stvrxl128 were missing invalidate_for_write. + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { + let first_line = ea & !RESERVATION_MASK; + let last_line = ea.wrapping_add(15) & !RESERVATION_MASK; + t.invalidate_for_write(first_line); + if last_line != first_line { t.invalidate_for_write(last_line); } + } + } crate::vmx::store_vector_right(mem, ea, ctx.vr[instr.vs128()]); ctx.pc += 4; } @@ -2875,6 +2941,13 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - } PpcOpcode::stvewx128 => { let ea = ea_indexed(ctx, instr) & !0xF; + // TODO PPCBUG-510: stvewx128 currently writes 16 bytes at ea & !0xF; the EA scope is + // wrong (should be word-aligned, 4 bytes only). When P3 fixes EA, this invalidate's + // range narrows automatically. + // PPCBUG-512: stvewx128 was missing invalidate_for_write. + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } let bytes = ctx.vr[instr.vs128()].as_bytes(); for i in 0..16 { mem.write_u8(ea + i as u32, bytes[i]); } ctx.pc += 4; @@ -5906,6 +5979,102 @@ mod tests { } } + // ---------- PPCBUG-511/513: invalidate_for_write via VMX stores ---------- + + /// PPCBUG-511: A plain `stvx` to a reserved line must invalidate the + /// reservation so that a subsequent `stwcx.` fails (CR0.EQ=0). + #[test] + fn lwarx_then_plain_stvx_invalidates_reservation() { + let table = std::sync::Arc::new(crate::ReservationTable::new()); + table.enable(); + + let mut ctx = PpcContext::new(); + ctx.reservation_table = Some(table.clone()); + ctx.hw_id = 0; + let mut mem = TestMem::new(); + + // r4=0x1000 (reservation + store address), r5=0 (index for lwarx/stwcx.), r7=stwcx val. + ctx.gpr[4] = 0x1000; + ctx.gpr[5] = 0; + ctx.gpr[7] = 0xCCCC_CCCC; + // VR 0: recognizable pattern to confirm the store lands. + ctx.vr[0] = xenia_types::Vec128::from_bytes([0xAA; 16]); + + // Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20) + let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1); + write_instr(&mut mem, 0, lwarx); + // Instr 1: stvx v0, r0, r4 (opcode 31, XO 231; rA=0 → base=0, EA = 0 + r4 = 0x1000, aligned) + // (31<<26) | (vs=0<<21) | (ra=0<<16) | (rb=4<<11) | (231<<1) + let stvx = (31u32 << 26) | (0 << 21) | (0 << 16) | (4 << 11) | (231 << 1); + write_instr(&mut mem, 4, stvx); + // Instr 2: stwcx. r7, r4, r5 (opcode 31, XO 150, Rc=1) + let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1; + write_instr(&mut mem, 8, stwcx); + + // Execute lwarx — reserves 0x1000's cache line. + ctx.pc = 0; + step(&mut ctx, &mut mem); + assert!(ctx.has_reservation, "lwarx must set has_reservation"); + + // Execute stvx — must call invalidate_for_write and clear the reservation. + step(&mut ctx, &mut mem); + assert_eq!(mem.read_u8(0x1000), 0xAA, "stvx must write the VR bytes"); + + // Execute stwcx. — reservation was invalidated; must fail (CR0.EQ=0). + step(&mut ctx, &mut mem); + assert!(!ctx.cr[0].eq, "stwcx. must fail after reservation was invalidated by stvx"); + assert_eq!(mem.read_u8(0x1000), 0xAA, "stwcx. must not overwrite on failure"); + } + + /// PPCBUG-513: A plain `stvlx` to a reserved line must invalidate the + /// reservation so that a subsequent `stwcx.` fails (CR0.EQ=0). + /// stvlx with EA=0x1003 writes bytes 0x1003-0x100F (13 bytes from VR0's high lanes). + #[test] + fn lwarx_then_plain_stvlx_invalidates_reservation() { + let table = std::sync::Arc::new(crate::ReservationTable::new()); + table.enable(); + + let mut ctx = PpcContext::new(); + ctx.reservation_table = Some(table.clone()); + ctx.hw_id = 0; + let mut mem = TestMem::new(); + + // Reserve at 0x1000 (same cache line as the stvlx target 0x1003). + ctx.gpr[4] = 0x1000; // lwarx/stwcx. reservation address + ctx.gpr[5] = 0; // index register (0 for lwarx/stwcx.) + ctx.gpr[6] = 0x1003; // stvlx EA: rb=6, ra=0 → ea = 0 + 0x1003 = 0x1003 + ctx.gpr[7] = 0xCCCC_CCCC; // stwcx. value + // VR 0: recognizable pattern. + ctx.vr[0] = xenia_types::Vec128::from_bytes([0xBB; 16]); + + // Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20) + let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1); + write_instr(&mut mem, 0, lwarx); + // Instr 1: stvlx v0, r0, r6 (opcode 31, XO 647; rA=0 → base=0, EA = r6 = 0x1003) + // store_vector_left writes shift=3 skipped bytes, then bytes 3..15 of VR0 → 0x1003..0x100F + // (31<<26) | (vs=0<<21) | (ra=0<<16) | (rb=6<<11) | (647<<1) + let stvlx = (31u32 << 26) | (0 << 21) | (0 << 16) | (6 << 11) | (647 << 1); + write_instr(&mut mem, 4, stvlx); + // Instr 2: stwcx. r7, r4, r5 (opcode 31, XO 150, Rc=1) + let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1; + write_instr(&mut mem, 8, stwcx); + + // Execute lwarx — reserves 0x1000's cache line. + ctx.pc = 0; + step(&mut ctx, &mut mem); + assert!(ctx.has_reservation, "lwarx must set has_reservation"); + + // Execute stvlx — must call invalidate_for_write and clear the reservation. + step(&mut ctx, &mut mem); + // store_vector_left(ea=0x1003): shift=3, n=13 → writes bytes 0x1003-0x100F = 0xBB. + assert_eq!(mem.read_u8(0x1003), 0xBB, "stvlx must write VR bytes starting at EA"); + assert_eq!(mem.read_u8(0x100F), 0xBB, "stvlx must write up to (ea & !0xF)+15"); + + // Execute stwcx. — reservation was invalidated; must fail (CR0.EQ=0). + step(&mut ctx, &mut mem); + assert!(!ctx.cr[0].eq, "stwcx. must fail after reservation was invalidated by stvlx"); + } + /// Regression: `lvebx` must preserve the prior contents of the /// destination VR for lanes other than the loaded byte. Previously /// the handler started from a zeroed buffer. From a107ac9ae73031eb5f371c0994885c4f103007b6 Mon Sep 17 00:00:00 2001 From: MechaCat02 Date: Fri, 1 May 2026 17:44:48 +0200 Subject: [PATCH 5/7] fix(cpu): PPCBUG-151 add reservation_width discriminator to stwcx./stdcx. Track lwarx vs ldarx reservation width in PpcContext as a u8 (4 = word, 8 = doubleword, 0 = none). stwcx. requires width==4; stdcx. requires width==8. Cross-width pairs (lwarx + stdcx., ldarx + stwcx.) now fail deterministically with CR0.EQ=0 instead of spuriously succeeding. The width is held per-thread; the cross-thread reservation table keeps its existing slot encoding because each host thread consults its own ctx.reservation_width before committing. Affected: PPCBUG-151 stwcx./stdcx. shared the same reservation slot without width discriminator; cross-width commits silently succeeded Tests: lwarx_then_stdcx_cross_width_fails, ldarx_then_stwcx_cross_width_fails Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/xenia-cpu/src/context.rs | 7 +++ crates/xenia-cpu/src/interpreter.rs | 96 ++++++++++++++++++++++++++++- 2 files changed, 101 insertions(+), 2 deletions(-) diff --git a/crates/xenia-cpu/src/context.rs b/crates/xenia-cpu/src/context.rs index 26b9733..5826120 100644 --- a/crates/xenia-cpu/src/context.rs +++ b/crates/xenia-cpu/src/context.rs @@ -101,6 +101,12 @@ pub struct PpcContext { pub reserved_line: u32, pub reserved_val: u64, pub has_reservation: bool, + /// PPCBUG-151 — width of the active reservation: 4 = `lwarx` (word), + /// 8 = `ldarx` (doubleword), 0 = no reservation. `stwcx.` requires + /// width==4; `stdcx.` requires width==8. Cross-width pairs fail + /// deterministically with CR0.EQ=0. Cleared alongside `has_reservation` + /// on every `stwcx.`/`stdcx.` exit (success or failure). + pub reservation_width: u8, /// M3.7 — generation stamp returned by [`crate::ReservationTable::reserve`] /// at the most recent `lwarx`/`ldarx`. Paired with `reserved_line`; /// `stwcx.`/`stdcx.` pass this back to `try_commit`. Meaningful only @@ -159,6 +165,7 @@ impl PpcContext { reserved_line: 0, reserved_val: 0, has_reservation: false, + reservation_width: 0, reserved_generation: 0, reservation_table: None, hw_id: 0, diff --git a/crates/xenia-cpu/src/interpreter.rs b/crates/xenia-cpu/src/interpreter.rs index c4694b5..63db6a4 100644 --- a/crates/xenia-cpu/src/interpreter.rs +++ b/crates/xenia-cpu/src/interpreter.rs @@ -1124,6 +1124,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - ctx.reserved_line = ea & !RESERVATION_MASK; ctx.reserved_val = val as u64; ctx.has_reservation = true; + ctx.reservation_width = 4; // PPCBUG-151: word reservation if let Some(t) = &ctx.reservation_table { if t.is_enabled() { ctx.reserved_generation = t.reserve(ea, ctx.hw_id); @@ -1140,17 +1141,21 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - .as_ref() .filter(|t| t.is_enabled()) .cloned(); + // PPCBUG-151: stwcx. requires a word (lwarx) reservation; + // a doubleword (ldarx) reservation must not commit here. + let width_ok = ctx.reservation_width == 4; let success = if let Some(t) = &table_route { // Table-routed: success iff the slot still holds our // reservation AND the per-ctx flag agrees (the per-ctx // flag would be cleared by an intervening write or // context switch). ctx.has_reservation + && width_ok && ctx.reserved_line == line && t.try_commit(ea, ctx.reserved_generation, ctx.hw_id) } else { // Legacy per-ctx path (M2 default). - ctx.has_reservation && ctx.reserved_line == line + ctx.has_reservation && width_ok && ctx.reserved_line == line }; if success { mem.write_u32(ea, ctx.gpr[instr.rs()] as u32); @@ -1176,6 +1181,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - } } ctx.has_reservation = false; + ctx.reservation_width = 0; // PPCBUG-151: always clear on exit ctx.pc += 4; } @@ -4282,6 +4288,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - ctx.reserved_line = ea & !RESERVATION_MASK; ctx.reserved_val = val; ctx.has_reservation = true; + ctx.reservation_width = 8; // PPCBUG-151: doubleword reservation if let Some(t) = &ctx.reservation_table { if t.is_enabled() { ctx.reserved_generation = t.reserve(ea, ctx.hw_id); @@ -4297,12 +4304,16 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - .as_ref() .filter(|t| t.is_enabled()) .cloned(); + // PPCBUG-151: stdcx. requires a doubleword (ldarx) reservation; + // a word (lwarx) reservation must not commit here. + let width_ok = ctx.reservation_width == 8; let success = if let Some(t) = &table_route { ctx.has_reservation + && width_ok && ctx.reserved_line == line && t.try_commit(ea, ctx.reserved_generation, ctx.hw_id) } else { - ctx.has_reservation && ctx.reserved_line == line + ctx.has_reservation && width_ok && ctx.reserved_line == line }; if success { mem.write_u64(ea, ctx.gpr[instr.rs()]); @@ -4324,6 +4335,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - } } ctx.has_reservation = false; + ctx.reservation_width = 0; // PPCBUG-151: always clear on exit ctx.pc += 4; } PpcOpcode::ldbrx => { @@ -6077,6 +6089,86 @@ mod tests { /// Regression: `lvebx` must preserve the prior contents of the /// destination VR for lanes other than the loaded byte. Previously + // ---------- PPCBUG-151: cross-width reservation pairs must fail ---------- + + /// PPCBUG-151: `lwarx` (width=4) followed by `stdcx.` (requires width=8) + /// must fail with CR0.EQ=0. Memory must remain unchanged. + #[test] + fn lwarx_then_stdcx_cross_width_fails() { + let table = std::sync::Arc::new(crate::ReservationTable::new()); + table.enable(); + + let mut ctx = PpcContext::new(); + ctx.reservation_table = Some(table.clone()); + ctx.hw_id = 0; + let mut mem = TestMem::new(); + + // r4=0x1000 (target addr), r5=0 (index), r6=value to (attempt to) store. + ctx.gpr[4] = 0x1000; + ctx.gpr[5] = 0; + ctx.gpr[6] = 0xDEAD_BEEF_CAFE_BABEu64; + + // Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20, Rc=0) + let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1); + write_instr(&mut mem, 0, lwarx); + // Instr 1: stdcx. r6, r4, r5 (opcode 31, XO 214, Rc=1) + let stdcx = (31u32 << 26) | (6 << 21) | (4 << 16) | (5 << 11) | (214 << 1) | 1; + write_instr(&mut mem, 4, stdcx); + + // Execute lwarx — must set a word reservation (width=4). + ctx.pc = 0; + step(&mut ctx, &mut mem); + assert!(ctx.has_reservation, "lwarx must set has_reservation"); + assert_eq!(ctx.reservation_width, 4, "lwarx must set reservation_width=4"); + + // Execute stdcx. — width mismatch (needs 8, got 4); must fail. + step(&mut ctx, &mut mem); + assert!(!ctx.cr[0].eq, "stdcx. must fail when reservation was set by lwarx (cross-width)"); + // Memory at 0x1000-0x1007 must be unchanged (still zero). + assert_eq!(mem.read_u64(0x1000), 0, "stdcx. must not write on cross-width failure"); + // Width must be cleared on exit. + assert_eq!(ctx.reservation_width, 0, "stdcx. must clear reservation_width on exit"); + } + + /// PPCBUG-151: `ldarx` (width=8) followed by `stwcx.` (requires width=4) + /// must fail with CR0.EQ=0. Memory must remain unchanged. + #[test] + fn ldarx_then_stwcx_cross_width_fails() { + let table = std::sync::Arc::new(crate::ReservationTable::new()); + table.enable(); + + let mut ctx = PpcContext::new(); + ctx.reservation_table = Some(table.clone()); + ctx.hw_id = 0; + let mut mem = TestMem::new(); + + // r4=0x1000 (target addr), r5=0 (index), r6=value to (attempt to) store. + ctx.gpr[4] = 0x1000; + ctx.gpr[5] = 0; + ctx.gpr[6] = 0xCCCC_CCCCu64; + + // Instr 0: ldarx r3, r4, r5 (opcode 31, XO 84, Rc=0) + let ldarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (84 << 1); + write_instr(&mut mem, 0, ldarx); + // Instr 1: stwcx. r6, r4, r5 (opcode 31, XO 150, Rc=1) + let stwcx = (31u32 << 26) | (6 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1; + write_instr(&mut mem, 4, stwcx); + + // Execute ldarx — must set a doubleword reservation (width=8). + ctx.pc = 0; + step(&mut ctx, &mut mem); + assert!(ctx.has_reservation, "ldarx must set has_reservation"); + assert_eq!(ctx.reservation_width, 8, "ldarx must set reservation_width=8"); + + // Execute stwcx. — width mismatch (needs 4, got 8); must fail. + step(&mut ctx, &mut mem); + assert!(!ctx.cr[0].eq, "stwcx. must fail when reservation was set by ldarx (cross-width)"); + // Memory at 0x1000 must be unchanged (still zero). + assert_eq!(mem.read_u32(0x1000), 0, "stwcx. must not write on cross-width failure"); + // Width must be cleared on exit. + assert_eq!(ctx.reservation_width, 0, "stwcx. must clear reservation_width on exit"); + } + /// the handler started from a zeroed buffer. #[test] fn test_lvebx_preserves_other_lanes() { From d75c4edf67c30d3341033e64b0f6146051ee30f9 Mon Sep 17 00:00:00 2001 From: MechaCat02 Date: Fri, 1 May 2026 17:55:13 +0200 Subject: [PATCH 6/7] docs(cpu): PPCBUG-108 document legacy reservation path's strict-lockstep requirement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds doc comments above lwarx/ldarx/stwcx./stdcx. clarifying that the legacy per-ctx reservation path is only correct in strict lockstep (single host thread); under --parallel the M3 scheduler must enable the cross-thread ReservationTable before spawning a second host thread. A debug_assert fires in the legacy stwcx./stdcx. branch if a non-primary HW slot (hw_id != 0) takes that path — surfacing ReservationTable-disabled misconfiguration early in debug builds. Note: the primary slot (hw_id==0) racing other parallel slots is not caught by the assert; that case requires the table to be enabled. Affected: PPCBUG-108 legacy per-ctx reservation path cannot invalidate cross-thread; informational — no behavioral change Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/xenia-cpu/src/interpreter.rs | 41 ++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/crates/xenia-cpu/src/interpreter.rs b/crates/xenia-cpu/src/interpreter.rs index 63db6a4..9795ac8 100644 --- a/crates/xenia-cpu/src/interpreter.rs +++ b/crates/xenia-cpu/src/interpreter.rs @@ -1116,6 +1116,15 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - // per-`PpcContext` fields. Both paths leave the per-ctx fields // in a coherent state so a flag flip mid-run doesn't corrupt // outstanding reservations. + // + // PPCBUG-108: lwarx + stwcx. atomicity is provided by `ReservationTable` + // in the M3 multi-HW-thread runtime. The legacy per-ctx fallback (when + // `reservation_table` is None or the table is disabled) cannot observe + // stores from other host threads — a store by thread B cannot clear + // `ctx_A.has_reservation`. This path is only correct in strict lockstep + // (single-host-thread) mode. The M3 scheduler MUST enable the table + // before spawning a second host thread. See stwcx./stdcx. for the + // debug_assert that fires if a non-primary slot takes this path. PpcOpcode::lwarx => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32; @@ -1132,6 +1141,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - } ctx.pc += 4; } + // PPCBUG-108: see lwarx comment above. stwcx. legacy path cannot observe + // cross-thread reservation invalidations; only safe in lockstep mode. PpcOpcode::stwcx => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32; @@ -1154,7 +1165,19 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - && ctx.reserved_line == line && t.try_commit(ea, ctx.reserved_generation, ctx.hw_id) } else { - // Legacy per-ctx path (M2 default). + // Legacy per-ctx path (M2 default / lockstep). + // PPCBUG-108: fires on non-primary HW slots under misconfig — + // if the table is disabled while workers are active, slots + // 1..N will trip this assert, surfacing the misconfiguration + // early in debug builds. Note: hw_id==0 (primary slot) taking + // this path while other slots run in parallel would NOT be + // caught; that case requires the table to be enabled instead. + debug_assert!( + ctx.hw_id == 0, + "PPCBUG-108: legacy per-ctx stwcx. on non-primary HW slot \ + (hw_id={}) — ReservationTable must be enabled under --parallel", + ctx.hw_id + ); ctx.has_reservation && width_ok && ctx.reserved_line == line }; if success { @@ -4281,6 +4304,11 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - // §4k — Scalar reservation / byte-reverse (doubleword) // ═════════════════════════════════════════════════════════════════ // M3.7 — same table-vs-legacy split as lwarx/stwcx. + // PPCBUG-108: ldarx + stdcx. have the same cross-thread atomicity + // limitation as lwarx/stwcx. in the legacy per-ctx fallback path. + // See the lwarx block comment for the full explanation. The M3 + // scheduler must enable `ReservationTable` before spawning a second + // host thread. stdcx. carries the debug_assert (see below). PpcOpcode::ldarx => { let ea = ea_indexed(ctx, instr); let val = mem.read_u64(ea); @@ -4296,6 +4324,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - } ctx.pc += 4; } + // PPCBUG-108: see ldarx comment above. stdcx. legacy path cannot observe + // cross-thread reservation invalidations; only safe in lockstep mode. PpcOpcode::stdcx => { let ea = ea_indexed(ctx, instr); let line = ea & !RESERVATION_MASK; @@ -4313,6 +4343,15 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - && ctx.reserved_line == line && t.try_commit(ea, ctx.reserved_generation, ctx.hw_id) } else { + // Legacy per-ctx path (M2 default / lockstep). + // PPCBUG-108: same sentinel as stwcx. — fires on non-primary + // HW slots if the table is disabled under --parallel. + debug_assert!( + ctx.hw_id == 0, + "PPCBUG-108: legacy per-ctx stdcx. on non-primary HW slot \ + (hw_id={}) — ReservationTable must be enabled under --parallel", + ctx.hw_id + ); ctx.has_reservation && width_ok && ctx.reserved_line == line }; if success { From c9f194dda12f9bef329479425d1a858615f12294 Mon Sep 17 00:00:00 2001 From: MechaCat02 Date: Fri, 1 May 2026 20:47:32 +0200 Subject: [PATCH 7/7] =?UTF-8?q?fix(cpu):=20review=20fixes=20=E2=80=94=20st?= =?UTF-8?q?swi/stswx=20two-line=20guard,=20dcbz/dcbz128=20invalidate?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PPCBUG-160 partial: stswi's single invalidate_for_write(ea) only covered the first cache line; with nb up to 32, the write span can cross a 128-byte line boundary. Replace with two-call guard: first_line = ea & !RESERVATION_MASK last_line = ea.wrapping_add(nb - 1) & !RESERVATION_MASK invalidate first; if last != first, invalidate last. PPCBUG-160 partial: stswx had the same single-call gap; nb from XER[0:6] can be up to 127 bytes. Same two-call guard applied; wrapped in `if nb > 0` to guard against nb==0 underflow (XER TBC field is 0 when no bytes to store). dcbz: zeroes 32 bytes at a 32-byte-aligned EA — touches exactly one 128-byte cache line; add canonical single-call invalidate guard (was entirely missing). dcbz128: zeroes 128 bytes at a 128-byte-aligned EA — one full reservation line; add canonical single-call invalidate guard (was entirely missing). Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/xenia-cpu/src/interpreter.rs | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/crates/xenia-cpu/src/interpreter.rs b/crates/xenia-cpu/src/interpreter.rs index 9795ac8..c22cd0b 100644 --- a/crates/xenia-cpu/src/interpreter.rs +++ b/crates/xenia-cpu/src/interpreter.rs @@ -1464,7 +1464,12 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let mut rs = instr.rs(); let mut bytes_left = nb; if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { - if t.has_active_reservers() { t.invalidate_for_write(ea); } + if t.has_active_reservers() { + let first_line = ea & !RESERVATION_MASK; + let last_line = ea.wrapping_add(nb - 1) & !RESERVATION_MASK; + t.invalidate_for_write(first_line); + if last_line != first_line { t.invalidate_for_write(last_line); } + } } while bytes_left > 0 { let val = ctx.gpr[rs] as u32; @@ -1600,6 +1605,9 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - // Zero 32 bytes at effective address let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) as u32) & !31; + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } for i in 0..8 { mem.write_u32(ea + i * 4, 0); } @@ -1609,6 +1617,9 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - // Zero 128 bytes let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) as u32) & !127; + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } for i in 0..32 { mem.write_u32(ea + i * 4, 0); } @@ -4418,8 +4429,15 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let nb = ctx.xer() & 0x7F; let mut rs = instr.rs(); let mut bytes_left = nb; - if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { - if t.has_active_reservers() { t.invalidate_for_write(ea); } + if nb > 0 { + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { + let first_line = ea & !RESERVATION_MASK; + let last_line = ea.wrapping_add(nb - 1) & !RESERVATION_MASK; + t.invalidate_for_write(first_line); + if last_line != first_line { t.invalidate_for_write(last_line); } + } + } } while bytes_left > 0 { let val = ctx.gpr[rs] as u32;