diff --git a/crates/xenia-cpu/src/interpreter.rs b/crates/xenia-cpu/src/interpreter.rs index f30b25f..c4694b5 100644 --- a/crates/xenia-cpu/src/interpreter.rs +++ b/crates/xenia-cpu/src/interpreter.rs @@ -1698,6 +1698,10 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - PpcOpcode::stvx => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) & !0xF) as u32; + // PPCBUG-511: stvx was missing invalidate_for_write. + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } let bytes = ctx.vr[instr.rs()].as_bytes(); for i in 0..16 { mem.write_u8(ea + i as u32, bytes[i]); } ctx.pc += 4; @@ -1705,6 +1709,10 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - PpcOpcode::stvx128 => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) & !0xF) as u32; + // PPCBUG-511: stvx128 was missing invalidate_for_write. + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } let bytes = ctx.vr[instr.vs128()].as_bytes(); for i in 0..16 { mem.write_u8(ea + i as u32, bytes[i]); } ctx.pc += 4; @@ -1756,6 +1764,10 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - // Store vS[EA & 0xF] (1 byte) to memory at EA. let base = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = base.wrapping_add(ctx.gpr[instr.rb()]) as u32; + // PPCBUG-512: stvebx was missing invalidate_for_write. + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } let slot = (ea & 0xF) as usize; let bytes = ctx.vr[instr.rs()].as_bytes(); mem.write_u8(ea, bytes[slot]); @@ -1766,6 +1778,10 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let base = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea_unaligned = base.wrapping_add(ctx.gpr[instr.rb()]) as u32; let ea = ea_unaligned & !0x1u32; + // PPCBUG-512: stvehx was missing invalidate_for_write. + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } let slot = ((ea_unaligned & 0xF) >> 1) as usize; let bytes = ctx.vr[instr.rs()].as_bytes(); let h = ((bytes[slot * 2] as u16) << 8) | (bytes[slot * 2 + 1] as u16); @@ -1777,6 +1793,10 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let base = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea_unaligned = base.wrapping_add(ctx.gpr[instr.rb()]) as u32; let ea = ea_unaligned & !0x3u32; + // PPCBUG-512: stvewx was missing invalidate_for_write. + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } let slot = ((ea_unaligned & 0xF) >> 2) as usize; let bytes = ctx.vr[instr.rs()].as_bytes(); let w = ((bytes[slot * 4] as u32) << 24) @@ -1799,6 +1819,10 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - PpcOpcode::stvxl | PpcOpcode::stvxl128 => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) & !0xF) as u32; + // PPCBUG-511: stvxl/stvxl128 were missing invalidate_for_write. + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } let vs = if matches!(instr.opcode, PpcOpcode::stvxl128) { instr.vs128() } else { instr.rs() }; let bytes = ctx.vr[vs].as_bytes(); for i in 0..16 { mem.write_u8(ea + i as u32, bytes[i]); } @@ -2845,21 +2869,63 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - } PpcOpcode::stvlx | PpcOpcode::stvlxl => { let ea = ea_indexed(ctx, instr); + // PPCBUG-513: stvlx/stvlxl were missing invalidate_for_write. + // store_vector_left writes [ea, (ea & !0xF)+15]; in the worst case (ea & 0xF == 0) + // that is exactly 16 bytes all within the same 16-byte block, so ea+15 lands in the + // same 128-byte cache line. Two-call form is kept for defensive correctness. + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { + let first_line = ea & !RESERVATION_MASK; + let last_line = ea.wrapping_add(15) & !RESERVATION_MASK; + t.invalidate_for_write(first_line); + if last_line != first_line { t.invalidate_for_write(last_line); } + } + } crate::vmx::store_vector_left(mem, ea, ctx.vr[instr.rs()]); ctx.pc += 4; } PpcOpcode::stvlx128 | PpcOpcode::stvlxl128 => { let ea = ea_indexed(ctx, instr); + // PPCBUG-513: stvlx128/stvlxl128 were missing invalidate_for_write. + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { + let first_line = ea & !RESERVATION_MASK; + let last_line = ea.wrapping_add(15) & !RESERVATION_MASK; + t.invalidate_for_write(first_line); + if last_line != first_line { t.invalidate_for_write(last_line); } + } + } crate::vmx::store_vector_left(mem, ea, ctx.vr[instr.vs128()]); ctx.pc += 4; } PpcOpcode::stvrx | PpcOpcode::stvrxl => { let ea = ea_indexed(ctx, instr); + // PPCBUG-514: stvrx/stvrxl were missing invalidate_for_write. + // store_vector_right writes [ea & !0xF, ea-1] (up to 15 bytes, all within a single + // 16-byte-aligned block). Two-call form is kept for defensive correctness. + // stvrx at shift==0 is a no-op; the guard fires unconditionally (cheap). + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { + let first_line = ea & !RESERVATION_MASK; + let last_line = ea.wrapping_add(15) & !RESERVATION_MASK; + t.invalidate_for_write(first_line); + if last_line != first_line { t.invalidate_for_write(last_line); } + } + } crate::vmx::store_vector_right(mem, ea, ctx.vr[instr.rs()]); ctx.pc += 4; } PpcOpcode::stvrx128 | PpcOpcode::stvrxl128 => { let ea = ea_indexed(ctx, instr); + // PPCBUG-514: stvrx128/stvrxl128 were missing invalidate_for_write. + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { + let first_line = ea & !RESERVATION_MASK; + let last_line = ea.wrapping_add(15) & !RESERVATION_MASK; + t.invalidate_for_write(first_line); + if last_line != first_line { t.invalidate_for_write(last_line); } + } + } crate::vmx::store_vector_right(mem, ea, ctx.vr[instr.vs128()]); ctx.pc += 4; } @@ -2875,6 +2941,13 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - } PpcOpcode::stvewx128 => { let ea = ea_indexed(ctx, instr) & !0xF; + // TODO PPCBUG-510: stvewx128 currently writes 16 bytes at ea & !0xF; the EA scope is + // wrong (should be word-aligned, 4 bytes only). When P3 fixes EA, this invalidate's + // range narrows automatically. + // PPCBUG-512: stvewx128 was missing invalidate_for_write. + if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { + if t.has_active_reservers() { t.invalidate_for_write(ea); } + } let bytes = ctx.vr[instr.vs128()].as_bytes(); for i in 0..16 { mem.write_u8(ea + i as u32, bytes[i]); } ctx.pc += 4; @@ -5906,6 +5979,102 @@ mod tests { } } + // ---------- PPCBUG-511/513: invalidate_for_write via VMX stores ---------- + + /// PPCBUG-511: A plain `stvx` to a reserved line must invalidate the + /// reservation so that a subsequent `stwcx.` fails (CR0.EQ=0). + #[test] + fn lwarx_then_plain_stvx_invalidates_reservation() { + let table = std::sync::Arc::new(crate::ReservationTable::new()); + table.enable(); + + let mut ctx = PpcContext::new(); + ctx.reservation_table = Some(table.clone()); + ctx.hw_id = 0; + let mut mem = TestMem::new(); + + // r4=0x1000 (reservation + store address), r5=0 (index for lwarx/stwcx.), r7=stwcx val. + ctx.gpr[4] = 0x1000; + ctx.gpr[5] = 0; + ctx.gpr[7] = 0xCCCC_CCCC; + // VR 0: recognizable pattern to confirm the store lands. + ctx.vr[0] = xenia_types::Vec128::from_bytes([0xAA; 16]); + + // Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20) + let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1); + write_instr(&mut mem, 0, lwarx); + // Instr 1: stvx v0, r0, r4 (opcode 31, XO 231; rA=0 → base=0, EA = 0 + r4 = 0x1000, aligned) + // (31<<26) | (vs=0<<21) | (ra=0<<16) | (rb=4<<11) | (231<<1) + let stvx = (31u32 << 26) | (0 << 21) | (0 << 16) | (4 << 11) | (231 << 1); + write_instr(&mut mem, 4, stvx); + // Instr 2: stwcx. r7, r4, r5 (opcode 31, XO 150, Rc=1) + let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1; + write_instr(&mut mem, 8, stwcx); + + // Execute lwarx — reserves 0x1000's cache line. + ctx.pc = 0; + step(&mut ctx, &mut mem); + assert!(ctx.has_reservation, "lwarx must set has_reservation"); + + // Execute stvx — must call invalidate_for_write and clear the reservation. + step(&mut ctx, &mut mem); + assert_eq!(mem.read_u8(0x1000), 0xAA, "stvx must write the VR bytes"); + + // Execute stwcx. — reservation was invalidated; must fail (CR0.EQ=0). + step(&mut ctx, &mut mem); + assert!(!ctx.cr[0].eq, "stwcx. must fail after reservation was invalidated by stvx"); + assert_eq!(mem.read_u8(0x1000), 0xAA, "stwcx. must not overwrite on failure"); + } + + /// PPCBUG-513: A plain `stvlx` to a reserved line must invalidate the + /// reservation so that a subsequent `stwcx.` fails (CR0.EQ=0). + /// stvlx with EA=0x1003 writes bytes 0x1003-0x100F (13 bytes from VR0's high lanes). + #[test] + fn lwarx_then_plain_stvlx_invalidates_reservation() { + let table = std::sync::Arc::new(crate::ReservationTable::new()); + table.enable(); + + let mut ctx = PpcContext::new(); + ctx.reservation_table = Some(table.clone()); + ctx.hw_id = 0; + let mut mem = TestMem::new(); + + // Reserve at 0x1000 (same cache line as the stvlx target 0x1003). + ctx.gpr[4] = 0x1000; // lwarx/stwcx. reservation address + ctx.gpr[5] = 0; // index register (0 for lwarx/stwcx.) + ctx.gpr[6] = 0x1003; // stvlx EA: rb=6, ra=0 → ea = 0 + 0x1003 = 0x1003 + ctx.gpr[7] = 0xCCCC_CCCC; // stwcx. value + // VR 0: recognizable pattern. + ctx.vr[0] = xenia_types::Vec128::from_bytes([0xBB; 16]); + + // Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20) + let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1); + write_instr(&mut mem, 0, lwarx); + // Instr 1: stvlx v0, r0, r6 (opcode 31, XO 647; rA=0 → base=0, EA = r6 = 0x1003) + // store_vector_left writes shift=3 skipped bytes, then bytes 3..15 of VR0 → 0x1003..0x100F + // (31<<26) | (vs=0<<21) | (ra=0<<16) | (rb=6<<11) | (647<<1) + let stvlx = (31u32 << 26) | (0 << 21) | (0 << 16) | (6 << 11) | (647 << 1); + write_instr(&mut mem, 4, stvlx); + // Instr 2: stwcx. r7, r4, r5 (opcode 31, XO 150, Rc=1) + let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1; + write_instr(&mut mem, 8, stwcx); + + // Execute lwarx — reserves 0x1000's cache line. + ctx.pc = 0; + step(&mut ctx, &mut mem); + assert!(ctx.has_reservation, "lwarx must set has_reservation"); + + // Execute stvlx — must call invalidate_for_write and clear the reservation. + step(&mut ctx, &mut mem); + // store_vector_left(ea=0x1003): shift=3, n=13 → writes bytes 0x1003-0x100F = 0xBB. + assert_eq!(mem.read_u8(0x1003), 0xBB, "stvlx must write VR bytes starting at EA"); + assert_eq!(mem.read_u8(0x100F), 0xBB, "stvlx must write up to (ea & !0xF)+15"); + + // Execute stwcx. — reservation was invalidated; must fail (CR0.EQ=0). + step(&mut ctx, &mut mem); + assert!(!ctx.cr[0].eq, "stwcx. must fail after reservation was invalidated by stvlx"); + } + /// Regression: `lvebx` must preserve the prior contents of the /// destination VR for lanes other than the loaded byte. Previously /// the handler started from a zeroed buffer.