fix(cpu): PPCBUG-510 stvewx128 writes 16 bytes instead of 4

stvewx128 was aligning EA to 16 bytes and writing all 16 bytes of the
vector, corrupting 12 adjacent bytes on every call. ISA semantics:
word-align EA, extract word lane (EA & 0xF) >> 2, write 4 bytes only.

The non-128 stvewx was already correct; stvewx128 was never updated.
Mirror the stvewx body with instr.vs128() substituted for instr.rs().
The invalidate_for_write call from P1 now covers the correct word-aligned
EA rather than the over-wide 16-byte range.

interpreter.rs: stvewx128 arm (~line 2984)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-05-02 10:05:37 +02:00
parent a8c918cf9e
commit cedee3c385

View File

@@ -2982,16 +2982,21 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
ctx.pc += 4;
}
PpcOpcode::stvewx128 => {
let ea = ea_indexed(ctx, instr) & !0xF;
// TODO PPCBUG-510: stvewx128 currently writes 16 bytes at ea & !0xF; the EA scope is
// wrong (should be word-aligned, 4 bytes only). When P3 fixes EA, this invalidate's
// range narrows automatically.
// PPCBUG-512: stvewx128 was missing invalidate_for_write.
// Mirror of stvewx: word-align EA, extract one 32-bit lane, write 4 bytes only.
// Previous code used & !0xF (16-byte) and wrote all 16 bytes, corrupting 12
// adjacent bytes on every execution (PPCBUG-510).
let ea_unaligned = ea_indexed(ctx, instr);
let ea = ea_unaligned & !0x3u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
let slot = ((ea_unaligned & 0xF) >> 2) as usize;
let bytes = ctx.vr[instr.vs128()].as_bytes();
for i in 0..16 { mem.write_u8(ea + i as u32, bytes[i]); }
let w = ((bytes[slot * 4] as u32) << 24)
| ((bytes[slot * 4 + 1] as u32) << 16)
| ((bytes[slot * 4 + 2] as u32) << 8)
| (bytes[slot * 4 + 3] as u32);
mem.write_u32(ea, w);
ctx.pc += 4;
}
@@ -6463,4 +6468,47 @@ mod tests {
assert_eq!(r[2], 0x3333_3333, "pack=1 shift=3: lane 2 from prev");
assert_eq!(r[3], 0x4444_4444, "pack=1 shift=3: lane 3 from prev");
}
// ---- PPCBUG-510: stvewx128 should write one word (4 bytes), not 16 ----
fn encode_stvewx128(vs_lo: u32, ra: u32, rb: u32) -> u32 {
// stvewx128 is a VMX128 load/store at op6=4.
// decode_op4 key1 = (bits21-27 << 4) | bits30-31 = 0b00110000011 for stvewx128.
// bits21-27 = 0b0011000 (host bits 10-4), bits30-31 = 0b11 (host bits 1-0).
// VS128[4:0] at host bits 25-21; RA at host bits 20-16; RB at host bits 15-11.
// VS128[5] at host bit 3 (PPC bit 28); VS128[6] at host bit 1 (PPC bit 30).
(4u32 << 26)
| (vs_lo << 21) // VS128[4:0]
| (ra << 16) // RA
| (rb << 11) // RB
| (0b0011000 << 4) // bits 21-27 of key1 pattern
| 0b11 // bits 30-31 of key1 pattern
}
#[test]
fn stvewx128_writes_one_word_at_word_aligned_ea() {
// PPCBUG-510: old code wrote all 16 bytes at ea & !0xF, corrupting 12 adjacent bytes.
// Fix: word-align EA, extract lane from (ea & 0xF) >> 2, write 4 bytes only.
let mut ctx = PpcContext::new();
let mem = TestMem::new();
// VS128 = v96 (vs_lo=0 | key bits → vs128=0 since key bits 21-27 set bit4=1 and bit5=1
// in the key, but vs128 uses bits 6-10 for low 5 bits).
// Actually: vs128 uses decode bits 6-10 (host 25-21) and bits 21,22 (host 10,9).
// encode_stvewx128 sets vs_lo in bits 25-21 and key bits at bits 10-4.
// vs128 = bits6-10 | (bit21<<5) | (bit22<<6) = vs_lo | 0 | 0 = vs_lo.
// So vs128 = vs_lo. We'll use vs_lo=3 → vs128=3.
let raw = encode_stvewx128(3, 1, 2);
ctx.vr[3] = xenia_types::Vec128::from_u32x4(0x1111_1111, 0x2222_2222, 0x3333_3333, 0x4444_4444);
ctx.gpr[1] = 0x1000; // base
ctx.gpr[2] = 0x008; // offset → EA = 0x1008 → word-aligned EA = 0x1008, slot = (0x8 & 0xF)>>2 = 2
write_instr(&mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mem);
assert_eq!(ctx.pc, 4, "PC must advance");
// Slot 2 → lane 2 = 0x3333_3333
assert_eq!(mem.read_u32(0x1008), 0x3333_3333, "only lane 2 word at ea");
// Adjacent words must be untouched (mem is zero-init)
assert_eq!(mem.read_u32(0x1000), 0x0000_0000, "byte below must be untouched");
assert_eq!(mem.read_u32(0x100C), 0x0000_0000, "byte above must be untouched");
}
}