fix(cpu): PPCBUG-510 stvewx128 writes 16 bytes instead of 4
stvewx128 was aligning EA to 16 bytes and writing all 16 bytes of the vector, corrupting 12 adjacent bytes on every call. ISA semantics: word-align EA, extract word lane (EA & 0xF) >> 2, write 4 bytes only. The non-128 stvewx was already correct; stvewx128 was never updated. Mirror the stvewx body with instr.vs128() substituted for instr.rs(). The invalidate_for_write call from P1 now covers the correct word-aligned EA rather than the over-wide 16-byte range. interpreter.rs: stvewx128 arm (~line 2984) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -2982,16 +2982,21 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
||||
ctx.pc += 4;
|
||||
}
|
||||
PpcOpcode::stvewx128 => {
|
||||
let ea = ea_indexed(ctx, instr) & !0xF;
|
||||
// TODO PPCBUG-510: stvewx128 currently writes 16 bytes at ea & !0xF; the EA scope is
|
||||
// wrong (should be word-aligned, 4 bytes only). When P3 fixes EA, this invalidate's
|
||||
// range narrows automatically.
|
||||
// PPCBUG-512: stvewx128 was missing invalidate_for_write.
|
||||
// Mirror of stvewx: word-align EA, extract one 32-bit lane, write 4 bytes only.
|
||||
// Previous code used & !0xF (16-byte) and wrote all 16 bytes, corrupting 12
|
||||
// adjacent bytes on every execution (PPCBUG-510).
|
||||
let ea_unaligned = ea_indexed(ctx, instr);
|
||||
let ea = ea_unaligned & !0x3u32;
|
||||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||||
}
|
||||
let slot = ((ea_unaligned & 0xF) >> 2) as usize;
|
||||
let bytes = ctx.vr[instr.vs128()].as_bytes();
|
||||
for i in 0..16 { mem.write_u8(ea + i as u32, bytes[i]); }
|
||||
let w = ((bytes[slot * 4] as u32) << 24)
|
||||
| ((bytes[slot * 4 + 1] as u32) << 16)
|
||||
| ((bytes[slot * 4 + 2] as u32) << 8)
|
||||
| (bytes[slot * 4 + 3] as u32);
|
||||
mem.write_u32(ea, w);
|
||||
ctx.pc += 4;
|
||||
}
|
||||
|
||||
@@ -6463,4 +6468,47 @@ mod tests {
|
||||
assert_eq!(r[2], 0x3333_3333, "pack=1 shift=3: lane 2 from prev");
|
||||
assert_eq!(r[3], 0x4444_4444, "pack=1 shift=3: lane 3 from prev");
|
||||
}
|
||||
|
||||
// ---- PPCBUG-510: stvewx128 should write one word (4 bytes), not 16 ----
|
||||
|
||||
fn encode_stvewx128(vs_lo: u32, ra: u32, rb: u32) -> u32 {
|
||||
// stvewx128 is a VMX128 load/store at op6=4.
|
||||
// decode_op4 key1 = (bits21-27 << 4) | bits30-31 = 0b00110000011 for stvewx128.
|
||||
// bits21-27 = 0b0011000 (host bits 10-4), bits30-31 = 0b11 (host bits 1-0).
|
||||
// VS128[4:0] at host bits 25-21; RA at host bits 20-16; RB at host bits 15-11.
|
||||
// VS128[5] at host bit 3 (PPC bit 28); VS128[6] at host bit 1 (PPC bit 30).
|
||||
(4u32 << 26)
|
||||
| (vs_lo << 21) // VS128[4:0]
|
||||
| (ra << 16) // RA
|
||||
| (rb << 11) // RB
|
||||
| (0b0011000 << 4) // bits 21-27 of key1 pattern
|
||||
| 0b11 // bits 30-31 of key1 pattern
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stvewx128_writes_one_word_at_word_aligned_ea() {
|
||||
// PPCBUG-510: old code wrote all 16 bytes at ea & !0xF, corrupting 12 adjacent bytes.
|
||||
// Fix: word-align EA, extract lane from (ea & 0xF) >> 2, write 4 bytes only.
|
||||
let mut ctx = PpcContext::new();
|
||||
let mem = TestMem::new();
|
||||
// VS128 = v96 (vs_lo=0 | key bits → vs128=0 since key bits 21-27 set bit4=1 and bit5=1
|
||||
// in the key, but vs128 uses bits 6-10 for low 5 bits).
|
||||
// Actually: vs128 uses decode bits 6-10 (host 25-21) and bits 21,22 (host 10,9).
|
||||
// encode_stvewx128 sets vs_lo in bits 25-21 and key bits at bits 10-4.
|
||||
// vs128 = bits6-10 | (bit21<<5) | (bit22<<6) = vs_lo | 0 | 0 = vs_lo.
|
||||
// So vs128 = vs_lo. We'll use vs_lo=3 → vs128=3.
|
||||
let raw = encode_stvewx128(3, 1, 2);
|
||||
ctx.vr[3] = xenia_types::Vec128::from_u32x4(0x1111_1111, 0x2222_2222, 0x3333_3333, 0x4444_4444);
|
||||
ctx.gpr[1] = 0x1000; // base
|
||||
ctx.gpr[2] = 0x008; // offset → EA = 0x1008 → word-aligned EA = 0x1008, slot = (0x8 & 0xF)>>2 = 2
|
||||
write_instr(&mem, 0, raw);
|
||||
ctx.pc = 0;
|
||||
step(&mut ctx, &mem);
|
||||
assert_eq!(ctx.pc, 4, "PC must advance");
|
||||
// Slot 2 → lane 2 = 0x3333_3333
|
||||
assert_eq!(mem.read_u32(0x1008), 0x3333_3333, "only lane 2 word at ea");
|
||||
// Adjacent words must be untouched (mem is zero-init)
|
||||
assert_eq!(mem.read_u32(0x1000), 0x0000_0000, "byte below must be untouched");
|
||||
assert_eq!(mem.read_u32(0x100C), 0x0000_0000, "byte above must be untouched");
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user