fix(cpu): PPCBUG-700 VMX128 register accessors match canary bitfield layout

Independent review of P3 batch 2 (52ece4b) found that all three VMX128
register accessors disagreed with canary's FormatVX128/VX128_R bitfield
struct (`xenia-canary/src/xenia/cpu/ppc/ppc_decode_data.h:484-663`). The
audit at line 2958 had marked these "confirmed-clean" but had miscounted
LSB-first bitfield offsets.

Canary's actual layout (LSB-first, GCC/Clang/MSVC on x86):
  VA128 = VA128l(5) | VA128h(1)<<5 | VA128H(1)<<6
        = PPC[11:15] | PPC[26]<<5 | PPC[21]<<6  (7-bit selector, 3 fields)
  VB128 = VB128l(5) | VB128h(2)<<5
        = PPC[16:20] | PPC[30:31]<<5            (7-bit selector, 2 fields)
  VD128 = VD128l(5) | VD128h(2)<<5
        = PPC[6:10]  | PPC[28:29]<<5            (7-bit selector, 2 fields)
  VX128_R Rc = PPC[25]  (host bit 6)             not PPC[27] as prior fix had

The buggy convention was internally consistent with hand-crafted test
fixtures (which set bits 29/21/22 to encode the high registers, matching
the buggy accessor). Real Xbox 360 game code follows canary's convention,
so any production VMX128 instruction with VR >= 32 was silently mis-decoded
— but no unit test exercised that path until the va128 fix in 52ece4b
exposed the inconsistency.

Changes:
- decoder.rs: rewrite va128/vb128/vd128/vx128r_rc_bit to canary positions.
  Drop the speculative `key4_dt` dot-form dispatch in decode_op6 — canary
  has no separate dot-form opcodes for VX128_R compute ops; Rc is a
  runtime modifier read by the interpreter via vx128r_rc_bit().
- decoder.rs tests: rewrite vmx128_test_word helper for canary layout;
  rename/re-encode vmx128_vd128_*, vmx128_va128_*, vmx128_vb128_* tests.
- interpreter.rs: update encode_vpkd3d128 test helper to encode VD via
  canary's VD128h field; tests now pass vd=96 explicitly.
- tests/disasm_goldens.rs: replace the vrlimi128/vsrw128/vpermwi128/
  vperm128 hand-encoded raws with canary-compliant encodings; introduce
  a shared `encode_vx128` helper.
- tests/golden/vmx128_registers.json: re-encode 9 entries (vperm128,
  vsrw128 ×2, vpermwi128, vrlimi128 ×2, vmaddfp128, vmaddcfp128,
  vnmsubfp128) to canary-compliant raws preserving the same expected
  operand strings.
- audit-findings.md: new PPCBUG-700 entry documenting the discovery and
  invalidating the audit's "confirmed-clean" assessment.

Affects all VMX128 binary ops (vaddfp128, vsubfp128, vmulfp128, vand128,
vor128, vxor128, vnor128, vandc128, vsel128, vslo128, vsro128, vperm128,
vsrw128, vmaddfp128, vmaddcfp128, vnmsubfp128, vpkd3d128, vpkshss128,
vpkshus128, vpkswss128, vpkswus128, vpkuhum128, vpkuhus128, vpkuwum128,
vpkuwus128, vmsum3fp128, vmsum4fp128, vrlimi128, vpermwi128 — 30+
opcodes), plus VX128_R compare dot-forms.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-05-02 11:22:20 +02:00
parent 2be25bdd41
commit 7609dcd406
5 changed files with 207 additions and 157 deletions

View File

@@ -6484,13 +6484,25 @@ mod tests {
// For vpkd3d128, PPC bits 21-22 are always 1 (key2 discriminant), so
// vd128 is always in range [96, 127] for vd_lo in [0, 31].
fn encode_vpkd3d128(vd_lo: u32, vb_lo: u32, imm: u32, z: u32) -> u32 {
// op6=6 (all VMX128 compute ops); VD[4:0] at host 25-21; IMM at host 20-16;
// VB[4:0] at host 15-11; host bits 10,9 = 1,1 (PPC bits 21,22, key2=0b110);
// z (2-bit) at host 7-6 (PPC bits 24-25); host bit 4 = 1 (PPC bit 27, key2 low=0b01).
// decode_op6 key2 = (bits21-23<<4)|(bits26-27) = (0b110<<4)|0b01 = 0b1100001.
(6u32 << 26) | (vd_lo << 21) | (imm << 16) | (vb_lo << 11)
| (1 << 10) | (1 << 9) | (z << 6) | (1 << 4)
fn encode_vpkd3d128(vd: u32, vb_lo: u32, imm: u32, z: u32) -> u32 {
// op6=6, FormatVX128_4 layout (canary):
// VD low at PPC 6-10 (host 21-25); VD high (2 bits) at PPC 28-29 (host 2-3).
// IMM at PPC 11-15; VB low at PPC 16-20.
// z (2-bit) at PPC 24-25 (host 6-7).
// key2 = 0b1100001 over bits 21-23 + 26-27:
// bits 21-23 = 0b110 → bit 21=1, bit 22=1, bit 23=0
// bits 26-27 = 0b01 → bit 26=0, bit 27=1
let vd_lo = vd & 0x1F;
let vd_hi = (vd >> 5) & 0x3;
(6u32 << 26)
| (vd_lo << 21)
| (vd_hi << 2)
| (imm << 16)
| (vb_lo << 11)
| (1 << 10) // bit 21 (key2)
| (1 << 9) // bit 22 (key2)
| (z << 6) // z at PPC 24-25
| (1 << 4) // bit 27 (key2)
}
#[test]
@@ -6504,7 +6516,7 @@ mod tests {
ctx.vr[1] = xenia_types::Vec128::from_f32x4(1.0, 0.0, 0.0, 0.0);
// prev vd=96: sentinel values that should NOT appear in result
ctx.vr[96] = xenia_types::Vec128::from_u32x4(0xAAAA_AAAA, 0xBBBB_BBBB, 0xCCCC_CCCC, 0xDDDD_DDDD);
write_instr(&mem, 0, encode_vpkd3d128(0, 1, 0, 0));
write_instr(&mem, 0, encode_vpkd3d128(96, 1, 0, 0));
ctx.pc = 0;
step(&mut ctx, &mem);
let r = ctx.vr[96].as_u32x4();
@@ -6524,7 +6536,7 @@ mod tests {
let mem = TestMem::new();
ctx.vr[1] = xenia_types::Vec128::from_f32x4(1.0, 0.0, 0.0, 0.0); // out[3]=0x00FF_0000
ctx.vr[96] = xenia_types::Vec128::from_u32x4(0x1111_1111, 0x2222_2222, 0x3333_3333, 0x4444_4444);
write_instr(&mem, 0, encode_vpkd3d128(0, 1, 1, 0));
write_instr(&mem, 0, encode_vpkd3d128(96, 1, 1, 0));
ctx.pc = 0;
step(&mut ctx, &mem);
let r = ctx.vr[96].as_u32x4();
@@ -6543,7 +6555,7 @@ mod tests {
let mem = TestMem::new();
ctx.vr[1] = xenia_types::Vec128::from_f32x4(1.0, 0.0, 0.0, 0.0); // out[3]=0x00FF_0000
ctx.vr[96] = xenia_types::Vec128::from_u32x4(0x1111_1111, 0x2222_2222, 0x3333_3333, 0x4444_4444);
write_instr(&mem, 0, encode_vpkd3d128(0, 1, 1, 3));
write_instr(&mem, 0, encode_vpkd3d128(96, 1, 1, 3));
ctx.pc = 0;
step(&mut ctx, &mem);
let r = ctx.vr[96].as_u32x4();