fix(cpu): PPCBUG-700 VMX128 register accessors match canary bitfield layout
Independent review of P3 batch 2 (52ece4b) found that all three VMX128 register accessors disagreed with canary's FormatVX128/VX128_R bitfield struct (`xenia-canary/src/xenia/cpu/ppc/ppc_decode_data.h:484-663`). The audit at line 2958 had marked these "confirmed-clean" but had miscounted LSB-first bitfield offsets. Canary's actual layout (LSB-first, GCC/Clang/MSVC on x86): VA128 = VA128l(5) | VA128h(1)<<5 | VA128H(1)<<6 = PPC[11:15] | PPC[26]<<5 | PPC[21]<<6 (7-bit selector, 3 fields) VB128 = VB128l(5) | VB128h(2)<<5 = PPC[16:20] | PPC[30:31]<<5 (7-bit selector, 2 fields) VD128 = VD128l(5) | VD128h(2)<<5 = PPC[6:10] | PPC[28:29]<<5 (7-bit selector, 2 fields) VX128_R Rc = PPC[25] (host bit 6) not PPC[27] as prior fix had The buggy convention was internally consistent with hand-crafted test fixtures (which set bits 29/21/22 to encode the high registers, matching the buggy accessor). Real Xbox 360 game code follows canary's convention, so any production VMX128 instruction with VR >= 32 was silently mis-decoded — but no unit test exercised that path until the va128 fix in52ece4bexposed the inconsistency. Changes: - decoder.rs: rewrite va128/vb128/vd128/vx128r_rc_bit to canary positions. Drop the speculative `key4_dt` dot-form dispatch in decode_op6 — canary has no separate dot-form opcodes for VX128_R compute ops; Rc is a runtime modifier read by the interpreter via vx128r_rc_bit(). - decoder.rs tests: rewrite vmx128_test_word helper for canary layout; rename/re-encode vmx128_vd128_*, vmx128_va128_*, vmx128_vb128_* tests. - interpreter.rs: update encode_vpkd3d128 test helper to encode VD via canary's VD128h field; tests now pass vd=96 explicitly. - tests/disasm_goldens.rs: replace the vrlimi128/vsrw128/vpermwi128/ vperm128 hand-encoded raws with canary-compliant encodings; introduce a shared `encode_vx128` helper. - tests/golden/vmx128_registers.json: re-encode 9 entries (vperm128, vsrw128 ×2, vpermwi128, vrlimi128 ×2, vmaddfp128, vmaddcfp128, vnmsubfp128) to canary-compliant raws preserving the same expected operand strings. - audit-findings.md: new PPCBUG-700 entry documenting the discovery and invalidating the audit's "confirmed-clean" assessment. Affects all VMX128 binary ops (vaddfp128, vsubfp128, vmulfp128, vand128, vor128, vxor128, vnor128, vandc128, vsel128, vslo128, vsro128, vperm128, vsrw128, vmaddfp128, vmaddcfp128, vnmsubfp128, vpkd3d128, vpkshss128, vpkshus128, vpkswss128, vpkswus128, vpkuhum128, vpkuhus128, vpkuwum128, vpkuwus128, vmsum3fp128, vmsum4fp128, vrlimi128, vpermwi128 — 30+ opcodes), plus VX128_R compare dot-forms. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -50,6 +50,23 @@ fn fixture_path(name: &str) -> PathBuf {
|
||||
.join(name)
|
||||
}
|
||||
|
||||
/// Encode a VMX128 VX128-form (or VX128_R/_2) instruction with canary's
|
||||
/// 7-bit register layout: VD low at PPC 6-10, high 2 bits at PPC 28-29;
|
||||
/// VA low at PPC 11-15, mid bit at PPC 26, high bit at PPC 21; VB low at
|
||||
/// PPC 16-20, high 2 bits at PPC 30-31. `secondary_bits` carries any
|
||||
/// secondary opcode + VC + Rc + key bits the caller needs.
|
||||
fn encode_vx128(op6: u32, vd: u32, va: u32, vb: u32, secondary_bits: u32) -> u32 {
|
||||
((op6 & 0x3F) << 26)
|
||||
| ((vd & 0x1F) << 21)
|
||||
| (((vd >> 5) & 0x3) << 2)
|
||||
| ((va & 0x1F) << 16)
|
||||
| (((va >> 5) & 0x1) << 5)
|
||||
| (((va >> 6) & 0x1) << 10)
|
||||
| ((vb & 0x1F) << 11)
|
||||
| (((vb >> 5) & 0x3) << 0)
|
||||
| secondary_bits
|
||||
}
|
||||
|
||||
fn build_rows(cases: &[(u32, u32, &str)]) -> Vec<GoldenRow> {
|
||||
cases
|
||||
.iter()
|
||||
@@ -428,77 +445,57 @@ fn vmx128_registers() {
|
||||
((4u32 << 26) | (5 << 11) | 1604, 0x82000000, "mtvscr v5"),
|
||||
];
|
||||
|
||||
// VMX128 op=5 — uses vd128/va128/vb128 (7-bit registers, high bits at
|
||||
// 21+22). These are the silent-bug-area encodings; we exercise low
|
||||
// register indices here because the secondary-opcode key for op=5
|
||||
// includes bits 21-22, constraining vd128 high bits to 0 in this form.
|
||||
// High-index examples for vd128 live in the op=6 series below.
|
||||
// VMX128 op=5: vperm128 v3, v4, v5, vc=0. Canary FormatVX128: VD low
|
||||
// at PPC 6-10, VA low at PPC 11-15, VB low at PPC 16-20, VC at PPC 23-25.
|
||||
// key1 = (bit22<<5)|bit27 = 0 selects vperm128.
|
||||
let vmx128_op5 = [
|
||||
// vaddfp128 v3, v4, v5 : op=5, key2=0b000001
|
||||
((5u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (0 << 6) | (1 << 0), 0x82000000, "vaddfp128 (encoded sloppily)"),
|
||||
(encode_vx128(5, 3, 4, 5, 0), 0x82000000, "vperm128 v3, v4, v5, 0 (canary)"),
|
||||
];
|
||||
|
||||
// VMX128 op=6 — vrlimi128 has secondary key in bits 23-25 + 26-27, so
|
||||
// bits 21-22 ARE the high bits of vd128 (canonical silent-bug-area).
|
||||
// These instructions exercise vd128 = 32, 64, 96 — covering the bit-21
|
||||
// and bit-22 split that ppc.rs's old extractor (now deleted) miscoded.
|
||||
let vrlimi128 = |vd: u32, vb: u32, imm: u32, z: u32| -> u32 {
|
||||
// op=6, vd128 = bits 6-10 + bit 21 + bit 22, vb128 = bits 16-20 + bits 30+31,
|
||||
// IMM = bits 11-15, Z = bits 24-25, key2 = (bits 23-25 << 4) | bits 26-27 = 0b1110001
|
||||
let vd_lo = vd & 0x1F;
|
||||
let vd_b21 = (vd >> 5) & 1;
|
||||
let vd_b22 = (vd >> 6) & 1;
|
||||
let vb_lo = vb & 0x1F;
|
||||
let vb_b30 = (vb >> 5) & 1;
|
||||
let vb_b31 = (vb >> 6) & 1;
|
||||
// bits 23-25 = 111, bits 26-27 = 00, bit 27 = 1 → key2 lower 4 bits = 0001
|
||||
// Encoded: bits 23-25 = 111, bits 26-27 = 00 are actually overlapping with z field (bits 24-25)
|
||||
// The plan view: (bits 23 << 6) | (bits 24-25 << 4) | (bits 26-27 << 2) but the table uses different.
|
||||
// Easiest: hand-encode known bit pattern matching decoder.rs's match:
|
||||
// key2 = (extract_bits(code, 23, 25) << 4) | extract_bits(code, 26, 27) = 0b1110001
|
||||
// bits 23-25 = 111, bits 26-27 = 01
|
||||
// Bit positions 23-27 = 11101 (5 bits, MSB at 23).
|
||||
// PPC bit 23 (LSB index 8): set
|
||||
// PPC bit 24 (LSB index 7): set -- this is z bit 0
|
||||
// PPC bit 25 (LSB index 6): set -- this is z bit 1
|
||||
// PPC bit 26 (LSB index 5): unset
|
||||
// PPC bit 27 (LSB index 4): set
|
||||
// We let z = bits 24-25 stored with vd128 bits at 21-22.
|
||||
// To preserve key2 = 0b1110001, we need bits 24-25 = 11, bit 26 = 0, bit 27 = 1.
|
||||
// BUT bits 24-25 ARE the z field; if we set them = 11 the z value is 3.
|
||||
// So Z is constrained for vrlimi128. Choose Z = 3 (matches Sylpheed examples).
|
||||
let z3 = z & 0x3;
|
||||
(6u32 << 26)
|
||||
| (vd_lo << 21)
|
||||
| (imm << 16)
|
||||
| (vb_lo << 11)
|
||||
| (vd_b21 << 10) // bit 21 (LSB pos 10)
|
||||
| (vd_b22 << 9) // bit 22 (LSB pos 9)
|
||||
| (1 << 8) // bit 23
|
||||
| (z3 << 6) // bits 24-25
|
||||
| (0 << 5) // bit 26
|
||||
| (1 << 4) // bit 27
|
||||
| (vb_b30 << 1) // bit 30
|
||||
| vb_b31 // bit 31
|
||||
// VMX128 op=6 — exercise full 0-127 vd128 range under canary's layout.
|
||||
// VD128h is at PPC 28-29 (host 2-3): no overlap with secondary opcode key,
|
||||
// so vd can be freely 0-127 for any op6 instruction.
|
||||
let vsrw128 = |vd: u32, vb: u32| -> u32 {
|
||||
// vsrw128 secondary: 0x000001D0 (decode_op6 key5 = 0b011101).
|
||||
encode_vx128(6, vd, 0, vb, 0x000001D0)
|
||||
};
|
||||
let vpermwi128 = |vd: u32, vb: u32, perm: u32| -> u32 {
|
||||
// vpermwi128: PERMl at PPC 11-15, PERMh at PPC 23-25, key1 sets bit 22 + bit 27.
|
||||
let perml = perm & 0x1F;
|
||||
let permh = (perm >> 5) & 0x7;
|
||||
let mut raw = (6u32 << 26)
|
||||
| ((vd & 0x1F) << 21)
|
||||
| (((vd >> 5) & 0x3) << 2) // VD128h
|
||||
| (perml << 16)
|
||||
| ((vb & 0x1F) << 11)
|
||||
| (((vb >> 5) & 0x3) << 0) // VB128h
|
||||
| (permh << 6) // PERMh at PPC 23-25
|
||||
| (1 << 9) // bit 22 (key1 high)
|
||||
| (1 << 4); // bit 27 (key1 low)
|
||||
raw &= !(1 << 10); // PPC 21 = 0 for vpermwi128
|
||||
raw
|
||||
};
|
||||
let vrlimi128 = |vd: u32, vb: u32, imm: u32, z: u32| -> u32 {
|
||||
// vrlimi128: IMM at PPC 11-15, z at PPC 24-25, key2 = 0b1110001 over
|
||||
// bits 21-23 + 26-27 → bits 21,22,23 = 1, bit 26 = 0, bit 27 = 1.
|
||||
(6u32 << 26)
|
||||
| ((vd & 0x1F) << 21)
|
||||
| (((vd >> 5) & 0x3) << 2) // VD128h
|
||||
| ((imm & 0x1F) << 16)
|
||||
| ((vb & 0x1F) << 11)
|
||||
| (((vb >> 5) & 0x3) << 0) // VB128h
|
||||
| ((z & 0x3) << 6) // z at PPC 24-25 = host 6-7
|
||||
| (1 << 8) // bit 23 (key2)
|
||||
| (1 << 9) // bit 22 (key2)
|
||||
| (1 << 10) // bit 21 (key2)
|
||||
| (1 << 4) // bit 27 (key2)
|
||||
};
|
||||
// Note: VMX128 op6 secondary keys constrain bits 21-23. For
|
||||
// vrlimi128 (key2 = 0b1110001 over bits 21-23 + 26-27) the only
|
||||
// valid vd128 range is 96..=127 — lower values change the secondary
|
||||
// key into some other instruction. The cases below record what the
|
||||
// disassembler emits for the borderline encodings, so a regression
|
||||
// in either the lookup table or the formatter would surface here.
|
||||
let vmx128_high = [
|
||||
// bits 21-22 = 00 → key2 ≠ vrlimi128 → decodes to vsrw128 (key5
|
||||
// branch). Locks current behavior; shows the silent-bug-area
|
||||
// encoding constraint.
|
||||
(vrlimi128(0, 12, 4, 3), 0x82000000, "encoding vd_hi=00: actually vsrw128"),
|
||||
// bits 21-22 = 10 → still not vrlimi128.
|
||||
(vrlimi128(32, 12, 4, 3), 0x82000000, "encoding vd_hi=10: actually vsrw128 v32"),
|
||||
// bits 21-22 = 01 → key1 matches vpermwi128.
|
||||
(vrlimi128(64, 12, 4, 3), 0x82000000, "encoding vd_hi=01: actually vpermwi128"),
|
||||
// bits 21-22 = 11 → key2 matches vrlimi128 with vd128=96.
|
||||
(vrlimi128(96, 12, 4, 3), 0x82000000, "vrlimi128 v96, v12, 4, 3 (real)"),
|
||||
(vrlimi128(127, 127, 4, 3), 0x82000000, "vrlimi128 v127, v127, 4, 3 (real)"),
|
||||
(vsrw128(0, 12), 0x82000000, "vsrw128 v0, v0, v12 (canary, vd_hi=00)"),
|
||||
(vsrw128(32, 12), 0x82000000, "vsrw128 v32, v0, v12 (canary, VD128h=01)"),
|
||||
(vpermwi128(64, 12, 0xE4), 0x82000000, "vpermwi128 v64, v12, 0xE4 (canary, VD128h=10)"),
|
||||
(vrlimi128(96, 12, 4, 3), 0x82000000, "vrlimi128 v96, v12, 4, 3 (canary, VD128h=11)"),
|
||||
(vrlimi128(127, 95, 4, 3), 0x82000000, "vrlimi128 v127, v95, 4, 3 (canary)"),
|
||||
];
|
||||
|
||||
// Fix 4: VMX128 multiply-add 4-operand layouts. Per canary, the addend
|
||||
@@ -514,12 +511,11 @@ fn vmx128_registers() {
|
||||
// vmaddcfp128 VD, VA, VD, VB → "v3, v35, v3, v5"
|
||||
// vnmsubfp128 VD, VA, VD, VB → "v3, v35, v3, v5"
|
||||
let vmx128_4op = [
|
||||
// vmaddfp128: vd=3(bits 6-10), va=35(bits 11-15=3 + bit29=1), vb=5(bits 16-20), key2=0b001101
|
||||
(0x146328D4u32, 0x82000000, "vmaddfp128 v3, v35, v5, v3"),
|
||||
// vmaddcfp128: same vd/va/vb layout, key2=0b010001
|
||||
(0x14632914u32, 0x82000000, "vmaddcfp128 v3, v35, v3, v5"),
|
||||
// vnmsubfp128: same vd/va/vb layout, key2=0b010101
|
||||
(0x14632954u32, 0x82000000, "vnmsubfp128 v3, v35, v3, v5"),
|
||||
// Canary FormatVX128 layout: vd=3 (PPC 6-10), va=35 (low 3 at PPC 11-15 + VA128h=1 at PPC 26),
|
||||
// vb=5 (PPC 16-20), key2 at PPC 22-25 + bit 27.
|
||||
(0x146328F0u32, 0x82000000, "vmaddfp128 v3, v35, v5, v3"),
|
||||
(0x14632930u32, 0x82000000, "vmaddcfp128 v3, v35, v3, v5"),
|
||||
(0x14632970u32, 0x82000000, "vnmsubfp128 v3, v35, v3, v5"),
|
||||
];
|
||||
|
||||
let mut all = Vec::new();
|
||||
|
||||
Reference in New Issue
Block a user