fix(cpu): PPCBUG-700 VMX128 register accessors match canary bitfield layout
Independent review of P3 batch 2 (52ece4b) found that all three VMX128 register accessors disagreed with canary's FormatVX128/VX128_R bitfield struct (`xenia-canary/src/xenia/cpu/ppc/ppc_decode_data.h:484-663`). The audit at line 2958 had marked these "confirmed-clean" but had miscounted LSB-first bitfield offsets. Canary's actual layout (LSB-first, GCC/Clang/MSVC on x86): VA128 = VA128l(5) | VA128h(1)<<5 | VA128H(1)<<6 = PPC[11:15] | PPC[26]<<5 | PPC[21]<<6 (7-bit selector, 3 fields) VB128 = VB128l(5) | VB128h(2)<<5 = PPC[16:20] | PPC[30:31]<<5 (7-bit selector, 2 fields) VD128 = VD128l(5) | VD128h(2)<<5 = PPC[6:10] | PPC[28:29]<<5 (7-bit selector, 2 fields) VX128_R Rc = PPC[25] (host bit 6) not PPC[27] as prior fix had The buggy convention was internally consistent with hand-crafted test fixtures (which set bits 29/21/22 to encode the high registers, matching the buggy accessor). Real Xbox 360 game code follows canary's convention, so any production VMX128 instruction with VR >= 32 was silently mis-decoded — but no unit test exercised that path until the va128 fix in52ece4bexposed the inconsistency. Changes: - decoder.rs: rewrite va128/vb128/vd128/vx128r_rc_bit to canary positions. Drop the speculative `key4_dt` dot-form dispatch in decode_op6 — canary has no separate dot-form opcodes for VX128_R compute ops; Rc is a runtime modifier read by the interpreter via vx128r_rc_bit(). - decoder.rs tests: rewrite vmx128_test_word helper for canary layout; rename/re-encode vmx128_vd128_*, vmx128_va128_*, vmx128_vb128_* tests. - interpreter.rs: update encode_vpkd3d128 test helper to encode VD via canary's VD128h field; tests now pass vd=96 explicitly. - tests/disasm_goldens.rs: replace the vrlimi128/vsrw128/vpermwi128/ vperm128 hand-encoded raws with canary-compliant encodings; introduce a shared `encode_vx128` helper. - tests/golden/vmx128_registers.json: re-encode 9 entries (vperm128, vsrw128 ×2, vpermwi128, vrlimi128 ×2, vmaddfp128, vmaddcfp128, vnmsubfp128) to canary-compliant raws preserving the same expected operand strings. - audit-findings.md: new PPCBUG-700 entry documenting the discovery and invalidating the audit's "confirmed-clean" assessment. Affects all VMX128 binary ops (vaddfp128, vsubfp128, vmulfp128, vand128, vor128, vxor128, vnor128, vandc128, vsel128, vslo128, vsro128, vperm128, vsrw128, vmaddfp128, vmaddcfp128, vnmsubfp128, vpkd3d128, vpkshss128, vpkshus128, vpkswss128, vpkswus128, vpkuhum128, vpkuhus128, vpkuwum128, vpkuwus128, vmsum3fp128, vmsum4fp128, vrlimi128, vpermwi128 — 30+ opcodes), plus VX128_R compare dot-forms. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -77,7 +77,9 @@ impl DecodedInstr {
|
||||
/// Rc for VC-form vector compare instructions — PPC bit 21 = host bit 10.
|
||||
#[inline] pub fn vc_rc_bit(&self) -> bool { (self.raw >> 10) & 1 != 0 }
|
||||
/// Rc for VX128_R-form vector compare instructions — PPC bit 27 = host bit 4.
|
||||
#[inline] pub fn vx128r_rc_bit(&self) -> bool { (self.raw >> 4) & 1 != 0 }
|
||||
/// VX128_R Rc bit — PPC bit 25 (host bit 6) per canary's FormatVX128_R
|
||||
/// bitfield layout. PPCBUG-700.
|
||||
#[inline] pub fn vx128r_rc_bit(&self) -> bool { (self.raw >> 6) & 1 != 0 }
|
||||
|
||||
/// IMM field for VX128_4-form instructions (vrlimi128) — 5-bit blend mask at PPC bits 11-15.
|
||||
#[inline] pub fn vx128_4_imm(&self) -> u32 { extract_bits(self.raw, 11, 15) }
|
||||
@@ -133,25 +135,30 @@ impl DecodedInstr {
|
||||
/// crbB (bits 16-20)
|
||||
#[inline] pub fn crbb(&self) -> u32 { extract_bits(self.raw, 16, 20) }
|
||||
|
||||
// VMX128 field extractors
|
||||
// VMX128 field extractors — bit positions match canary's
|
||||
// FormatVX128/VX128_2/VX128_4/VX128_5/VX128_R bitfield layout
|
||||
// (xenia-canary `ppc_decode_data.h:484-663`, LSB-first packed). PPCBUG-700.
|
||||
|
||||
/// VA128 (bits 11-15, plus bit from 29)
|
||||
/// VA128 = VA128l(5) | VA128h(1) << 5 | VA128H(1) << 6.
|
||||
/// Canonical 7-bit register selector: PPC 11-15 (low), PPC 26 (mid), PPC 21 (high).
|
||||
#[inline] pub fn va128(&self) -> usize {
|
||||
(extract_bits(self.raw, 11, 15) | (extract_bits(self.raw, 29, 29) << 5)) as usize
|
||||
(extract_bits(self.raw, 11, 15)
|
||||
| (extract_bits(self.raw, 26, 26) << 5)
|
||||
| (extract_bits(self.raw, 21, 21) << 6)) as usize
|
||||
}
|
||||
|
||||
/// VB128 (bits 16-20, plus bits from 28, 30)
|
||||
/// VB128 = VB128l(5) | VB128h(2) << 5. Canary's VB128h is a 2-bit
|
||||
/// contiguous field at PPC 30-31 (host bits 0-1).
|
||||
#[inline] pub fn vb128(&self) -> usize {
|
||||
(extract_bits(self.raw, 16, 20)
|
||||
| (extract_bits(self.raw, 28, 28) << 5)
|
||||
| (extract_bits(self.raw, 30, 30) << 6)) as usize
|
||||
| (extract_bits(self.raw, 30, 31) << 5)) as usize
|
||||
}
|
||||
|
||||
/// VD128 (bits 6-10, plus bits from 21, 22)
|
||||
/// VD128 = VD128l(5) | VD128h(2) << 5. Canary's VD128h is a 2-bit
|
||||
/// contiguous field at PPC 28-29 (host bits 2-3).
|
||||
#[inline] pub fn vd128(&self) -> usize {
|
||||
(extract_bits(self.raw, 6, 10)
|
||||
| (extract_bits(self.raw, 21, 21) << 5)
|
||||
| (extract_bits(self.raw, 22, 22) << 6)) as usize
|
||||
| (extract_bits(self.raw, 28, 29) << 5)) as usize
|
||||
}
|
||||
|
||||
/// VS128 - same encoding as VD128
|
||||
@@ -664,11 +671,11 @@ fn decode_op6(code: u32) -> PpcOpcode {
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// VMX128 compare (non-dot and dot forms).
|
||||
// Non-dot: bit 27 = 0. Dot: bit 27 = 1, but bit 25 must also be 0 to
|
||||
// distinguish from the shift/merge group (which has bit 25 = 1 when bit 27 = 1).
|
||||
// key4_nd uses bits 22-24 + bit 27 (same as original, covers non-dot).
|
||||
// key4_dt uses bits 22-24 + bit 25 + bit 27 (narrower, covers dot-only).
|
||||
// VMX128 compare (VX128_R form). Single dispatch path: bit 27 = 0 always
|
||||
// for these opcodes per canary's table (`ppc_opcode_table_gen.cc:295-305`).
|
||||
// The Rc bit is at PPC 25 (host bit 6) per the FormatVX128_R bitfield —
|
||||
// it's a runtime modifier read by the interpreter, NOT part of the
|
||||
// secondary-opcode discrimination. PPCBUG-700.
|
||||
let key4_nd = (extract_bits(code, 22, 24) << 3) | extract_bits(code, 27, 27);
|
||||
match key4_nd {
|
||||
0b000000 => return PpcOpcode::vcmpeqfp128,
|
||||
@@ -678,16 +685,6 @@ fn decode_op6(code: u32) -> PpcOpcode {
|
||||
0b100000 => return PpcOpcode::vcmpequw128,
|
||||
_ => {}
|
||||
}
|
||||
// Dot forms: bit 27 = 1, bit 25 = 0 (key = bits22-24 + bit25 + bit27, low 3 bits)
|
||||
let key4_dt = (extract_bits(code, 22, 24) << 2) | (extract_bits(code, 25, 25) << 1) | extract_bits(code, 27, 27);
|
||||
match key4_dt {
|
||||
0b00001 => return PpcOpcode::vcmpeqfp128, // bits22-24=000, bit25=0, bit27=1
|
||||
0b00101 => return PpcOpcode::vcmpgefp128, // bits22-24=001, bit25=0, bit27=1
|
||||
0b01001 => return PpcOpcode::vcmpgtfp128, // bits22-24=010, bit25=0, bit27=1
|
||||
0b01101 => return PpcOpcode::vcmpbfp128, // bits22-24=011, bit25=0, bit27=1
|
||||
0b10001 => return PpcOpcode::vcmpequw128, // bits22-24=100, bit25=0, bit27=1
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// VMX128 shift/merge
|
||||
let key5 = (extract_bits(code, 22, 25) << 2) | extract_bits(code, 27, 27);
|
||||
@@ -1055,21 +1052,26 @@ mod tests {
|
||||
// used those extractors). Each test poke-bits exactly the slots the
|
||||
// accessor reads and asserts the assembled register number.
|
||||
|
||||
fn vmx128_test_word(va21: u32, vd6_10: u32, vd21: u32, vd22: u32,
|
||||
vb16_20: u32, vb28: u32, vb30: u32) -> u32 {
|
||||
// PPC bit i -> LSB position 31-i.
|
||||
(vd6_10 << (31 - 10))
|
||||
| (va21 << (31 - 21)) // va128 high bit at PPC 29 in some forms — kept 0 here
|
||||
| (vd21 << (31 - 21))
|
||||
| (vd22 << (31 - 22))
|
||||
| (vb16_20 << (31 - 20))
|
||||
| (vb28 << (31 - 28))
|
||||
| (vb30 << (31 - 30))
|
||||
/// Build a VMX128 test word for the canary-compliant register layout.
|
||||
/// `vd128 = vd_lo | (vd_hi << 5)` where vd_lo is 5 bits (PPC 6-10) and
|
||||
/// vd_hi is 2 bits (PPC 28-29). Same shape for vb128 (vb_lo at PPC 16-20,
|
||||
/// vb_hi 2 bits at PPC 30-31). va128 = va_lo | (va_h26<<5) | (va_h21<<6)
|
||||
/// per canary's 7-bit VA selector.
|
||||
fn vmx128_test_word(vd_lo: u32, vd_hi: u32, va_lo: u32, va_h26: u32, va_h21: u32,
|
||||
vb_lo: u32, vb_hi: u32) -> u32 {
|
||||
// PPC bit i -> host bit (31-i).
|
||||
(vd_lo << (31 - 10)) // VD128l: PPC 6-10 = host 21-25
|
||||
| (vd_hi << (31 - 29)) // VD128h: PPC 28-29 = host 2-3 (LSB at host 2)
|
||||
| (va_lo << (31 - 15)) // VA128l: PPC 11-15 = host 16-20
|
||||
| (va_h26 << (31 - 26)) // VA128h: PPC 26 = host 5
|
||||
| (va_h21 << (31 - 21)) // VA128H: PPC 21 = host 10
|
||||
| (vb_lo << (31 - 20)) // VB128l: PPC 16-20 = host 11-15
|
||||
| (vb_hi << (31 - 31)) // VB128h: PPC 30-31 = host 0-1 (LSB at host 0)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vmx128_vd128_low_5_bits_only() {
|
||||
// vd_lo = 0..31, vd_b21 = 0, vd_b22 = 0 → vd128 = vd_lo
|
||||
// vd_lo = 0..31, vd_hi = 0 → vd128 = vd_lo
|
||||
for r in 0..32u32 {
|
||||
let raw = (r as u32) << (31 - 10);
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
@@ -1078,45 +1080,51 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vmx128_vd128_bit21_adds_32() {
|
||||
// vd_lo = 0, vd_b21 = 1, vd_b22 = 0 → vd128 = 32
|
||||
let raw = (0u32 << (31 - 10)) | (1u32 << (31 - 21));
|
||||
fn vmx128_vd128_high_low_bit_adds_32() {
|
||||
// vd_lo = 0, VD128h = 0b01 (LSB only at host bit 2 = PPC 29) → vd128 = 32
|
||||
let raw = (1u32 << (31 - 29));
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.vd128(), 32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vmx128_vd128_bit22_adds_64() {
|
||||
// vd_lo = 0, vd_b21 = 0, vd_b22 = 1 → vd128 = 64
|
||||
let raw = (0u32 << (31 - 10)) | (1u32 << (31 - 22));
|
||||
fn vmx128_vd128_high_high_bit_adds_64() {
|
||||
// vd_lo = 0, VD128h = 0b10 (MSB only at host bit 3 = PPC 28) → vd128 = 64
|
||||
let raw = (1u32 << (31 - 28));
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.vd128(), 64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vmx128_vd128_full_127() {
|
||||
// vd_lo = 31, vd_b21 = 1, vd_b22 = 1 → vd128 = 127
|
||||
// vd_lo = 31, VD128h = 0b11 → vd128 = 127
|
||||
let raw = (31u32 << (31 - 10))
|
||||
| (1u32 << (31 - 21))
|
||||
| (1u32 << (31 - 22));
|
||||
| (1u32 << (31 - 28))
|
||||
| (1u32 << (31 - 29));
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.vd128(), 127);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vmx128_va128_uses_bit29() {
|
||||
// va128 = bits 11-15 + bit 29. va_lo = 7, bit 29 = 1 → va128 = 7 | 32 = 39.
|
||||
let raw = (7u32 << (31 - 15)) | (1u32 << (31 - 29));
|
||||
fn vmx128_va128_canary_layout() {
|
||||
// va_lo = 7 at PPC 11-15, VA128h = 1 at PPC 26 → va128 = 7 | 32 = 39
|
||||
let raw = (7u32 << (31 - 15)) | (1u32 << (31 - 26));
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.va128(), 39);
|
||||
// VA128H = 1 at PPC 21 → va128 += 64 = 103
|
||||
let raw = raw | (1u32 << (31 - 21));
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.va128(), 7 | 32 | 64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vmx128_vb128_uses_bits28_and_30() {
|
||||
// vb128 = bits 16-20 + bit 28 + bit 30. Low 5 = 5, bit 28 = 1 → +32, bit 30 = 1 → +64.
|
||||
let raw = (5u32 << (31 - 20))
|
||||
| (1u32 << (31 - 28))
|
||||
| (1u32 << (31 - 30));
|
||||
fn vmx128_vb128_uses_bits30_31() {
|
||||
// vb_lo = 5 at PPC 16-20. VB128h = 0b01 (LSB at PPC 31 = host 0) → +32.
|
||||
// VB128h = 0b11 → +96.
|
||||
let raw = (5u32 << (31 - 20)) | (1u32 << (31 - 31));
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.vb128(), 5 | 32);
|
||||
let raw = raw | (1u32 << (31 - 30));
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.vb128(), 5 | 32 | 64);
|
||||
}
|
||||
@@ -1126,11 +1134,9 @@ mod tests {
|
||||
// vs128 must always equal vd128.
|
||||
for r in [0u32, 31, 32, 64, 96, 127] {
|
||||
let lo = r & 0x1F;
|
||||
let b21 = (r >> 5) & 1;
|
||||
let b22 = (r >> 6) & 1;
|
||||
let hi = (r >> 5) & 0x3;
|
||||
let raw = (lo << (31 - 10))
|
||||
| (b21 << (31 - 21))
|
||||
| (b22 << (31 - 22));
|
||||
| (hi << (31 - 29));
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.vd128(), r as usize, "vd128 mismatch for r={r}");
|
||||
assert_eq!(d.vs128(), r as usize, "vs128 mismatch for r={r}");
|
||||
@@ -1142,7 +1148,8 @@ mod tests {
|
||||
#[allow(dead_code)]
|
||||
fn _vmx128_test_word_helper_compiles() {
|
||||
// Keep the helper validated against the real accessor.
|
||||
let raw = vmx128_test_word(0, 5, 1, 1, 0, 0, 0);
|
||||
// vd_lo=5, vd_hi=0b11 → vd128 = 5 | 96 = 101
|
||||
let raw = vmx128_test_word(5, 3, 0, 0, 0, 0, 0);
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.vd128(), 5 | 32 | 64);
|
||||
}
|
||||
|
||||
@@ -6484,13 +6484,25 @@ mod tests {
|
||||
// For vpkd3d128, PPC bits 21-22 are always 1 (key2 discriminant), so
|
||||
// vd128 is always in range [96, 127] for vd_lo in [0, 31].
|
||||
|
||||
fn encode_vpkd3d128(vd_lo: u32, vb_lo: u32, imm: u32, z: u32) -> u32 {
|
||||
// op6=6 (all VMX128 compute ops); VD[4:0] at host 25-21; IMM at host 20-16;
|
||||
// VB[4:0] at host 15-11; host bits 10,9 = 1,1 (PPC bits 21,22, key2=0b110);
|
||||
// z (2-bit) at host 7-6 (PPC bits 24-25); host bit 4 = 1 (PPC bit 27, key2 low=0b01).
|
||||
// decode_op6 key2 = (bits21-23<<4)|(bits26-27) = (0b110<<4)|0b01 = 0b1100001.
|
||||
(6u32 << 26) | (vd_lo << 21) | (imm << 16) | (vb_lo << 11)
|
||||
| (1 << 10) | (1 << 9) | (z << 6) | (1 << 4)
|
||||
fn encode_vpkd3d128(vd: u32, vb_lo: u32, imm: u32, z: u32) -> u32 {
|
||||
// op6=6, FormatVX128_4 layout (canary):
|
||||
// VD low at PPC 6-10 (host 21-25); VD high (2 bits) at PPC 28-29 (host 2-3).
|
||||
// IMM at PPC 11-15; VB low at PPC 16-20.
|
||||
// z (2-bit) at PPC 24-25 (host 6-7).
|
||||
// key2 = 0b1100001 over bits 21-23 + 26-27:
|
||||
// bits 21-23 = 0b110 → bit 21=1, bit 22=1, bit 23=0
|
||||
// bits 26-27 = 0b01 → bit 26=0, bit 27=1
|
||||
let vd_lo = vd & 0x1F;
|
||||
let vd_hi = (vd >> 5) & 0x3;
|
||||
(6u32 << 26)
|
||||
| (vd_lo << 21)
|
||||
| (vd_hi << 2)
|
||||
| (imm << 16)
|
||||
| (vb_lo << 11)
|
||||
| (1 << 10) // bit 21 (key2)
|
||||
| (1 << 9) // bit 22 (key2)
|
||||
| (z << 6) // z at PPC 24-25
|
||||
| (1 << 4) // bit 27 (key2)
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -6504,7 +6516,7 @@ mod tests {
|
||||
ctx.vr[1] = xenia_types::Vec128::from_f32x4(1.0, 0.0, 0.0, 0.0);
|
||||
// prev vd=96: sentinel values that should NOT appear in result
|
||||
ctx.vr[96] = xenia_types::Vec128::from_u32x4(0xAAAA_AAAA, 0xBBBB_BBBB, 0xCCCC_CCCC, 0xDDDD_DDDD);
|
||||
write_instr(&mem, 0, encode_vpkd3d128(0, 1, 0, 0));
|
||||
write_instr(&mem, 0, encode_vpkd3d128(96, 1, 0, 0));
|
||||
ctx.pc = 0;
|
||||
step(&mut ctx, &mem);
|
||||
let r = ctx.vr[96].as_u32x4();
|
||||
@@ -6524,7 +6536,7 @@ mod tests {
|
||||
let mem = TestMem::new();
|
||||
ctx.vr[1] = xenia_types::Vec128::from_f32x4(1.0, 0.0, 0.0, 0.0); // out[3]=0x00FF_0000
|
||||
ctx.vr[96] = xenia_types::Vec128::from_u32x4(0x1111_1111, 0x2222_2222, 0x3333_3333, 0x4444_4444);
|
||||
write_instr(&mem, 0, encode_vpkd3d128(0, 1, 1, 0));
|
||||
write_instr(&mem, 0, encode_vpkd3d128(96, 1, 1, 0));
|
||||
ctx.pc = 0;
|
||||
step(&mut ctx, &mem);
|
||||
let r = ctx.vr[96].as_u32x4();
|
||||
@@ -6543,7 +6555,7 @@ mod tests {
|
||||
let mem = TestMem::new();
|
||||
ctx.vr[1] = xenia_types::Vec128::from_f32x4(1.0, 0.0, 0.0, 0.0); // out[3]=0x00FF_0000
|
||||
ctx.vr[96] = xenia_types::Vec128::from_u32x4(0x1111_1111, 0x2222_2222, 0x3333_3333, 0x4444_4444);
|
||||
write_instr(&mem, 0, encode_vpkd3d128(0, 1, 1, 3));
|
||||
write_instr(&mem, 0, encode_vpkd3d128(96, 1, 1, 3));
|
||||
ctx.pc = 0;
|
||||
step(&mut ctx, &mem);
|
||||
let r = ctx.vr[96].as_u32x4();
|
||||
|
||||
Reference in New Issue
Block a user