diff --git a/audit-findings.md b/audit-findings.md index 26cbb97..5bb09cf 100644 --- a/audit-findings.md +++ b/audit-findings.md @@ -3414,3 +3414,38 @@ has the wrong extraction. The disassembler was written independently and got the degenerate cases. IDs PPCBUG-655 through PPCBUG-679 are unallocated — no further bugs found in Phase C3. + +--- + +## Phase C4 — Post-merge audit corrections (2026-05-02) + +### PPCBUG-700 — VMX128 register accessors disagreed with canary's bitfield layout (HIGH) + +- **Severity**: HIGH (silent mis-decoding of any VMX128 instruction with a register >= 32) +- **Status**: applied +- **Locations**: `decoder.rs:138-160` (`va128`/`vb128`/`vd128`), `decoder.rs:80` (`vx128r_rc_bit`) +- **Discovery**: independent reviewer of the P3 phase merge, comparing our rust accessors + against canary's `FormatVX128`/`VX128_2`/`VX128_4`/`VX128_5`/`VX128_R` bitfield struct + in `xenia-canary/src/xenia/cpu/ppc/ppc_decode_data.h:484-663`. +- **Symptom**: this entry contradicts the audit's own line 2958 ("confirmed-clean") + assessment. The previous audit miscounted bit-field offsets — under x86_64 LSB-first + C++ bitfield packing, the canary fields land at: + - `VA128 = VA128l(5) | VA128h(1)<<5 | VA128H(1)<<6` = PPC[11-15] | PPC[26]<<5 | PPC[21]<<6 (3 fields, 7 bits) + - `VB128 = VB128l(5) | VB128h(2)<<5` = PPC[16-20] | PPC[30-31]<<5 (2 fields, 7 bits) + - `VD128 = VD128l(5) | VD128h(2)<<5` = PPC[6-10] | PPC[28-29]<<5 (2 fields, 7 bits) + - `Rc` (VX128_R only) = PPC[25] (host bit 6) — not PPC[27] as PPCBUG-422/562 prescribed. + Rust code instead used va128: PPC[11-15] | PPC[29]<<5 (one bit, wrong position); vb128: + PPC[16-20] | PPC[28]<<5 | PPC[30]<<6 (wrong positions); vd128: PPC[6-10] | PPC[21]<<5 | + PPC[22]<<6 (wrong positions); vx128r_rc_bit at PPC[27]. +- **Why it lurked**: the buggy convention was internally consistent with hand-crafted + test fixtures (which set bit 29 / 21 / 22 to encode "high" registers, matching the + buggy accessor). Real Xbox 360 game code follows canary's convention, so any production + encoding with VR >= 32 was silently mis-decoded — but no unit test exercised that path. +- **Fix**: rewrite the four accessors to canary's bit positions; rewrite the + `vmx128_test_word` helper and unit tests; re-encode the goldens for vmaddfp128/ + vmaddcfp128/vnmsubfp128/vperm128/vsrw128/vpermwi128/vrlimi128. Drop the speculative + `key4_dt` dot-form dispatch in `decode_op6` (canary has no separate dot-form opcodes + for VX128_R compute ops; Rc is a runtime modifier). Update `encode_vpkd3d128` test + helper for canary's VD128h placement. +- **Cross-reference**: invalidates the audit's confirmed-clean note at line 2958. + Subsumes the partial fix-shape proposed in PPCBUG-422 (Rc-bit position). diff --git a/crates/xenia-cpu/src/decoder.rs b/crates/xenia-cpu/src/decoder.rs index d4d8ac1..9790f47 100644 --- a/crates/xenia-cpu/src/decoder.rs +++ b/crates/xenia-cpu/src/decoder.rs @@ -77,7 +77,9 @@ impl DecodedInstr { /// Rc for VC-form vector compare instructions — PPC bit 21 = host bit 10. #[inline] pub fn vc_rc_bit(&self) -> bool { (self.raw >> 10) & 1 != 0 } /// Rc for VX128_R-form vector compare instructions — PPC bit 27 = host bit 4. - #[inline] pub fn vx128r_rc_bit(&self) -> bool { (self.raw >> 4) & 1 != 0 } + /// VX128_R Rc bit — PPC bit 25 (host bit 6) per canary's FormatVX128_R + /// bitfield layout. PPCBUG-700. + #[inline] pub fn vx128r_rc_bit(&self) -> bool { (self.raw >> 6) & 1 != 0 } /// IMM field for VX128_4-form instructions (vrlimi128) — 5-bit blend mask at PPC bits 11-15. #[inline] pub fn vx128_4_imm(&self) -> u32 { extract_bits(self.raw, 11, 15) } @@ -133,25 +135,30 @@ impl DecodedInstr { /// crbB (bits 16-20) #[inline] pub fn crbb(&self) -> u32 { extract_bits(self.raw, 16, 20) } - // VMX128 field extractors + // VMX128 field extractors — bit positions match canary's + // FormatVX128/VX128_2/VX128_4/VX128_5/VX128_R bitfield layout + // (xenia-canary `ppc_decode_data.h:484-663`, LSB-first packed). PPCBUG-700. - /// VA128 (bits 11-15, plus bit from 29) + /// VA128 = VA128l(5) | VA128h(1) << 5 | VA128H(1) << 6. + /// Canonical 7-bit register selector: PPC 11-15 (low), PPC 26 (mid), PPC 21 (high). #[inline] pub fn va128(&self) -> usize { - (extract_bits(self.raw, 11, 15) | (extract_bits(self.raw, 29, 29) << 5)) as usize + (extract_bits(self.raw, 11, 15) + | (extract_bits(self.raw, 26, 26) << 5) + | (extract_bits(self.raw, 21, 21) << 6)) as usize } - /// VB128 (bits 16-20, plus bits from 28, 30) + /// VB128 = VB128l(5) | VB128h(2) << 5. Canary's VB128h is a 2-bit + /// contiguous field at PPC 30-31 (host bits 0-1). #[inline] pub fn vb128(&self) -> usize { (extract_bits(self.raw, 16, 20) - | (extract_bits(self.raw, 28, 28) << 5) - | (extract_bits(self.raw, 30, 30) << 6)) as usize + | (extract_bits(self.raw, 30, 31) << 5)) as usize } - /// VD128 (bits 6-10, plus bits from 21, 22) + /// VD128 = VD128l(5) | VD128h(2) << 5. Canary's VD128h is a 2-bit + /// contiguous field at PPC 28-29 (host bits 2-3). #[inline] pub fn vd128(&self) -> usize { (extract_bits(self.raw, 6, 10) - | (extract_bits(self.raw, 21, 21) << 5) - | (extract_bits(self.raw, 22, 22) << 6)) as usize + | (extract_bits(self.raw, 28, 29) << 5)) as usize } /// VS128 - same encoding as VD128 @@ -664,11 +671,11 @@ fn decode_op6(code: u32) -> PpcOpcode { _ => {} } - // VMX128 compare (non-dot and dot forms). - // Non-dot: bit 27 = 0. Dot: bit 27 = 1, but bit 25 must also be 0 to - // distinguish from the shift/merge group (which has bit 25 = 1 when bit 27 = 1). - // key4_nd uses bits 22-24 + bit 27 (same as original, covers non-dot). - // key4_dt uses bits 22-24 + bit 25 + bit 27 (narrower, covers dot-only). + // VMX128 compare (VX128_R form). Single dispatch path: bit 27 = 0 always + // for these opcodes per canary's table (`ppc_opcode_table_gen.cc:295-305`). + // The Rc bit is at PPC 25 (host bit 6) per the FormatVX128_R bitfield — + // it's a runtime modifier read by the interpreter, NOT part of the + // secondary-opcode discrimination. PPCBUG-700. let key4_nd = (extract_bits(code, 22, 24) << 3) | extract_bits(code, 27, 27); match key4_nd { 0b000000 => return PpcOpcode::vcmpeqfp128, @@ -678,16 +685,6 @@ fn decode_op6(code: u32) -> PpcOpcode { 0b100000 => return PpcOpcode::vcmpequw128, _ => {} } - // Dot forms: bit 27 = 1, bit 25 = 0 (key = bits22-24 + bit25 + bit27, low 3 bits) - let key4_dt = (extract_bits(code, 22, 24) << 2) | (extract_bits(code, 25, 25) << 1) | extract_bits(code, 27, 27); - match key4_dt { - 0b00001 => return PpcOpcode::vcmpeqfp128, // bits22-24=000, bit25=0, bit27=1 - 0b00101 => return PpcOpcode::vcmpgefp128, // bits22-24=001, bit25=0, bit27=1 - 0b01001 => return PpcOpcode::vcmpgtfp128, // bits22-24=010, bit25=0, bit27=1 - 0b01101 => return PpcOpcode::vcmpbfp128, // bits22-24=011, bit25=0, bit27=1 - 0b10001 => return PpcOpcode::vcmpequw128, // bits22-24=100, bit25=0, bit27=1 - _ => {} - } // VMX128 shift/merge let key5 = (extract_bits(code, 22, 25) << 2) | extract_bits(code, 27, 27); @@ -1055,21 +1052,26 @@ mod tests { // used those extractors). Each test poke-bits exactly the slots the // accessor reads and asserts the assembled register number. - fn vmx128_test_word(va21: u32, vd6_10: u32, vd21: u32, vd22: u32, - vb16_20: u32, vb28: u32, vb30: u32) -> u32 { - // PPC bit i -> LSB position 31-i. - (vd6_10 << (31 - 10)) - | (va21 << (31 - 21)) // va128 high bit at PPC 29 in some forms — kept 0 here - | (vd21 << (31 - 21)) - | (vd22 << (31 - 22)) - | (vb16_20 << (31 - 20)) - | (vb28 << (31 - 28)) - | (vb30 << (31 - 30)) + /// Build a VMX128 test word for the canary-compliant register layout. + /// `vd128 = vd_lo | (vd_hi << 5)` where vd_lo is 5 bits (PPC 6-10) and + /// vd_hi is 2 bits (PPC 28-29). Same shape for vb128 (vb_lo at PPC 16-20, + /// vb_hi 2 bits at PPC 30-31). va128 = va_lo | (va_h26<<5) | (va_h21<<6) + /// per canary's 7-bit VA selector. + fn vmx128_test_word(vd_lo: u32, vd_hi: u32, va_lo: u32, va_h26: u32, va_h21: u32, + vb_lo: u32, vb_hi: u32) -> u32 { + // PPC bit i -> host bit (31-i). + (vd_lo << (31 - 10)) // VD128l: PPC 6-10 = host 21-25 + | (vd_hi << (31 - 29)) // VD128h: PPC 28-29 = host 2-3 (LSB at host 2) + | (va_lo << (31 - 15)) // VA128l: PPC 11-15 = host 16-20 + | (va_h26 << (31 - 26)) // VA128h: PPC 26 = host 5 + | (va_h21 << (31 - 21)) // VA128H: PPC 21 = host 10 + | (vb_lo << (31 - 20)) // VB128l: PPC 16-20 = host 11-15 + | (vb_hi << (31 - 31)) // VB128h: PPC 30-31 = host 0-1 (LSB at host 0) } #[test] fn vmx128_vd128_low_5_bits_only() { - // vd_lo = 0..31, vd_b21 = 0, vd_b22 = 0 → vd128 = vd_lo + // vd_lo = 0..31, vd_hi = 0 → vd128 = vd_lo for r in 0..32u32 { let raw = (r as u32) << (31 - 10); let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 }; @@ -1078,45 +1080,51 @@ mod tests { } #[test] - fn vmx128_vd128_bit21_adds_32() { - // vd_lo = 0, vd_b21 = 1, vd_b22 = 0 → vd128 = 32 - let raw = (0u32 << (31 - 10)) | (1u32 << (31 - 21)); + fn vmx128_vd128_high_low_bit_adds_32() { + // vd_lo = 0, VD128h = 0b01 (LSB only at host bit 2 = PPC 29) → vd128 = 32 + let raw = (1u32 << (31 - 29)); let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 }; assert_eq!(d.vd128(), 32); } #[test] - fn vmx128_vd128_bit22_adds_64() { - // vd_lo = 0, vd_b21 = 0, vd_b22 = 1 → vd128 = 64 - let raw = (0u32 << (31 - 10)) | (1u32 << (31 - 22)); + fn vmx128_vd128_high_high_bit_adds_64() { + // vd_lo = 0, VD128h = 0b10 (MSB only at host bit 3 = PPC 28) → vd128 = 64 + let raw = (1u32 << (31 - 28)); let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 }; assert_eq!(d.vd128(), 64); } #[test] fn vmx128_vd128_full_127() { - // vd_lo = 31, vd_b21 = 1, vd_b22 = 1 → vd128 = 127 + // vd_lo = 31, VD128h = 0b11 → vd128 = 127 let raw = (31u32 << (31 - 10)) - | (1u32 << (31 - 21)) - | (1u32 << (31 - 22)); + | (1u32 << (31 - 28)) + | (1u32 << (31 - 29)); let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 }; assert_eq!(d.vd128(), 127); } #[test] - fn vmx128_va128_uses_bit29() { - // va128 = bits 11-15 + bit 29. va_lo = 7, bit 29 = 1 → va128 = 7 | 32 = 39. - let raw = (7u32 << (31 - 15)) | (1u32 << (31 - 29)); + fn vmx128_va128_canary_layout() { + // va_lo = 7 at PPC 11-15, VA128h = 1 at PPC 26 → va128 = 7 | 32 = 39 + let raw = (7u32 << (31 - 15)) | (1u32 << (31 - 26)); let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 }; assert_eq!(d.va128(), 39); + // VA128H = 1 at PPC 21 → va128 += 64 = 103 + let raw = raw | (1u32 << (31 - 21)); + let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 }; + assert_eq!(d.va128(), 7 | 32 | 64); } #[test] - fn vmx128_vb128_uses_bits28_and_30() { - // vb128 = bits 16-20 + bit 28 + bit 30. Low 5 = 5, bit 28 = 1 → +32, bit 30 = 1 → +64. - let raw = (5u32 << (31 - 20)) - | (1u32 << (31 - 28)) - | (1u32 << (31 - 30)); + fn vmx128_vb128_uses_bits30_31() { + // vb_lo = 5 at PPC 16-20. VB128h = 0b01 (LSB at PPC 31 = host 0) → +32. + // VB128h = 0b11 → +96. + let raw = (5u32 << (31 - 20)) | (1u32 << (31 - 31)); + let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 }; + assert_eq!(d.vb128(), 5 | 32); + let raw = raw | (1u32 << (31 - 30)); let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 }; assert_eq!(d.vb128(), 5 | 32 | 64); } @@ -1126,11 +1134,9 @@ mod tests { // vs128 must always equal vd128. for r in [0u32, 31, 32, 64, 96, 127] { let lo = r & 0x1F; - let b21 = (r >> 5) & 1; - let b22 = (r >> 6) & 1; + let hi = (r >> 5) & 0x3; let raw = (lo << (31 - 10)) - | (b21 << (31 - 21)) - | (b22 << (31 - 22)); + | (hi << (31 - 29)); let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 }; assert_eq!(d.vd128(), r as usize, "vd128 mismatch for r={r}"); assert_eq!(d.vs128(), r as usize, "vs128 mismatch for r={r}"); @@ -1142,7 +1148,8 @@ mod tests { #[allow(dead_code)] fn _vmx128_test_word_helper_compiles() { // Keep the helper validated against the real accessor. - let raw = vmx128_test_word(0, 5, 1, 1, 0, 0, 0); + // vd_lo=5, vd_hi=0b11 → vd128 = 5 | 96 = 101 + let raw = vmx128_test_word(5, 3, 0, 0, 0, 0, 0); let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 }; assert_eq!(d.vd128(), 5 | 32 | 64); } diff --git a/crates/xenia-cpu/src/interpreter.rs b/crates/xenia-cpu/src/interpreter.rs index 09800b8..19fa865 100644 --- a/crates/xenia-cpu/src/interpreter.rs +++ b/crates/xenia-cpu/src/interpreter.rs @@ -6484,13 +6484,25 @@ mod tests { // For vpkd3d128, PPC bits 21-22 are always 1 (key2 discriminant), so // vd128 is always in range [96, 127] for vd_lo in [0, 31]. - fn encode_vpkd3d128(vd_lo: u32, vb_lo: u32, imm: u32, z: u32) -> u32 { - // op6=6 (all VMX128 compute ops); VD[4:0] at host 25-21; IMM at host 20-16; - // VB[4:0] at host 15-11; host bits 10,9 = 1,1 (PPC bits 21,22, key2=0b110); - // z (2-bit) at host 7-6 (PPC bits 24-25); host bit 4 = 1 (PPC bit 27, key2 low=0b01). - // decode_op6 key2 = (bits21-23<<4)|(bits26-27) = (0b110<<4)|0b01 = 0b1100001. - (6u32 << 26) | (vd_lo << 21) | (imm << 16) | (vb_lo << 11) - | (1 << 10) | (1 << 9) | (z << 6) | (1 << 4) + fn encode_vpkd3d128(vd: u32, vb_lo: u32, imm: u32, z: u32) -> u32 { + // op6=6, FormatVX128_4 layout (canary): + // VD low at PPC 6-10 (host 21-25); VD high (2 bits) at PPC 28-29 (host 2-3). + // IMM at PPC 11-15; VB low at PPC 16-20. + // z (2-bit) at PPC 24-25 (host 6-7). + // key2 = 0b1100001 over bits 21-23 + 26-27: + // bits 21-23 = 0b110 → bit 21=1, bit 22=1, bit 23=0 + // bits 26-27 = 0b01 → bit 26=0, bit 27=1 + let vd_lo = vd & 0x1F; + let vd_hi = (vd >> 5) & 0x3; + (6u32 << 26) + | (vd_lo << 21) + | (vd_hi << 2) + | (imm << 16) + | (vb_lo << 11) + | (1 << 10) // bit 21 (key2) + | (1 << 9) // bit 22 (key2) + | (z << 6) // z at PPC 24-25 + | (1 << 4) // bit 27 (key2) } #[test] @@ -6504,7 +6516,7 @@ mod tests { ctx.vr[1] = xenia_types::Vec128::from_f32x4(1.0, 0.0, 0.0, 0.0); // prev vd=96: sentinel values that should NOT appear in result ctx.vr[96] = xenia_types::Vec128::from_u32x4(0xAAAA_AAAA, 0xBBBB_BBBB, 0xCCCC_CCCC, 0xDDDD_DDDD); - write_instr(&mem, 0, encode_vpkd3d128(0, 1, 0, 0)); + write_instr(&mem, 0, encode_vpkd3d128(96, 1, 0, 0)); ctx.pc = 0; step(&mut ctx, &mem); let r = ctx.vr[96].as_u32x4(); @@ -6524,7 +6536,7 @@ mod tests { let mem = TestMem::new(); ctx.vr[1] = xenia_types::Vec128::from_f32x4(1.0, 0.0, 0.0, 0.0); // out[3]=0x00FF_0000 ctx.vr[96] = xenia_types::Vec128::from_u32x4(0x1111_1111, 0x2222_2222, 0x3333_3333, 0x4444_4444); - write_instr(&mem, 0, encode_vpkd3d128(0, 1, 1, 0)); + write_instr(&mem, 0, encode_vpkd3d128(96, 1, 1, 0)); ctx.pc = 0; step(&mut ctx, &mem); let r = ctx.vr[96].as_u32x4(); @@ -6543,7 +6555,7 @@ mod tests { let mem = TestMem::new(); ctx.vr[1] = xenia_types::Vec128::from_f32x4(1.0, 0.0, 0.0, 0.0); // out[3]=0x00FF_0000 ctx.vr[96] = xenia_types::Vec128::from_u32x4(0x1111_1111, 0x2222_2222, 0x3333_3333, 0x4444_4444); - write_instr(&mem, 0, encode_vpkd3d128(0, 1, 1, 3)); + write_instr(&mem, 0, encode_vpkd3d128(96, 1, 1, 3)); ctx.pc = 0; step(&mut ctx, &mem); let r = ctx.vr[96].as_u32x4(); diff --git a/crates/xenia-cpu/tests/disasm_goldens.rs b/crates/xenia-cpu/tests/disasm_goldens.rs index 2c301a5..ec617a0 100644 --- a/crates/xenia-cpu/tests/disasm_goldens.rs +++ b/crates/xenia-cpu/tests/disasm_goldens.rs @@ -50,6 +50,23 @@ fn fixture_path(name: &str) -> PathBuf { .join(name) } +/// Encode a VMX128 VX128-form (or VX128_R/_2) instruction with canary's +/// 7-bit register layout: VD low at PPC 6-10, high 2 bits at PPC 28-29; +/// VA low at PPC 11-15, mid bit at PPC 26, high bit at PPC 21; VB low at +/// PPC 16-20, high 2 bits at PPC 30-31. `secondary_bits` carries any +/// secondary opcode + VC + Rc + key bits the caller needs. +fn encode_vx128(op6: u32, vd: u32, va: u32, vb: u32, secondary_bits: u32) -> u32 { + ((op6 & 0x3F) << 26) + | ((vd & 0x1F) << 21) + | (((vd >> 5) & 0x3) << 2) + | ((va & 0x1F) << 16) + | (((va >> 5) & 0x1) << 5) + | (((va >> 6) & 0x1) << 10) + | ((vb & 0x1F) << 11) + | (((vb >> 5) & 0x3) << 0) + | secondary_bits +} + fn build_rows(cases: &[(u32, u32, &str)]) -> Vec { cases .iter() @@ -428,77 +445,57 @@ fn vmx128_registers() { ((4u32 << 26) | (5 << 11) | 1604, 0x82000000, "mtvscr v5"), ]; - // VMX128 op=5 — uses vd128/va128/vb128 (7-bit registers, high bits at - // 21+22). These are the silent-bug-area encodings; we exercise low - // register indices here because the secondary-opcode key for op=5 - // includes bits 21-22, constraining vd128 high bits to 0 in this form. - // High-index examples for vd128 live in the op=6 series below. + // VMX128 op=5: vperm128 v3, v4, v5, vc=0. Canary FormatVX128: VD low + // at PPC 6-10, VA low at PPC 11-15, VB low at PPC 16-20, VC at PPC 23-25. + // key1 = (bit22<<5)|bit27 = 0 selects vperm128. let vmx128_op5 = [ - // vaddfp128 v3, v4, v5 : op=5, key2=0b000001 - ((5u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (0 << 6) | (1 << 0), 0x82000000, "vaddfp128 (encoded sloppily)"), + (encode_vx128(5, 3, 4, 5, 0), 0x82000000, "vperm128 v3, v4, v5, 0 (canary)"), ]; - // VMX128 op=6 — vrlimi128 has secondary key in bits 23-25 + 26-27, so - // bits 21-22 ARE the high bits of vd128 (canonical silent-bug-area). - // These instructions exercise vd128 = 32, 64, 96 — covering the bit-21 - // and bit-22 split that ppc.rs's old extractor (now deleted) miscoded. - let vrlimi128 = |vd: u32, vb: u32, imm: u32, z: u32| -> u32 { - // op=6, vd128 = bits 6-10 + bit 21 + bit 22, vb128 = bits 16-20 + bits 30+31, - // IMM = bits 11-15, Z = bits 24-25, key2 = (bits 23-25 << 4) | bits 26-27 = 0b1110001 - let vd_lo = vd & 0x1F; - let vd_b21 = (vd >> 5) & 1; - let vd_b22 = (vd >> 6) & 1; - let vb_lo = vb & 0x1F; - let vb_b30 = (vb >> 5) & 1; - let vb_b31 = (vb >> 6) & 1; - // bits 23-25 = 111, bits 26-27 = 00, bit 27 = 1 → key2 lower 4 bits = 0001 - // Encoded: bits 23-25 = 111, bits 26-27 = 00 are actually overlapping with z field (bits 24-25) - // The plan view: (bits 23 << 6) | (bits 24-25 << 4) | (bits 26-27 << 2) but the table uses different. - // Easiest: hand-encode known bit pattern matching decoder.rs's match: - // key2 = (extract_bits(code, 23, 25) << 4) | extract_bits(code, 26, 27) = 0b1110001 - // bits 23-25 = 111, bits 26-27 = 01 - // Bit positions 23-27 = 11101 (5 bits, MSB at 23). - // PPC bit 23 (LSB index 8): set - // PPC bit 24 (LSB index 7): set -- this is z bit 0 - // PPC bit 25 (LSB index 6): set -- this is z bit 1 - // PPC bit 26 (LSB index 5): unset - // PPC bit 27 (LSB index 4): set - // We let z = bits 24-25 stored with vd128 bits at 21-22. - // To preserve key2 = 0b1110001, we need bits 24-25 = 11, bit 26 = 0, bit 27 = 1. - // BUT bits 24-25 ARE the z field; if we set them = 11 the z value is 3. - // So Z is constrained for vrlimi128. Choose Z = 3 (matches Sylpheed examples). - let z3 = z & 0x3; - (6u32 << 26) - | (vd_lo << 21) - | (imm << 16) - | (vb_lo << 11) - | (vd_b21 << 10) // bit 21 (LSB pos 10) - | (vd_b22 << 9) // bit 22 (LSB pos 9) - | (1 << 8) // bit 23 - | (z3 << 6) // bits 24-25 - | (0 << 5) // bit 26 - | (1 << 4) // bit 27 - | (vb_b30 << 1) // bit 30 - | vb_b31 // bit 31 + // VMX128 op=6 — exercise full 0-127 vd128 range under canary's layout. + // VD128h is at PPC 28-29 (host 2-3): no overlap with secondary opcode key, + // so vd can be freely 0-127 for any op6 instruction. + let vsrw128 = |vd: u32, vb: u32| -> u32 { + // vsrw128 secondary: 0x000001D0 (decode_op6 key5 = 0b011101). + encode_vx128(6, vd, 0, vb, 0x000001D0) + }; + let vpermwi128 = |vd: u32, vb: u32, perm: u32| -> u32 { + // vpermwi128: PERMl at PPC 11-15, PERMh at PPC 23-25, key1 sets bit 22 + bit 27. + let perml = perm & 0x1F; + let permh = (perm >> 5) & 0x7; + let mut raw = (6u32 << 26) + | ((vd & 0x1F) << 21) + | (((vd >> 5) & 0x3) << 2) // VD128h + | (perml << 16) + | ((vb & 0x1F) << 11) + | (((vb >> 5) & 0x3) << 0) // VB128h + | (permh << 6) // PERMh at PPC 23-25 + | (1 << 9) // bit 22 (key1 high) + | (1 << 4); // bit 27 (key1 low) + raw &= !(1 << 10); // PPC 21 = 0 for vpermwi128 + raw + }; + let vrlimi128 = |vd: u32, vb: u32, imm: u32, z: u32| -> u32 { + // vrlimi128: IMM at PPC 11-15, z at PPC 24-25, key2 = 0b1110001 over + // bits 21-23 + 26-27 → bits 21,22,23 = 1, bit 26 = 0, bit 27 = 1. + (6u32 << 26) + | ((vd & 0x1F) << 21) + | (((vd >> 5) & 0x3) << 2) // VD128h + | ((imm & 0x1F) << 16) + | ((vb & 0x1F) << 11) + | (((vb >> 5) & 0x3) << 0) // VB128h + | ((z & 0x3) << 6) // z at PPC 24-25 = host 6-7 + | (1 << 8) // bit 23 (key2) + | (1 << 9) // bit 22 (key2) + | (1 << 10) // bit 21 (key2) + | (1 << 4) // bit 27 (key2) }; - // Note: VMX128 op6 secondary keys constrain bits 21-23. For - // vrlimi128 (key2 = 0b1110001 over bits 21-23 + 26-27) the only - // valid vd128 range is 96..=127 — lower values change the secondary - // key into some other instruction. The cases below record what the - // disassembler emits for the borderline encodings, so a regression - // in either the lookup table or the formatter would surface here. let vmx128_high = [ - // bits 21-22 = 00 → key2 ≠ vrlimi128 → decodes to vsrw128 (key5 - // branch). Locks current behavior; shows the silent-bug-area - // encoding constraint. - (vrlimi128(0, 12, 4, 3), 0x82000000, "encoding vd_hi=00: actually vsrw128"), - // bits 21-22 = 10 → still not vrlimi128. - (vrlimi128(32, 12, 4, 3), 0x82000000, "encoding vd_hi=10: actually vsrw128 v32"), - // bits 21-22 = 01 → key1 matches vpermwi128. - (vrlimi128(64, 12, 4, 3), 0x82000000, "encoding vd_hi=01: actually vpermwi128"), - // bits 21-22 = 11 → key2 matches vrlimi128 with vd128=96. - (vrlimi128(96, 12, 4, 3), 0x82000000, "vrlimi128 v96, v12, 4, 3 (real)"), - (vrlimi128(127, 127, 4, 3), 0x82000000, "vrlimi128 v127, v127, 4, 3 (real)"), + (vsrw128(0, 12), 0x82000000, "vsrw128 v0, v0, v12 (canary, vd_hi=00)"), + (vsrw128(32, 12), 0x82000000, "vsrw128 v32, v0, v12 (canary, VD128h=01)"), + (vpermwi128(64, 12, 0xE4), 0x82000000, "vpermwi128 v64, v12, 0xE4 (canary, VD128h=10)"), + (vrlimi128(96, 12, 4, 3), 0x82000000, "vrlimi128 v96, v12, 4, 3 (canary, VD128h=11)"), + (vrlimi128(127, 95, 4, 3), 0x82000000, "vrlimi128 v127, v95, 4, 3 (canary)"), ]; // Fix 4: VMX128 multiply-add 4-operand layouts. Per canary, the addend @@ -514,12 +511,11 @@ fn vmx128_registers() { // vmaddcfp128 VD, VA, VD, VB → "v3, v35, v3, v5" // vnmsubfp128 VD, VA, VD, VB → "v3, v35, v3, v5" let vmx128_4op = [ - // vmaddfp128: vd=3(bits 6-10), va=35(bits 11-15=3 + bit29=1), vb=5(bits 16-20), key2=0b001101 - (0x146328D4u32, 0x82000000, "vmaddfp128 v3, v35, v5, v3"), - // vmaddcfp128: same vd/va/vb layout, key2=0b010001 - (0x14632914u32, 0x82000000, "vmaddcfp128 v3, v35, v3, v5"), - // vnmsubfp128: same vd/va/vb layout, key2=0b010101 - (0x14632954u32, 0x82000000, "vnmsubfp128 v3, v35, v3, v5"), + // Canary FormatVX128 layout: vd=3 (PPC 6-10), va=35 (low 3 at PPC 11-15 + VA128h=1 at PPC 26), + // vb=5 (PPC 16-20), key2 at PPC 22-25 + bit 27. + (0x146328F0u32, 0x82000000, "vmaddfp128 v3, v35, v5, v3"), + (0x14632930u32, 0x82000000, "vmaddcfp128 v3, v35, v3, v5"), + (0x14632970u32, 0x82000000, "vnmsubfp128 v3, v35, v3, v5"), ]; let mut all = Vec::new(); diff --git a/crates/xenia-cpu/tests/golden/vmx128_registers.json b/crates/xenia-cpu/tests/golden/vmx128_registers.json index 6d4f8b0..12cf44d 100644 --- a/crates/xenia-cpu/tests/golden/vmx128_registers.json +++ b/crates/xenia-cpu/tests/golden/vmx128_registers.json @@ -71,64 +71,64 @@ "operands": "v5" }, { - "label": "vaddfp128 (encoded sloppily)", - "raw": "0x14642801", + "label": "vperm128 v3, v4, v5, 0 (canary)", + "raw": "0x14642800", "addr": "0x82000000", "mnemonic": "vperm128", "operands": "v3, v4, v5, 0" }, { - "label": "encoding vd_hi=00: actually vsrw128", - "raw": "0x180461D0", + "label": "vsrw128 v0, v0, v12 (canary, vd_hi=00)", + "raw": "0x180061D0", "addr": "0x82000000", "mnemonic": "vsrw128", - "operands": "v0, v4, v12" + "operands": "v0, v0, v12" }, { - "label": "encoding vd_hi=10: actually vsrw128 v32", - "raw": "0x180465D0", + "label": "vsrw128 v32, v0, v12 (canary, VD128h=01)", + "raw": "0x180061D4", "addr": "0x82000000", "mnemonic": "vsrw128", - "operands": "v32, v4, v12" + "operands": "v32, v0, v12" }, { - "label": "encoding vd_hi=01: actually vpermwi128", - "raw": "0x180463D0", + "label": "vpermwi128 v64, v12, 0xE4 (canary, VD128h=10)", + "raw": "0x180463D8", "addr": "0x82000000", "mnemonic": "vpermwi128", "operands": "v64, v12, 0xE4" }, { - "label": "vrlimi128 v96, v12, 4, 3 (real)", - "raw": "0x180467D0", + "label": "vrlimi128 v96, v12, 4, 3 (canary, VD128h=11)", + "raw": "0x180467DC", "addr": "0x82000000", "mnemonic": "vrlimi128", "operands": "v96, v12, 4, 3" }, { - "label": "vrlimi128 v127, v127, 4, 3 (real)", - "raw": "0x1BE4FFD3", + "label": "vrlimi128 v127, v95, 4, 3 (canary)", + "raw": "0x1BE4FFDE", "addr": "0x82000000", "mnemonic": "vrlimi128", "operands": "v127, v95, 4, 3" }, { "label": "vmaddfp128 v3, v35, v5, v3", - "raw": "0x146328D4", + "raw": "0x146328F0", "addr": "0x82000000", "mnemonic": "vmaddfp128", "operands": "v3, v35, v5, v3" }, { "label": "vmaddcfp128 v3, v35, v3, v5", - "raw": "0x14632914", + "raw": "0x14632930", "addr": "0x82000000", "mnemonic": "vmaddcfp128", "operands": "v3, v35, v3, v5" }, { "label": "vnmsubfp128 v3, v35, v3, v5", - "raw": "0x14632954", + "raw": "0x14632970", "addr": "0x82000000", "mnemonic": "vnmsubfp128", "operands": "v3, v35, v3, v5"